Example #1
0
    def _bundle_metadata(self, metadata):
        """
        Bundles the metadata into a tarfile formatted bundle

        @param metadata: The metadata string to bundle
        """
        #print >> sys.stderr, "Bundle meta!"
        #print >> sys.stderr, metadata

        metadata_file = None
        try:
            metadata_file = tempfile.TemporaryFile()
        except IOError:
            task_error("Can't create metadata file in working directory")

        metadata_file.write(metadata)
        metadata_file.seek(0)

        if self.empty_tar:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='w')
            self.empty_tar = False
        else:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='a')

        tar_info = tarfile.TarInfo("metadata.txt")
        tar_info.size = len(metadata)
        tar_info.mtime = time.time()
        tarball.addfile(tar_info, metadata_file)
        metadata_file.close()
        tarball.close()
Example #2
0
    def _bundle_file(self, file_paths):
        """
        A 'Pure Virtual' function that will perform file bundling in a child class

        :Parameters:
            file_name
                The name of the file to bundle
            file_arcname
                An alternative name to use for the file inside of the bundle
        """
        task_error("Can't bundle a file with the base class")
Example #3
0
    def __init__(self, bundle_path):
        """
        Initializes a Tar_Bundler

        :Parameters:
            bundle_path
                The path to the target bundle file
        """

        if bundle_path == '' or bundle_path is None:
            task_error('no bundle path')

        # Initialize the Base Bundler Class
        FileBundler.__init__(self, bundle_path)

        tarball = tarfile.TarFile(name=self.bundle_path, mode='w')
        tarball.close()

        self.empty_tar = True

        print >> sys.stderr, "Successfully created tarfile bundle %s" % self.bundle_path
Example #4
0
    def bundle_metadata(self, metadata):
        """
        Bundles the metadata into a tarfile formatted bundle

        @param metadata: The metadata string to bundle
        """

        metadata_file = None
        try:
            metadata_file = tempfile.NamedTemporaryFile(delete=False)
        except IOError:
            task_error('Cannot create metadata file in working directory')

        metadata_file.write(metadata)
        fname = metadata_file.name
        metadata_file.close()

        metadata_file = open(fname, mode='rb')

        # metadata_file.seek(0)

        if self.empty_tar:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='w')
            self.empty_tar = False
        else:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='a')

        try:
            tar_info = tarfile.TarInfo('metadata.txt')
            tar_info.size = len(metadata)
            tar_info.mtime = time.time()
            tarball.addfile(tar_info, metadata_file)
            metadata_file.close()
            tarball.close()
            os.remove(fname)
        except Exception, ex:
            print ex
            traceback.print_exc(file=sys.stdout)
            raise ex
Example #5
0
    def bundle_file(self, file_paths, bundle_size=0):
        """
        Bundles files into a tarfile formatted bundle

        :Parameters:
            file_name
                The name of the file to bundle
            file_arcname
                An alternative name to use for the file inside of the bundle
        """

        if self.empty_tar:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='w')
            self.empty_tar = False
        else:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='a')

        self.running_size = 0
        self.last_percent = 0
        self.percent_complete = 0
        self.bundle_size = bundle_size

        try:
            self.report_percent_complete()

            for (file_path, file_arcname) in file_paths:
                # hash the file and store in hash_dict
                # percent complete is reported only as read and hashed
                # hopefully that being the slowest part and all we have
                # access to for completion statistics
                self.hash_file(file_path, file_arcname)

                # for version 1.2, push files to a data/ directory
                #to avoid collisions with metadata.txt in the root
                modified_arc_name = os.path.join('data', file_arcname)
                tarball.add(file_path, arcname=modified_arc_name, recursive=False)

        except BundlerError, err:
            task_error("Failed to bundle file: %s" % (err.msg))
Example #6
0
    def __init__(self,
                 bundle_path,
                 proposal_ID='',
                 instrument_name='',
                 instrument_ID='',
                 groups=None):
        """
        Initializes a Tar_Bundler

        :Parameters:
            bundle_path
                The path to the target bundle file
            proposal_ID
                An optional string describing the proposal associated with this bundle
            instrument_name
                The name of the instrument that produced the data packaged in the bundle
        """


        if bundle_path == '' or bundle_path == None:
            bundle_path = 'bundle.tar'

        # Initialize the Base Bundler Class
        FileBundler.__init__(self, bundle_path,
                             proposal_id=proposal_ID,
                             instrument_name=instrument_name,
                             instrument_id=instrument_ID,
                             groups=groups)

        try:
            tarball = tarfile.TarFile(name=self.bundle_path, mode='w')
            tarball.close()
        except:
            task_error("Couldn't create bundle tarball: %s" % self.bundle_path)

        self.empty_tar = True

        print >> sys.stderr, "Successfully created tarfile bundle %s" % self.bundle_path
Example #7
0
def progress(_download_t, _download_d, upload_t, upload_d):
    """
    gets the progress of the current pycurl upload
    """
    if upload_t > 0:
        try:
            percent = 100.0 * float(upload_d) / float(upload_t)

            if percent - TrackPercent.percent > 5:
                meta_dict={'Status': "upload percent complete: " + str(int(percent))}
                task_state("PROGRESS", meta_dict)
                TrackPercent.percent = percent

        except Exception, e:
            raise task_error("Error during callback: " + e.message)
Example #8
0
def bundle(bundle_name='', file_list=None, bundle_size=0, meta_list=None):
    """
    Bundles a list of files into a single aggregated bundle file

    :Parameters:
        bundle_name
            The target bundle file in which to aggregate the file list
        file_list
            The list of files to bundle
        bundle_size
            total size of the files to be bundled
        meta_list
            list of metadata items.  File metadata will be added to this list
    """

    # validate parameters
    if bundle_name is None or bundle_name == '':
        task_error("Missing bundle name")

    if file_list is None or len(file_list) == 0:
        task_error("Missing file list")

    # Set up the bundle file
    bundle_path = os.path.abspath(bundle_name)

    # Set up the bundler object
    bundler = None

    bundler = TarBundler(bundle_path)

    bundler.bundle_file(file_list, bundle_size, meta_list)

    meta_str = json.dumps(meta_list)
    bundler.bundle_metadata(meta_str)

    TaskComm.set_state('PROGRESS', 'Bundling complete')
Example #9
0
def upload_files(ingest_server='',
                 bundle_name='',
                 file_list=None,
                 bundle_size=0,
                 meta_list=None,
                 auth={},
                 verify=True,
                 tartar=False):
    """
    task created on a separate Celery process to bundle and upload in the background
    status and errors are pushed by celery to the main server through RabbitMQ
    """
    # one big-ass exception handler for upload.
    try:

        target_dir = os.path.dirname(bundle_name)
        if not os.path.isdir(target_dir):
            task_error('Bundle directory does not exist')
            return

        TaskComm.set_state("PROGRESS", "Cleaning previous uploads")

        # initial state pushed through celery
        TaskComm.set_state("PROGRESS", "Starting Bundle/Upload Process")

        bundle(bundle_name=bundle_name,
               file_list=file_list,
               meta_list=meta_list,
               bundle_size=bundle_size)

        TaskComm.set_state("PROGRESS", "Completed Bundling")

        if tartar:
            # create the file tuple list of 1 file
            fname = os.path.basename(bundle_name)

            file_tuples = []
            file_tuples.append((bundle_name, fname))

            bundle_size = os.path.getsize(bundle_name)

            # dual extension indicates tartar
            bundle_name += '.tar'

            bundle(bundle_name=bundle_name,
                   file_list=file_tuples,
                   meta_list=meta_list,
                   bundle_size=bundle_size)

        TaskComm.set_state(
            "PROGRESS", "Starting Uploady: " + str(bundle_name) + ": " +
            ingest_server + ": " + str(auth))

        uploader = Uploader(bundle_name, ingest_server, auth, verify)

        TaskComm.set_state("PROGRESS", "Uploader Initialized")

        result = uploader.upload_bundle()

        TaskComm.set_state("PROGRESS", "Finished Upload")

        try:
            status = json.loads(result)
        except Exception, e:
            print 'Upload Error'
            print result
            raise Exception('Upload error:  ' + result)
            print 'End Upload Error'

        # check for a valid job id.  Ingest error should return -99
        job_id = status['job_id']
        if job_id < 0:
            task_error(err)
            raise Exception('Upload error:  ' + bundle_name)

        TaskComm.set_state("PROGRESS", "Rename Tar File")

        try:
            rename_tar_file(target_dir, bundle_name, job_id)
        except Exception, ex:
            raise Exception(ex.message + ':  ' + bundle_name + ':  ' +
                            str(job_id))
Example #10
0
            rename_tar_file(target_dir, bundle_name, job_id)
        except Exception, ex:
            raise Exception(ex.message + ':  ' + bundle_name + ':  ' +
                            str(job_id))

        print status

        if TaskComm.USE_CELERY:
            # set job ID here
            print 'exit with deliberate error'
            print result
            raise StandardError(result)
        else:
            TaskComm.set_state("DONE", result)

    except StandardError, error:
        raise error

    except Exception, ex:
        print >> sys.stderr, "Exception in upload_files:"
        print >> sys.stderr, '-' * 60
        traceback.print_exc(file=sys.stderr)
        print >> sys.stderr, '-' * 60

        err = 'Task exception: upload_files :' + str(
            ex.message) + ': ' + result + ': ' + traceback.format_exc()
        task_error(err)
        print err

        raise Exception(err)
Example #11
0
    def hash_file(self, file_path, file_arcname):
        """
        Bundle in a file or directory

        :Parameters:
            file_path
                The path to the file to bundle
            file_arcname
                An alternative name to use for the file inside of the bundle
        """

        file_path = os.path.abspath(file_path)

        # If the file_arcname argument is None use the base file name as the
        # arc name
        if file_arcname is None:
            file_arcname = os.path.basename(file_path)

        if not os.path.exists(file_path):
            task_error("%s doesn't exist" % file_path)
        if not os.access(file_path, os.R_OK):
            task_error("Can't read from %s" % file_path)

        file_mode = os.stat(file_path)[stat.ST_MODE]
        if not stat.S_ISDIR(file_mode) and not stat.S_ISREG(file_mode):
            task_error("Unknown file type for %s" % file_path)

        file_in = None
        try:
            # open to read binary.  This is important.
            file_in = open(file_path, 'rb')
        except IOError:
            task_error("Couldn't read from file: %s" % file_path)

        # hash file 1Mb at a time
        hashval = hashlib.sha1()
        while True:
            data = file_in.read(1024 * 1024)
            if not data:
                break
            hashval.update(data)

            # update file bundle status

            self.running_size += len(data)

            self.percent_complete = 100.0 * self.running_size / self.bundle_size

            # only update significant progress
            if self.percent_complete - self.last_percent > 1:
                self.report_percent_complete()
                self.last_percent = self.percent_complete

        file_hash = hashval.hexdigest()

        # print 'hash:  ' + file_hash
        file_in.close()

        modified_name = os.path.join('data', file_arcname)
        (file_dir, file_name) = os.path.split(modified_name)

        # linuxfy the directory
        file_dir = file_dir.replace('\\', '/')

        info = {}
        info['size'] = os.path.getsize(file_path)
        mime_type = mimetypes.guess_type(file_path, strict=True)[0]

        info[
            'mimetype'] = mime_type if mime_type is not None else 'application/octet-stream'
        info['name'] = file_name
        info['mtime'] = DT.datetime.utcfromtimestamp(
            int(os.path.getmtime(file_path))).isoformat()
        info['ctime'] = DT.datetime.utcfromtimestamp(
            int(os.path.getctime(file_path))).isoformat()
        info['destinationTable'] = 'Files'
        info['subdir'] = file_dir
        info['hashsum'] = file_hash
        info['hashtype'] = 'sha1'

        # todo make sure errors bubble up without crashing
        if file_arcname in self.file_meta:
            print file_arcname
            task_error(
                "Different file with the same arcname is already in the bundle"
            )
            return

        return info
Example #12
0
def bundle(bundle_name='',
           instrument_name='',
           proposal='',
           file_list=None,
           groups=None,
           bundle_size=0):
    """
    Bundles a list of files into a single aggregated bundle file

    :Parameters:
        bundle_name
            The target bundle file in which to aggregate the file list
        instrument_name
            The name of the instrument that produced the data files that will be bundled
        groups
            The a hash of type/name groups to attach to
        tarfile
            If true, tarfile format is used to bundle.  Otherwise zipfile format is used
        proposal
            An optional proposal ID to attach to the bundle
        file_list
            The list of files to bundle
    """

    # validate parameters
    if bundle_name == None or bundle_name == '':
        task_error("Missing bundle name")

    if instrument_name == None or instrument_name == '':
        task_error("Missing instrument name")

    if proposal == None or proposal == '':
        task_error("Missing proposal")

    if file_list == None or len(file_list) == 0:
        task_error("Missing file list")

    if groups == None or groups == '':
        task_error("Missing groups")

    #print >> sys.stderr, "Start bundling %s" % bundle_name

    # Set up the bundle file
    bundle_path = os.path.abspath(bundle_name)
    #print >> sys.stderr, "Bundle file set to %s" % bundle_path

    # Set up the bundler object
    bundler = None

    # dfh note we are setting the instrument name and ID to the same thing,
    # which is being
    # sent in as the instrument name but is actually the instrument ID.  Fix
    # this.
    bundler = TarBundler(bundle_path, proposal_ID=proposal,
                         instrument_name=instrument_name,
                         instrument_ID=instrument_name,
                         groups=groups)

    bundler.bundle_file(file_list, bundle_size)

    bundler.bundle_metadata()

    #print >> sys.stderr, "Finished bundling"
    task_state('PROGRESS', "Bundling complete")
Example #13
0
 def _bundle_metadata(self, metadata):
     """
     A 'Pure Virtual' function that will perform metadata bundling in a child class
     @param metadata: The metadata string to bundle
     """
     task_error("Can't bundle metadata with the base class")
Example #14
0
    def hash_file(self, file_path, file_arcname):
        """
        Bundle in a file or directory

        :Parameters:
            file_path
                The path to the file to bundle
            file_arcname
                An alternative name to use for the file inside of the bundle
        """

        file_path = os.path.abspath(file_path)

        # If the file_arcname argument is None use the base file name as the
        # arc name
        if file_arcname == None:
            file_arcname = os.path.basename(file_path)

        if not os.path.exists(file_path):
            task_error("%s doesn't exist" % file_path)
        if not os.access(file_path, os.R_OK):
            task_error("Can't read from %s" % file_path)

        file_mode = os.stat(file_path)[stat.ST_MODE]
        if not stat.S_ISDIR(file_mode) and not stat.S_ISREG(file_mode):
            task_error("Unknown file type for %s" % file_path)


        file_in = None
        try:
            # open to read binary.  This is important.
            file_in = open(file_path, 'rb')
        except IOError:
            task_error("Couldn't read from file: %s" % file_path)

        # hash file 1Mb at a time
        hashval = hashlib.sha1()
        while True:
            data = file_in.read(1024 * 1024)
            if not data:
                break
            hashval.update(data)

            # update file bundle status

            self.running_size += len(data)

            self.percent_complete = 100.0 * self.running_size / self.bundle_size

            # only update significant progress
            if self.percent_complete - self.last_percent > 1:
                self.report_percent_complete()
                self.last_percent = self.percent_complete

        file_hash = hashval.hexdigest()

        # print 'hash:  ' + file_hash
        file_in.close()

        #todo make sure errors bubble up without crashing
        if file_arcname in self.hash_dict:
            if hash != self.hash_dict[file_arcname]:
                print file_arcname
                task_error("Different file with the same arcname is already in the bundle")
            return
        self.hash_dict[file_arcname] = file_hash