Ejemplo n.º 1
0
    def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update( get_name_info(t_path, i_str=i_str) )
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'], o_fname + '.sc.xz')
        if self.config.get('gpg_encryption_key_path'):
            o_path += '.gpg'

        name_info['s3_output_path'] = o_path

        logger.info('to_s3_chunks: \n\t%r\n\tfrom: %r\n\tby way of %r ' % (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        ## compress and encrypt
        logger.info( 'key path: %r', self.config.get('gpg_encryption_key_path') )
        _errors, t_path2 = compress_and_encrypt_path(
            t_path, 
            self.config.get('gpg_encryption_key_path'),
            gpg_recipient=self.config.get('gpg_recipient'),
            tmp_dir=self.config['tmp_dir_path'],
            )
        logger.info( '\n'.join(_errors) )

        data = open(t_path2).read()
        logger.debug('compressed size: %d' % len(data))
        while 1:
            start_time  = time.time()
            self.put(o_path, data)
            elapsed = time.time() - start_time
            if elapsed  > 0:
                logger.debug('put %.1f bytes/second' % (len(data) / elapsed))

            if self.config['verify_via_http']:
                try:
                    start_time = time.time()
                    self.verify(o_path, name_info['md5'])
                    elapsed = time.time() - start_time
                    if elapsed > 0:
                        logger.debug('verify %.1f bytes/second' % (len(data) / elapsed))

                    break
                except Exception, exc:
                    logger.critical( 'verify_via_http failed so retrying: %r' % exc )
                    ## keep looping if verify raises anything
                    continue

            else:
                ## not verifying, so don't attempt multiple puts
                break
Ejemplo n.º 2
0
    def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update(get_name_info(t_path, i_str=i_str))
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'],
                              o_fname + '.sc.xz')
        if self.config.get('gpg_encryption_key_path'):
            o_path += '.gpg'

        name_info['s3_output_path'] = o_path

        logger.info('to_s3_chunks: \n\t%r\n\tfrom: %r\n\tby way of %r ' %
                    (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        ## compress and encrypt
        logger.info('key path: %r', self.config.get('gpg_encryption_key_path'))
        _errors, t_path2 = compress_and_encrypt_path(
            t_path,
            self.config.get('gpg_encryption_key_path'),
            gpg_recipient=self.config.get('gpg_recipient'),
            tmp_dir=self.config['tmp_dir_path'],
        )
        logger.info('\n'.join(_errors))

        data = open(t_path2).read()
        logger.debug('compressed size: %d' % len(data))
        while 1:
            start_time = time.time()
            self.put(o_path, data)
            elapsed = time.time() - start_time
            if elapsed > 0:
                logger.debug('put %.1f bytes/second' % (len(data) / elapsed))

            if self.config['verify_via_http']:
                try:
                    start_time = time.time()
                    self.verify(o_path, name_info['md5'])
                    elapsed = time.time() - start_time
                    if elapsed > 0:
                        logger.debug('verify %.1f bytes/second' %
                                     (len(data) / elapsed))

                    break
                except Exception, exc:
                    logger.critical('verify_via_http failed so retrying: %r' %
                                    exc)
                    ## keep looping if verify raises anything
                    continue

            else:
                ## not verifying, so don't attempt multiple puts
                break
    def __call__(self, t_path, name_info, i_str):
        o_type = self.config["output_type"]

        name_info.update(get_name_info(t_path, i_str=i_str))

        if name_info["num"] == 0:
            return None

        if "input" in self.config["output_name"]:
            i_fname = i_str.split("/")[-1]
            if i_fname.endswith(".gpg"):
                i_fname = i_fname[:-4]
            if i_fname.endswith(".xz"):
                i_fname = i_fname[:-3]
            if i_fname.endswith(".sc"):
                i_fname = i_fname[:-3]
            name_info["input_fname"] = i_fname

        ## prepare to compress the output
        compress = self.config.get("compress", None)
        assert compress in [None, "xz"], compress

        if o_type == "samedir":
            ## assume that i_str was a local path
            assert i_str[-3:] == ".sc", repr(i_str[-3:])
            o_path = i_str[:-3] + "-%s.sc" % self.config["output_name"]
            if compress:
                o_path += ".xz"
            # print 'creating %s' % o_path

        elif o_type == "inplace":
            ## replace the input chunks with the newly created
            o_path = i_str
            if o_path.endswith(".xz"):
                compress = True

        elif o_type == "otherdir":
            ## put the
            if not self.config["output_path"].startswith("/"):
                o_dir = os.path.join(os.getcwd(), self.config["output_path"])
            else:
                o_dir = self.config["output_path"]

            if not os.path.exists(o_dir):
                os.makedirs(o_dir)

            o_fname = self.config["output_name"] % name_info
            o_path = os.path.join(o_dir, o_fname + ".sc")
            if compress:
                o_path += ".xz"

        ## if dir is missing make it
        dirname = os.path.dirname(o_path)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname)

        if compress:
            assert o_path.endswith(".xz"), o_path
            logger.info("compress_and_encrypt_path(%r, tmp_dir=%r)", t_path, self.config["tmp_dir_path"])

            ## forcibly collect dereferenced objects
            # gc.collect()

            errors, t_path2 = streamcorpus.compress_and_encrypt_path(t_path, tmp_dir=self.config["tmp_dir_path"])
            assert not errors, errors

            if self.config.get("cleanup_tmp_files", True):
                # default action, move tmp file to output position
                try:
                    logger.debug("attempting renamed(%r, %r)", t_path2, o_path)
                    os.rename(t_path2, o_path)
                    logger.debug("renamed(%r, %r)", t_path2, o_path)
                except OSError, exc:
                    if exc.errno == 18:
                        logger.debug("resorting to patient_move(%r, %r)", t_path2, o_path, exc_info=True)
                        patient_move(t_path2, o_path)
                        logger.debug("patient_move succeeded")
                    else:
                        logger.critical("rename failed (%r -> %r)", t_path2, o_path, exc_info=True)
                        raise
                return o_path
            else:
                # for debugging, leave temp file, copy to output
                shutil.copy(t_path2, o_path)
                logger.info("copied %r -> %r", t_path2, o_path)
                return o_path
Ejemplo n.º 4
0
    def __call__(self, t_path, name_info, i_str):
        o_type = self.config['output_type']
        
        name_info.update( get_name_info( t_path, i_str=i_str ) )

        if name_info['num'] == 0:
            return None

        if 'input' in self.config['output_name']:
            i_fname = i_str.split('/')[-1]
            if i_fname.endswith('.gpg'):
                i_fname = i_fname[:-4]
            if i_fname.endswith('.xz'):
                i_fname = i_fname[:-3]
            if i_fname.endswith('.sc'):
                i_fname = i_fname[:-3]
            name_info['input_fname'] = i_fname 

        ## prepare to compress the output
        compress = self.config.get('compress', None)

        if o_type == 'samedir':
            ## assume that i_str was a local path
            assert i_str[-3:] == '.sc', repr(i_str[-3:])
            o_path = i_str[:-3] + '-%s.sc' % self.config['output_name']
            if compress:
                o_path += '.xz'
            #print 'creating %s' % o_path
            
        elif o_type == 'inplace':
            ## replace the input chunks with the newly created
            o_path = i_str
            if o_path.endswith('.xz'):
                compress = True

        elif o_type == 'otherdir':
            ## put the 
            if not self.config['output_path'].startswith('/'):
                o_dir = os.path.join(os.getcwd(), self.config['output_path'])
            else:
                o_dir = self.config['output_path']

            if not os.path.exists(o_dir):
                os.makedirs(o_dir)

            o_fname = self.config['output_name'] % name_info
            o_path = os.path.join(o_dir, o_fname + '.sc')
            if compress:
                o_path += '.xz'

        logger.info('writing chunk file to {}'.format(o_path))
        logger.debug('temporary chunk in {}'.format(t_path))

        ## if dir is missing make it
        dirname = os.path.dirname(o_path)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname)

        if compress:
            assert o_path.endswith('.xz'), o_path
            logger.info('compress_and_encrypt_path(%r, tmp_dir=%r)', 
                        t_path, self.config['tmp_dir_path'])

            ## forcibly collect dereferenced objects
            #gc.collect()

            errors, t_path2 = streamcorpus.compress_and_encrypt_path(
                t_path, tmp_dir=self.config['tmp_dir_path'])
            assert not errors, errors

            if self.config['cleanup_tmp_files']:
                # default action, move tmp file to output position
                try:
                    logger.debug('attempting renamed(%r, %r)', t_path2, o_path)
                    os.rename(t_path2, o_path)
                    logger.debug('renamed(%r, %r)', t_path2, o_path)
                except OSError, exc:
                    if exc.errno==18:
                        logger.debug('resorting to patient_move(%r, %r)',
                                     t_path2, o_path, exc_info=True)
                        patient_move(t_path2, o_path)
                        logger.debug('patient_move succeeded')
                    else:
                        logger.critical('rename failed (%r -> %r)', t_path2, o_path, exc_info=True)
                        raise
                return o_path
            else:
                # for debugging, leave temp file, copy to output
                shutil.copy(t_path2, o_path)
                logger.info('copied %r -> %r', t_path2, o_path)
                return o_path