def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update( get_name_info(t_path, i_str=i_str) )
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'], o_fname + '.tar.gz')

        logger.info('to_s3_tarballs: \n\t%r\n\tfrom: %r\n\tby way of %r ' % (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        t_path2 = tarball_export(t_path, name_info)

        data = open(t_path2).read()
        name_info['md5'] = hashlib.md5(data).hexdigest() # pylint: disable=E1101

        self.upload(o_path, data, name_info)
        self.cleanup(t_path)
        self.cleanup(t_path2)

        logger.info('to_s3_tarballs finished:\n\t input: %s\n\toutput: %s' % (i_str, o_path))
        ## return the final output path
        return o_path
    def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update(get_name_info(t_path, i_str=i_str))
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'],
                              o_fname + '.tar.gz')

        logger.info('to_s3_tarballs: \n\t%r\n\tfrom: %r\n\tby way of %r ' %
                    (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        t_path2 = tarball_export(t_path, name_info)

        data = open(t_path2).read()
        name_info['md5'] = hashlib.md5(data).hexdigest()  # pylint: disable=E1101

        self.upload(o_path, data, name_info)
        self.cleanup(t_path)
        self.cleanup(t_path2)

        logger.info('to_s3_tarballs finished:\n\t input: %s\n\toutput: %s' %
                    (i_str, o_path))
        ## return the final output path
        return o_path
Exemple #3
0
    def __call__(self, t_path, name_info, i_str):
        name_info.update( get_name_info( t_path, i_str=i_str ) )

        if name_info['num'] == 0:
            return None

        o_fname = self.config['output_name'] % name_info
        o_dir = self.config['output_path']
        o_path = os.path.join(o_dir, o_fname + '.tar.gz')

        ## if dir is missing make it
        dirname = os.path.dirname(o_path)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname)

        t_path2 = tarball_export(self.config, t_path, name_info)

        ## do an atomic renaming    
        try:
            logger.debug('attemping os.rename(%r, %r)' % (t_path2, o_path))
            os.rename(t_path2, o_path)
        except OSError, exc:                
            if exc.errno==18:
                patient_move(t_path2, o_path)
            else:
                msg = 'failed shutil.copy2(%r, %r) and/or os.remove(t_path)\n%s'\
                    % (t_path2, o_path, traceback.format_exc(exc))
                logger.critical(traceback.format_exc(exc))
                raise
    def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update( get_name_info(t_path, i_str=i_str) )
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'], o_fname + '.sc.xz')
        if self.config.get('gpg_encryption_key_path'):
            o_path += '.gpg'

        name_info['s3_output_path'] = o_path

        logger.info('to_s3_chunks: \n\t%r\n\tfrom: %r\n\tby way of %r ' % (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        ## compress and encrypt
        logger.info( 'key path: %r', self.config.get('gpg_encryption_key_path') )
        _errors, t_path2 = compress_and_encrypt_path(
            t_path, 
            self.config.get('gpg_encryption_key_path'),
            gpg_recipient=self.config.get('gpg_recipient'),
            tmp_dir=self.config['tmp_dir_path'],
            )
        logger.info( '\n'.join(_errors) )

        data = open(t_path2).read()
        logger.debug('compressed size: %d' % len(data))
        while 1:
            start_time  = time.time()
            self.put(o_path, data)
            elapsed = time.time() - start_time
            if elapsed  > 0:
                logger.debug('put %.1f bytes/second' % (len(data) / elapsed))

            if self.config['verify_via_http']:
                try:
                    start_time = time.time()
                    self.verify(o_path, name_info['md5'])
                    elapsed = time.time() - start_time
                    if elapsed > 0:
                        logger.debug('verify %.1f bytes/second' % (len(data) / elapsed))

                    break
                except Exception, exc:
                    logger.critical( 'verify_via_http failed so retrying: %r' % exc )
                    ## keep looping if verify raises anything
                    continue

            else:
                ## not verifying, so don't attempt multiple puts
                break
def test_get_name_info(tmpdir):

    path = str(tmpdir.join('test_path'))
    c = Chunk(path, mode='wb')
    c.add(make_stream_item(28491, 'abs_url'))

    name_info = get_name_info(path, i_str='foo')
    assert name_info['date_now'] == name_info['date_time_now'][:10]
    assert name_info['date_now'] + '-' + name_info['time_now'] == name_info['date_time_now']
    def __call__(self, t_path, name_info, i_str):
        '''
        Load chunk from t_path and put it into the right place in s3
        using the output_name template from the config
        '''
        name_info.update(get_name_info(t_path, i_str=i_str))
        if name_info['num'] == 0:
            o_path = None
            return o_path

        o_fname = self.config['output_name'] % name_info
        o_path = os.path.join(self.config['s3_path_prefix'],
                              o_fname + '.sc.xz')
        if self.config.get('gpg_encryption_key_path'):
            o_path += '.gpg'

        name_info['s3_output_path'] = o_path

        logger.info('to_s3_chunks: \n\t%r\n\tfrom: %r\n\tby way of %r ' %
                    (o_path, i_str, t_path))

        ## forcibly collect dereferenced objects
        #gc.collect()

        ## compress and encrypt
        logger.info('key path: %r', self.config.get('gpg_encryption_key_path'))
        _errors, t_path2 = compress_and_encrypt_path(
            t_path,
            self.config.get('gpg_encryption_key_path'),
            gpg_recipient=self.config.get('gpg_recipient'),
            tmp_dir=self.config['tmp_dir_path'],
        )
        logger.info('\n'.join(_errors))

        data = open(t_path2).read()
        logger.debug('compressed size: %d' % len(data))
        while 1:
            start_time = time.time()
            self.put(o_path, data)
            elapsed = time.time() - start_time
            if elapsed > 0:
                logger.debug('put %.1f bytes/second' % (len(data) / elapsed))

            if self.config['verify_via_http']:
                try:
                    start_time = time.time()
                    self.verify(o_path, name_info['md5'])
                    elapsed = time.time() - start_time
                    if elapsed > 0:
                        logger.debug('verify %.1f bytes/second' %
                                     (len(data) / elapsed))

                    break
                except Exception, exc:
                    logger.critical('verify_via_http failed so retrying: %r' %
                                    exc)
                    ## keep looping if verify raises anything
                    continue

            else:
                ## not verifying, so don't attempt multiple puts
                break
Exemple #7
0
    def __call__(self, t_path, name_info, i_str):
        o_type = self.config['output_type']
        
        name_info.update( get_name_info( t_path, i_str=i_str ) )

        if name_info['num'] == 0:
            return None

        if 'input' in self.config['output_name']:
            i_fname = i_str.split('/')[-1]
            if i_fname.endswith('.gpg'):
                i_fname = i_fname[:-4]
            if i_fname.endswith('.xz'):
                i_fname = i_fname[:-3]
            if i_fname.endswith('.sc'):
                i_fname = i_fname[:-3]
            name_info['input_fname'] = i_fname 

        ## prepare to compress the output
        compress = self.config.get('compress', None)

        if o_type == 'samedir':
            ## assume that i_str was a local path
            assert i_str[-3:] == '.sc', repr(i_str[-3:])
            o_path = i_str[:-3] + '-%s.sc' % self.config['output_name']
            if compress:
                o_path += '.xz'
            #print 'creating %s' % o_path
            
        elif o_type == 'inplace':
            ## replace the input chunks with the newly created
            o_path = i_str
            if o_path.endswith('.xz'):
                compress = True

        elif o_type == 'otherdir':
            ## put the 
            if not self.config['output_path'].startswith('/'):
                o_dir = os.path.join(os.getcwd(), self.config['output_path'])
            else:
                o_dir = self.config['output_path']

            if not os.path.exists(o_dir):
                os.makedirs(o_dir)

            o_fname = self.config['output_name'] % name_info
            o_path = os.path.join(o_dir, o_fname + '.sc')
            if compress:
                o_path += '.xz'

        logger.info('writing chunk file to {}'.format(o_path))
        logger.debug('temporary chunk in {}'.format(t_path))

        ## if dir is missing make it
        dirname = os.path.dirname(o_path)
        if dirname and not os.path.exists(dirname):
            os.makedirs(dirname)

        if compress:
            assert o_path.endswith('.xz'), o_path
            logger.info('compress_and_encrypt_path(%r, tmp_dir=%r)', 
                        t_path, self.config['tmp_dir_path'])

            ## forcibly collect dereferenced objects
            #gc.collect()

            errors, t_path2 = streamcorpus.compress_and_encrypt_path(
                t_path, tmp_dir=self.config['tmp_dir_path'])
            assert not errors, errors

            if self.config['cleanup_tmp_files']:
                # default action, move tmp file to output position
                try:
                    logger.debug('attempting renamed(%r, %r)', t_path2, o_path)
                    os.rename(t_path2, o_path)
                    logger.debug('renamed(%r, %r)', t_path2, o_path)
                except OSError, exc:
                    if exc.errno==18:
                        logger.debug('resorting to patient_move(%r, %r)',
                                     t_path2, o_path, exc_info=True)
                        patient_move(t_path2, o_path)
                        logger.debug('patient_move succeeded')
                    else:
                        logger.critical('rename failed (%r -> %r)', t_path2, o_path, exc_info=True)
                        raise
                return o_path
            else:
                # for debugging, leave temp file, copy to output
                shutil.copy(t_path2, o_path)
                logger.info('copied %r -> %r', t_path2, o_path)
                return o_path