def copy_files_from_bucket_to_bucket(s3_helper, key_pogs, key_aws_pogs, galaxy_name, run_id, galaxy_id): filename_aws_pogs = '{0}/{1}.hdf5'.format( FAST_DISK, get_galaxy_file_name(galaxy_name, run_id, galaxy_id) ) s3_helper.get_file_from_bucket(BUCKET_NAME_OLD_POGS, key_pogs, filename_aws_pogs) # Get the size if should_be_multipart(filename_aws_pogs): s3_helper.add_file_to_bucket_multipart( BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True ) else: s3_helper.add_file_to_bucket(BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True)
def process_block_ids(block_ids): # Create the queue queue = multiprocessing.JoinableQueue() consumers = [Consumer(queue) for i in xrange(NUMBER_PROCESSES)] for consumer in consumers: consumer.start() for block_id in block_ids: # Re-establish the S3 connection just in case s3_helper = S3Helper(PROFILE_NAME) bucket = s3_helper.get_bucket(BUCKET_NAME) block_name = get_block_dir(block_id) directory_name = os.path.join(HDF_PATH, block_name) for file_name in sorted(glob.glob(os.path.join(directory_name, '*'))): if file_name.endswith('.hdf5'): LOG.info('Looking at {0}'.format(file_name)) (head, tail) = os.path.split(file_name) key_name = '{0}/{1}'.format(block_name, tail) if not bucket.get_key(key_name): if should_be_multipart(file_name): queue.put(TaskAddMultipart(key_name, file_name)) else: queue.put(TaskAdd(key_name, file_name)) else: LOG.info('The file {0} already exists in S3'.format(tail)) # Add a poison pill for each consumer for i in xrange(NUMBER_PROCESSES): queue.put(None) # Wait for the queue to terminate queue.join()