def copy_files_from_bucket_to_bucket(s3_helper, key_pogs, key_aws_pogs, galaxy_name, run_id, galaxy_id):
    filename_aws_pogs = '{0}/{1}.hdf5'.format(
        FAST_DISK,
        get_galaxy_file_name(galaxy_name, run_id, galaxy_id)
    )
    s3_helper.get_file_from_bucket(BUCKET_NAME_OLD_POGS, key_pogs, filename_aws_pogs)

    # Get the size
    if should_be_multipart(filename_aws_pogs):
        s3_helper.add_file_to_bucket_multipart(
            BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True
        )
    else:
        s3_helper.add_file_to_bucket(BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True)
Exemple #2
0
def process_block_ids(block_ids):
    # Create the queue
    queue = multiprocessing.JoinableQueue()

    consumers = [Consumer(queue)
                 for i in xrange(NUMBER_PROCESSES)]

    for consumer in consumers:
        consumer.start()

    for block_id in block_ids:
        # Re-establish the S3 connection just in case
        s3_helper = S3Helper(PROFILE_NAME)
        bucket = s3_helper.get_bucket(BUCKET_NAME)

        block_name = get_block_dir(block_id)
        directory_name = os.path.join(HDF_PATH, block_name)
        for file_name in sorted(glob.glob(os.path.join(directory_name, '*'))):
            if file_name.endswith('.hdf5'):
                LOG.info('Looking at {0}'.format(file_name))

                (head, tail) = os.path.split(file_name)

                key_name = '{0}/{1}'.format(block_name, tail)
                if not bucket.get_key(key_name):
                    if should_be_multipart(file_name):
                        queue.put(TaskAddMultipart(key_name, file_name))
                    else:
                        queue.put(TaskAdd(key_name, file_name))
                else:
                    LOG.info('The file {0} already exists in S3'.format(tail))

    # Add a poison pill for each consumer
    for i in xrange(NUMBER_PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()