Ejemplo n.º 1
0
def copy_hdf5_files(args):
    s3_helper = S3Helper()
    bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME)
    bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS)

    engine = create_engine(DB_LOGIN)
    connection = engine.connect()

    subquery = select([STEP_DONE.c.galaxy_id]).distinct()
    for galaxy in connection.execute(select([GALAXY]).where(~GALAXY.c.galaxy_id.in_(subquery)).order_by(GALAXY.c.galaxy_id)):
        if args.verbose:
            LOG.info('Checking galaxy_id: {0}, name: {1}'.format(galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]))

        # noinspection PyBroadException
        try:
            block_dir = get_block_dir(galaxy[GALAXY.c.galaxy_id] / 1000)
            name_aws_pogs = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
            key_aws_pogs = Key(bucket_aws_pogs)
            key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs)

            if not key_aws_pogs.exists():

                # Does it exist in POGS?
                name_pogs = get_galaxy_file_name_pogs(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                key_pogs = Key(bucket_pogs)
                key_pogs.key = '{0}/{0}.hdf5'.format(name_pogs)

                if key_pogs.exists():
                    check_and_copy_or_restore(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], args.verbose)
                else:
                    add_step_done_id(connection, galaxy[GALAXY.c.galaxy_id], STEP_DONE_ID_NO_HDF5_FILE)

        except BaseException:
            LOG.exception('error during fetch, quitting')
            break

    if args.verbose:
        LOG.info('Done')
Ejemplo n.º 2
0
def check_and_copy_or_restore(galaxy_name, run_id, galaxy_id, verbose):
    # noinspection PyBroadException
    try:
        s3_helper = S3Helper(PROFILE_NAME)
        bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME)
        bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS)

        name_aws_pogs = get_galaxy_file_name(galaxy_name, run_id, galaxy_id)
        name_pogs = get_galaxy_file_name_pogs(galaxy_name, run_id, galaxy_id)

        # Get the new key from the old key
        block_dir = get_block_dir(galaxy_id / 1000)
        key_aws_pogs = Key(bucket_aws_pogs)
        key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs)

        keyname_pogs = '{0}/{0}.hdf5'.format(name_pogs)
        # Work around to silly bug in boto
        key_pogs = bucket_pogs.get_key(keyname_pogs)

        if key_pogs.storage_class == 'GLACIER' and key_pogs.expiry_date is None:
            if key_pogs.ongoing_restore is None:
                if verbose:
                    LOG.info('Retrieving from glacier: {0}'.format(key_pogs.key))
                key_pogs.restore(days=10)
            else:
                if verbose:
                    LOG.info('The file is being retrieved from glacier: {0}'.format(key_pogs.key))

        else:
            if verbose:
                LOG.info('Copy: {0} to {1}'.format(key_pogs.key, key_aws_pogs.key))

            copy_files_from_bucket_to_bucket(s3_helper, key_pogs.key, key_aws_pogs.key, galaxy_name, run_id, galaxy_id)

    except BaseException:
        LOG.exception('Error during copy')