def copy_hdf5_files(args): s3_helper = S3Helper() bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME) bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS) engine = create_engine(DB_LOGIN) connection = engine.connect() subquery = select([STEP_DONE.c.galaxy_id]).distinct() for galaxy in connection.execute(select([GALAXY]).where(~GALAXY.c.galaxy_id.in_(subquery)).order_by(GALAXY.c.galaxy_id)): if args.verbose: LOG.info('Checking galaxy_id: {0}, name: {1}'.format(galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name])) # noinspection PyBroadException try: block_dir = get_block_dir(galaxy[GALAXY.c.galaxy_id] / 1000) name_aws_pogs = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) key_aws_pogs = Key(bucket_aws_pogs) key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs) if not key_aws_pogs.exists(): # Does it exist in POGS? name_pogs = get_galaxy_file_name_pogs(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]) key_pogs = Key(bucket_pogs) key_pogs.key = '{0}/{0}.hdf5'.format(name_pogs) if key_pogs.exists(): check_and_copy_or_restore(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], args.verbose) else: add_step_done_id(connection, galaxy[GALAXY.c.galaxy_id], STEP_DONE_ID_NO_HDF5_FILE) except BaseException: LOG.exception('error during fetch, quitting') break if args.verbose: LOG.info('Done')
def check_and_copy_or_restore(galaxy_name, run_id, galaxy_id, verbose): # noinspection PyBroadException try: s3_helper = S3Helper(PROFILE_NAME) bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME) bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS) name_aws_pogs = get_galaxy_file_name(galaxy_name, run_id, galaxy_id) name_pogs = get_galaxy_file_name_pogs(galaxy_name, run_id, galaxy_id) # Get the new key from the old key block_dir = get_block_dir(galaxy_id / 1000) key_aws_pogs = Key(bucket_aws_pogs) key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs) keyname_pogs = '{0}/{0}.hdf5'.format(name_pogs) # Work around to silly bug in boto key_pogs = bucket_pogs.get_key(keyname_pogs) if key_pogs.storage_class == 'GLACIER' and key_pogs.expiry_date is None: if key_pogs.ongoing_restore is None: if verbose: LOG.info('Retrieving from glacier: {0}'.format(key_pogs.key)) key_pogs.restore(days=10) else: if verbose: LOG.info('The file is being retrieved from glacier: {0}'.format(key_pogs.key)) else: if verbose: LOG.info('Copy: {0} to {1}'.format(key_pogs.key, key_aws_pogs.key)) copy_files_from_bucket_to_bucket(s3_helper, key_pogs.key, key_aws_pogs.key, galaxy_name, run_id, galaxy_id) except BaseException: LOG.exception('Error during copy')