コード例 #1
0
def copy_files_from_bucket_to_bucket(s3_helper, key_pogs, key_aws_pogs, galaxy_name, run_id, galaxy_id):
    filename_aws_pogs = '{0}/{1}.hdf5'.format(
        FAST_DISK,
        get_galaxy_file_name(galaxy_name, run_id, galaxy_id)
    )
    s3_helper.get_file_from_bucket(BUCKET_NAME_OLD_POGS, key_pogs, filename_aws_pogs)

    # Get the size
    if should_be_multipart(filename_aws_pogs):
        s3_helper.add_file_to_bucket_multipart(
            BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True
        )
    else:
        s3_helper.add_file_to_bucket(BUCKET_NAME, key_aws_pogs, filename_aws_pogs, reduced_redundancy=True, delete_source=True)
コード例 #2
0
ファイル: process_galaxy_mask.py プロジェクト: ICRAR/aws-pogs
    def run(self):
        galaxy = self._connection.execute(
            select([GALAXY]).where(GALAXY.c.galaxy_id == self._galaxy_id)).first()

        if galaxy is not None:
            LOG.info(
                'Processing {0}'.format(
                    get_galaxy_file_name(
                        galaxy[GALAXY.c.name],
                        galaxy[GALAXY.c.run_id],
                        galaxy[GALAXY.c.galaxy_id],
                    )
                )
            )
            self._dimension_x = galaxy[GALAXY.c.dimension_x]
            self._dimension_y = galaxy[GALAXY.c.dimension_y]
            self._find_mask()
コード例 #3
0
def copy_hdf5_files(args):
    s3_helper = S3Helper()
    bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME)
    bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS)

    engine = create_engine(DB_LOGIN)
    connection = engine.connect()

    subquery = select([STEP_DONE.c.galaxy_id]).distinct()
    for galaxy in connection.execute(select([GALAXY]).where(~GALAXY.c.galaxy_id.in_(subquery)).order_by(GALAXY.c.galaxy_id)):
        if args.verbose:
            LOG.info('Checking galaxy_id: {0}, name: {1}'.format(galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]))

        # noinspection PyBroadException
        try:
            block_dir = get_block_dir(galaxy[GALAXY.c.galaxy_id] / 1000)
            name_aws_pogs = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
            key_aws_pogs = Key(bucket_aws_pogs)
            key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs)

            if not key_aws_pogs.exists():

                # Does it exist in POGS?
                name_pogs = get_galaxy_file_name_pogs(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                key_pogs = Key(bucket_pogs)
                key_pogs.key = '{0}/{0}.hdf5'.format(name_pogs)

                if key_pogs.exists():
                    check_and_copy_or_restore(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], args.verbose)
                else:
                    add_step_done_id(connection, galaxy[GALAXY.c.galaxy_id], STEP_DONE_ID_NO_HDF5_FILE)

        except BaseException:
            LOG.exception('error during fetch, quitting')
            break

    if args.verbose:
        LOG.info('Done')
コード例 #4
0
def check_and_copy_or_restore(galaxy_name, run_id, galaxy_id, verbose):
    # noinspection PyBroadException
    try:
        s3_helper = S3Helper(PROFILE_NAME)
        bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME)
        bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS)

        name_aws_pogs = get_galaxy_file_name(galaxy_name, run_id, galaxy_id)
        name_pogs = get_galaxy_file_name_pogs(galaxy_name, run_id, galaxy_id)

        # Get the new key from the old key
        block_dir = get_block_dir(galaxy_id / 1000)
        key_aws_pogs = Key(bucket_aws_pogs)
        key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs)

        keyname_pogs = '{0}/{0}.hdf5'.format(name_pogs)
        # Work around to silly bug in boto
        key_pogs = bucket_pogs.get_key(keyname_pogs)

        if key_pogs.storage_class == 'GLACIER' and key_pogs.expiry_date is None:
            if key_pogs.ongoing_restore is None:
                if verbose:
                    LOG.info('Retrieving from glacier: {0}'.format(key_pogs.key))
                key_pogs.restore(days=10)
            else:
                if verbose:
                    LOG.info('The file is being retrieved from glacier: {0}'.format(key_pogs.key))

        else:
            if verbose:
                LOG.info('Copy: {0} to {1}'.format(key_pogs.key, key_aws_pogs.key))

            copy_files_from_bucket_to_bucket(s3_helper, key_pogs.key, key_aws_pogs.key, galaxy_name, run_id, galaxy_id)

    except BaseException:
        LOG.exception('Error during copy')
コード例 #5
0
ファイル: load_database.py プロジェクト: ICRAR/aws-pogs
def process_galaxy(connection, galaxies, verbosity, profile_name='aws-pogs'):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_NO_HDF5_FILE, STEP_DONE_ID_ORIGINAL_VALUES, STEP_DONE_ID_SED_DATA],
            True,
            True
        )

        # Work out what SED data is required
        data_required_sed = get_data_required_sed(connection, steps_done)
        data_required_original = get_data_required_original(steps_done)

        if len(data_required_sed) > 0 or len(data_required_original) > 0:
            # Copy the file from S3
            s3_helper = S3Helper(profile_name=profile_name)
            galaxy_id = int(galaxy[GALAXY.c.galaxy_id])
            galaxy_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id)
            s3_name = os.path.join('{0:04d}000'.format(galaxy_id / 1000), galaxy_name) + '.hdf5'
            hdf5_file_name = os.path.join(FAST_DISK, galaxy_name) + '.hdf5'

            copy_ok = s3_helper.copy_file_to_disk(BUCKET_NAME, s3_name, hdf5_file_name)

            if copy_ok:
                h5_file = h5py.File(hdf5_file_name, 'r')
                LOG.info('Processing SED for name: {0}, run_id: {1}, galaxy_id: {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id))
                # Do we have anything to do?
                if len(data_required_sed) > 0:
                    # noinspection PyBroadException
                    try:
                        # Store the SED fit values
                        add_sed_data(connection, galaxy_id, h5_file, data_required_sed)
                    except Exception:
                        LOG.exception('An exception occurred in process_galaxy processing the SED values')
                else:
                    if verbosity >= 1:
                        LOG.info('Nothing to add - SED')

                if len(data_required_original) > 0:
                    # noinspection PyBroadException
                    try:
                        add_original_data(connection, galaxy_id, h5_file, data_required_original)
                    except Exception:
                        LOG.exception('An exception occurred in process_galaxy processing the original values')
                else:
                    if verbosity >= 1:
                        LOG.info('Nothing to add - Original Data')

                # Clean up after ourselves
                h5_file.close()
                os.remove(hdf5_file_name)

            else:
                LOG.error('The file for name: {0}, run_id: {1}, galaxy_id: {2} does not exist'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id))