Example #1
0
def get_glacier_data_size(connection, bucket_name):
    """
    Returns the total number of bytes that we have stored in glacier.
    Checks with the database first for a cached copy of this info to not have to keep re-requesting it.
    :param connection: The database connection.
    :param bucket_name: Name of the bucket to count.
    :return:
    """

    # Load most recent entry from database
    # if timestamp on most recent entry is < 24 hours from now, use it
    # if not, do the full check and add a new entry in the db specifying the glacier size.

    day_ago = seconds_since_epoch(get_hours_ago(24))
    result = connection.execute(
        select([HDF5_GLACIER_STORAGE_SIZE
                ]).where(HDF5_GLACIER_STORAGE_SIZE.c.count_time > day_ago))

    latest_time = 0
    latest_size = 0
    for row in result:
        if row['count_time'] > latest_time:
            latest_size = row['size']
            latest_time = row['count_time']

    if latest_time == 0 or latest_size == 0:
        # Need to re-count
        s3helper = S3Helper()
        LOG.info("Glacier data size expired, recounting...")
        size = s3helper.glacier_data_size(bucket_name)
        LOG.info("Glacier data size counted: {0} bytes".format(size))

        connection.execute(HDF5_GLACIER_STORAGE_SIZE.insert(),
                           size=size,
                           count_time=seconds_since_epoch(datetime.now()))
    else:
        size = latest_size

    return size
Example #2
0
def get_glacier_data_size(connection, bucket_name):
    """
    Returns the total number of bytes that we have stored in glacier.
    Checks with the database first for a cached copy of this info to not have to keep re-requesting it.
    :param connection: The database connection.
    :param bucket_name: Name of the bucket to count.
    :return:
    """

    # Load most recent entry from database
    # if timestamp on most recent entry is < 24 hours from now, use it
    # if not, do the full check and add a new entry in the db specifying the glacier size.

    day_ago = seconds_since_epoch(get_hours_ago(24))
    result = connection.execute(select([HDF5_GLACIER_STORAGE_SIZE])
                                .where(HDF5_GLACIER_STORAGE_SIZE.c.count_time > day_ago))

    latest_time = 0
    latest_size = 0
    for row in result:
        if row['count_time'] > latest_time:
            latest_size = row['size']
            latest_time = row['count_time']

    if latest_time == 0 or latest_size == 0:
        # Need to re-count
        s3helper = S3Helper()
        LOG.info("Glacier data size expired, recounting...")
        size = s3helper.glacier_data_size(bucket_name)
        LOG.info("Glacier data size counted: {0} bytes".format(size))

        connection.execute(HDF5_GLACIER_STORAGE_SIZE.insert(),
                           size=size, count_time=seconds_since_epoch(datetime.now()))
    else:
        size = latest_size

    return size
Example #3
0
def get_day_start_request_size(connection):
    """
    Get the volume of data that has been received from glacier since the start of the day.
    :param connection: Database connection.
    :return: amount of data, in bytes, that has been requested since the start of the day.
    """
    # for each row in the db where request time + 4 hours is > now - 24 hours
    # add up the size of the requests

    start_time = seconds_since_epoch(get_start_of_day())

    result = connection.execute(select([HDF5_REQUEST_GALAXY_SIZE]).where(HDF5_REQUEST_GALAXY_SIZE.c.request_time > start_time))

    size = 0
    for line in result:
        # Add up the sizes
        size += line['size']

    return size
Example #4
0
def get_day_start_request_size(connection):
    """
    Get the volume of data that has been received from glacier since the start of the day.
    :param connection: Database connection.
    :return: amount of data, in bytes, that has been requested since the start of the day.
    """
    # for each row in the db where request time + 4 hours is > now - 24 hours
    # add up the size of the requests

    start_time = seconds_since_epoch(get_start_of_day())

    result = connection.execute(
        select([HDF5_REQUEST_GALAXY_SIZE
                ]).where(HDF5_REQUEST_GALAXY_SIZE.c.request_time > start_time))

    size = 0
    for line in result:
        # Add up the sizes
        size += line['size']

    return size
Example #5
0
def generate_files(connection, hdf5_request_galaxy_ids, email, features,
                   layers, pixel_types):
    """
    Get the FITS files for this request

    :type connection: The database connection
    :param pixel_types:
    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    available_galaxies = []
    s3_helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    # Check whether all the requested galaxies are available or not.
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        galaxy = connection.execute(
            select([GALAXY]).where(
                GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
        hdf5_request_galaxy = connection.execute(
            select([
                HDF5_REQUEST_GALAXY
            ]).where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                     hdf5_request_galaxy.hdf5_request_galaxy_id)).first()
        state = hdf5_request_galaxy.state

        if state is not 0:
            LOG.info('Skipping {0}, state is {1}'.format(
                galaxy[GALAXY.c.name], state))
            continue  # Skip

        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                           galaxy[GALAXY.c.galaxy_id])

        if s3_helper.file_exists(bucket_name, key):
            if s3_helper.file_archived(bucket_name, key):
                # file is archived
                if s3_helper.file_restoring(bucket_name, key):
                    # if file is restoring, just need to wait for it
                    LOG.info(
                        'Galaxy {0} is still restoring from glacier'.format(
                            galaxy[GALAXY.c.name]))
                else:
                    # if file is not restoring, need to request.
                    file_size = s3_helper.file_size(bucket_name, key)

                    if restore_file_size_check(connection, bucket_name,
                                               file_size):
                        # We're good to restore
                        LOG.info(
                            'Making request for archived galaxy {0}'.format(
                                galaxy[GALAXY.c.name]))
                        s3_helper.restore_archived_file(bucket_name, key)

                        connection.execute(
                            HDF5_REQUEST_GALAXY_SIZE.insert(),
                            hdf5_request_galaxy_id=hdf5_request_galaxy[
                                'hdf5_request_galaxy_id'],
                            size=file_size,
                            request_time=seconds_since_epoch(datetime.now()))
                    else:
                        # Don't restore or we risk spending a lot of money
                        LOG.info(
                            'Daily galaxy restore size hit. Cannot request archived galaxy.'
                        )
            else:
                # file is not archived
                LOG.info('Galaxy {0} is available in s3'.format(
                    galaxy[GALAXY.c.name]))
                available_galaxies.append(hdf5_request_galaxy)
        else:
            LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(
                galaxy[GALAXY.c.name]))

    total_request_galaxies = len(hdf5_request_galaxy_ids)
    LOG.info(
        'Need to have {0} galaxies available ({1} currently available)'.format(
            total_request_galaxies * GALAXY_EMAIL_THRESHOLD,
            len(available_galaxies)))
    if len(
            available_galaxies
    ) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD:  # Only proceed if more than the threshold of galaxies are available
        LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.
                 format(len(available_galaxies), total_request_galaxies,
                        GALAXY_EMAIL_THRESHOLD * 100))
        remaining_galaxies = total_request_galaxies - len(available_galaxies)

        for hdf5_request_galaxy in available_galaxies:
            result = HDF5ToFitsResult()
            results.append(result)
            connection.execute(HDF5_REQUEST_GALAXY.update().where(
                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
            # noinspection PyBroadException
            try:
                galaxy = connection.execute(
                    select([GALAXY
                            ]).where(GALAXY.c.galaxy_id ==
                                     hdf5_request_galaxy.galaxy_id)).first()
                result.galaxy_name = galaxy[GALAXY.c.name]
                LOG.info('Processing {0} ({1}) for {2}'.format(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

                # make sure the galaxy is available
                if galaxy[GALAXY.c.status_id] == STORED or galaxy[
                        GALAXY.c.status_id] == DELETED:
                    output_dir = tempfile.mkdtemp()
                    try:
                        s3_helper = S3Helper()
                        LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                        tmp_file = get_hdf5_file(s3_helper, output_dir,
                                                 galaxy[GALAXY.c.name],
                                                 galaxy[GALAXY.c.run_id],
                                                 galaxy[GALAXY.c.galaxy_id])
                        LOG.info('File stored in {0}'.format(tmp_file))

                        # We have the file
                        if os.path.isfile(tmp_file):
                            int_flux_output = os.path.join(
                                output_dir, 'intflux')
                            rad_output = os.path.join(output_dir, 'rad')

                            if not os.path.exists(int_flux_output):
                                os.mkdir(int_flux_output)

                            if not os.path.exists(rad_output):
                                os.mkdir(rad_output)

                            file_names = process_hdf5_file(
                                tmp_file,
                                galaxy[GALAXY.c.name],
                                galaxy[GALAXY.c.galaxy_id],
                                pixel_types,
                                features,
                                result,
                                layers,
                                output_dir,
                                rad_output,
                                int_flux_output,
                            )

                            url = zip_files(
                                s3_helper,
                                get_galaxy_file_name(
                                    galaxy[GALAXY.c.name],
                                    galaxy[GALAXY.c.run_id],
                                    galaxy[GALAXY.c.galaxy_id]), uuid_string,
                                file_names, output_dir)

                            connection.execute(HDF5_REQUEST_GALAXY.update(
                            ).where(
                                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                                hdf5_request_galaxy.hdf5_request_galaxy_id
                            ).values(state=2,
                                     link=url,
                                     link_expires_at=datetime.now() +
                                     timedelta(days=10)))

                            result.error = None
                            result.link = url

                    except S3ResponseError as e:  # Handling for a strange s3 error
                        LOG.error(
                            'Error retrieving galaxy {0} from s3. Retrying next run'
                            .format(galaxy[GALAXY.c.name]))
                        LOG.error('{0}'.format(str(e)))
                        key = get_key_hdf5(galaxy[GALAXY.c.name],
                                           galaxy[GALAXY.c.run_id],
                                           galaxy[GALAXY.c.galaxy_id])
                        LOG.info('Key: {0}'.format(key))
                        LOG.info('Exists: {0}'.format(
                            s3_helper.file_exists(bucket_name, key)))
                        result.error = traceback.format_exc()
                        remaining_galaxies += 1
                    finally:
                        # Delete the temp files now we're done
                        shutil.rmtree(output_dir)

                else:
                    connection.execute(HDF5_REQUEST_GALAXY.update().where(
                        HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                        hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                            state=3))
                    result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(
                        galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                    LOG.info(result.error)
            except:
                LOG.error('Major error')
                result.error = traceback.format_exc()
                connection.execute(HDF5_REQUEST_GALAXY.update().where(
                    HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                    hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                        state=3))

        send_email(email, results, features, layers, pixel_types,
                   remaining_galaxies)
Example #6
0
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers, pixel_types):
    """
    Get the FITS files for this request

    :type connection: The database connection
    :param pixel_types:
    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    available_galaxies = []
    s3_helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    # Check whether all the requested galaxies are available or not.
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
        hdf5_request_galaxy = connection.execute(select([HDF5_REQUEST_GALAXY])
                                                 .where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id)).first()
        state = hdf5_request_galaxy.state

        if state is not 0:
            LOG.info('Skipping {0}, state is {1}'.format(galaxy[GALAXY.c.name], state))
            continue  # Skip

        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])

        if s3_helper.file_exists(bucket_name, key):
            if s3_helper.file_archived(bucket_name, key):
                # file is archived
                if s3_helper.file_restoring(bucket_name, key):
                    # if file is restoring, just need to wait for it
                    LOG.info('Galaxy {0} is still restoring from glacier'.format(galaxy[GALAXY.c.name]))
                else:
                    # if file is not restoring, need to request.
                    file_size = s3_helper.file_size(bucket_name, key)

                    if restore_file_size_check(connection, bucket_name, file_size):
                        # We're good to restore
                        LOG.info('Making request for archived galaxy {0}'.format(galaxy[GALAXY.c.name]))
                        s3_helper.restore_archived_file(bucket_name, key)

                        connection.execute(HDF5_REQUEST_GALAXY_SIZE.insert(),
                                           hdf5_request_galaxy_id=hdf5_request_galaxy['hdf5_request_galaxy_id'],
                                           size=file_size,
                                           request_time=seconds_since_epoch(datetime.now()))
                    else:
                        # Don't restore or we risk spending a lot of money
                        LOG.info('Daily galaxy restore size hit. Cannot request archived galaxy.')
            else:
                # file is not archived
                LOG.info('Galaxy {0} is available in s3'.format(galaxy[GALAXY.c.name]))
                available_galaxies.append(hdf5_request_galaxy)
        else:
            LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(galaxy[GALAXY.c.name]))

    total_request_galaxies = len(hdf5_request_galaxy_ids)
    LOG.info('Need to have {0} galaxies available ({1} currently available)'.format(total_request_galaxies * GALAXY_EMAIL_THRESHOLD, len(available_galaxies)))
    if len(available_galaxies) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD:  # Only proceed if more than the threshold of galaxies are available
        LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.format(
            len(available_galaxies),
            total_request_galaxies,
            GALAXY_EMAIL_THRESHOLD * 100)
        )
        remaining_galaxies = total_request_galaxies - len(available_galaxies)

        for hdf5_request_galaxy in available_galaxies:
            result = HDF5ToFitsResult()
            results.append(result)
            connection.execute(HDF5_REQUEST_GALAXY.update().where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
            # noinspection PyBroadException
            try:
                galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
                result.galaxy_name = galaxy[GALAXY.c.name]
                LOG.info('Processing {0} ({1}) for {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

                # make sure the galaxy is available
                if galaxy[GALAXY.c.status_id] == STORED or galaxy[GALAXY.c.status_id] == DELETED:
                    output_dir = tempfile.mkdtemp()
                    try:
                        s3_helper = S3Helper()
                        LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                        tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                        LOG.info('File stored in {0}'.format(tmp_file))

                        # We have the file
                        if os.path.isfile(tmp_file):
                            int_flux_output = os.path.join(output_dir, 'intflux')
                            rad_output = os.path.join(output_dir, 'rad')

                            if not os.path.exists(int_flux_output):
                                os.mkdir(int_flux_output)

                            if not os.path.exists(rad_output):
                                os.mkdir(rad_output)

                            file_names = process_hdf5_file(
                                tmp_file,
                                galaxy[GALAXY.c.name],
                                galaxy[GALAXY.c.galaxy_id],
                                pixel_types,
                                features,
                                result,
                                layers,
                                output_dir,
                                rad_output,
                                int_flux_output,
                            )

                            url = zip_files(
                                s3_helper,
                                get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]),
                                uuid_string,
                                file_names,
                                output_dir
                            )

                            connection.execute(
                                HDF5_REQUEST_GALAXY.update().
                                where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10)))

                            result.error = None
                            result.link = url

                    except S3ResponseError as e:  # Handling for a strange s3 error
                        LOG.error('Error retrieving galaxy {0} from s3. Retrying next run'.format(galaxy[GALAXY.c.name]))
                        LOG.error('{0}'.format(str(e)))
                        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                        LOG.info('Key: {0}'.format(key))
                        LOG.info('Exists: {0}'.format(s3_helper.file_exists(bucket_name, key)))
                        result.error = traceback.format_exc()
                        remaining_galaxies += 1
                    finally:
                        # Delete the temp files now we're done
                        shutil.rmtree(output_dir)

                else:
                    connection.execute(HDF5_REQUEST_GALAXY.update().
                                       where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                       values(state=3))
                    result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                    LOG.info(result.error)
            except:
                LOG.error('Major error')
                result.error = traceback.format_exc()
                connection.execute(HDF5_REQUEST_GALAXY.update().
                                   where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                   values(state=3))

        send_email(email, results, features, layers, pixel_types, remaining_galaxies)