Esempio n. 1
0
def make_request(connection, email_address, galaxy_ids, features, layers, pixel_types):
    """
    Makes a request in the database for the provided galaxies.
    :param connection: The database connection
    :param email_address: The email address requesting the galaxies
    :param galaxy_ids: The database IDs of the galaxies
    :param features: The features of the galaxies to request
    :param layers: The layers of the galaxies to request
    :param pixel_types: The pixel types of the galaxies to request
    :return:
    """

    # Make some new DB entries for these galaxies
    transaction = connection.begin()

    try:

        LOG.info("Making HDF5_REQUEST entry...")
        result = connection.execute(HDF5_REQUEST.insert(), profile_id=0, email=email_address,
                                    created_at=datetime.datetime.utcnow())

        LOG.info("Making features, layers, pixel types entries...")
        insert_features_layers_pixel_types_db_ids(connection, result.inserted_primary_key, features, layers, pixel_types)

        LOG.info("Making galaxy entries...")
        for galaxy in galaxy_ids:
            # We already checked to ensure these are valid, so throw em in.
            connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key,
                               galaxy_id=galaxy)

        transaction.commit()
    except:
        transaction.rollback()
        raise
Esempio n. 2
0
def make_request(connection, email_address, galaxy_ids, features, layers,
                 pixel_types):
    """
    Makes a request in the database for the provided galaxies.
    :param connection: The database connection
    :param email_address: The email address requesting the galaxies
    :param galaxy_ids: The database IDs of the galaxies
    :param features: The features of the galaxies to request
    :param layers: The layers of the galaxies to request
    :param pixel_types: The pixel types of the galaxies to request
    :return:
    """

    # Make some new DB entries for these galaxies
    transaction = connection.begin()

    try:

        LOG.info("Making HDF5_REQUEST entry...")
        result = connection.execute(HDF5_REQUEST.insert(),
                                    profile_id=0,
                                    email=email_address,
                                    created_at=datetime.datetime.utcnow())

        LOG.info("Making features, layers, pixel types entries...")
        insert_features_layers_pixel_types_db_ids(connection,
                                                  result.inserted_primary_key,
                                                  features, layers,
                                                  pixel_types)

        LOG.info("Making galaxy entries...")
        for galaxy in galaxy_ids:
            # We already checked to ensure these are valid, so throw em in.
            connection.execute(HDF5_REQUEST_GALAXY.insert(),
                               hdf5_request_id=result.inserted_primary_key,
                               galaxy_id=galaxy)

        transaction.commit()
    except:
        transaction.rollback()
        raise
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers):
    """
    Get the FITS files for this request

    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        result = HDF5ToFitsResult()
        results.append(result)
        connection.execute(HDF5_REQUEST_GALAXY.update().where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
        try:
            galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
            result.galaxy_name = galaxy[GALAXY.c.name]
            LOG.info('Processing {0} ({1}) for {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

            # make sure the galaxy is available
            if galaxy[GALAXY.c.status_id] == STORED or galaxy[GALAXY.c.status_id] == DELETED:
                output_dir = tempfile.mkdtemp()
                try:
                    s3_helper = S3Helper()
                    LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                    tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                    LOG.info('File stored in {0}'.format(tmp_file))

                    # We have the file
                    if os.path.isfile(tmp_file):
                        h5_file = h5py.File(tmp_file, 'r')
                        galaxy_group = h5_file['galaxy']
                        pixel_group = galaxy_group['pixel']

                        file_names = []
                        for feature in features:
                            for layer in layers:
                                LOG.info('Processing {0} - {1}'.format(feature, layer))
                                file_names.append(build_fits_image(feature, layer, output_dir, galaxy_group, pixel_group, galaxy[GALAXY.c.name]))

                        h5_file.close()
                        url = zip_files(s3_helper, get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]), uuid_string, file_names, output_dir)
                        connection.execute(HDF5_REQUEST_GALAXY.update().
                                           where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                           values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10)))
                        result.error = None
                        result.link = url
                finally:
                    # Delete the temp files now we're done
                    shutil.rmtree(output_dir)

            else:
                connection.execute(HDF5_REQUEST_GALAXY.update().
                                   where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                   values(state=3))
                result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                LOG.info(result.error)
        except:
            LOG.error('Major error')
            result.error = traceback.format_exc()
            connection.execute(HDF5_REQUEST_GALAXY.update().
                               where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                               values(state=3))

    send_email(email, results, features, layers)
Esempio n. 4
0
def generate_files(connection, hdf5_request_galaxy_ids, email, features,
                   layers, pixel_types):
    """
    Get the FITS files for this request

    :type connection: The database connection
    :param pixel_types:
    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    available_galaxies = []
    s3_helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    # Check whether all the requested galaxies are available or not.
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        galaxy = connection.execute(
            select([GALAXY]).where(
                GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
        hdf5_request_galaxy = connection.execute(
            select([
                HDF5_REQUEST_GALAXY
            ]).where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                     hdf5_request_galaxy.hdf5_request_galaxy_id)).first()
        state = hdf5_request_galaxy.state

        if state is not 0:
            LOG.info('Skipping {0}, state is {1}'.format(
                galaxy[GALAXY.c.name], state))
            continue  # Skip

        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id],
                           galaxy[GALAXY.c.galaxy_id])

        if s3_helper.file_exists(bucket_name, key):
            if s3_helper.file_archived(bucket_name, key):
                # file is archived
                if s3_helper.file_restoring(bucket_name, key):
                    # if file is restoring, just need to wait for it
                    LOG.info(
                        'Galaxy {0} is still restoring from glacier'.format(
                            galaxy[GALAXY.c.name]))
                else:
                    # if file is not restoring, need to request.
                    file_size = s3_helper.file_size(bucket_name, key)

                    if restore_file_size_check(connection, bucket_name,
                                               file_size):
                        # We're good to restore
                        LOG.info(
                            'Making request for archived galaxy {0}'.format(
                                galaxy[GALAXY.c.name]))
                        s3_helper.restore_archived_file(bucket_name, key)

                        connection.execute(
                            HDF5_REQUEST_GALAXY_SIZE.insert(),
                            hdf5_request_galaxy_id=hdf5_request_galaxy[
                                'hdf5_request_galaxy_id'],
                            size=file_size,
                            request_time=seconds_since_epoch(datetime.now()))
                    else:
                        # Don't restore or we risk spending a lot of money
                        LOG.info(
                            'Daily galaxy restore size hit. Cannot request archived galaxy.'
                        )
            else:
                # file is not archived
                LOG.info('Galaxy {0} is available in s3'.format(
                    galaxy[GALAXY.c.name]))
                available_galaxies.append(hdf5_request_galaxy)
        else:
            LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(
                galaxy[GALAXY.c.name]))

    total_request_galaxies = len(hdf5_request_galaxy_ids)
    LOG.info(
        'Need to have {0} galaxies available ({1} currently available)'.format(
            total_request_galaxies * GALAXY_EMAIL_THRESHOLD,
            len(available_galaxies)))
    if len(
            available_galaxies
    ) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD:  # Only proceed if more than the threshold of galaxies are available
        LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.
                 format(len(available_galaxies), total_request_galaxies,
                        GALAXY_EMAIL_THRESHOLD * 100))
        remaining_galaxies = total_request_galaxies - len(available_galaxies)

        for hdf5_request_galaxy in available_galaxies:
            result = HDF5ToFitsResult()
            results.append(result)
            connection.execute(HDF5_REQUEST_GALAXY.update().where(
                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
            # noinspection PyBroadException
            try:
                galaxy = connection.execute(
                    select([GALAXY
                            ]).where(GALAXY.c.galaxy_id ==
                                     hdf5_request_galaxy.galaxy_id)).first()
                result.galaxy_name = galaxy[GALAXY.c.name]
                LOG.info('Processing {0} ({1}) for {2}'.format(
                    galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

                # make sure the galaxy is available
                if galaxy[GALAXY.c.status_id] == STORED or galaxy[
                        GALAXY.c.status_id] == DELETED:
                    output_dir = tempfile.mkdtemp()
                    try:
                        s3_helper = S3Helper()
                        LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                        tmp_file = get_hdf5_file(s3_helper, output_dir,
                                                 galaxy[GALAXY.c.name],
                                                 galaxy[GALAXY.c.run_id],
                                                 galaxy[GALAXY.c.galaxy_id])
                        LOG.info('File stored in {0}'.format(tmp_file))

                        # We have the file
                        if os.path.isfile(tmp_file):
                            int_flux_output = os.path.join(
                                output_dir, 'intflux')
                            rad_output = os.path.join(output_dir, 'rad')

                            if not os.path.exists(int_flux_output):
                                os.mkdir(int_flux_output)

                            if not os.path.exists(rad_output):
                                os.mkdir(rad_output)

                            file_names = process_hdf5_file(
                                tmp_file,
                                galaxy[GALAXY.c.name],
                                galaxy[GALAXY.c.galaxy_id],
                                pixel_types,
                                features,
                                result,
                                layers,
                                output_dir,
                                rad_output,
                                int_flux_output,
                            )

                            url = zip_files(
                                s3_helper,
                                get_galaxy_file_name(
                                    galaxy[GALAXY.c.name],
                                    galaxy[GALAXY.c.run_id],
                                    galaxy[GALAXY.c.galaxy_id]), uuid_string,
                                file_names, output_dir)

                            connection.execute(HDF5_REQUEST_GALAXY.update(
                            ).where(
                                HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                                hdf5_request_galaxy.hdf5_request_galaxy_id
                            ).values(state=2,
                                     link=url,
                                     link_expires_at=datetime.now() +
                                     timedelta(days=10)))

                            result.error = None
                            result.link = url

                    except S3ResponseError as e:  # Handling for a strange s3 error
                        LOG.error(
                            'Error retrieving galaxy {0} from s3. Retrying next run'
                            .format(galaxy[GALAXY.c.name]))
                        LOG.error('{0}'.format(str(e)))
                        key = get_key_hdf5(galaxy[GALAXY.c.name],
                                           galaxy[GALAXY.c.run_id],
                                           galaxy[GALAXY.c.galaxy_id])
                        LOG.info('Key: {0}'.format(key))
                        LOG.info('Exists: {0}'.format(
                            s3_helper.file_exists(bucket_name, key)))
                        result.error = traceback.format_exc()
                        remaining_galaxies += 1
                    finally:
                        # Delete the temp files now we're done
                        shutil.rmtree(output_dir)

                else:
                    connection.execute(HDF5_REQUEST_GALAXY.update().where(
                        HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                        hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                            state=3))
                    result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(
                        galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                    LOG.info(result.error)
            except:
                LOG.error('Major error')
                result.error = traceback.format_exc()
                connection.execute(HDF5_REQUEST_GALAXY.update().where(
                    HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id ==
                    hdf5_request_galaxy.hdf5_request_galaxy_id).values(
                        state=3))

        send_email(email, results, features, layers, pixel_types,
                   remaining_galaxies)
Esempio n. 5
0
def generate_files(connection, hdf5_request_galaxy_ids, email, features, layers, pixel_types):
    """
    Get the FITS files for this request

    :type connection: The database connection
    :param pixel_types:
    :param hdf5_request_galaxy_ids: the galaxy id
    :param email:
    :param features:
    :param layers:
    :return:
    """
    uuid_string = str(uuid.uuid4())
    results = []
    available_galaxies = []
    s3_helper = S3Helper()
    bucket_name = get_saved_files_bucket()

    # Check whether all the requested galaxies are available or not.
    for hdf5_request_galaxy in hdf5_request_galaxy_ids:
        galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
        hdf5_request_galaxy = connection.execute(select([HDF5_REQUEST_GALAXY])
                                                 .where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id)).first()
        state = hdf5_request_galaxy.state

        if state is not 0:
            LOG.info('Skipping {0}, state is {1}'.format(galaxy[GALAXY.c.name], state))
            continue  # Skip

        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])

        if s3_helper.file_exists(bucket_name, key):
            if s3_helper.file_archived(bucket_name, key):
                # file is archived
                if s3_helper.file_restoring(bucket_name, key):
                    # if file is restoring, just need to wait for it
                    LOG.info('Galaxy {0} is still restoring from glacier'.format(galaxy[GALAXY.c.name]))
                else:
                    # if file is not restoring, need to request.
                    file_size = s3_helper.file_size(bucket_name, key)

                    if restore_file_size_check(connection, bucket_name, file_size):
                        # We're good to restore
                        LOG.info('Making request for archived galaxy {0}'.format(galaxy[GALAXY.c.name]))
                        s3_helper.restore_archived_file(bucket_name, key)

                        connection.execute(HDF5_REQUEST_GALAXY_SIZE.insert(),
                                           hdf5_request_galaxy_id=hdf5_request_galaxy['hdf5_request_galaxy_id'],
                                           size=file_size,
                                           request_time=seconds_since_epoch(datetime.now()))
                    else:
                        # Don't restore or we risk spending a lot of money
                        LOG.info('Daily galaxy restore size hit. Cannot request archived galaxy.')
            else:
                # file is not archived
                LOG.info('Galaxy {0} is available in s3'.format(galaxy[GALAXY.c.name]))
                available_galaxies.append(hdf5_request_galaxy)
        else:
            LOG.error('Galaxy {0} does not exist on s3 or glacier!'.format(galaxy[GALAXY.c.name]))

    total_request_galaxies = len(hdf5_request_galaxy_ids)
    LOG.info('Need to have {0} galaxies available ({1} currently available)'.format(total_request_galaxies * GALAXY_EMAIL_THRESHOLD, len(available_galaxies)))
    if len(available_galaxies) >= total_request_galaxies * GALAXY_EMAIL_THRESHOLD:  # Only proceed if more than the threshold of galaxies are available
        LOG.info('{0}/{1} (> {2}%) galaxies are available. Email will be sent'.format(
            len(available_galaxies),
            total_request_galaxies,
            GALAXY_EMAIL_THRESHOLD * 100)
        )
        remaining_galaxies = total_request_galaxies - len(available_galaxies)

        for hdf5_request_galaxy in available_galaxies:
            result = HDF5ToFitsResult()
            results.append(result)
            connection.execute(HDF5_REQUEST_GALAXY.update().where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).values(state=1))
            # noinspection PyBroadException
            try:
                galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == hdf5_request_galaxy.galaxy_id)).first()
                result.galaxy_name = galaxy[GALAXY.c.name]
                LOG.info('Processing {0} ({1}) for {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id], email))

                # make sure the galaxy is available
                if galaxy[GALAXY.c.status_id] == STORED or galaxy[GALAXY.c.status_id] == DELETED:
                    output_dir = tempfile.mkdtemp()
                    try:
                        s3_helper = S3Helper()
                        LOG.info('Getting HDF5 file to {0}'.format(output_dir))
                        tmp_file = get_hdf5_file(s3_helper, output_dir, galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                        LOG.info('File stored in {0}'.format(tmp_file))

                        # We have the file
                        if os.path.isfile(tmp_file):
                            int_flux_output = os.path.join(output_dir, 'intflux')
                            rad_output = os.path.join(output_dir, 'rad')

                            if not os.path.exists(int_flux_output):
                                os.mkdir(int_flux_output)

                            if not os.path.exists(rad_output):
                                os.mkdir(rad_output)

                            file_names = process_hdf5_file(
                                tmp_file,
                                galaxy[GALAXY.c.name],
                                galaxy[GALAXY.c.galaxy_id],
                                pixel_types,
                                features,
                                result,
                                layers,
                                output_dir,
                                rad_output,
                                int_flux_output,
                            )

                            url = zip_files(
                                s3_helper,
                                get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id]),
                                uuid_string,
                                file_names,
                                output_dir
                            )

                            connection.execute(
                                HDF5_REQUEST_GALAXY.update().
                                where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                values(state=2, link=url, link_expires_at=datetime.now() + timedelta(days=10)))

                            result.error = None
                            result.link = url

                    except S3ResponseError as e:  # Handling for a strange s3 error
                        LOG.error('Error retrieving galaxy {0} from s3. Retrying next run'.format(galaxy[GALAXY.c.name]))
                        LOG.error('{0}'.format(str(e)))
                        key = get_key_hdf5(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                        LOG.info('Key: {0}'.format(key))
                        LOG.info('Exists: {0}'.format(s3_helper.file_exists(bucket_name, key)))
                        result.error = traceback.format_exc()
                        remaining_galaxies += 1
                    finally:
                        # Delete the temp files now we're done
                        shutil.rmtree(output_dir)

                else:
                    connection.execute(HDF5_REQUEST_GALAXY.update().
                                       where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                       values(state=3))
                    result.error = 'Cannot process {0} ({1}) as the HDF5 file has not been generated'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.galaxy_id])
                    LOG.info(result.error)
            except:
                LOG.error('Major error')
                result.error = traceback.format_exc()
                connection.execute(HDF5_REQUEST_GALAXY.update().
                                   where(HDF5_REQUEST_GALAXY.c.hdf5_request_galaxy_id == hdf5_request_galaxy.hdf5_request_galaxy_id).
                                   values(state=3))

        send_email(email, results, features, layers, pixel_types, remaining_galaxies)
    exit(1)

# Create a new request
# Need to make:
# HDF5_request
# HDF5_request_feature
# HDF5_request_galaxy
# HDF5_request_layer
# HDF5_request_pixel
transaction = connection.begin()
result = connection.execute(HDF5_REQUEST.insert(),
                            profile_id=47016,
                            email=args['email'],
                            created_at=time.time())

connection.execute(HDF5_REQUEST_GALAXY.insert(),
                   hdf5_request_id=result.inserted_primary_key,
                   galaxy_id=args['galaxy_id'])

for pixel_type in pixel_types:
    connection.execute(HDF5_REQUEST_PIXEL_TYPE.insert(),
                       hdf5_request_id=result.inserted_primary_key,
                       hdf5_pixel_type_id=pixel_type +
                       1)  # database entries start at 1

for layer in layers:
    connection.execute(HDF5_REQUEST_LAYER.insert(),
                       hdf5_request_id=result.inserted_primary_key,
                       hdf5_layer_id=LAYERS[layer] + 1)

for feature in features:
Esempio n. 7
0
connection = engine.connect()

features, layers, pixel_types = get_features_and_layers_pixeltypes_cmd_line(args)
if len(features) == 0 or len(layers) == 0:
    parser.print_help()
    exit(1)

# Create a new request
# Need to make:
# HDF5_request
# HDF5_request_feature
# HDF5_request_galaxy
# HDF5_request_layer
# HDF5_request_pixel
transaction = connection.begin()
result = connection.execute(HDF5_REQUEST.insert(), profile_id=47016, email=args['email'], created_at=time.time())

connection.execute(HDF5_REQUEST_GALAXY.insert(), hdf5_request_id=result.inserted_primary_key, galaxy_id=args['galaxy_id'])

for pixel_type in pixel_types:
    connection.execute(HDF5_REQUEST_PIXEL_TYPE.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_pixel_type_id=pixel_type+1) # database entries start at 1

for layer in layers:
    connection.execute(HDF5_REQUEST_LAYER.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_layer_id=LAYERS[layer]+1)

for feature in features:
    connection.execute(HDF5_REQUEST_FEATURE.insert(), hdf5_request_id=result.inserted_primary_key, hdf5_feature_id=FEATURES[feature]+1)
transaction.commit()
LOG.info('All done')
connection.close()