def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, True)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            for table in tables_required:
                with connection.begin():
                    create_mean_standard_deviation = CreateMeanStandardDeviation(connection, table, galaxy_id)
                    create_mean_standard_deviation.calculate_details()
                    create_mean_standard_deviation.update_details()
                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_MEAN_STANDARD_DEVIATION,
                        table.name
                    )
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_MASK)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        if STEP_DONE_ID_BUILD_MASK not in steps_done:
            with connection.begin():
                build_mask = ProcessGalaxyMasks(connection, galaxy_id)
                build_mask.run()
                add_step_done_id(
                    connection,
                    galaxy_id,
                    STEP_DONE_ID_BUILD_MASK
                )
Exemple #3
0
def add_original_data(connection, galaxy_id, h5_file, data_required):
    galaxy_group = h5_file['galaxy']
    pixel_group = galaxy_group['pixel']

    count = 0
    total_time = 0
    # Add the original values
    for filter_name in data_required:
        with connection.begin():
            time_for_filter = insert_original_data(
                connection,
                filter_name,
                pixel_group,
                galaxy_group.attrs['output_format'],
                galaxy_group.attrs['dimension_x'],
                galaxy_group.attrs['dimension_y'],
                galaxy_id
            )
            add_step_done_id(connection, galaxy_id, STEP_DONE_ID_ORIGINAL_VALUES, filter_name)

            if time_for_filter > 0:
                count += 1
                total_time += time_for_filter

    LOG.info(
        'add_original_data, galaxy_id: {0}, total_time: {1:.3f}s, average_time: {2:.3f}s'.format(
            galaxy_id,
            total_time,
            total_time / count if count > 0 else 0
        )
    )
Exemple #4
0
def add_sed_data(connection, galaxy_id, h5_file, data_required):
    galaxy_group = h5_file['galaxy']
    pixel_group = galaxy_group['pixel']

    count = 0
    total_time = 0
    # For each pair insert the data
    for pair in data_required:
        with connection.begin():
            time_for_pair = insert_sed_data(
                connection,
                pair,
                pixel_group,
                galaxy_group.attrs['output_format'],
                galaxy_group.attrs['dimension_x'],
                galaxy_group.attrs['dimension_y'],
                galaxy_id
            )
            add_step_done_id(connection, galaxy_id, STEP_DONE_ID_SED_DATA, pair[0], pair[1])
            count += 1
            total_time += time_for_pair

    LOG.info(
        'add_sed_data, galaxy_id: {0}, total_time: {1:.3f}s, average_time: {2:.3f}s'.format(
            galaxy_id,
            total_time,
            total_time / count if count > 0 else 0
        )
    )
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_BUILD_MASK, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS])

        # have we built the mask
        if STEP_DONE_ID_BUILD_MASK in steps_done:
            if STEP_DONE_ID_CENTROID_CENTRE_OF_MASS not in steps_done:
                with connection.begin():
                    centroid_center_of_mass = CentroidCentreOfMass(connection, galaxy, verbosity)
                    centroid_center_of_mass.calculate_results()

                    if len(centroid_center_of_mass.layers) > 0:
                        connection.execute(
                            IMAGE_DETAIL.insert(),
                            galaxy_id=galaxy_id,
                            x_image=centroid_center_of_mass.dimension_x / 2.0,
                            y_image=centroid_center_of_mass.dimension_y / 2.0,
                            x_centroid=numpy.asscalar(centroid_center_of_mass.centroid[0]),
                            y_centroid=numpy.asscalar(centroid_center_of_mass.centroid[1]),
                            x_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[0]),
                            y_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[1])
                        )
                    else:
                        connection.execute(
                            IMAGE_DETAIL.insert(),
                            galaxy_id=galaxy_id,
                            x_image=centroid_center_of_mass.dimension_x / 2.0,
                            y_image=centroid_center_of_mass.dimension_y / 2.0,
                            x_centroid=-1.0,
                            y_centroid=-1.0,
                            x_centre_of_mass=-1.0,
                            y_centre_of_mass=-1.0
                        )
                    add_step_done_id(connection, galaxy_id, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS)

            elif verbosity >= 1:
                LOG.info('Nothing to do for the galaxy {0}'.format(galaxy_id))

        elif verbosity >= 1:
            LOG.info('The mask has not been built for the galaxy {0}'.format(galaxy_id))
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        time_start = time.time()

        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, True)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            for table in tables_required:
                with connection.begin():
                    time_start_update = time.time()
                    create_galaxy_detail = CreateGalaxyDetail(connection, table, galaxy_id)
                    create_galaxy_detail.calculate_details()
                    create_galaxy_detail.insert_details()
                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_BUILD_GALAXY_DETAILS,
                        table.name
                    )
                    time_end_update = time.time()
                    LOG.info(
                        'galaxy_id: {0}, table: {1}, time: {2:.3f}s'.format(
                            galaxy_id,
                            table.name,
                            time_end_update - time_start_update
                        )
                    )
        time_end = time.time()
        LOG.info(
            'galaxy_id: {0}, time: {1:.3f}s'.format(
                galaxy_id,
                time_end - time_start
            )
        )
def copy_hdf5_files(args):
    s3_helper = S3Helper()
    bucket_aws_pogs = s3_helper.get_bucket(BUCKET_NAME)
    bucket_pogs = s3_helper.get_bucket(BUCKET_NAME_OLD_POGS)

    engine = create_engine(DB_LOGIN)
    connection = engine.connect()

    subquery = select([STEP_DONE.c.galaxy_id]).distinct()
    for galaxy in connection.execute(select([GALAXY]).where(~GALAXY.c.galaxy_id.in_(subquery)).order_by(GALAXY.c.galaxy_id)):
        if args.verbose:
            LOG.info('Checking galaxy_id: {0}, name: {1}'.format(galaxy[GALAXY.c.galaxy_id], galaxy[GALAXY.c.name]))

        # noinspection PyBroadException
        try:
            block_dir = get_block_dir(galaxy[GALAXY.c.galaxy_id] / 1000)
            name_aws_pogs = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
            key_aws_pogs = Key(bucket_aws_pogs)
            key_aws_pogs.key = '{0}/{1}.hdf5'.format(block_dir, name_aws_pogs)

            if not key_aws_pogs.exists():

                # Does it exist in POGS?
                name_pogs = get_galaxy_file_name_pogs(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id])
                key_pogs = Key(bucket_pogs)
                key_pogs.key = '{0}/{0}.hdf5'.format(name_pogs)

                if key_pogs.exists():
                    check_and_copy_or_restore(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy[GALAXY.c.galaxy_id], args.verbose)
                else:
                    add_step_done_id(connection, galaxy[GALAXY.c.galaxy_id], STEP_DONE_ID_NO_HDF5_FILE)

        except BaseException:
            LOG.exception('error during fetch, quitting')
            break

    if args.verbose:
        LOG.info('Done')
Exemple #8
0
def process_galaxy(connection, galaxies):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_BUILD_GALAXY_DETAILS, STEP_DONE_ID_NORMALISE_Z_MIN_MAX],
            True
        )

        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            time_start = time.time()
            for table in tables_required:
                with connection.begin():
                    time_start_update = time.time()
                    mean_standard = connection.execute(
                        select([GALAXY_DETAIL]).where(
                            and_(
                                GALAXY_DETAIL.c.galaxy_id == galaxy_id,
                                GALAXY_DETAIL.c.table_name == table.name
                            )
                        )
                    ).first()
                    insert_data = []
                    if mean_standard is not None:
                        mean = mean_standard[GALAXY_DETAIL.c.mean]
                        standard_deviation = mean_standard[GALAXY_DETAIL.c.standard_deviation]
                        min_value = mean_standard[GALAXY_DETAIL.c.min_value]
                        max_value = mean_standard[GALAXY_DETAIL.c.max_value]
                        max_minus_min = max_value - min_value
                        if max_minus_min is not None and max_minus_min != 0.0 \
                                and standard_deviation is not None and standard_deviation != 0.0:
                            if table.name.startswith('original_value__'):
                                select_statement = select(
                                    [table]
                                ).where(
                                    and_(
                                        table.c.galaxy_id == galaxy_id,
                                        table.c.galaxy_id == MASK_POINT.c.galaxy_id,
                                        table.c.x == MASK_POINT.c.x,
                                        table.c.y == MASK_POINT.c.y,
                                        table.c.value > 0.0
                                    )
                                )
                            else:
                                select_statement = select(
                                    [table]
                                ).where(
                                    and_(
                                        table.c.galaxy_id == galaxy_id,
                                        table.c.galaxy_id == MASK_POINT.c.galaxy_id,
                                        table.c.x == MASK_POINT.c.x,
                                        table.c.y == MASK_POINT.c.y
                                    )
                                )

                            for table_data in connection.execute(select_statement):
                                value = table_data[table.c.value]
                                z_score = (value - mean) / standard_deviation
                                insert_data.append(
                                    {
                                        'original_id': table_data[table.c.id],
                                        'value_zscore': z_score,
                                        'value_min_max': (value - min_value) / max_minus_min,
                                        'value_ln': math.log(value - min_value + 1),
                                        'value_softmax': 1.0 / (1.0 + math.exp(-z_score))
                                    }
                                )

                    if len(insert_data) > 0:
                        table_extended = DATA_TABLES_EXTENDED[table.name + '__extended']
                        connection.execute(table_extended.insert(), insert_data)

                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_NORMALISE_Z_MIN_MAX,
                        table.name
                    )
                    time_end_update = time.time()
                    LOG.info(
                        'galaxy_id: {0}, table: {1}__extended, time: {2:.3f}s, inserts: {3}'.format(
                            galaxy_id,
                            table.name,
                            time_end_update - time_start_update,
                            len(insert_data)
                        )
                    )

            time_end = time.time()
            LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))