def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_MEAN_STANDARD_DEVIATION, True)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            for table in tables_required:
                with connection.begin():
                    create_mean_standard_deviation = CreateMeanStandardDeviation(connection, table, galaxy_id)
                    create_mean_standard_deviation.calculate_details()
                    create_mean_standard_deviation.update_details()
                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_MEAN_STANDARD_DEVIATION,
                        table.name
                    )
Example #2
0
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_MASK)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        if STEP_DONE_ID_BUILD_MASK not in steps_done:
            with connection.begin():
                build_mask = ProcessGalaxyMasks(connection, galaxy_id)
                build_mask.run()
                add_step_done_id(
                    connection,
                    galaxy_id,
                    STEP_DONE_ID_BUILD_MASK
                )
Example #3
0
def process_galaxy(connection_mysql, connection_mongodb, database_name, galaxies, radius):
    """
    Process the galaxies
    """
    # Get the mongodb database
    mongo_db = Database(connection_mongodb, database_name)
    collection_galaxies = Collection(mongo_db, COLLECTION_GALAXIES)
    collection_galaxy_data_sed = Collection(mongo_db, COLLECTION_GALAXY_DATA_SED)
    collection_galaxy_data_original = Collection(mongo_db, COLLECTION_GALAXY_DATA_ORIGINAL)

    # Build the tables
    build_dynamic_tables(connection_mysql)

    for galaxy in galaxies:
        time_start = time.time()
        galaxy_id = galaxy[GALAXY.c.galaxy_id]

        # Make sure we have an IMAGE_DETAIL as we use later for calculations
        image_detail = connection_mysql.execute(
            select([IMAGE_DETAIL]).where(IMAGE_DETAIL.c.galaxy_id == galaxy_id)
        ).first()

        if image_detail is not None:
            data01 = get_galaxy_data(
                connection_mysql,
                galaxy_id,
                DATA_TABLES_SED,
                radius
            )
            data02 = get_galaxy_data(
                connection_mysql,
                galaxy_id,
                DATA_TABLES_ORIGINAL,
                radius
            )
            if len(data01) > 0 and len(data02) > 0:
                galaxy_details = get_galaxy_details(connection_mysql, galaxy_id)

                galaxy_data = {
                    'galaxy_id': galaxy_id,
                    'run_id': galaxy[GALAXY.c.run_id],
                    'name': galaxy[GALAXY.c.name],
                    'dimension_x': galaxy[GALAXY.c.dimension_x],
                    'dimension_y': galaxy[GALAXY.c.dimension_y],
                    'dimension_z': galaxy[GALAXY.c.dimension_z],
                    'redshift': float(galaxy[GALAXY.c.redshift]),
                    'galaxy_type': galaxy[GALAXY.c.galaxy_type],
                    'galaxy_detail': galaxy_details,
                    'mask_radius': radius
                }

                # Put it into Mongo
                collection_galaxies.insert_one(galaxy_data)
                collection_galaxy_data_sed.insert_many(data01)
                collection_galaxy_data_original.insert_many(data02)
        time_end = time.time()
        LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_BUILD_MASK, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS])

        # have we built the mask
        if STEP_DONE_ID_BUILD_MASK in steps_done:
            if STEP_DONE_ID_CENTROID_CENTRE_OF_MASS not in steps_done:
                with connection.begin():
                    centroid_center_of_mass = CentroidCentreOfMass(connection, galaxy, verbosity)
                    centroid_center_of_mass.calculate_results()

                    if len(centroid_center_of_mass.layers) > 0:
                        connection.execute(
                            IMAGE_DETAIL.insert(),
                            galaxy_id=galaxy_id,
                            x_image=centroid_center_of_mass.dimension_x / 2.0,
                            y_image=centroid_center_of_mass.dimension_y / 2.0,
                            x_centroid=numpy.asscalar(centroid_center_of_mass.centroid[0]),
                            y_centroid=numpy.asscalar(centroid_center_of_mass.centroid[1]),
                            x_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[0]),
                            y_centre_of_mass=numpy.asscalar(centroid_center_of_mass.centre_of_mass[1])
                        )
                    else:
                        connection.execute(
                            IMAGE_DETAIL.insert(),
                            galaxy_id=galaxy_id,
                            x_image=centroid_center_of_mass.dimension_x / 2.0,
                            y_image=centroid_center_of_mass.dimension_y / 2.0,
                            x_centroid=-1.0,
                            y_centroid=-1.0,
                            x_centre_of_mass=-1.0,
                            y_centre_of_mass=-1.0
                        )
                    add_step_done_id(connection, galaxy_id, STEP_DONE_ID_CENTROID_CENTRE_OF_MASS)

            elif verbosity >= 1:
                LOG.info('Nothing to do for the galaxy {0}'.format(galaxy_id))

        elif verbosity >= 1:
            LOG.info('The mask has not been built for the galaxy {0}'.format(galaxy_id))
Example #5
0
def process_galaxy(connection, galaxies, verbosity):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        time_start = time.time()

        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(connection, galaxy_id, STEP_DONE_ID_BUILD_GALAXY_DETAILS, True)
        if verbosity:
            LOG.info(
                'galaxy_id: {0}, galaxy_name: {1}'.format(
                    galaxy_id,
                    galaxy[GALAXY.c.name]
                )
            )
        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            for table in tables_required:
                with connection.begin():
                    time_start_update = time.time()
                    create_galaxy_detail = CreateGalaxyDetail(connection, table, galaxy_id)
                    create_galaxy_detail.calculate_details()
                    create_galaxy_detail.insert_details()
                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_BUILD_GALAXY_DETAILS,
                        table.name
                    )
                    time_end_update = time.time()
                    LOG.info(
                        'galaxy_id: {0}, table: {1}, time: {2:.3f}s'.format(
                            galaxy_id,
                            table.name,
                            time_end_update - time_start_update
                        )
                    )
        time_end = time.time()
        LOG.info(
            'galaxy_id: {0}, time: {1:.3f}s'.format(
                galaxy_id,
                time_end - time_start
            )
        )
Example #6
0
    def run(self):
        time_start = time.time()
        engine = create_engine(DB_LOGIN)
        self._connection = engine.connect()
        # noinspection PyBroadException
        try:
            build_dynamic_tables(self._connection)
            for galaxy_id in self._galaxy_ids:
                time_get_data = time.time()
                self._get_data(galaxy_id)
                time_build_csv_lines = time.time()
                self._build_csv_lines()
                time_end = time.time()
                LOG.info(
                    "galaxy_id: {0}, get_data: {1:.3f}s, build_csv_lines: {2:.3f}s".format(
                        galaxy_id, time_build_csv_lines - time_get_data, time_end - time_build_csv_lines
                    )
                )
        except Exception:
            LOG.exception("Getting data")
            self._still_valid = False
        finally:
            self._connection.close()

        time_shuffle = time.time()
        LOG.info("get_all_data: {0:.3f}s".format(time_shuffle - time_start))
        if self._still_valid:
            # Now shuffle the elements
            shuffle(self._csv_line)

            time_write_to_file = time.time()
            LOG.info("shuffle: {0:.3f}s".format(time_write_to_file - time_shuffle))
            self._write_to_file()
            LOG.info("write_to_file: {0:.3f}s".format(time.time() - time_write_to_file))

        time_end = time.time()
        LOG.info("all: {0:.3f}s".format(time_end - time_start))
Example #7
0
def process_galaxy(connection, galaxies):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_BUILD_GALAXY_DETAILS, STEP_DONE_ID_NORMALISE_Z_MIN_MAX],
            True
        )

        tables_required = get_tables_required(steps_done)

        if len(tables_required) > 0:
            time_start = time.time()
            for table in tables_required:
                with connection.begin():
                    time_start_update = time.time()
                    mean_standard = connection.execute(
                        select([GALAXY_DETAIL]).where(
                            and_(
                                GALAXY_DETAIL.c.galaxy_id == galaxy_id,
                                GALAXY_DETAIL.c.table_name == table.name
                            )
                        )
                    ).first()
                    insert_data = []
                    if mean_standard is not None:
                        mean = mean_standard[GALAXY_DETAIL.c.mean]
                        standard_deviation = mean_standard[GALAXY_DETAIL.c.standard_deviation]
                        min_value = mean_standard[GALAXY_DETAIL.c.min_value]
                        max_value = mean_standard[GALAXY_DETAIL.c.max_value]
                        max_minus_min = max_value - min_value
                        if max_minus_min is not None and max_minus_min != 0.0 \
                                and standard_deviation is not None and standard_deviation != 0.0:
                            if table.name.startswith('original_value__'):
                                select_statement = select(
                                    [table]
                                ).where(
                                    and_(
                                        table.c.galaxy_id == galaxy_id,
                                        table.c.galaxy_id == MASK_POINT.c.galaxy_id,
                                        table.c.x == MASK_POINT.c.x,
                                        table.c.y == MASK_POINT.c.y,
                                        table.c.value > 0.0
                                    )
                                )
                            else:
                                select_statement = select(
                                    [table]
                                ).where(
                                    and_(
                                        table.c.galaxy_id == galaxy_id,
                                        table.c.galaxy_id == MASK_POINT.c.galaxy_id,
                                        table.c.x == MASK_POINT.c.x,
                                        table.c.y == MASK_POINT.c.y
                                    )
                                )

                            for table_data in connection.execute(select_statement):
                                value = table_data[table.c.value]
                                z_score = (value - mean) / standard_deviation
                                insert_data.append(
                                    {
                                        'original_id': table_data[table.c.id],
                                        'value_zscore': z_score,
                                        'value_min_max': (value - min_value) / max_minus_min,
                                        'value_ln': math.log(value - min_value + 1),
                                        'value_softmax': 1.0 / (1.0 + math.exp(-z_score))
                                    }
                                )

                    if len(insert_data) > 0:
                        table_extended = DATA_TABLES_EXTENDED[table.name + '__extended']
                        connection.execute(table_extended.insert(), insert_data)

                    add_step_done_id(
                        connection,
                        galaxy_id,
                        STEP_DONE_ID_NORMALISE_Z_MIN_MAX,
                        table.name
                    )
                    time_end_update = time.time()
                    LOG.info(
                        'galaxy_id: {0}, table: {1}__extended, time: {2:.3f}s, inserts: {3}'.format(
                            galaxy_id,
                            table.name,
                            time_end_update - time_start_update,
                            len(insert_data)
                        )
                    )

            time_end = time.time()
            LOG.info('galaxy_id: {0}, time: {1:.3f}s'.format(galaxy_id, time_end - time_start))
Example #8
0
def process_galaxy(connection, galaxies, verbosity, profile_name='aws-pogs'):
    """
    Process the galaxies
    """
    # Build the tables
    build_dynamic_tables(connection)

    for galaxy in galaxies:
        galaxy_id = galaxy[GALAXY.c.galaxy_id]
        steps_done = get_step_done_ids(
            connection,
            galaxy_id,
            [STEP_DONE_ID_NO_HDF5_FILE, STEP_DONE_ID_ORIGINAL_VALUES, STEP_DONE_ID_SED_DATA],
            True,
            True
        )

        # Work out what SED data is required
        data_required_sed = get_data_required_sed(connection, steps_done)
        data_required_original = get_data_required_original(steps_done)

        if len(data_required_sed) > 0 or len(data_required_original) > 0:
            # Copy the file from S3
            s3_helper = S3Helper(profile_name=profile_name)
            galaxy_id = int(galaxy[GALAXY.c.galaxy_id])
            galaxy_name = get_galaxy_file_name(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id)
            s3_name = os.path.join('{0:04d}000'.format(galaxy_id / 1000), galaxy_name) + '.hdf5'
            hdf5_file_name = os.path.join(FAST_DISK, galaxy_name) + '.hdf5'

            copy_ok = s3_helper.copy_file_to_disk(BUCKET_NAME, s3_name, hdf5_file_name)

            if copy_ok:
                h5_file = h5py.File(hdf5_file_name, 'r')
                LOG.info('Processing SED for name: {0}, run_id: {1}, galaxy_id: {2}'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id))
                # Do we have anything to do?
                if len(data_required_sed) > 0:
                    # noinspection PyBroadException
                    try:
                        # Store the SED fit values
                        add_sed_data(connection, galaxy_id, h5_file, data_required_sed)
                    except Exception:
                        LOG.exception('An exception occurred in process_galaxy processing the SED values')
                else:
                    if verbosity >= 1:
                        LOG.info('Nothing to add - SED')

                if len(data_required_original) > 0:
                    # noinspection PyBroadException
                    try:
                        add_original_data(connection, galaxy_id, h5_file, data_required_original)
                    except Exception:
                        LOG.exception('An exception occurred in process_galaxy processing the original values')
                else:
                    if verbosity >= 1:
                        LOG.info('Nothing to add - Original Data')

                # Clean up after ourselves
                h5_file.close()
                os.remove(hdf5_file_name)

            else:
                LOG.error('The file for name: {0}, run_id: {1}, galaxy_id: {2} does not exist'.format(galaxy[GALAXY.c.name], galaxy[GALAXY.c.run_id], galaxy_id))