def _save_results(self, connection, map_pixel_results, list_insert_filters, list_pixel_parameters, map_pixel_histograms): """ Add the pixel to the database """ if self._pxresult_id is not None and not self.noinsert: # Update the filters connection.execute(PIXEL_RESULT.update().where( PIXEL_RESULT.c.pxresult_id == self._pxresult_id).values( map_pixel_results)) connection.execute(PIXEL_FILTER.insert(), list_insert_filters) # Because I need to get the PK back we have to do each one separately for pixel_parameter in list_pixel_parameters: result = connection.execute(PIXEL_PARAMETER.insert(), pixel_parameter) id = result.inserted_primary_key # Add the ID to the list list_pixel_histograms = map_pixel_histograms[ pixel_parameter['parameter_name_id']] for map_values in list_pixel_histograms: map_values['pxparameter_id'] = id[0] connection.execute(PIXEL_HISTOGRAM.insert(), list_pixel_histograms)
def _save_results(self, connection, map_pixel_results, list_insert_filters, list_pixel_parameters, map_pixel_histograms): """ Add the pixel to the database """ if self._pxresult_id is not None and not self.noinsert: # Update the filters connection.execute(PIXEL_RESULT.update().where(PIXEL_RESULT.c.pxresult_id == self._pxresult_id).values(map_pixel_results)) connection.execute(PIXEL_FILTER.insert(), list_insert_filters) # Because I need to get the PK back we have to do each one separately for pixel_parameter in list_pixel_parameters: result = connection.execute(PIXEL_PARAMETER.insert(), pixel_parameter) id = result.inserted_primary_key # Add the ID to the list list_pixel_histograms = map_pixel_histograms[pixel_parameter['parameter_name_id']] for map_values in list_pixel_histograms: map_values['pxparameter_id'] = id[0] connection.execute(PIXEL_HISTOGRAM.insert(), list_pixel_histograms)
LOG.info('Range from %s to %s', list[0], list[1]) galaxy_ids = range(int(list[0]), int(list[1]) + 1) else: galaxy_ids = args['galaxy_id'] for galaxy_id_str in galaxy_ids: galaxy_id1 = int(galaxy_id_str) galaxy = connection.execute(select([GALAXY]).where(GALAXY.c.galaxy_id == galaxy_id1)).first() if galaxy is None: LOG.info('Error: Galaxy with galaxy_id of %d was not found', galaxy_id1) else: # Have we got work units out there for this galaxy? LOG.info('Working on galaxy %s (%d)', galaxy[GALAXY.c.name], galaxy[GALAXY.c.version_number]) deleted_galaxy = 0 transaction = connection.begin() count = 0 for pxresult_id in connection.execute(select([PIXEL_RESULT.c.pxresult_id]).where(PIXEL_RESULT.c.galaxy_id == galaxy[GALAXY.c.galaxy_id]).order_by(PIXEL_RESULT.c.pxresult_id)): if not (count % 1000): LOG.info('Deleting low pixel_histogram values from galaxy {0} pixel {1} : Deleted total {2} galaxy {3}'.format(galaxy[GALAXY.c.galaxy_id], pxresult_id[0], deleted_total, deleted_galaxy)) result_proxy = connection.execute(PIXEL_HISTOGRAM.delete().where(and_(PIXEL_HISTOGRAM.c.pxresult_id == pxresult_id[0], PIXEL_HISTOGRAM.c.hist_value < MIN_HIST_VALUE))) deleted_total += result_proxy.rowcount deleted_galaxy += result_proxy.rowcount count += 1 transaction.commit() LOG.info('Removed %d really small histogram values from %s (%d)', deleted_galaxy, galaxy.name, galaxy.version_number) connection.close() LOG.info('Done - removed %d really small histogram values.', deleted_total)
def store_pixels(connection, galaxy_id, group, dimension_x, dimension_y, dimension_z, pixel_count): """ Store the pixel data """ LOG.info('Storing the pixel data - {0} pixels to process'.format(pixel_count)) count = connection.execute(select([func.count(PIXEL_HISTOGRAM.c.pxhistogram_id)], from_obj=PIXEL_HISTOGRAM.join(PIXEL_RESULT)).where(PIXEL_RESULT.c.galaxy_id == galaxy_id)).first()[0] data = numpy.empty((dimension_x, dimension_y, NUMBER_PARAMETERS, NUMBER_IMAGES), dtype=numpy.float) data.fill(numpy.NaN) data_pixel_details = group.create_dataset('pixel_details', (dimension_x, dimension_y), dtype=data_type_pixel, compression='gzip') data_pixel_parameters = group.create_dataset('pixel_parameters', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=data_type_pixel_parameter, compression='gzip') data_pixel_filter = group.create_dataset('pixel_filters', (dimension_x, dimension_y, dimension_z), dtype=data_type_pixel_filter, compression='gzip') data_pixel_histograms_grid = group.create_dataset('pixel_histograms_grid', (dimension_x, dimension_y, NUMBER_PARAMETERS), dtype=h5py.special_dtype(ref=h5py.RegionReference), compression='gzip') data_pixel_histograms_list = group.create_dataset('pixel_histograms_list', (count,), dtype=data_type_pixel_histogram, compression='gzip') count = 0 pixel_histogram_count = 0 for pixel_result in connection.execute(select([PIXEL_RESULT]).where(PIXEL_RESULT.c.galaxy_id == galaxy_id)): count += 1 if count % 500 == 0: LOG.info('Processed {0} of {1}'.format(count, pixel_count)) pxresult_id = pixel_result[PIXEL_RESULT.c.pxresult_id] x = pixel_result[PIXEL_RESULT.c.x] y = pixel_result[PIXEL_RESULT.c.y] data[x, y, INDEX_F_MU_SFH , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.fmu_sfh] data[x, y, INDEX_F_MU_IR , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.fmu_ir] data[x, y, INDEX_MU_PARAMETER, INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.mu] data[x, y, INDEX_TAU_V , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.tauv] data[x, y, INDEX_SSFR_0_1GYR , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.s_sfr] data[x, y, INDEX_M_STARS , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.m] data[x, y, INDEX_L_DUST , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.ldust] data[x, y, INDEX_T_C_ISM , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.t_c_ism] data[x, y, INDEX_T_W_BC , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.t_w_bc] data[x, y, INDEX_XI_C_TOT , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.xi_c_tot] data[x, y, INDEX_XI_PAH_TOT , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.xi_pah_tot] data[x, y, INDEX_XI_MIR_TOT , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.xi_mir_tot] data[x, y, INDEX_XI_W_TOT , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.x_w_tot] data[x, y, INDEX_TAU_V_ISM , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.tvism] data[x, y, INDEX_M_DUST , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.mdust] data[x, y, INDEX_SFR_0_1GYR , INDEX_BEST_FIT] = pixel_result[PIXEL_RESULT.c.sfr] data_pixel_details[x, y] = ( pxresult_id, pixel_result[PIXEL_RESULT.c.area_id], pixel_result[PIXEL_RESULT.c.i_sfh], pixel_result[PIXEL_RESULT.c.i_ir], pixel_result[PIXEL_RESULT.c.chi2], pixel_result[PIXEL_RESULT.c.redshift], pixel_result[PIXEL_RESULT.c.i_opt], pixel_result[PIXEL_RESULT.c.dmstar], pixel_result[PIXEL_RESULT.c.dfmu_aux], pixel_result[PIXEL_RESULT.c.dz], ) for pixel_parameter in connection.execute(select([PIXEL_PARAMETER]).where(PIXEL_PARAMETER.c.pxresult_id == pxresult_id).order_by(PIXEL_PARAMETER.c.pxparameter_id)): if pixel_parameter[PIXEL_PARAMETER.c.parameter_name_id] is not None: z = pixel_parameter[PIXEL_PARAMETER.c.parameter_name_id] - 1 data[x, y, z, INDEX_PERCENTILE_50] = pixel_parameter[PIXEL_PARAMETER.c.percentile50] data[x, y, z, INDEX_HIGHEST_PROB_BIN] = pixel_parameter[PIXEL_PARAMETER.c.high_prob_bin] if pixel_parameter[PIXEL_PARAMETER.c.high_prob_bin] is not None else numpy.NaN data[x, y, z, INDEX_PERCENTILE_2_5] = pixel_parameter[PIXEL_PARAMETER.c.percentile2_5] data[x, y, z, INDEX_PERCENTILE_16] = pixel_parameter[PIXEL_PARAMETER.c.percentile16] data[x, y, z, INDEX_PERCENTILE_84] = pixel_parameter[PIXEL_PARAMETER.c.percentile84] data[x, y, z, INDEX_PERCENTILE_97_5] = pixel_parameter[PIXEL_PARAMETER.c.percentile97_5] first_prob_bin = pixel_parameter[PIXEL_PARAMETER.c.first_prob_bin] if pixel_parameter[PIXEL_PARAMETER.c.first_prob_bin] is not None else numpy.NaN last_prob_bin = pixel_parameter[PIXEL_PARAMETER.c.last_prob_bin] if pixel_parameter[PIXEL_PARAMETER.c.last_prob_bin] is not None else numpy.NaN bin_step = pixel_parameter[PIXEL_PARAMETER.c.bin_step] if pixel_parameter[PIXEL_PARAMETER.c.bin_step] is not None else numpy.NaN data_pixel_parameters[x, y, z] = ( first_prob_bin, last_prob_bin, bin_step, ) pixel_histogram_start = pixel_histogram_count for pixel_histogram in connection.execute(select([PIXEL_HISTOGRAM]).where(PIXEL_HISTOGRAM.c.pxparameter_id == pixel_parameter[PIXEL_PARAMETER.c.pxparameter_id]).order_by(PIXEL_HISTOGRAM.c.pxhistogram_id)): data_pixel_histograms_list[pixel_histogram_count] = ( pixel_histogram[PIXEL_HISTOGRAM.c.x_axis], pixel_histogram[PIXEL_HISTOGRAM.c.hist_value], ) pixel_histogram_count += 1 # only store it if we have it if pixel_histogram_count - pixel_histogram_start > 0: data_pixel_histograms_grid[x, y, z] = data_pixel_histograms_list.regionref[pixel_histogram_start:pixel_histogram_count] filter_layer = 0 for pixel_filter in connection.execute(select([PIXEL_FILTER]).where(PIXEL_FILTER.c.pxresult_id == pxresult_id).order_by(PIXEL_FILTER.c.pxfilter_id)): # Sometimes we seem to get two versions of the data - just use the first set if filter_layer < dimension_z: data_pixel_filter[x, y, filter_layer] = ( pixel_filter[PIXEL_FILTER.c.observed_flux], pixel_filter[PIXEL_FILTER.c.observational_uncertainty], pixel_filter[PIXEL_FILTER.c.flux_bfm], ) filter_layer += 1 pixel_dataset = group.create_dataset('pixels', data=data, compression='gzip') pixel_dataset.attrs['DIM3_F_MU_SFH'] = INDEX_F_MU_SFH pixel_dataset.attrs['DIM3_F_MU_IR'] = INDEX_F_MU_IR pixel_dataset.attrs['DIM3_MU_PARAMETER'] = INDEX_MU_PARAMETER pixel_dataset.attrs['DIM3_TAU_V'] = INDEX_TAU_V pixel_dataset.attrs['DIM3_SSFR_0_1GYR'] = INDEX_SSFR_0_1GYR pixel_dataset.attrs['DIM3_M_STARS'] = INDEX_M_STARS pixel_dataset.attrs['DIM3_L_DUST'] = INDEX_L_DUST pixel_dataset.attrs['DIM3_T_C_ISM'] = INDEX_T_C_ISM pixel_dataset.attrs['DIM3_T_W_BC'] = INDEX_T_W_BC pixel_dataset.attrs['DIM3_XI_C_TOT'] = INDEX_XI_C_TOT pixel_dataset.attrs['DIM3_XI_PAH_TOT'] = INDEX_XI_PAH_TOT pixel_dataset.attrs['DIM3_XI_MIR_TOT'] = INDEX_XI_MIR_TOT pixel_dataset.attrs['DIM3_XI_W_TOT'] = INDEX_XI_W_TOT pixel_dataset.attrs['DIM3_TAU_V_ISM'] = INDEX_TAU_V_ISM pixel_dataset.attrs['DIM3_M_DUST'] = INDEX_M_DUST pixel_dataset.attrs['DIM3_SFR_0_1GYR'] = INDEX_SFR_0_1GYR pixel_dataset.attrs['DIM4_BEST_FIT'] = INDEX_BEST_FIT pixel_dataset.attrs['DIM4_PERCENTILE_50'] = INDEX_PERCENTILE_50 pixel_dataset.attrs['DIM4_HIGHEST_PROB_BIN'] = INDEX_HIGHEST_PROB_BIN pixel_dataset.attrs['DIM4_PERCENTILE_2_5'] = INDEX_PERCENTILE_2_5 pixel_dataset.attrs['DIM4_PERCENTILE_16'] = INDEX_PERCENTILE_16 pixel_dataset.attrs['DIM4_PERCENTILE_84'] = INDEX_PERCENTILE_84 pixel_dataset.attrs['DIM4_PERCENTILE_97_5'] = INDEX_PERCENTILE_97_5 pxresult_id = connection.execute(select([PIXEL_RESULT.c.pxresult_id]).where(PIXEL_RESULT.c.galaxy_id == galaxy_id)).first()[0] filter_layer = 0 for pixel_filter in connection.execute(select([PIXEL_FILTER]).where(PIXEL_FILTER.c.pxresult_id == pxresult_id).order_by(PIXEL_FILTER.c.pxfilter_id)): data_pixel_filter.attrs[pixel_filter[PIXEL_FILTER.c.filter_name]] = filter_layer filter_layer += 1 return count
select([ AREA.c.area_id ]).where(AREA.c.galaxy_id == galaxy_id_aws).order_by( AREA.c.area_id)): deleted_area_count += 1 for pxresult_id1 in connection_aws.execute( select([PIXEL_RESULT.c.pxresult_id ]).where(PIXEL_RESULT.c.area_id == area_id1[0]).order_by( PIXEL_RESULT.c.pxresult_id)): deleted_pixel_count += 1 connection_aws.execute(PIXEL_FILTER.delete().where( PIXEL_FILTER.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_PARAMETER.delete().where( PIXEL_PARAMETER.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_HISTOGRAM.delete().where( PIXEL_HISTOGRAM.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_RESULT.delete().where( PIXEL_RESULT.c.area_id == area_id1[0])) transaction_aws.commit() transaction_aws = connection_aws.begin() # Give the rest of the world a chance to access the database time.sleep(1) transaction_aws.commit() end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) LOG.info('Deleted %d areas %d pixels.', deleted_area_count,
insert_only(IMAGE_FILTERS_USED, image_filters_used, connection_pleiades) transaction_pleiades.commit() copy_end_time = time.time() # Now we can delete the bits we don't need deleted_area_count = 0 deleted_pixel_count = 0 if False: for area_id1 in connection_aws.execute(select([AREA.c.area_id]).where(AREA.c.galaxy_id == galaxy_id_aws).order_by(AREA.c.area_id)): deleted_area_count += 1 for pxresult_id1 in connection_aws.execute(select([PIXEL_RESULT.c.pxresult_id]).where(PIXEL_RESULT.c.area_id == area_id1[0]).order_by(PIXEL_RESULT.c.pxresult_id)): deleted_pixel_count += 1 connection_aws.execute(PIXEL_FILTER.delete().where(PIXEL_FILTER.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_PARAMETER.delete().where(PIXEL_PARAMETER.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_HISTOGRAM.delete().where(PIXEL_HISTOGRAM.c.pxresult_id == pxresult_id1[0])) connection_aws.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) transaction_aws.commit() transaction_aws = connection_aws.begin() # Give the rest of the world a chance to access the database time.sleep(1) transaction_aws.commit() end_time = time.time() LOG.info('Galaxy with galaxy_id of %d was archived.', galaxy_id1) LOG.info('Copied %d areas %d pixels.', area_count, pixel_count) LOG.info('Deleted %d areas %d pixels.', deleted_area_count, deleted_pixel_count) total_time = end_time - start_time
): deleted_area_count += 1 for pxresult_id1 in connection_aws.execute( select([PIXEL_RESULT.c.pxresult_id]) .where(PIXEL_RESULT.c.area_id == area_id1[0]) .order_by(PIXEL_RESULT.c.pxresult_id) ): deleted_pixel_count += 1 connection_aws.execute( PIXEL_FILTER.delete().where(PIXEL_FILTER.c.pxresult_id == pxresult_id1[0]) ) connection_aws.execute( PIXEL_PARAMETER.delete().where(PIXEL_PARAMETER.c.pxresult_id == pxresult_id1[0]) ) connection_aws.execute( PIXEL_HISTOGRAM.delete().where(PIXEL_HISTOGRAM.c.pxresult_id == pxresult_id1[0]) ) connection_aws.execute(PIXEL_RESULT.delete().where(PIXEL_RESULT.c.area_id == area_id1[0])) transaction_aws.commit() transaction_aws = connection_aws.begin() # Give the rest of the world a chance to access the database time.sleep(1) transaction_aws.commit() end_time = time.time() LOG.info("Galaxy with galaxy_id of %d was archived.", galaxy_id1) LOG.info("Copied %d areas %d pixels.", area_count, pixel_count) LOG.info("Deleted %d areas %d pixels.", deleted_area_count, deleted_pixel_count)
# Have we got work units out there for this galaxy? LOG.info('Working on galaxy %s (%d)', galaxy[GALAXY.c.name], galaxy[GALAXY.c.version_number]) deleted_galaxy = 0 transaction = connection.begin() count = 0 for pxresult_id in connection.execute( select([ PIXEL_RESULT.c.pxresult_id ]).where(PIXEL_RESULT.c.galaxy_id == galaxy[ GALAXY.c.galaxy_id]).order_by(PIXEL_RESULT.c.pxresult_id)): if not (count % 1000): LOG.info( 'Deleting low pixel_histogram values from galaxy {0} pixel {1} : Deleted total {2} galaxy {3}' .format(galaxy[GALAXY.c.galaxy_id], pxresult_id[0], deleted_total, deleted_galaxy)) result_proxy = connection.execute(PIXEL_HISTOGRAM.delete().where( and_(PIXEL_HISTOGRAM.c.pxresult_id == pxresult_id[0], PIXEL_HISTOGRAM.c.hist_value < MIN_HIST_VALUE))) deleted_total += result_proxy.rowcount deleted_galaxy += result_proxy.rowcount count += 1 transaction.commit() LOG.info('Removed %d really small histogram values from %s (%d)', deleted_galaxy, galaxy.name, galaxy.version_number) connection.close() LOG.info('Done - removed %d really small histogram values.', deleted_total)