def get_timings(session_id: (int, NoneType)=None, test_id: (int, NoneType)=None): now = flux.current_timeline.time() total_clause = case( [ (Timing.total < 0, now + Timing.total) ], else_=Timing.total) if session_id is None and test_id is None: return {} if session_id is not None: total_sum_subquery = db.session.query(Timing.name, func.sum(total_clause).label('total_time')).\ group_by(Timing.session_id, Timing.name).filter_by(session_id=session_id).subquery() query = db.session.query(func.json_object_agg(total_sum_subquery.c.name, total_sum_subquery.c.total_time)) else: query = db.session.query(func.json_object_agg(Timing.name, total_clause)).\ filter_by(test_id=test_id) return query.scalar() or {}
def _get_metadata_query(*, entity_type, entity_id): model = _get_metadata_model(entity_type) query = db.session.query(func.json_object_agg(model.key, model.metadata_item)) if entity_type == 'session': related = Session elif entity_type == 'test': related = Test else: error_abort('Invalid entity type', requests.codes.bad_request) query = query.join(related) if isinstance(entity_id, int): query = query.filter(related.id == entity_id) else: query = query.filter(related.logical_id == entity_id) return query
def _get_metadata_query(*, entity_type, entity_id): model = _get_metadata_model(entity_type) query = db.session.query( func.json_object_agg(model.key, model.metadata_item)) if entity_type == 'session': related = Session elif entity_type == 'test': related = Test else: error_abort('Invalid entity type', requests.codes.bad_request) query = query.join(related) if isinstance(entity_id, int): query = query.filter(related.id == entity_id) else: query = query.filter(related.logical_id == entity_id) return query
def get_fields_query(self): labels_query = select([ models.session_label.c.session_id, models.session_label.c.label_id ]).where(models.session_label.c.session_id == models.Session.id).distinct().correlate( models.Session).alias() return select([ label("_type", text("'session'")), label("_index", text("'session'")), label("_id", models.Session.id), *[ getattr(models.Session, column_name) for column_name in models.Session.__table__.columns.keys() if column_name not in {'timespan', 'parameters'} ], models.User.email.label('user_email'), select([ func.array_agg( func.json_build_object('timestamp', models.Error.timestamp, 'message', models.Error.message)) ]).where(models.Error.session_id == models.Session.id).label( 'session_errors'), select([ func.array_agg( func.json_build_object('timestamp', models.Warning.timestamp, 'message', models.Warning.message)) ]).where(models.Warning.session_id == models.Session.id).label( 'session_warnings'), select([ func.array_agg( func.json_build_object("name", models.Label.name)) ]).select_from( labels_query.join( models.Label, models.Label.id == labels_query.c.label_id)).label( 'session_labels'), select([ func.json_object_agg(models.SessionMetadata.key, models.SessionMetadata.metadata_item) ]).where(models.SessionMetadata.session_id == models.Session.id).label('session_metadata'), ]).select_from( models.Session.__table__.outerjoin( models.User.__table__, models.Session.user_id == models.User.id))
def get_team(self, team_id): team_leads = (db.session .query(TeamMember.team_id, User.id, User.name, User.email_address) .join(User) .filter(TeamMember.is_team_lead.is_(True)) .order_by(User.name) .subquery()) aggregated_team_leads = (db.session .query(team_leads.columns.team_id, func.json_object_agg( team_leads.columns.id, func.json_build_object( 'emailAddress', team_leads.columns.email_address, 'name', team_leads.columns.name ) ).label('teamLeads')) .group_by(team_leads.columns.team_id) .subquery()) team_members = (db.session .query(TeamMember.id.label('team_member_id'), TeamMember.team_id, User.id, User.name, User.email_address) .join(User) .filter(TeamMember.is_team_lead.is_(False)) .order_by(User.name) .subquery()) team_member_permissions = (db.session .query( team_members, func.coalesce( func.json_object_agg( TeamMemberPermission.permission, True ).filter(TeamMemberPermission.permission.isnot(None)), '{}' ).label('permissions')) .join( TeamMemberPermission, TeamMemberPermission.team_member_id == team_members.columns.team_member_id, isouter=True) .group_by( team_members.columns.team_member_id, team_members.columns.team_id, team_members.columns.id, team_members.columns.name, team_members.columns.email_address) .order_by(team_members.columns.name) .subquery()) aggregated_team_members = (db.session .query(team_member_permissions.columns.team_id, func.json_object_agg( team_member_permissions.columns.id, func.json_build_object( 'emailAddress', team_member_permissions.columns.email_address, 'name', team_member_permissions.columns.name, 'permissions', team_member_permissions.columns.permissions )).label('teamMembers')) .group_by(team_member_permissions.columns.team_id) .subquery()) team = (db.session .query(Team.id, Team.name, func.coalesce(Team.email_address, '').label('emailAddress'), Team.status, aggregated_team_leads.columns.teamLeads, aggregated_team_members.columns.teamMembers) .join(aggregated_team_leads, aggregated_team_leads.columns.team_id == Team.id, isouter=True) .join(aggregated_team_members, aggregated_team_members.columns.team_id == Team.id, isouter=True) .filter(Team.id == team_id) .one_or_none()) return team._asdict() if team else None
def _get_results(database_model_name, spectrum_index=None, filter_by_kwargs=None, limit=None): # Get the database model database_model = getattr(astradb, database_model_name) if filter_by_kwargs is None: filter_by_kwargs = dict() q = astra_session.query( astradb.TaskInstance, func.json_object_agg(astradb.Parameter.parameter_name, astradb.Parameter.parameter_value), astradb.TaskInstanceMeta, database_model, ).filter( astradb.TaskInstance.output_pk == database_model.output_pk, astradb.TaskInstance.pk == astradb.TaskInstanceMeta.ti_pk, astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk, astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk, ).filter_by(**filter_by_kwargs).group_by(astradb.TaskInstance, astradb.TaskInstanceMeta, database_model) if limit is not None: q = q.limit(limit) rows = [] for ti, parameters, meta, result in tqdm(q.yield_per(1), total=q.count()): row = OrderedDict([ # Source information. ("catalogid", meta.catalogid), ("ra", meta.ra or np.nan), ("dec", meta.dec or np.nan), ("pmra", meta.pmra or np.nan), ("pmdec", meta.pmdec or np.nan), ("parallax", meta.parallax or np.nan), ("gaia_dr2_source_id", meta.gaia_dr2_source_id or -1), # Task information. ("ti_pk", ti.pk), # Parameters (minimal) ("release", parameters.get("release", "")), ("obj", parameters.get("obj", "")), ("healpix", parameters.get("healpix", -1)), ("telescope", parameters.get("telescope", "")), ]) # Add the result information. ignore_keys = ("output_pk", "ti_pk", "associated_ti_pks") if spectrum_index is None: # Get all results. N = len(result.snr) for i in range(N): this_row = row.copy() this_row["spectrum_index"] = i for key in result.__table__.columns.keys(): if key in ignore_keys or key.startswith("_"): continue value = getattr(result, key) if isinstance(value, (tuple, list)): value = value[i] this_row[key] = value or np.nan rows.append(this_row) else: # Only single result. for key in result.__table__.columns.keys(): if key in ignore_keys or key.startswith("_"): continue value = getattr(result, key) if isinstance(value, (tuple, list)): value = value[spectrum_index] row[key] = value or np.nan rows.append(row) return rows
def individual_visit_data(): """ Return a dictionary of results where the differences between labels derived from individual visits and those derived from the stacked spectra are pre-calculated. """ sq = session.query( astradb.ApogeeNet.output_pk.label("output_pk"), func.json_object_agg( astradb.Parameter.parameter_name, astradb.Parameter.parameter_value ).label("parameters") )\ .filter(astradb.ApogeeNet.output_pk == astradb.TaskInstance.output_pk)\ .filter(astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk)\ .filter(astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk)\ .group_by(astradb.ApogeeNet)\ .subquery(with_labels=True) q = session.query( astradb.TaskInstance, astradb.ApogeeNet, func.cardinality(astradb.ApogeeNet.snr), sq.c.parameters )\ .filter(sq.c.output_pk == astradb.ApogeeNet.output_pk)\ .filter(sq.c.output_pk == astradb.TaskInstance.output_pk) total, = session.query(func.sum(func.cardinality(astradb.ApogeeNet.snr))).first() keys = ( "ti_pk", "snr_stacked", "snr_visit", "teff_stacked", "logg_stacked", "fe_h_stacked", "delta_teff", "delta_logg", "delta_fe_h", "bitmask_stacked", "bitmask_visit", "release", "date" ) data = OrderedDict([(key, []) for key in keys]) with tqdm(total=total, unit="spectra") as pb: for task_instance, result, N, parameters in q.yield_per(1): date = datetime.strptime(task_instance.run_id.split("T")[0].split("_")[-1], "%Y-%m-%d") for i in range(2, N): data["ti_pk"].append(task_instance.pk) data["snr_stacked"].append(result.snr[0]) data["snr_visit"].append(result.snr[i]) data["teff_stacked"].append(result.teff[0]) data["logg_stacked"].append(result.logg[0]) data["fe_h_stacked"].append(result.fe_h[0]) data["delta_teff"].append(result.teff[i] - result.teff[0]) data["delta_logg"].append(result.logg[i] - result.logg[0]) data["delta_fe_h"].append(result.fe_h[i] - result.fe_h[0]) data["bitmask_stacked"].append(result.bitmask_flag[0]) data["bitmask_visit"].append(result.bitmask_flag[i]) data["release"].append(parameters["release"]) data["date"].append(date.year + (int(date.strftime("%j")) / 366)) pb.update(N) for key in keys: data[key] = np.array(data[key]) return data
def export_to_table(output_path, overwrite=True): """ Export the APOGEENet database results to a table. :param output_path: The disk location where to write the table to. :param overwrite: [optional] Overwrite any """ output_path = os.path.expandvars(os.path.expanduser(output_path)) if not overwrite and os.path.exists(output_path): raise OSError(f"path '{output_path}' already exists and asked not to overwrite it") sq = session.query( astradb.ApogeeNet.output_pk.label("output_pk"), func.json_object_agg( astradb.Parameter.parameter_name, astradb.Parameter.parameter_value ).label("parameters") )\ .filter(astradb.ApogeeNet.output_pk == astradb.TaskInstance.output_pk)\ .filter(astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk)\ .filter(astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk)\ .group_by(astradb.ApogeeNet)\ .subquery(with_labels=True) q = session.query( astradb.TaskInstance, astradb.ApogeeNet, func.cardinality(astradb.ApogeeNet.snr), sq.c.parameters )\ .filter(sq.c.output_pk == astradb.ApogeeNet.output_pk)\ .filter(sq.c.output_pk == astradb.TaskInstance.output_pk) total, = session.query(func.sum(func.cardinality(astradb.ApogeeNet.snr))).first() table_columns = OrderedDict([ ("ti_pk", []), ("run_id", []), ("release", []), ("apred", []), ("field", []), ("healpix", []), ("telescope", []), ("obj", []), ("spectrum_index", []), ]) column_names = ("snr", "teff", "u_teff", "logg", "u_logg", "fe_h", "u_fe_h", "bitmask_flag") for cn in column_names: table_columns[cn] = [] with tqdm(total=total, unit="spectra") as pb: for task_instance, result, N, parameters in q.yield_per(1): for i in range(N): table_columns["ti_pk"].append(result.ti_pk) table_columns["run_id"].append(task_instance.run_id) table_columns["release"].append(parameters["release"]) table_columns["apred"].append(parameters["apred"]) table_columns["field"].append(parameters.get("field", "")) table_columns["healpix"].append(parameters.get("healpix", "")) table_columns["telescope"].append(parameters["telescope"]) table_columns["obj"].append(parameters["obj"]) table_columns["spectrum_index"].append(i) for column_name in column_names: table_columns[column_name].append(getattr(result, column_name)[i]) pb.update(1) log.info(f"Creating table with {total} rows") table = Table(data=table_columns) log.info(f"Table created.") log.info(f"Writing to {output_path}") table.write(output_path, overwrite=overwrite) log.info("Done") return table_columns
def _get_tests_to_replicate_query(replica, bulk_size=200): session_entities_query = select([ models.session_entity.c.session_id, models.session_entity.c.entity_id ]).where(models.session_entity.c.session_id == models.Test.session_id).distinct().correlate(models.Test).alias() test_entities_query = select([ models.test_entity.c.test_id, models.test_entity.c.entity_id ]).where(models.test_entity.c.test_id == models.Test.id).distinct().correlate(models.Test).alias() query = select([ label("_type", text("'test'")), label("_index", text("'backslash'")), label("_id", models.Test.id), *[getattr(models.Test, column_name) for column_name in models.Test.__table__.columns.keys() if column_name not in {'timespan', 'parameters'}], models.User.email.label('user_email'), cast(models.Test.parameters, sqlalchemy.Text).label('parameters'), func.json_build_object( "file_name", models.TestInformation.file_name, "class_name", models.TestInformation.class_name, "name", models.TestInformation.name, "variation", cast(models.TestVariation.variation, sqlalchemy.Text), ).label('test'), select([func.array_agg( func.json_build_object( 'timestamp', models.Error.timestamp, 'message', models.Error.message) )]).where(models.Error.test_id == models.Test.id).label('errors'), select([ func.json_object_agg(models.SessionMetadata.key, models.SessionMetadata.metadata_item).label('session_metadata') ]).where(models.SessionMetadata.session_id == models.Test.session_id).label('session_metadata'), select([ func.json_object_agg(models.TestMetadata.key, models.TestMetadata.metadata_item) ]).where(models.TestMetadata.test_id == models.Test.id).label('test_metadata'), select([ func.array_agg( func.json_build_object( "name", models.Entity.name, "type", models.Entity.type, ) ) ]).select_from(session_entities_query.join(models.Entity, models.Entity.id == session_entities_query.c.entity_id)).label('session_entities'), select([ func.array_agg( func.json_build_object( "name", models.Entity.name, "type", models.Entity.type, ) ) ]).select_from(test_entities_query.join(models.Entity, models.Entity.id == test_entities_query.c.entity_id)).label('test_entities'), select([ func.array_agg( func.json_build_object( "name", models.Subject.name, "product", models.Product.name, "version", models.ProductVersion.version, "revision", models.ProductRevision.revision, ) ) ]).select_from( models.session_subject .join(models.SubjectInstance) .join(models.Subject) .join(models.ProductRevision) .join(models.ProductVersion) .join(models.Product) ).where(models.session_subject.c.session_id == models.Test.session_id).label('subjects'), ]).select_from( models.Test.__table__.join(models.Session.__table__) .outerjoin(models.User.__table__, models.Session.user_id == models.User.id) .outerjoin(models.TestInformation) .outerjoin(models.TestVariation) ).where(_REPLICATION_TEST_FILTER) if replica.untimed_done: if replica.last_replicated_timestamp is not None: query = query.where(or_( models.Test.updated_at > replica.last_replicated_timestamp, and_( models.Test.updated_at == replica.last_replicated_timestamp, models.Test.id > replica.last_replicated_id, ))) query = query.order_by(models.Test.updated_at.asc(), models.Test.id.asc()) else: query = query.where(models.Test.updated_at == None) if replica.last_replicated_id is not None: query = query.where( models.Test.id > replica.last_replicated_id) query = query.order_by(models.Test.id.asc()) return query.limit(bulk_size)
def get_fields_query(self): session_entities_query = select([ models.session_entity.c.session_id, models.session_entity.c.entity_id ]).where(models.session_entity.c.session_id == models.Test.session_id).distinct().correlate( models.Test).alias() test_entities_query = select([ models.test_entity.c.test_id, models.test_entity.c.entity_id ]).where(models.test_entity.c.test_id == models.Test.id).distinct().correlate(models.Test).alias() return select([ label("_type", text("'test'")), label("_index", text("'test'")), label("_id", models.Test.id), *[ getattr(models.Test, column_name) for column_name in models.Test.__table__.columns.keys() if column_name not in {'timespan', 'parameters'} ], models.Session.logical_id.label('session_logical_id'), models.User.email.label('user_email'), cast(models.Test.parameters, sqlalchemy.Text).label('parameters'), func.json_build_object( "file_name", models.TestInformation.file_name, "class_name", models.TestInformation.class_name, "name", models.TestInformation.name, "variation", cast(models.TestVariation.variation, sqlalchemy.Text), ).label('test'), select([ func.array_agg( func.json_build_object('timestamp', models.Error.timestamp, 'message', models.Error.message)) ]).where(models.Error.test_id == models.Test.id).label('errors'), select([ func.array_agg( func.json_build_object('timestamp', models.Warning.timestamp, 'message', models.Warning.message)) ]).where( models.Warning.test_id == models.Test.id).label('warnings'), select([ func.json_object_agg( models.SessionMetadata.key, models.SessionMetadata.metadata_item).label( 'session_metadata') ]).where(models.SessionMetadata.session_id == models.Test.session_id).label('session_metadata'), select([ func.json_object_agg(models.TestMetadata.key, models.TestMetadata.metadata_item) ]).where(models.TestMetadata.test_id == models.Test.id).label( 'test_metadata'), select([ func.array_agg( func.json_build_object( "name", models.Entity.name, "type", models.Entity.type, )) ]).select_from( session_entities_query.join( models.Entity, models.Entity.id == session_entities_query. c.entity_id)).label('session_entities'), select([ func.array_agg( func.json_build_object( "name", models.Entity.name, "type", models.Entity.type, )) ]).select_from( test_entities_query.join( models.Entity, models.Entity.id == test_entities_query.c.entity_id)).label('test_entities'), select([ func.array_agg( func.json_build_object( "name", models.Subject.name, "product", models.Product.name, "version", models.ProductVersion.version, "revision", models.ProductRevision.revision, )) ]).select_from( models.session_subject.join(models.SubjectInstance).join( models.Subject).join(models.ProductRevision).join( models.ProductVersion).join(models.Product)).where( models.session_subject.c.session_id == models.Test.session_id).label('subjects'), ]).select_from( models.Test.__table__.join(models.Session.__table__).outerjoin( models.User.__table__, models.Session.user_id == models.User.id).outerjoin( models.TestInformation).outerjoin(models.TestVariation))