Beispiel #1
0
def get_timings(session_id: (int, NoneType)=None, test_id: (int, NoneType)=None):
    now = flux.current_timeline.time()
    total_clause = case(
        [
            (Timing.total < 0, now + Timing.total)
        ], else_=Timing.total)
    if session_id is None and test_id is None:
        return {}
    if session_id is not None:
        total_sum_subquery = db.session.query(Timing.name, func.sum(total_clause).label('total_time')).\
                             group_by(Timing.session_id, Timing.name).filter_by(session_id=session_id).subquery()
        query = db.session.query(func.json_object_agg(total_sum_subquery.c.name, total_sum_subquery.c.total_time))
    else:
        query = db.session.query(func.json_object_agg(Timing.name, total_clause)).\
                filter_by(test_id=test_id)
    return query.scalar() or {}
Beispiel #2
0
def _get_metadata_query(*, entity_type, entity_id):
    model = _get_metadata_model(entity_type)
    query = db.session.query(func.json_object_agg(model.key, model.metadata_item))
    if entity_type == 'session':
        related = Session
    elif entity_type == 'test':
        related = Test
    else:
        error_abort('Invalid entity type', requests.codes.bad_request)
    query = query.join(related)
    if isinstance(entity_id, int):
        query = query.filter(related.id == entity_id)
    else:
        query = query.filter(related.logical_id == entity_id)
    return query
Beispiel #3
0
def _get_metadata_query(*, entity_type, entity_id):
    model = _get_metadata_model(entity_type)
    query = db.session.query(
        func.json_object_agg(model.key, model.metadata_item))
    if entity_type == 'session':
        related = Session
    elif entity_type == 'test':
        related = Test
    else:
        error_abort('Invalid entity type', requests.codes.bad_request)
    query = query.join(related)
    if isinstance(entity_id, int):
        query = query.filter(related.id == entity_id)
    else:
        query = query.filter(related.logical_id == entity_id)
    return query
Beispiel #4
0
 def get_fields_query(self):
     labels_query = select([
         models.session_label.c.session_id, models.session_label.c.label_id
     ]).where(models.session_label.c.session_id ==
              models.Session.id).distinct().correlate(
                  models.Session).alias()
     return select([
         label("_type", text("'session'")),
         label("_index", text("'session'")),
         label("_id", models.Session.id),
         *[
             getattr(models.Session, column_name)
             for column_name in models.Session.__table__.columns.keys()
             if column_name not in {'timespan', 'parameters'}
         ],
         models.User.email.label('user_email'),
         select([
             func.array_agg(
                 func.json_build_object('timestamp', models.Error.timestamp,
                                        'message', models.Error.message))
         ]).where(models.Error.session_id == models.Session.id).label(
             'session_errors'),
         select([
             func.array_agg(
                 func.json_build_object('timestamp',
                                        models.Warning.timestamp, 'message',
                                        models.Warning.message))
         ]).where(models.Warning.session_id == models.Session.id).label(
             'session_warnings'),
         select([
             func.array_agg(
                 func.json_build_object("name", models.Label.name))
         ]).select_from(
             labels_query.join(
                 models.Label,
                 models.Label.id == labels_query.c.label_id)).label(
                     'session_labels'),
         select([
             func.json_object_agg(models.SessionMetadata.key,
                                  models.SessionMetadata.metadata_item)
         ]).where(models.SessionMetadata.session_id ==
                  models.Session.id).label('session_metadata'),
     ]).select_from(
         models.Session.__table__.outerjoin(
             models.User.__table__,
             models.Session.user_id == models.User.id))
    def get_team(self, team_id):
        team_leads = (db.session
                        .query(TeamMember.team_id, User.id, User.name, User.email_address)
                        .join(User)
                        .filter(TeamMember.is_team_lead.is_(True))
                        .order_by(User.name)
                        .subquery())

        aggregated_team_leads = (db.session
                                   .query(team_leads.columns.team_id,
                                          func.json_object_agg(
                                              team_leads.columns.id,
                                              func.json_build_object(
                                                  'emailAddress', team_leads.columns.email_address,
                                                  'name', team_leads.columns.name
                                              )
                                          ).label('teamLeads'))
                                   .group_by(team_leads.columns.team_id)
                                   .subquery())

        team_members = (db.session
                          .query(TeamMember.id.label('team_member_id'),
                                 TeamMember.team_id,
                                 User.id,
                                 User.name,
                                 User.email_address)
                          .join(User)
                          .filter(TeamMember.is_team_lead.is_(False))
                          .order_by(User.name)
                          .subquery())

        team_member_permissions = (db.session
                                     .query(
                                         team_members,
                                         func.coalesce(
                                             func.json_object_agg(
                                                 TeamMemberPermission.permission, True
                                             ).filter(TeamMemberPermission.permission.isnot(None)), '{}'
                                         ).label('permissions'))
                                     .join(
                                         TeamMemberPermission,
                                         TeamMemberPermission.team_member_id == team_members.columns.team_member_id,
                                         isouter=True)
                                     .group_by(
                                         team_members.columns.team_member_id,
                                         team_members.columns.team_id,
                                         team_members.columns.id,
                                         team_members.columns.name,
                                         team_members.columns.email_address)
                                     .order_by(team_members.columns.name)
                                     .subquery())

        aggregated_team_members = (db.session
                                     .query(team_member_permissions.columns.team_id,
                                            func.json_object_agg(
                                                team_member_permissions.columns.id,
                                                func.json_build_object(
                                                    'emailAddress', team_member_permissions.columns.email_address,
                                                    'name', team_member_permissions.columns.name,
                                                    'permissions', team_member_permissions.columns.permissions
                                                )).label('teamMembers'))
                                     .group_by(team_member_permissions.columns.team_id)
                                     .subquery())

        team = (db.session
                  .query(Team.id, Team.name, func.coalesce(Team.email_address, '').label('emailAddress'), Team.status,
                         aggregated_team_leads.columns.teamLeads, aggregated_team_members.columns.teamMembers)
                  .join(aggregated_team_leads, aggregated_team_leads.columns.team_id == Team.id, isouter=True)
                  .join(aggregated_team_members, aggregated_team_members.columns.team_id == Team.id, isouter=True)
                  .filter(Team.id == team_id)
                  .one_or_none())

        return team._asdict() if team else None
Beispiel #6
0
def _get_results(database_model_name,
                 spectrum_index=None,
                 filter_by_kwargs=None,
                 limit=None):

    # Get the database model
    database_model = getattr(astradb, database_model_name)
    if filter_by_kwargs is None:
        filter_by_kwargs = dict()

    q = astra_session.query(
        astradb.TaskInstance,
        func.json_object_agg(astradb.Parameter.parameter_name,
                             astradb.Parameter.parameter_value),
        astradb.TaskInstanceMeta,
        database_model,
    ).filter(
        astradb.TaskInstance.output_pk == database_model.output_pk,
        astradb.TaskInstance.pk == astradb.TaskInstanceMeta.ti_pk,
        astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk,
        astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk,
    ).filter_by(**filter_by_kwargs).group_by(astradb.TaskInstance,
                                             astradb.TaskInstanceMeta,
                                             database_model)

    if limit is not None:
        q = q.limit(limit)

    rows = []
    for ti, parameters, meta, result in tqdm(q.yield_per(1), total=q.count()):

        row = OrderedDict([
            # Source information.
            ("catalogid", meta.catalogid),
            ("ra", meta.ra or np.nan),
            ("dec", meta.dec or np.nan),
            ("pmra", meta.pmra or np.nan),
            ("pmdec", meta.pmdec or np.nan),
            ("parallax", meta.parallax or np.nan),
            ("gaia_dr2_source_id", meta.gaia_dr2_source_id or -1),
            # Task information.
            ("ti_pk", ti.pk),
            # Parameters (minimal)
            ("release", parameters.get("release", "")),
            ("obj", parameters.get("obj", "")),
            ("healpix", parameters.get("healpix", -1)),
            ("telescope", parameters.get("telescope", "")),
        ])

        # Add the result information.
        ignore_keys = ("output_pk", "ti_pk", "associated_ti_pks")
        if spectrum_index is None:
            # Get all results.
            N = len(result.snr)
            for i in range(N):
                this_row = row.copy()
                this_row["spectrum_index"] = i
                for key in result.__table__.columns.keys():
                    if key in ignore_keys or key.startswith("_"): continue

                    value = getattr(result, key)
                    if isinstance(value, (tuple, list)):
                        value = value[i]
                    this_row[key] = value or np.nan

                rows.append(this_row)

        else:
            # Only single result.
            for key in result.__table__.columns.keys():
                if key in ignore_keys or key.startswith("_"): continue

                value = getattr(result, key)

                if isinstance(value, (tuple, list)):
                    value = value[spectrum_index]

                row[key] = value or np.nan

            rows.append(row)

    return rows
Beispiel #7
0
def individual_visit_data():
    """
    Return a dictionary of results where the differences between labels derived from 
    individual visits and those derived from the stacked spectra are pre-calculated.
    """
    sq = session.query(
            astradb.ApogeeNet.output_pk.label("output_pk"),
            func.json_object_agg(
                astradb.Parameter.parameter_name,
                astradb.Parameter.parameter_value
            ).label("parameters")
        )\
        .filter(astradb.ApogeeNet.output_pk == astradb.TaskInstance.output_pk)\
        .filter(astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk)\
        .filter(astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk)\
        .group_by(astradb.ApogeeNet)\
        .subquery(with_labels=True)

    q = session.query(
            astradb.TaskInstance,
            astradb.ApogeeNet, 
            func.cardinality(astradb.ApogeeNet.snr),
            sq.c.parameters
        )\
        .filter(sq.c.output_pk == astradb.ApogeeNet.output_pk)\
        .filter(sq.c.output_pk == astradb.TaskInstance.output_pk)

    total, = session.query(func.sum(func.cardinality(astradb.ApogeeNet.snr))).first()

    keys = (
        "ti_pk", "snr_stacked", "snr_visit", 
        "teff_stacked", "logg_stacked", "fe_h_stacked", 
        "delta_teff", "delta_logg", "delta_fe_h", 
        "bitmask_stacked", "bitmask_visit", "release", "date"
    )
    data = OrderedDict([(key, []) for key in keys])
    
    with tqdm(total=total, unit="spectra") as pb:
        for task_instance, result, N, parameters in q.yield_per(1):
            date = datetime.strptime(task_instance.run_id.split("T")[0].split("_")[-1], "%Y-%m-%d")
            for i in range(2, N):
                data["ti_pk"].append(task_instance.pk)
                data["snr_stacked"].append(result.snr[0])
                data["snr_visit"].append(result.snr[i])
                data["teff_stacked"].append(result.teff[0])
                data["logg_stacked"].append(result.logg[0])
                data["fe_h_stacked"].append(result.fe_h[0])
                data["delta_teff"].append(result.teff[i] - result.teff[0])
                data["delta_logg"].append(result.logg[i] - result.logg[0])
                data["delta_fe_h"].append(result.fe_h[i] - result.fe_h[0])
                data["bitmask_stacked"].append(result.bitmask_flag[0])
                data["bitmask_visit"].append(result.bitmask_flag[i])
                data["release"].append(parameters["release"])
                data["date"].append(date.year + (int(date.strftime("%j")) / 366))

            pb.update(N)

    for key in keys:
        data[key] = np.array(data[key])

    return data
Beispiel #8
0
def export_to_table(output_path, overwrite=True):
    """
    Export the APOGEENet database results to a table.

    :param output_path:
        The disk location where to write the table to.
    
    :param overwrite: [optional]
        Overwrite any
    """

    output_path = os.path.expandvars(os.path.expanduser(output_path))
    if not overwrite and os.path.exists(output_path):
        raise OSError(f"path '{output_path}' already exists and asked not to overwrite it")

    sq = session.query(
            astradb.ApogeeNet.output_pk.label("output_pk"),
            func.json_object_agg(
                astradb.Parameter.parameter_name,
                astradb.Parameter.parameter_value
            ).label("parameters")
        )\
        .filter(astradb.ApogeeNet.output_pk == astradb.TaskInstance.output_pk)\
        .filter(astradb.TaskInstance.pk == astradb.TaskInstanceParameter.ti_pk)\
        .filter(astradb.TaskInstanceParameter.parameter_pk == astradb.Parameter.pk)\
        .group_by(astradb.ApogeeNet)\
        .subquery(with_labels=True)

    q = session.query(
            astradb.TaskInstance,
            astradb.ApogeeNet, 
            func.cardinality(astradb.ApogeeNet.snr),
            sq.c.parameters
        )\
        .filter(sq.c.output_pk == astradb.ApogeeNet.output_pk)\
        .filter(sq.c.output_pk == astradb.TaskInstance.output_pk)

    total, = session.query(func.sum(func.cardinality(astradb.ApogeeNet.snr))).first()

    table_columns = OrderedDict([
        ("ti_pk", []),
        ("run_id", []),
        ("release", []),
        ("apred", []),
        ("field", []),
        ("healpix", []),
        ("telescope", []),
        ("obj", []),
        ("spectrum_index", []),
    ])
    column_names = ("snr", "teff", "u_teff", "logg", "u_logg", "fe_h", "u_fe_h", "bitmask_flag")
    for cn in column_names:
        table_columns[cn] = []

    with tqdm(total=total, unit="spectra") as pb:
    
        for task_instance, result, N, parameters in q.yield_per(1):
            for i in range(N):
                table_columns["ti_pk"].append(result.ti_pk)
                table_columns["run_id"].append(task_instance.run_id)
                table_columns["release"].append(parameters["release"])
                table_columns["apred"].append(parameters["apred"])
                table_columns["field"].append(parameters.get("field", ""))
                table_columns["healpix"].append(parameters.get("healpix", ""))
                table_columns["telescope"].append(parameters["telescope"])
                table_columns["obj"].append(parameters["obj"])
                table_columns["spectrum_index"].append(i)

                for column_name in column_names:
                    table_columns[column_name].append(getattr(result, column_name)[i])
                
                pb.update(1)
    
    log.info(f"Creating table with {total} rows")
    table = Table(data=table_columns)
    log.info(f"Table created.")

    log.info(f"Writing to {output_path}")
    table.write(output_path, overwrite=overwrite)
    log.info("Done")

    return table_columns
Beispiel #9
0
def _get_tests_to_replicate_query(replica, bulk_size=200):

    session_entities_query = select([
       models.session_entity.c.session_id, models.session_entity.c.entity_id
    ]).where(models.session_entity.c.session_id == models.Test.session_id).distinct().correlate(models.Test).alias()

    test_entities_query = select([
       models.test_entity.c.test_id, models.test_entity.c.entity_id
    ]).where(models.test_entity.c.test_id == models.Test.id).distinct().correlate(models.Test).alias()

    query = select([
        label("_type", text("'test'")),
        label("_index", text("'backslash'")),
        label("_id", models.Test.id),

        *[getattr(models.Test, column_name)
         for column_name in models.Test.__table__.columns.keys()
         if column_name not in {'timespan', 'parameters'}],

        models.User.email.label('user_email'),
        cast(models.Test.parameters, sqlalchemy.Text).label('parameters'),
        func.json_build_object(
            "file_name",
            models.TestInformation.file_name,
            "class_name",
            models.TestInformation.class_name,
            "name",
            models.TestInformation.name,
            "variation",
            cast(models.TestVariation.variation, sqlalchemy.Text),
        ).label('test'),
        select([func.array_agg(
            func.json_build_object(
                'timestamp', models.Error.timestamp,
                'message', models.Error.message)
        )]).where(models.Error.test_id == models.Test.id).label('errors'),
        select([
            func.json_object_agg(models.SessionMetadata.key,
                                 models.SessionMetadata.metadata_item).label('session_metadata')
        ]).where(models.SessionMetadata.session_id == models.Test.session_id).label('session_metadata'),
        select([
            func.json_object_agg(models.TestMetadata.key,
                                 models.TestMetadata.metadata_item)
        ]).where(models.TestMetadata.test_id == models.Test.id).label('test_metadata'),
        select([
            func.array_agg(
                func.json_build_object(
                    "name",
                    models.Entity.name,
                    "type",
                    models.Entity.type,
                )
            )
        ]).select_from(session_entities_query.join(models.Entity, models.Entity.id == session_entities_query.c.entity_id)).label('session_entities'),
        select([
            func.array_agg(
                func.json_build_object(
                    "name",
                    models.Entity.name,
                    "type",
                    models.Entity.type,
                )
            )
        ]).select_from(test_entities_query.join(models.Entity, models.Entity.id == test_entities_query.c.entity_id)).label('test_entities'),
        select([
            func.array_agg(
                func.json_build_object(
                    "name", models.Subject.name,
                    "product", models.Product.name,
                    "version", models.ProductVersion.version,
                    "revision", models.ProductRevision.revision,
                )
            )
        ]).select_from(
            models.session_subject
            .join(models.SubjectInstance)
            .join(models.Subject)
            .join(models.ProductRevision)
            .join(models.ProductVersion)
            .join(models.Product)
        ).where(models.session_subject.c.session_id == models.Test.session_id).label('subjects'),
    ]).select_from(
        models.Test.__table__.join(models.Session.__table__)
        .outerjoin(models.User.__table__, models.Session.user_id == models.User.id)
        .outerjoin(models.TestInformation)
        .outerjoin(models.TestVariation)
    ).where(_REPLICATION_TEST_FILTER)

    if replica.untimed_done:
        if replica.last_replicated_timestamp is not None:
            query = query.where(or_(
                models.Test.updated_at > replica.last_replicated_timestamp,
                and_(
                    models.Test.updated_at == replica.last_replicated_timestamp,
                    models.Test.id > replica.last_replicated_id,
                )))
        query = query.order_by(models.Test.updated_at.asc(), models.Test.id.asc())
    else:
        query = query.where(models.Test.updated_at == None)
        if replica.last_replicated_id is not None:
            query = query.where(
                models.Test.id > replica.last_replicated_id)
        query = query.order_by(models.Test.id.asc())
    return query.limit(bulk_size)
Beispiel #10
0
    def get_fields_query(self):
        session_entities_query = select([
            models.session_entity.c.session_id,
            models.session_entity.c.entity_id
        ]).where(models.session_entity.c.session_id ==
                 models.Test.session_id).distinct().correlate(
                     models.Test).alias()

        test_entities_query = select([
            models.test_entity.c.test_id, models.test_entity.c.entity_id
        ]).where(models.test_entity.c.test_id ==
                 models.Test.id).distinct().correlate(models.Test).alias()

        return select([
            label("_type", text("'test'")),
            label("_index", text("'test'")),
            label("_id", models.Test.id),
            *[
                getattr(models.Test, column_name)
                for column_name in models.Test.__table__.columns.keys()
                if column_name not in {'timespan', 'parameters'}
            ],
            models.Session.logical_id.label('session_logical_id'),
            models.User.email.label('user_email'),
            cast(models.Test.parameters, sqlalchemy.Text).label('parameters'),
            func.json_build_object(
                "file_name",
                models.TestInformation.file_name,
                "class_name",
                models.TestInformation.class_name,
                "name",
                models.TestInformation.name,
                "variation",
                cast(models.TestVariation.variation, sqlalchemy.Text),
            ).label('test'),
            select([
                func.array_agg(
                    func.json_build_object('timestamp', models.Error.timestamp,
                                           'message', models.Error.message))
            ]).where(models.Error.test_id == models.Test.id).label('errors'),
            select([
                func.array_agg(
                    func.json_build_object('timestamp',
                                           models.Warning.timestamp, 'message',
                                           models.Warning.message))
            ]).where(
                models.Warning.test_id == models.Test.id).label('warnings'),
            select([
                func.json_object_agg(
                    models.SessionMetadata.key,
                    models.SessionMetadata.metadata_item).label(
                        'session_metadata')
            ]).where(models.SessionMetadata.session_id ==
                     models.Test.session_id).label('session_metadata'),
            select([
                func.json_object_agg(models.TestMetadata.key,
                                     models.TestMetadata.metadata_item)
            ]).where(models.TestMetadata.test_id == models.Test.id).label(
                'test_metadata'),
            select([
                func.array_agg(
                    func.json_build_object(
                        "name",
                        models.Entity.name,
                        "type",
                        models.Entity.type,
                    ))
            ]).select_from(
                session_entities_query.join(
                    models.Entity, models.Entity.id == session_entities_query.
                    c.entity_id)).label('session_entities'),
            select([
                func.array_agg(
                    func.json_build_object(
                        "name",
                        models.Entity.name,
                        "type",
                        models.Entity.type,
                    ))
            ]).select_from(
                test_entities_query.join(
                    models.Entity, models.Entity.id ==
                    test_entities_query.c.entity_id)).label('test_entities'),
            select([
                func.array_agg(
                    func.json_build_object(
                        "name",
                        models.Subject.name,
                        "product",
                        models.Product.name,
                        "version",
                        models.ProductVersion.version,
                        "revision",
                        models.ProductRevision.revision,
                    ))
            ]).select_from(
                models.session_subject.join(models.SubjectInstance).join(
                    models.Subject).join(models.ProductRevision).join(
                        models.ProductVersion).join(models.Product)).where(
                            models.session_subject.c.session_id ==
                            models.Test.session_id).label('subjects'),
        ]).select_from(
            models.Test.__table__.join(models.Session.__table__).outerjoin(
                models.User.__table__,
                models.Session.user_id == models.User.id).outerjoin(
                    models.TestInformation).outerjoin(models.TestVariation))