예제 #1
0
    def get(self, datastack_name: str, version: int, table_name: str):
        """get frozen table metadata

        Args:
            datastack_name (str): datastack name
            version (int): version number
            table_name (str): table name

        Returns:
            dict: dictionary of table metadata
        """
        aligned_volume_name, pcg_table_name = get_relevant_datastack_info(
            datastack_name)
        session = sqlalchemy_cache.get(aligned_volume_name)
        analysis_version, analysis_table = get_analysis_version_and_table(
            datastack_name, table_name, version, session)

        schema = AnalysisTableSchema()
        tables = schema.dump(analysis_table)

        db = dynamic_annotation_cache.get_db(aligned_volume_name)
        ann_md = db.get_table_metadata(table_name)
        ann_md.pop("id")
        ann_md.pop("deleted")
        tables.update(ann_md)
        return tables, 200
예제 #2
0
    def post(self, aligned_volume_name: str):
        """Create an aligned volume database
        Args:
            aligned_volume_name (str): name of aligned_volume from infoservice
        """
        check_aligned_volume(aligned_volume_name)
        aligned_vol_db = dynamic_annotation_cache.get_db(aligned_volume_name)

        base = Base
        base.metadata.bind = aligned_vol_db.engine
        base.metadata.create_all()
        return 200
예제 #3
0
def get_analysis_table(aligned_volume: str,
                       datastack: str,
                       table_name: str,
                       mat_version: int = 1):
    """Helper method that returns a table model.

    Args:
        aligned_volume (str): aligned_volume name
        datastack (str): datastack name
        table_name (str): table to reflect a model
        mat_version (int, optional): target database version

    Returns:
        SQLAlchemy model: returns a sqlalchemy model of a target table
    """
    anno_db = dynamic_annotation_cache.get_db(aligned_volume)
    schema_name = anno_db.get_table_schema(table_name)
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               mat_version)
    analysis_engine = create_engine(analysis_sql_uri)

    meta = MetaData()
    meta.reflect(bind=analysis_engine)

    anno_schema = get_schema(schema_name)
    flat_schema = create_flattened_schema(anno_schema)

    if not analysis_engine.dialect.has_table(analysis_engine, table_name):
        annotation_dict = create_table_dict(
            table_name=table_name,
            Schema=flat_schema,
            segmentation_source=None,
            table_metadata=None,
            with_crud_columns=False,
        )
        analysis_table = type(table_name, (Base, ), annotation_dict)
    else:
        analysis_table = meta.tables[table_name]

    analysis_engine.dispose()
    return analysis_table
예제 #4
0
def get_flat_model(datastack_name: str, table_name: str, version: int,
                   Session):
    """get a flat model for a frozen table

    Args:
        datastack_name (str): datastack name
        table_name (str): table name
        version (int): version of table
        Session (Sqlalchemy session): session to connect to database

    Returns:
        sqlalchemy.Model: model of table
    """
    aligned_volume_name, pcg_table_name = get_relevant_datastack_info(
        datastack_name)
    analysis_version, analysis_table = get_analysis_version_and_table(
        datastack_name, table_name, version, Session)
    if analysis_table is None:
        abort(
            404,
            "Cannot find table {} in datastack {} at version {}".format(
                table_name, datastack_name, version),
        )
    if not analysis_version.valid:
        abort(410, "This materialization version is not available")

    db = dynamic_annotation_cache.get_db(aligned_volume_name)
    metadata = db.get_table_metadata(table_name)
    reference_table = metadata.get("reference_table")
    if reference_table:
        table_metadata = {"reference_table": reference_table}
    else:
        table_metadata = None
    return make_flat_model(
        table_name=table_name,
        schema_type=analysis_table.schema,
        segmentation_source=None,
        table_metadata=table_metadata,
    )
예제 #5
0
def check_tables(self, mat_info: list, analysis_version: int):
    """Check if each materialized table has the same number of rows as
    the aligned volumes tables in the live database that are set as valid.
    If row numbers match, set the validity of both the analysis tables as well
    as the analysis version (materialized database) as True.

    Args:
        mat_info (list): list of dicts containing metadata for each materialized table
        analysis_version (int): the materialized version number

    Returns:
        str: returns statement if all tables are valid
    """
    aligned_volume = mat_info[0][
        "aligned_volume"]  # get aligned_volume name from datastack
    table_count = len(mat_info)
    analysis_database = mat_info[0]["analysis_database"]

    session = sqlalchemy_cache.get(aligned_volume)
    engine = sqlalchemy_cache.get_engine(aligned_volume)
    mat_session = sqlalchemy_cache.get(analysis_database)
    mat_engine = sqlalchemy_cache.get_engine(analysis_database)
    mat_client = dynamic_annotation_cache.get_db(analysis_database)
    versioned_database = (session.query(AnalysisVersion).filter(
        AnalysisVersion.version == analysis_version).one())

    valid_table_count = 0
    for mat_metadata in mat_info:
        annotation_table_name = mat_metadata["annotation_table_name"]

        live_table_row_count = (mat_session.query(
            MaterializedMetadata.row_count).filter(
                MaterializedMetadata.table_name ==
                annotation_table_name).scalar())
        mat_row_count = mat_client._get_table_row_count(annotation_table_name)
        celery_logger.info(
            f"ROW COUNTS: {live_table_row_count} {mat_row_count}")

        if mat_row_count == 0:
            celery_logger.warning(
                f"{annotation_table_name} has {mat_row_count} rows, skipping.")
            continue

        if live_table_row_count != mat_row_count:
            raise ValueError(
                f"""Row count doesn't match for table '{annotation_table_name}': 
                    Row count in '{aligned_volume}': {live_table_row_count} - Row count in {analysis_database}: {mat_row_count}"""
            )
        celery_logger.info(f"{annotation_table_name} row counts match")
        schema = mat_metadata["schema"]
        table_metadata = None
        if mat_metadata.get("reference_table"):
            table_metadata = {
                "reference_table": mat_metadata.get("reference_table")
            }

        anno_model = make_flat_model(
            table_name=annotation_table_name,
            schema_type=schema,
            segmentation_source=None,
            table_metadata=table_metadata,
        )
        live_mapped_indexes = index_cache.get_index_from_model(
            anno_model, mat_engine)
        mat_mapped_indexes = index_cache.get_table_indices(
            annotation_table_name, mat_engine)

        if live_mapped_indexes != mat_mapped_indexes:
            raise IndexMatchError(
                f"Indexes did not match: annotation indexes {live_mapped_indexes}; materialized indexes {mat_mapped_indexes}"
            )

        celery_logger.info(
            f"Indexes matches: {live_mapped_indexes} {mat_mapped_indexes}")

        table_validity = (session.query(AnalysisTable).filter(
            AnalysisTable.analysisversion_id == versioned_database.id).filter(
                AnalysisTable.table_name == annotation_table_name).one())
        table_validity.valid = True
        valid_table_count += 1
    celery_logger.info(
        f"Valid tables {valid_table_count}, Mat tables {table_count}")

    if valid_table_count != table_count:
        raise ValueError(
            f"Valid table amounts don't match {valid_table_count} {table_count}"
        )
    versioned_database.valid = True
    try:
        session.commit()
        return "All materialized tables match valid row number from live tables"
    except Exception as e:
        session.rollback()
        celery_logger.error(e)
    finally:
        session.close()
        mat_client.cached_session.close()
        mat_session.close()
        engine.dispose()
        mat_engine.dispose()
예제 #6
0
def create_materialized_metadata(
    self,
    datastack_info: dict,
    analysis_version: int,
    materialization_time_stamp: datetime.datetime.utcnow,
):
    """Creates a metadata table in a materialized database. Reads row counts
    from annotation tables copied to the materialized database. Inserts row count
    and table info into the metadata table.

    Args:
        aligned_volume (str):  aligned volume name
        mat_sql_uri (str): target database sql url to use

    Raises:
       database_error:  sqlalchemy connection error

    Returns:
        bool: True if Metadata table were created and table info was inserted.
    """
    aligned_volume = datastack_info["aligned_volume"]["name"]
    datastack = datastack_info["datastack"]
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]
    sql_uri = make_url(f"{sql_base_uri}/{aligned_volume}")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               analysis_version)

    session, engine = create_session(sql_uri)
    analysis_session, analysis_engine = create_session(analysis_sql_uri)

    try:
        mat_table = MaterializedMetadata()
        mat_table.__table__.create(bind=analysis_engine)  # pylint: disable=maybe-no-member
    except Exception as e:
        celery_logger.error(f"Materialized Metadata table creation failed {e}")

    mat_client = dynamic_annotation_cache.get_db(
        f"{datastack}__mat{analysis_version}")

    tables = session.query(AnnoMetadata).all()
    try:
        for table in tables:
            # only create table if marked as valid in the metadata table
            if table.valid:
                table_name = table.table_name
                schema_type = (session.query(AnnoMetadata.schema_type).filter(
                    AnnoMetadata.table_name == table_name).one())

                valid_row_count = mat_client._get_table_row_count(
                    table_name, filter_valid=True)
                celery_logger.info(f"Row count {valid_row_count}")
                if valid_row_count == 0:
                    continue

                mat_metadata = MaterializedMetadata(
                    schema=schema_type[0],
                    table_name=table_name,
                    row_count=valid_row_count,
                    materialized_timestamp=materialization_time_stamp,
                )
                analysis_session.add(mat_metadata)
                analysis_session.commit()
    except Exception as database_error:
        analysis_session.rollback()
        session.rollback()
        celery_logger.error(database_error)
    finally:
        session.close()
        engine.dispose()
        mat_client.cached_session.close()
        analysis_session.close()
        analysis_engine.dispose()
    return True
예제 #7
0
        analysis_version (int, optional): Analysis version to use for frozen materialization. Defaults to None.
        skip_table (bool, optional): Triggers row count for skipping tables larger than row_size arg. Defaults to False.
        row_size (int, optional): Row size number to check. Defaults to 1_000_000.

    Returns:
        List[dict]: [description]
    """

    aligned_volume_name = datastack_info["aligned_volume"]["name"]
    pcg_table_name = datastack_info["segmentation_source"].split("/")[-1]
    segmentation_source = datastack_info.get("segmentation_source")

    if not materialization_time_stamp:
        materialization_time_stamp = datetime.datetime.utcnow()

    db = dynamic_annotation_cache.get_db(aligned_volume_name)

    annotation_tables = db.get_valid_table_names()
    metadata = []
    celery_logger.debug(f"Annotation tables: {annotation_tables}")
    for annotation_table in annotation_tables:
        row_count = db._get_table_row_count(annotation_table, filter_valid=True)
        max_id = db.get_max_id_value(annotation_table)
        min_id = db.get_min_id_value(annotation_table)
        if row_count == 0:
            continue

        if row_count >= row_size and skip_table:
            continue

        md = db.get_table_metadata(annotation_table)
 def test_get_mat_client(self, test_app, aligned_volume_name):
     self.mat_client = dynamic_annotation_cache.get_db(aligned_volume_name)
     assert isinstance(self.mat_client, DynamicMaterializationClient)