def get_aligned_volumes():
    server = get_config_param("GLOBAL_SERVER")
    api_version = int(get_config_param("INFO_API_VERSION"))
    auth = AuthClient(server_address=server, token=current_app.config["AUTH_TOKEN"])
    infoclient = InfoServiceClient(
        server_address=server,
        auth_client=auth,
        api_version=api_version,
    )
    aligned_volume_names = infoclient.get_aligned_volumes()
    return aligned_volume_names
Esempio n. 2
0
def insert_annotation_data(self, chunk: List[int], mat_metadata: dict):
    """Insert annotation data into database

    Args:
        chunk (List[int]): chunk of annotation ids
        mat_metadata (dict): materialized metadata
    Returns:
        bool: True if data was inserted
    """
    aligned_volume = mat_metadata["aligned_volume"]
    analysis_version = mat_metadata["analysis_version"]
    annotation_table_name = mat_metadata["annotation_table_name"]
    datastack = mat_metadata["datastack"]

    session = sqlalchemy_cache.get(aligned_volume)
    engine = sqlalchemy_cache.get_engine(aligned_volume)

    # build table models
    AnnotationModel = create_annotation_model(mat_metadata,
                                              with_crud_columns=False)
    SegmentationModel = create_segmentation_model(mat_metadata)
    analysis_table = get_analysis_table(aligned_volume, datastack,
                                        annotation_table_name,
                                        analysis_version)

    query_columns = []
    for col in AnnotationModel.__table__.columns:
        query_columns.append(col)
    for col in SegmentationModel.__table__.columns:
        if not col.name == "id":
            query_columns.append(col)

    chunked_id_query = query_id_range(AnnotationModel.id, chunk[0], chunk[1])

    anno_ids = (session.query(
        AnnotationModel.id).filter(chunked_id_query).filter(
            AnnotationModel.valid == True))
    query = (session.query(*query_columns).join(SegmentationModel).filter(
        SegmentationModel.id == AnnotationModel.id).filter(
            SegmentationModel.id.in_(anno_ids)))
    data = query.all()
    mat_df = pd.DataFrame(data)
    mat_df = mat_df.to_dict(orient="records")
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               analysis_version)
    analysis_session, analysis_engine = create_session(analysis_sql_uri)

    try:
        analysis_engine.execute(analysis_table.insert(),
                                [data for data in mat_df])
    except Exception as e:
        celery_logger.error(e)
        analysis_session.rollback()
    finally:
        analysis_session.close()
        analysis_engine.dispose()
        session.close()
        engine.dispose()
    return True
Esempio n. 3
0
def materialize_database(days_to_expire: int = 5) -> None:
    """
    Materialize database. Steps are as follows:
    1. Create new versioned database.
    2. Copy tables into versioned database
    3. Merge annotation and semgentation tables
    4. Re-index merged tables
    5. Check merge tables for row count and index consistency

    """
    try:
        datastacks = json.loads(os.environ["DATASTACKS"])
    except KeyError as e:
        celery_logger.error(f"KeyError: {e}")
        datastacks = get_config_param("DATASTACKS")
    for datastack in datastacks:
        try:
            celery_logger.info(f"Materializing {datastack} database")
            datastack_info = get_datastack_info(datastack)
            task = create_versioned_materialization_workflow.s(
                datastack_info, days_to_expire)
            task.apply_async()
        except Exception as e:
            celery_logger.error(e)
            raise e
    return True
Esempio n. 4
0
def run_periodic_materialization(days_to_expire: int = None) -> None:
    """
    Run complete materialization workflow. Steps are as follows:
    1. Find missing segmentation data in a given datastack and lookup.
    2. Update expired root ids
    3. Copy database to new frozen version
    4. Merge annotation and segmentation tables together
    5. Drop non-materializied tables
    """
    try:
        datastacks = json.loads(os.environ["DATASTACKS"])
    except:
        datastacks = get_config_param("DATASTACKS")

    for datastack in datastacks:
        try:
            celery_logger.info(
                f"Start periodic materialization job for {datastack}")
            datastack_info = get_datastack_info(datastack)
            datastack_info["database_expires"] = True
            task = run_complete_workflow.s(datastack_info,
                                           days_to_expire=days_to_expire)
            task.apply_async()
        except Exception as e:
            celery_logger.error(e)
            raise e
    return True
Esempio n. 5
0
def generate_chunked_model_ids(
    mat_metadata: dict, use_segmentation_model=False
) -> List[List]:
    """Creates list of chunks with start:end index for chunking queries for materialization.

    Parameters
    ----------
    mat_metadata : dict
        Materialization metadata

    Returns
    -------
    List[List]
        list of list containing start and end indices
    """
    celery_logger.info("Chunking supervoxel ids")
    if use_segmentation_model:
        AnnotationModel = create_segmentation_model(mat_metadata)
    else:
        AnnotationModel = create_annotation_model(mat_metadata)
    chunk_size = mat_metadata.get("chunk_size")

    if not chunk_size:
        ROW_CHUNK_SIZE = get_config_param("MATERIALIZATION_ROW_CHUNK_SIZE")
        chunk_size = ROW_CHUNK_SIZE

    chunked_ids = chunk_ids(mat_metadata, AnnotationModel.id, chunk_size)

    return [chunk for chunk in chunked_ids]
Esempio n. 6
0
def get_aligned_volumes_databases():
    aligned_volumes = get_aligned_volumes()
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]

    engine = create_engine(sql_base_uri)
    with engine.connect() as connection:
        result = connection.execute("SELECT datname FROM pg_database;")
        databases = [database[0] for database in result]
    aligned_volume_databases = list(
        set(aligned_volumes).intersection(databases))
    return aligned_volume_databases
Esempio n. 7
0
def drop_tables(self, mat_info: List[dict], analysis_version: int):
    """Drop all tables that don't match valid in the live 'aligned_volume' database
    as well as tables that were copied from the live table that are not needed in
    the frozen version (e.g. metadata tables).

    Args:
        datastack_info (dict): datastack info for the aligned_volume from the infoservice
        analysis_version (int): materialized version number

    Raises:
        e: error if dropping table(s) fails.

    Returns:
        str: tables that have been dropped
    """
    datastack = mat_info[0]["datastack"]

    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               analysis_version)

    mat_engine = create_engine(analysis_sql_uri)

    mat_inspector = reflection.Inspector.from_engine(mat_engine)
    mat_table_names = mat_inspector.get_table_names()
    mat_table_names.remove("materializedmetadata")

    annotation_tables = [
        table.get("annotation_table_name") for table in mat_info
    ]
    segmentation_tables = [
        table.get("segmentation_table_name") for table in mat_info
        if table.get("segmentation_table_name") is not None
    ]

    filtered_tables = annotation_tables + segmentation_tables

    tables_to_drop = set(mat_table_names) - set(filtered_tables)
    tables_to_drop.remove("spatial_ref_sys")  # keep postgis spatial info table

    try:
        connection = mat_engine.connect()
        for table in tables_to_drop:
            drop_statement = f"DROP TABLE {table} CASCADE"
            connection.execute(drop_statement)
    except Exception as e:
        celery_logger.error(e)
        raise e
    finally:
        connection.close()
        mat_engine.dispose()

    return {f"Tables dropped {tables_to_drop}"}
Esempio n. 8
0
def add_indices(self, mat_metadata: dict):
    """Find missing indices for a given table contained
    in the mat_metadata dict. Spawns a chain of celery
    tasks that run synchronously that add an index per task.

    Args:
        mat_metadata (dict): datastack info for the aligned_volume derived from the infoservice

    Returns:
        chain: chain of celery tasks
    """
    add_indices = mat_metadata.get("add_indices", False)
    if add_indices:
        analysis_version = mat_metadata.get("analysis_version")
        datastack = mat_metadata["datastack"]
        analysis_database = mat_metadata["analysis_database"]
        SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
        analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                                   analysis_version)

        analysis_session, analysis_engine = create_session(analysis_sql_uri)

        annotation_table_name = mat_metadata.get("annotation_table_name")
        schema = mat_metadata.get("schema")

        table_metadata = None
        if mat_metadata.get("reference_table"):
            table_metadata = {
                "reference_table": mat_metadata.get("reference_table")
            }

        model = make_flat_model(
            table_name=annotation_table_name,
            schema_type=schema,
            segmentation_source=None,
            table_metadata=table_metadata,
        )

        commands = index_cache.add_indices_sql_commands(
            annotation_table_name, model, analysis_engine)
        analysis_session.close()
        analysis_engine.dispose()

        add_index_tasks = chain(
            [add_index.si(analysis_database, command) for command in commands])

        return self.replace(add_index_tasks)
    return "Indices already exist"
Esempio n. 9
0
def get_analysis_table(aligned_volume: str,
                       datastack: str,
                       table_name: str,
                       mat_version: int = 1):
    """Helper method that returns a table model.

    Args:
        aligned_volume (str): aligned_volume name
        datastack (str): datastack name
        table_name (str): table to reflect a model
        mat_version (int, optional): target database version

    Returns:
        SQLAlchemy model: returns a sqlalchemy model of a target table
    """
    anno_db = dynamic_annotation_cache.get_db(aligned_volume)
    schema_name = anno_db.get_table_schema(table_name)
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               mat_version)
    analysis_engine = create_engine(analysis_sql_uri)

    meta = MetaData()
    meta.reflect(bind=analysis_engine)

    anno_schema = get_schema(schema_name)
    flat_schema = create_flattened_schema(anno_schema)

    if not analysis_engine.dialect.has_table(analysis_engine, table_name):
        annotation_dict = create_table_dict(
            table_name=table_name,
            Schema=flat_schema,
            segmentation_source=None,
            table_metadata=None,
            with_crud_columns=False,
        )
        analysis_table = type(table_name, (Base, ), annotation_dict)
    else:
        analysis_table = meta.tables[table_name]

    analysis_engine.dispose()
    return analysis_table
Esempio n. 10
0
def run_periodic_database_update() -> None:
    """
    Run update database workflow. Steps are as follows:
    1. Find missing segmentation data in a given datastack and lookup.
    2. Update expired root ids

    """
    try:
        datastacks = json.loads(os.environ["DATASTACKS"])
    except:
        datastacks = get_config_param("DATASTACKS")

    for datastack in datastacks:
        try:
            celery_logger.info(
                f"Start periodic database update job for {datastack}")
            datastack_info = get_datastack_info(datastack)
            task = update_database_workflow.s(datastack_info)
            task.apply_async()
        except Exception as e:
            celery_logger.error(e)
            raise e
    return True
Esempio n. 11
0
def drop_indices(self, mat_metadata: dict):
    """Drop all indices of a given table.

    Args:
        mat_metadata (dict): datastack info for the aligned_volume derived from the infoservice

    Returns:
        str: string if indices were dropped or not.
    """
    add_indices = mat_metadata.get("add_indices", False)
    if add_indices:
        analysis_version = mat_metadata.get("analysis_version", None)
        datastack = mat_metadata["datastack"]
        temp_mat_table_name = mat_metadata["temp_mat_table_name"]
        SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
        analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                                   analysis_version)

        analysis_session, analysis_engine = create_session(analysis_sql_uri)
        index_cache.drop_table_indices(temp_mat_table_name, analysis_engine)
        analysis_session.close()
        analysis_engine.dispose()
        return "Indices DROPPED"
    return "No indices dropped"
Esempio n. 12
0
def merge_tables(self, mat_metadata: dict):
    """Merge all the annotation and segmentation rows into a new table that are
    flagged as valid. Drop the original split tables after inserting all the rows
    into the new table.

    Args:
        mat_metadata (dict): datastack info for the aligned_volume from the infoservice
        analysis_version (int): materialized version number

    Raises:
        e: error during table merging operation

    Returns:
        str: number of rows copied
    """
    analysis_version = mat_metadata["analysis_version"]
    annotation_table_name = mat_metadata["annotation_table_name"]
    segmentation_table_name = mat_metadata["segmentation_table_name"]
    temp_table_name = mat_metadata["temp_mat_table_name"]
    schema = mat_metadata["schema"]
    datastack = mat_metadata["datastack"]

    # create dynamic sql_uri
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               analysis_version)

    # get schema and match column order for sql query
    anno_schema = get_schema(schema)
    flat_schema = create_flattened_schema(anno_schema)

    ordered_model_columns = create_table_dict(
        table_name=annotation_table_name,
        Schema=flat_schema,
        segmentation_source=None,
        table_metadata=None,
        with_crud_columns=False,
    )

    AnnotationModel = create_annotation_model(mat_metadata,
                                              with_crud_columns=False)
    SegmentationModel = create_segmentation_model(mat_metadata)

    query_columns = {}
    crud_columns = ["created", "deleted", "superceded_id"]
    for col in AnnotationModel.__table__.columns:
        if col.name not in crud_columns:
            query_columns[col.name] = col
    for col in SegmentationModel.__table__.columns:
        if not col.name == "id":
            query_columns[col.name] = col

    sorted_columns = OrderedDict([(key, query_columns[key])
                                  for key in ordered_model_columns
                                  if key in query_columns.keys()])
    sorted_columns_list = list(sorted_columns.values())
    columns = [f'"{col.table}".{col.name}' for col in sorted_columns_list]

    mat_session, mat_engine = create_session(analysis_sql_uri)

    query = f"""
        SELECT 
            {', '.join(columns)}
        FROM 
            {AnnotationModel.__table__.name}
        JOIN 
            "{SegmentationModel.__table__.name}"
            ON {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id
        WHERE
            {AnnotationModel.id} = "{SegmentationModel.__table__.name}".id
        AND {AnnotationModel.valid} = true

    """

    try:
        mat_db_connection = mat_engine.connect()
        with mat_db_connection.begin():
            insert_query = mat_db_connection.execute(
                f"CREATE TABLE {temp_table_name} AS ({query});")
            row_count = insert_query.rowcount
            drop_query = mat_db_connection.execute(
                f'DROP TABLE {annotation_table_name}, "{segmentation_table_name}" CASCADE;'
            )
            alter_query = mat_db_connection.execute(
                f"ALTER TABLE {temp_table_name} RENAME TO {annotation_table_name};"
            )
        mat_session.close()
        mat_engine.dispose()

        return f"Number of rows copied: {row_count}"
    except Exception as e:
        celery_logger.error(e)
        raise (e)
Esempio n. 13
0
def create_materialized_metadata(
    self,
    datastack_info: dict,
    analysis_version: int,
    materialization_time_stamp: datetime.datetime.utcnow,
):
    """Creates a metadata table in a materialized database. Reads row counts
    from annotation tables copied to the materialized database. Inserts row count
    and table info into the metadata table.

    Args:
        aligned_volume (str):  aligned volume name
        mat_sql_uri (str): target database sql url to use

    Raises:
       database_error:  sqlalchemy connection error

    Returns:
        bool: True if Metadata table were created and table info was inserted.
    """
    aligned_volume = datastack_info["aligned_volume"]["name"]
    datastack = datastack_info["datastack"]
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]
    sql_uri = make_url(f"{sql_base_uri}/{aligned_volume}")
    analysis_sql_uri = create_analysis_sql_uri(SQL_URI_CONFIG, datastack,
                                               analysis_version)

    session, engine = create_session(sql_uri)
    analysis_session, analysis_engine = create_session(analysis_sql_uri)

    try:
        mat_table = MaterializedMetadata()
        mat_table.__table__.create(bind=analysis_engine)  # pylint: disable=maybe-no-member
    except Exception as e:
        celery_logger.error(f"Materialized Metadata table creation failed {e}")

    mat_client = dynamic_annotation_cache.get_db(
        f"{datastack}__mat{analysis_version}")

    tables = session.query(AnnoMetadata).all()
    try:
        for table in tables:
            # only create table if marked as valid in the metadata table
            if table.valid:
                table_name = table.table_name
                schema_type = (session.query(AnnoMetadata.schema_type).filter(
                    AnnoMetadata.table_name == table_name).one())

                valid_row_count = mat_client._get_table_row_count(
                    table_name, filter_valid=True)
                celery_logger.info(f"Row count {valid_row_count}")
                if valid_row_count == 0:
                    continue

                mat_metadata = MaterializedMetadata(
                    schema=schema_type[0],
                    table_name=table_name,
                    row_count=valid_row_count,
                    materialized_timestamp=materialization_time_stamp,
                )
                analysis_session.add(mat_metadata)
                analysis_session.commit()
    except Exception as database_error:
        analysis_session.rollback()
        session.rollback()
        celery_logger.error(database_error)
    finally:
        session.close()
        engine.dispose()
        mat_client.cached_session.close()
        analysis_session.close()
        analysis_engine.dispose()
    return True
Esempio n. 14
0
def create_analysis_database(self, datastack_info: dict,
                             analysis_version: int) -> str:
    """Copies live database to new versioned database for materializied annotations.

    Args:
        datastack_info (dict): datastack metadata
        analysis_version (int): analysis database version number

    Raises:
        e: error if dropping table(s) fails.

    Returns:
        bool: True if analysis database creation is successful
    """

    aligned_volume = datastack_info["aligned_volume"]["name"]
    datastack = datastack_info["datastack"]
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]
    sql_uri = make_url(f"{sql_base_uri}/{aligned_volume}")
    analysis_sql_uri = create_analysis_sql_uri(str(sql_uri), datastack,
                                               analysis_version)

    engine = create_engine(sql_uri,
                           isolation_level="AUTOCOMMIT",
                           pool_pre_ping=True)

    connection = engine.connect()
    connection.connection.set_session(autocommit=True)

    result = connection.execute(f"SELECT 1 FROM pg_catalog.pg_database \
                WHERE datname = '{analysis_sql_uri.database}'")
    if not result.fetchone():
        try:
            # create new database from template_postgis database
            celery_logger.info(
                f"Creating new materialized database {analysis_sql_uri.database}"
            )

            drop_connections = f"""
            SELECT 
                pg_terminate_backend(pid) 
            FROM 
                pg_stat_activity
            WHERE 
                datname = '{aligned_volume}'
            AND pid <> pg_backend_pid()
            """

            connection.execute(drop_connections)

            connection.execute(f"""CREATE DATABASE {analysis_sql_uri.database} 
                    WITH TEMPLATE {aligned_volume}""")
            # lets reconnect
            try:
                connection = engine.connect()
                # check if database exists
                db_result = connection.execute(
                    f"SELECT 1 FROM pg_catalog.pg_database \
                        WHERE datname = '{analysis_sql_uri.database}'")
                db_result.fetchone()
            except Exception as e:
                celery_logger.error(f"Connection was lost: {e}")

        except OperationalError as sql_error:
            celery_logger.error(f"ERROR: {sql_error}")
            raise self.retry(exc=sql_error, countdown=3)
        finally:
            # invalidate caches since we killed connections to the live db
            dynamic_annotation_cache.invalidate_cache()
            sqlalchemy_cache.invalidate_cache()

    connection.close()
    engine.dispose()
    return True
Esempio n. 15
0
def create_new_version(
    datastack_info: dict,
    materialization_time_stamp: datetime.datetime.utcnow,
    days_to_expire: int = None,
):
    """Create new versioned database row in the analysis_version table.
    Sets the expiration date for the database.

    Args:
        datastack_info (dict): datastack info from infoservice
        materialization_time_stamp (datetime.datetime.utcnow): UTC timestamp of root_id lookup
        days_to_expire (int, optional): Number of days until db is flagged to be expired. Defaults to 5.

    Returns:
        [int]: version number of materialized database
    """
    aligned_volume = datastack_info["aligned_volume"]["name"]
    datastack = datastack_info.get("datastack")

    table_objects = [
        AnalysisVersion.__tablename__,
        AnalysisTable.__tablename__,
    ]
    SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
    sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]
    sql_uri = make_url(f"{sql_base_uri}/{aligned_volume}")

    session, engine = create_session(sql_uri)

    # create analysis metadata table if not exists
    for table in table_objects:
        if not engine.dialect.has_table(engine, table):
            Base.metadata.tables[table].create(bind=engine)

    top_version = session.query(func.max(AnalysisVersion.version)).scalar()

    if top_version is None:
        new_version_number = 1
    else:
        new_version_number = top_version + 1
    if days_to_expire > 0:
        expiration_date = materialization_time_stamp + datetime.timedelta(
            days=days_to_expire)
    else:
        expiration_date = None

    analysisversion = AnalysisVersion(
        datastack=datastack,
        time_stamp=materialization_time_stamp,
        version=new_version_number,
        valid=False,
        expires_on=expiration_date,
    )
    try:
        session.add(analysisversion)
        session.commit()
    except Exception as e:
        session.rollback()
        celery_logger.error(e)
    finally:
        session.close()
        engine.dispose()
    return new_version_number
Esempio n. 16
0
                if not last_updated_time_stamp:
                    last_updated_time_stamp = None
                else:
                    last_updated_time_stamp = str(last_updated_time_stamp)

                table_metadata.update(
                    {
                        "create_segmentation_table": create_segmentation_table,
                        "segmentation_table_name": segmentation_table_name,
                        "temp_mat_table_name": f"temp__{annotation_table}",
                        "pcg_table_name": pcg_table_name,
                        "segmentation_source": segmentation_source,
                        "last_updated_time_stamp": last_updated_time_stamp,
                        "chunk_size": get_config_param(
                            "MATERIALIZATION_ROW_CHUNK_SIZE"
                        ),
                        "find_all_expired_roots": datastack_info.get(
                            "find_all_expired_roots", False
                        ),
                    }
                )
            if analysis_version:
                table_metadata.update(
                    {
                        "analysis_version": analysis_version,
                        "analysis_database": f"{datastack_info['datastack']}__mat{analysis_version}",
                    }
                )

            metadata.append(table_metadata.copy())
Esempio n. 17
0
def remove_expired_databases(delete_threshold: int = 5) -> str:
    """
    Remove expired database from time this method is called.
    """
    aligned_volume_databases = get_aligned_volumes_databases()
    datastacks = get_config_param("DATASTACKS")
    current_time = datetime.utcnow()
    remove_db_cron_info = []

    for datastack in datastacks:
        datastack_info = get_datastack_info(datastack)
        aligned_volume = datastack_info["aligned_volume"]["name"]
        if aligned_volume in aligned_volume_databases:
            SQL_URI_CONFIG = get_config_param("SQLALCHEMY_DATABASE_URI")
            sql_base_uri = SQL_URI_CONFIG.rpartition("/")[0]
            sql_uri = make_url(f"{sql_base_uri}/{aligned_volume}")
            session, engine = create_session(sql_uri)
            session.expire_on_commit = False
            # get number of expired dbs that are ready for deletion
            try:
                expired_results = (session.query(AnalysisVersion).filter(
                    AnalysisVersion.expires_on <= current_time).all())
                expired_versions = [
                    str(expired_db) for expired_db in expired_results
                ]

            except Exception as sql_error:
                celery_logger.error(f"Error: {sql_error}")
                continue

            # get databases that exist currently, filter by materializied dbs
            result = engine.execute(
                "SELECT datname FROM pg_database;").fetchall()
            database_list = list(itertools.chain.from_iterable(result))
            databases = [
                database for database in database_list
                if database.startswith(datastack)
            ]

            # get databases to delete that are currently present
            databases_to_delete = [
                database for database in databases
                if database in expired_versions
            ]

            dropped_dbs_info = {
                "aligned_volume":
                aligned_volume,
                "materialized_databases":
                (databases, f"count={len(databases)}"),
                "expired_databases": (
                    expired_versions,
                    f"count={len(expired_versions)}",
                ),
                "delete_threshold":
                delete_threshold,
            }
            dropped_dbs = []

            if len(databases) > delete_threshold:
                with engine.connect() as conn:
                    conn.execution_options(isolation_level="AUTOCOMMIT")
                    for database in databases_to_delete:
                        try:
                            sql = (
                                "SELECT 1 FROM pg_database WHERE datname='%s'"
                                % database)
                            result_proxy = conn.execute(sql)
                            result = result_proxy.scalar()
                            if result:
                                drop_connections = f"""
                                SELECT 
                                    pg_terminate_backend(pid) 
                                FROM 
                                    pg_stat_activity
                                WHERE 
                                    datname = '{database}'
                                AND pid <> pg_backend_pid()
                                """

                                conn.execute(drop_connections)
                                celery_logger.info(
                                    f"Dropped connections to: {database}")
                                sql = "DROP DATABASE %s" % database
                                result_proxy = conn.execute(sql)
                                celery_logger.info(
                                    f"Database: {database} removed")

                                # strip version from database string
                                database_version = database.rsplit("__mat")[-1]

                                expired_database = (
                                    session.query(AnalysisVersion).filter(
                                        AnalysisVersion.version ==
                                        database_version).one())
                                expired_database.valid = False
                                session.commit()
                                celery_logger.info(
                                    f"Database '{expired_database}' dropped")
                                dropped_dbs.append(expired_database)
                                dropped_dbs_info[
                                    "dropped_databases"] = dropped_dbs
                        except Exception as e:
                            celery_logger.error(
                                f"ERROR: {e}: {database} does not exist")
            remove_db_cron_info.append(dropped_dbs_info)
            session.close()
    return remove_db_cron_info
Esempio n. 18
0
 def _get_mat_client(self, database: str):
     sql_uri_config = get_config_param("SQLALCHEMY_DATABASE_URI")
     mat_client = DynamicMaterializationClient(database, sql_uri_config)
     self._clients[database] = mat_client
     return self._clients[database]