Ejemplo n.º 1
0
    def for_schema_base(cls, schema_base: DeclarativeMeta) -> Session:
        engine = SQLAlchemyEngineManager.get_engine_for_schema_base(
            schema_base)
        if engine is None:
            raise ValueError(
                f"No engine set for base [{schema_base.__name__}]")

        session = Session(bind=engine)
        cls._apply_session_listener_for_schema_base(schema_base, session)
        return session
Ejemplo n.º 2
0
def stop_and_clear_on_disk_postgresql_database() -> None:
    """Drops all tables in the local postgres database and stops the postgres server. Should be called in the
    tearDownClass function so this only runs once per test class."""
    if environment.in_gae():
        raise ValueError('Running test-only code in Google App Engine.')

    for declarative_base in DECLARATIVE_BASES:
        use_on_disk_postgresql_database(declarative_base)
        declarative_base.metadata.drop_all(SQLAlchemyEngineManager.get_engine_for_schema_base(declarative_base))
        SQLAlchemyEngineManager.teardown_engine_for_schema(declarative_base)

    if not environment.in_travis():
        # If we are running locally, we must stop the local postgres server
        os.system('pg_ctl -D /usr/local/var/postgres stop &> /dev/null')
Ejemplo n.º 3
0
def _write_df_only_successful_rows(
        table: DeclarativeMeta, df: pd.DataFrame) -> None:
    """If the dataframe can't be written all at once (eg. some rows already
    exist in the database) then we write only the rows that we can."""
    for i in range(len(df)):
        row = df.iloc[i:i + 1]
        try:
            row.to_sql(table.__tablename__,
                       SQLAlchemyEngineManager.get_engine_for_schema_base(
                           JailsBase),
                       if_exists='append',
                       index=False)
        except IntegrityError:
            # Skip rows that can't be written
            logging.info("Skipping write_df to %s table: %s.", table, row)
Ejemplo n.º 4
0
def write_df(table: DeclarativeMeta, df: pd.DataFrame) -> None:
    """
    Writes the |df| to the |table|.

    The column headers on |df| must match the column names in |table|. All rows
    in |df| will be appended to |table|. If a row in |df| already exists in
    |table|, then that row will be skipped.
    """
    try:
        df.to_sql(table.__tablename__,
                  SQLAlchemyEngineManager.get_engine_for_schema_base(JailsBase),
                  if_exists='append',
                  index=False)
    except IntegrityError:
        _write_df_only_successful_rows(table, df)
Ejemplo n.º 5
0
def import_gcs_csv_to_cloud_sql(destination_table: str, gcs_uri: GcsfsFilePath,
                                columns: List[str]) -> None:
    """Implements the import of GCS CSV to Cloud SQL by creating a temporary table, uploading the
    results to the temporary table, and then swapping the contents of the table."""
    engine = SQLAlchemyEngineManager.get_engine_for_schema_base(
        SQLAlchemyEngineManager.declarative_method_for_schema(
            SchemaType.CASE_TRIAGE))
    if engine is None:
        raise RuntimeError("Could not create postgres sqlalchemy engine")

    # Drop old temporary table if it exists
    tmp_table_name = f"tmp__{destination_table}"
    with engine.connect() as conn:
        conn.execute(f"DROP TABLE IF EXISTS {tmp_table_name}")

    # Create temporary table
    with engine.connect() as conn:
        conn.execute(
            f"CREATE TABLE {tmp_table_name} AS TABLE {destination_table} WITH NO DATA"
        )

    try:
        # Start actual Cloud SQL import
        logging.info("Starting import from GCS URI: %s", gcs_uri)
        logging.info("Starting import to destination table: %s",
                     destination_table)
        logging.info("Starting import using columns: %s", columns)
        cloud_sql_client = CloudSQLClientImpl()
        instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
            SchemaType.CASE_TRIAGE)
        if instance_name is None:
            raise ValueError("Could not find instance name.")
        operation_id = cloud_sql_client.import_gcs_csv(
            instance_name=instance_name,
            table_name=tmp_table_name,
            gcs_uri=gcs_uri,
            columns=columns,
        )
        if operation_id is None:
            raise RuntimeError(
                "Cloud SQL import operation was not started successfully.")

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id)

        if not operation_succeeded:
            raise RuntimeError("Cloud SQL import failed.")
    except Exception as e:
        logging.warning(
            "Dropping newly created table due to raised exception.")
        conn.execute(f"DROP TABLE {tmp_table_name}")
        raise e

    # Swap in new table
    old_table_name = f"old__{destination_table}"
    with engine.begin() as conn:
        conn.execute(
            f"ALTER TABLE {destination_table} RENAME TO {old_table_name}")
        conn.execute(
            f"ALTER TABLE {tmp_table_name} RENAME TO {destination_table}")
        conn.execute(f"DROP TABLE {old_table_name}")