def export_from_cloud_sql_to_gcs_csv(
    schema_type: SchemaType,
    table_name: str,
    gcs_uri: GcsfsFilePath,
    columns: List[str],
) -> None:
    cloud_sql_client = CloudSQLClientImpl()
    instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        schema_type)
    if instance_name is None:
        raise ValueError("Could not find instance name.")
    operation_id = cloud_sql_client.export_to_gcs_csv(
        instance_name=instance_name,
        table_name=table_name,
        gcs_uri=gcs_uri,
        columns=columns,
    )
    if operation_id is None:
        raise RuntimeError(
            "Cloud SQL export operation was not started successfully.")

    operation_succeeded = cloud_sql_client.wait_until_operation_completed(
        operation_id)

    if not operation_succeeded:
        raise RuntimeError("Cloud SQL export failed.")
Ejemplo n.º 2
0
def confirm_correct_db(database: SchemaType) -> None:
    dbname = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        database)
    if dbname is None:
        logging.error("Could not find database instance.")
        logging.error("Exiting...")
        sys.exit(1)

    prompt_for_confirmation(f"Running migrations on {dbname}.", dbname)
Ejemplo n.º 3
0
    def testGetStrippedCloudSqlInstanceId(
            self, mock_secrets: mock.MagicMock) -> None:
        # Arrange
        mock_secrets.side_effect = [
            "project:region:111",
        ]

        # Act
        instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
            schema_type=SchemaType.OPERATIONS)

        # Assert
        self.assertEqual(instance_id, "111")
        mock_secrets.assert_called_with("operations_cloudsql_instance_id")
Ejemplo n.º 4
0
def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool:
    """Export a Cloud SQL table to a CSV file on GCS.

    Given a table name and export_query, retrieve the export URI from
    cloud_sql_to_bq_config, then execute the export operation and wait until it
    completes.

    Args:
        table_name: Table to export.
        cloud_sql_to_bq_config: The export config class for the table's SchemaType.
    Returns:
        True if operation succeeded without errors, False if not.
    """
    schema_type = cloud_sql_to_bq_config.schema_type
    export_query = cloud_sql_to_bq_config.get_table_export_query(table_name)
    export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name)

    export_context = create_export_context(schema_type, export_uri, export_query)

    project_id = metadata.project_id()
    instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type)
    export_request = (
        sqladmin_client()
        .instances()
        .export(project=project_id, instance=instance_id, body=export_context)
    )

    logging.info("GCS URI [%s] in project [%s]", export_uri, project_id)
    logging.info("Starting export: [%s]", str(export_request.to_json()))

    try:
        response = export_request.execute()
    except googleapiclient.errors.HttpError:
        logging.exception("Failed to export table [%s]", table_name)
        return False

    # We need to block until the operation is done because
    # the Cloud SQL API only supports one operation at a time.
    operation_id = response["name"]
    logging.info(
        "Waiting for export operation [%s] to complete for table [%s] "
        "in database [%s] in project [%s]",
        operation_id,
        table_name,
        instance_id,
        project_id,
    )
    operation_success = wait_until_operation_finished(operation_id)

    return operation_success
def _import_csv_to_temp_table(
    database_key: SQLAlchemyDatabaseKey,
    schema_type: SchemaType,
    destination_table: str,
    tmp_table_name: str,
    gcs_uri: GcsfsFilePath,
    columns: List[str],
    seconds_to_wait: int = 60,
) -> None:
    """Imports a GCS CSV file to a temp table that is created with the destination table as a template."""
    with SessionFactory.using_database(database_key=database_key) as session:
        # Drop old temp table if exists, Create new temp table
        session.execute(f"DROP TABLE IF EXISTS {tmp_table_name}")
        session.execute(
            f"CREATE TABLE {tmp_table_name} AS TABLE {destination_table} WITH NO DATA"
        )

    # Import CSV to temp table
    logging.info("Starting import from GCS URI: %s", gcs_uri)
    logging.info("Starting import to tmp destination table: %s",
                 tmp_table_name)
    logging.info("Starting import using columns: %s", columns)

    cloud_sql_client = CloudSQLClientImpl()
    instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        schema_type=schema_type)
    if instance_name is None:
        raise ValueError("Could not find instance name.")

    # Create temp table with CSV file
    operation_id = cloud_sql_client.import_gcs_csv(
        instance_name=instance_name,
        table_name=tmp_table_name,
        gcs_uri=gcs_uri,
        columns=columns,
    )

    if operation_id is None:
        raise RuntimeError(
            "Cloud SQL import operation was not started successfully.")

    operation_succeeded = cloud_sql_client.wait_until_operation_completed(
        operation_id=operation_id,
        seconds_to_wait=seconds_to_wait,
    )

    if not operation_succeeded:
        raise RuntimeError(f"Cloud SQL import to {tmp_table_name} failed.")
Ejemplo n.º 6
0
 def import_gcs_sql(
     self,
     database_key: SQLAlchemyDatabaseKey,
     gcs_uri: GcsfsFilePath,
 ) -> Optional[str]:
     logging.debug("Starting Cloud SQL import operation.")
     instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
         database_key.schema_type)
     req = self.service.instances().import_(
         project=self.project_id,
         instance=instance_name,
         body={
             "importContext": {
                 "database": database_key.db_name,
                 "fileType": "SQL",
                 "uri": f"gs://{gcs_uri.abs_path()}",
             },
         },
     )
     resp = req.execute()
     return resp.get("name")
Ejemplo n.º 7
0
def import_gcs_csv_to_cloud_sql(destination_table: str, gcs_uri: GcsfsFilePath,
                                columns: List[str]) -> None:
    """Implements the import of GCS CSV to Cloud SQL by creating a temporary table, uploading the
    results to the temporary table, and then swapping the contents of the table."""
    engine = SQLAlchemyEngineManager.get_engine_for_schema_base(
        SQLAlchemyEngineManager.declarative_method_for_schema(
            SchemaType.CASE_TRIAGE))
    if engine is None:
        raise RuntimeError("Could not create postgres sqlalchemy engine")

    # Drop old temporary table if it exists
    tmp_table_name = f"tmp__{destination_table}"
    with engine.connect() as conn:
        conn.execute(f"DROP TABLE IF EXISTS {tmp_table_name}")

    # Create temporary table
    with engine.connect() as conn:
        conn.execute(
            f"CREATE TABLE {tmp_table_name} AS TABLE {destination_table} WITH NO DATA"
        )

    try:
        # Start actual Cloud SQL import
        logging.info("Starting import from GCS URI: %s", gcs_uri)
        logging.info("Starting import to destination table: %s",
                     destination_table)
        logging.info("Starting import using columns: %s", columns)
        cloud_sql_client = CloudSQLClientImpl()
        instance_name = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
            SchemaType.CASE_TRIAGE)
        if instance_name is None:
            raise ValueError("Could not find instance name.")
        operation_id = cloud_sql_client.import_gcs_csv(
            instance_name=instance_name,
            table_name=tmp_table_name,
            gcs_uri=gcs_uri,
            columns=columns,
        )
        if operation_id is None:
            raise RuntimeError(
                "Cloud SQL import operation was not started successfully.")

        operation_succeeded = cloud_sql_client.wait_until_operation_completed(
            operation_id)

        if not operation_succeeded:
            raise RuntimeError("Cloud SQL import failed.")
    except Exception as e:
        logging.warning(
            "Dropping newly created table due to raised exception.")
        conn.execute(f"DROP TABLE {tmp_table_name}")
        raise e

    # Swap in new table
    old_table_name = f"old__{destination_table}"
    with engine.begin() as conn:
        conn.execute(
            f"ALTER TABLE {destination_table} RENAME TO {old_table_name}")
        conn.execute(
            f"ALTER TABLE {tmp_table_name} RENAME TO {destination_table}")
        conn.execute(f"DROP TABLE {old_table_name}")
Ejemplo n.º 8
0
def main(database: SchemaType, repo_root: str, ssl_cert_path: str,
         dry_run: bool):
    """
    Invokes the main code path for running migrations.

    This checks for user validations that the database and branches are correct and then runs existing pending
    migrations.
    """

    requires_ssl = SQLAlchemyEngineManager.database_requires_ssl(
        metadata.project_id())

    if requires_ssl and not ssl_cert_path:
        logging.error(
            'Specifying an argument to --ssl-cert-path is required for the specified database.'
        )
        logging.error('Exiting...')
        sys.exit(1)

    if dry_run:
        if not local_postgres_helpers.can_start_on_disk_postgresql_database():
            logging.error('pg_ctl is not installed. Cannot perform a dry-run.')
            logging.error('Exiting...')
            sys.exit(1)
        logging.info('Creating a dry-run...\n')

    is_prod = metadata.project_id() == GCP_PROJECT_PRODUCTION
    dbname = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(
        database)
    if is_prod:
        logging.info('RUNNING AGAINST PRODUCTION\n')

    db_check = input(
        f'Running new migrations on {dbname}. Please type "{dbname}" to confirm. (Anything else exits):\n'
    )
    if db_check != dbname:
        logging.warning('\nConfirmation aborted.')
        sys.exit(1)

    # Get user to validate git branch
    try:
        repo = Repository(repo_root)
    except Exception as e:
        logging.error('improper project root provided: %s', e)
        sys.exit(1)

    current_branch = repo.head.shorthand

    if is_prod and not current_branch.startswith('releases/'):
        logging.error(
            'Migrations run against production must be from a release branch. The current branch is %s.',
            current_branch)
        sys.exit(1)

    branch_check = input(
        f'\nThis script will execute all migrations on this branch ({current_branch}) that have not yet been run '
        f'against {dbname}. Please type "{current_branch}" to confirm your branch. (Anything else exits):\n'
    )
    if branch_check != current_branch:
        logging.warning('\nConfirmation aborted.')
        sys.exit(1)

    # Fetch secrets
    if dry_run:
        overriden_env_vars = local_postgres_helpers.update_local_sqlalchemy_postgres_env_vars(
        )
    else:
        overriden_env_vars = SQLAlchemyEngineManager.update_sqlalchemy_env_vars(
            database, ssl_cert_path=ssl_cert_path)

    # Run migrations
    try:
        if dry_run:
            logging.info(
                'Starting local postgres database for migrations dry run')
            db_dir = local_postgres_helpers.start_on_disk_postgresql_database()
        config = alembic.config.Config(
            SQLAlchemyEngineManager.get_alembic_file(database))
        alembic.command.upgrade(config, 'head')
    except Exception as e:
        logging.error('Migrations failed to run: %s', e)
        sys.exit(1)
    finally:
        local_postgres_helpers.restore_local_env_vars(overriden_env_vars)
        if dry_run:
            try:
                logging.info('Stopping local postgres database')
                local_postgres_helpers.stop_and_clear_on_disk_postgresql_database(
                    db_dir)
            except Exception as e2:
                logging.error('Error cleaning up postgres: %s', e2)