Beispiel #1
0
class TestCloudSqlDatabaseHook(unittest.TestCase):
    @mock.patch(
        'airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_connection'
    )
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.connection = Connection(
            conn_id='my_gcp_connection',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra=
            '{"database_type":"postgres", "location":"my_location", "instance":"my_instance", '
            '"use_proxy": true, "project_id":"my_project"}')

        m.return_value = self.connection
        self.db_hook = CloudSqlDatabaseHook('my_gcp_connection')

    def test_get_sqlproxy_runner(self):
        self.db_hook._generate_connection_uri()
        sqlproxy_runner = self.db_hook.get_sqlproxy_runner()
        self.assertEqual(sqlproxy_runner.gcp_conn_id, self.connection.conn_id)
        project = self.connection.extra_dejson['project_id']
        location = self.connection.extra_dejson['location']
        instance = self.connection.extra_dejson['instance']
        instance_spec = "{project}:{location}:{instance}".format(
            project=project, location=location, instance=instance)
        self.assertEqual(sqlproxy_runner.instance_specification, instance_spec)
Beispiel #2
0
class TestCloudSqlDatabaseHook(unittest.TestCase):
    @mock.patch(
        'airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_connection'
    )
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.sql_connection = Connection(
            conn_id='my_gcp_sql_connection',
            conn_type='gcpcloudsql',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra='{"database_type":"postgres", "location":"my_location", '
            '"instance":"my_instance", "use_proxy": true, '
            '"project_id":"my_project"}')
        self.connection = Connection(
            conn_id='my_gcp_connection',
            conn_type='google_cloud_platform',
        )
        scopes = [
            "https://www.googleapis.com/auth/pubsub",
            "https://www.googleapis.com/auth/datastore",
            "https://www.googleapis.com/auth/bigquery",
            "https://www.googleapis.com/auth/devstorage.read_write",
            "https://www.googleapis.com/auth/logging.write",
            "https://www.googleapis.com/auth/cloud-platform",
        ]
        conn_extra = {
            "extra__google_cloud_platform__scope":
            ",".join(scopes),
            "extra__google_cloud_platform__project":
            "your-gcp-project",
            "extra__google_cloud_platform__key_path":
            '/var/local/google_cloud_default.json'
        }
        conn_extra_json = json.dumps(conn_extra)
        self.connection.set_extra(conn_extra_json)

        m.side_effect = [self.sql_connection, self.connection]
        self.db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id='my_gcp_sql_connection',
            gcp_conn_id='my_gcp_connection')

    def test_get_sqlproxy_runner(self):
        self.db_hook._generate_connection_uri()
        sqlproxy_runner = self.db_hook.get_sqlproxy_runner()
        self.assertEqual(sqlproxy_runner.gcp_conn_id, self.connection.conn_id)
        project = self.sql_connection.extra_dejson['project_id']
        location = self.sql_connection.extra_dejson['location']
        instance = self.sql_connection.extra_dejson['instance']
        instance_spec = "{project}:{location}:{instance}".format(
            project=project, location=location, instance=instance)
        self.assertEqual(sqlproxy_runner.instance_specification, instance_spec)
Beispiel #3
0
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.connection = Connection(
            conn_id='my_gcp_connection',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra=
            '{"database_type":"postgres", "location":"my_location", "instance":"my_instance", '
            '"use_proxy": true, "project_id":"my_project"}')

        m.return_value = self.connection
        self.db_hook = CloudSqlDatabaseHook('my_gcp_connection')
Beispiel #4
0
 def __init__(self,
              sql,
              autocommit=False,
              parameters=None,
              gcp_conn_id='google_cloud_default',
              gcp_cloudsql_conn_id='google_cloud_sql_default',
              *args,
              **kwargs):
     super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
     self.sql = sql
     self.gcp_conn_id = gcp_conn_id
     self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
     self.autocommit = autocommit
     self.parameters = parameters
     self.cloudsql_db_hook = CloudSqlDatabaseHook(
         gcp_cloudsql_conn_id=gcp_cloudsql_conn_id)
     self.cloud_sql_proxy_runner = None
     self.database_hook = None
Beispiel #5
0
 def __init__(self,
              sql,
              autocommit=False,
              parameters=None,
              gcp_conn_id='google_cloud_default',
              gcp_cloudsql_conn_id='google_cloud_sql_default',
              *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.sql = sql
     self.gcp_conn_id = gcp_conn_id
     self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
     self.autocommit = autocommit
     self.parameters = parameters
     self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
     self.cloudsql_db_hook = CloudSqlDatabaseHook(
         gcp_cloudsql_conn_id=gcp_cloudsql_conn_id,
         default_gcp_project_id=self.gcp_connection.extra_dejson.get(
             'extra__google_cloud_platform__project'))
     self.cloud_sql_proxy_runner = None
     self.database_hook = None
Beispiel #6
0
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.sql_connection = Connection(
            conn_id='my_gcp_sql_connection',
            conn_type='gcpcloudsql',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra='{"database_type":"postgres", "location":"my_location", '
            '"instance":"my_instance", "use_proxy": true, '
            '"project_id":"my_project"}')
        self.connection = Connection(
            conn_id='my_gcp_connection',
            conn_type='google_cloud_platform',
        )
        scopes = [
            "https://www.googleapis.com/auth/pubsub",
            "https://www.googleapis.com/auth/datastore",
            "https://www.googleapis.com/auth/bigquery",
            "https://www.googleapis.com/auth/devstorage.read_write",
            "https://www.googleapis.com/auth/logging.write",
            "https://www.googleapis.com/auth/cloud-platform",
        ]
        conn_extra = {
            "extra__google_cloud_platform__scope":
            ",".join(scopes),
            "extra__google_cloud_platform__project":
            "your-gcp-project",
            "extra__google_cloud_platform__key_path":
            '/var/local/google_cloud_default.json'
        }
        conn_extra_json = json.dumps(conn_extra)
        self.connection.set_extra(conn_extra_json)

        m.side_effect = [self.sql_connection, self.connection]
        self.db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id='my_gcp_sql_connection',
            gcp_conn_id='my_gcp_connection')
 def __init__(self,
              sql,
              autocommit=False,
              parameters=None,
              gcp_conn_id='google_cloud_default',
              gcp_cloudsql_conn_id='google_cloud_sql_default',
              *args, **kwargs):
     super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
     self.sql = sql
     self.gcp_conn_id = gcp_conn_id
     self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
     self.autocommit = autocommit
     self.parameters = parameters
     self.cloudsql_db_hook = CloudSqlDatabaseHook(
         gcp_cloudsql_conn_id=gcp_cloudsql_conn_id)
     self.cloud_sql_proxy_runner = None
     self.database_hook = None
 def __init__(self,
              sql,
              autocommit=False,
              parameters=None,
              gcp_conn_id='google_cloud_default',
              gcp_cloudsql_conn_id='google_cloud_sql_default',
              *args, **kwargs):
     super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
     self.sql = sql
     self.gcp_conn_id = gcp_conn_id
     self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
     self.autocommit = autocommit
     self.parameters = parameters
     self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
     self.cloudsql_db_hook = CloudSqlDatabaseHook(
         gcp_cloudsql_conn_id=gcp_cloudsql_conn_id,
         default_gcp_project_id=self.gcp_connection.extra_dejson.get(
             'extra__google_cloud_platform__project'))
     self.cloud_sql_proxy_runner = None
     self.database_hook = None
class CloudSqlQueryOperator(BaseOperator):
    """
    Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses
    cloud-sql-proxy to establish secure connection with the database.

    :param sql: SQL query or list of queries to run (should be DML or DDL query -
        this operator does not return any data from the database,
        so it is useless to pass it DQL queries. Note that it is responsibility of the
        author of the queries to make sure that the queries are idempotent. For example
        you can use CREATE TABLE IF NOT EXISTS to create a table.
    :type sql: str or [str]
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: mapping or iterable
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform for
        cloud-sql-proxy authentication.
    :type gcp_conn_id: str
    :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL
       its schema should be gcpcloudsql://.
       See :class:`~airflow.contrib.hooks.gcp_sql_hooks.CloudSqlDatabaseHook` for
       details on how to define gcpcloudsql:// connection.
    :type gcp_cloudsql_conn_id: str
    """
    # [START gcp_sql_query_template_fields]
    template_fields = ('sql', 'gcp_cloudsql_conn_id', 'gcp_conn_id')
    template_ext = ('.sql',)
    # [END gcp_sql_query_template_fields]

    @apply_defaults
    def __init__(self,
                 sql,
                 autocommit=False,
                 parameters=None,
                 gcp_conn_id='google_cloud_default',
                 gcp_cloudsql_conn_id='google_cloud_sql_default',
                 *args, **kwargs):
        super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
        self.sql = sql
        self.gcp_conn_id = gcp_conn_id
        self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.cloudsql_db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id=gcp_cloudsql_conn_id)
        self.cloud_sql_proxy_runner = None
        self.database_hook = None

    def pre_execute(self, context):
        self.cloudsql_db_hook.create_connection()
        self.database_hook = self.cloudsql_db_hook.get_database_hook()
        if self.cloudsql_db_hook.use_proxy:
            self.cloud_sql_proxy_runner = self.cloudsql_db_hook.get_sqlproxy_runner()
            self.cloudsql_db_hook.free_reserved_port()
            # There is very, very slim chance that the socket will be taken over
            # here by another bind(0). It's quite unlikely to happen though!
            self.cloud_sql_proxy_runner.start_proxy()

    def execute(self, context):
        self.log.info('Executing: "%s"', self.sql)
        self.database_hook.run(self.sql, self.autocommit, parameters=self.parameters)

    def post_execute(self, context, result=None):
        # Make sure that all the cleanups happen, no matter if there are some
        # exceptions thrown
        try:
            self.cloudsql_db_hook.cleanup_database_hook()
        finally:
            try:
                if self.cloud_sql_proxy_runner:
                    self.cloud_sql_proxy_runner.stop_proxy()
                    self.cloud_sql_proxy_runner = None
            finally:
                self.cloudsql_db_hook.delete_connection()
                self.cloudsql_db_hook = None
Beispiel #10
0
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.hooks.mysql_hook import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.contrib.hooks.bigquery_hook import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.hooks.postgres_hook import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.hooks.pig_hook import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.hooks.hive_hooks import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.hooks.presto_hook import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.hooks.hive_hooks import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.hooks.sqlite_hook import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.hooks.jdbc_hook import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.hooks.mssql_hook import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.hooks.oracle_hook import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.contrib.hooks.vertica_hook import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.contrib.hooks.cloudant_hook import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.contrib.hooks.jira_hook import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.contrib.hooks.redis_hook import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.contrib.hooks.wasb_hook import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.hooks.docker_hook import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.contrib.hooks.azure_data_lake_hook import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.contrib.hooks.azure_cosmos_hook import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.contrib.hooks.cassandra_hook import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.contrib.hooks.mongo_hook import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.contrib.hooks.gcp_sql_hook import CloudSqlDatabaseHook
         return CloudSqlDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.contrib.hooks.grpc_hook import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))
Beispiel #11
0
class CloudSqlQueryOperator(BaseOperator):
    """
    Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses
    cloud-sql-proxy to establish secure connection with the database.

    .. seealso::
        For more information on how to use this operator, take a look at the guide:
        :ref:`howto/operator:CloudSqlQueryOperator`

    :param sql: SQL query or list of queries to run (should be DML or DDL query -
        this operator does not return any data from the database,
        so it is useless to pass it DQL queries. Note that it is responsibility of the
        author of the queries to make sure that the queries are idempotent. For example
        you can use CREATE TABLE IF NOT EXISTS to create a table.
    :type sql: str or list[str]
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: mapping or iterable
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform for
        cloud-sql-proxy authentication.
    :type gcp_conn_id: str
    :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL
       its schema should be gcpcloudsql://.
       See :class:`~airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook` for
       details on how to define gcpcloudsql:// connection.
    :type gcp_cloudsql_conn_id: str
    """
    # [START gcp_sql_query_template_fields]
    template_fields = ('sql', 'gcp_cloudsql_conn_id', 'gcp_conn_id')
    template_ext = ('.sql', )
    # [END gcp_sql_query_template_fields]

    @apply_defaults
    def __init__(self,
                 sql,
                 autocommit=False,
                 parameters=None,
                 gcp_conn_id='google_cloud_default',
                 gcp_cloudsql_conn_id='google_cloud_sql_default',
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.sql = sql
        self.gcp_conn_id = gcp_conn_id
        self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.gcp_connection = BaseHook.get_connection(self.gcp_conn_id)
        self.cloudsql_db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id=gcp_cloudsql_conn_id,
            gcp_conn_id=gcp_conn_id,
            default_gcp_project_id=self.gcp_connection.extra_dejson.get(
                'extra__google_cloud_platform__project'))
        self.cloud_sql_proxy_runner = None
        self.database_hook = None

    def _execute_query(self):
        try:
            if self.cloudsql_db_hook.use_proxy:
                self.cloud_sql_proxy_runner = self.cloudsql_db_hook. \
                    get_sqlproxy_runner()
                self.cloudsql_db_hook.free_reserved_port()
                # There is very, very slim chance that the socket will
                # be taken over here by another bind(0).
                # It's quite unlikely to happen though!
                self.cloud_sql_proxy_runner.start_proxy()
            self.log.info('Executing: "%s"', self.sql)
            self.database_hook.run(self.sql,
                                   self.autocommit,
                                   parameters=self.parameters)
        finally:
            if self.cloud_sql_proxy_runner:
                self.cloud_sql_proxy_runner.stop_proxy()
                self.cloud_sql_proxy_runner = None

    def execute(self, context):
        self.cloudsql_db_hook.validate_ssl_certs()
        self.cloudsql_db_hook.create_connection()

        try:
            self.cloudsql_db_hook.validate_socket_path_length()
            self.database_hook = self.cloudsql_db_hook.get_database_hook()
            try:
                self._execute_query()
            finally:
                self.cloudsql_db_hook.cleanup_database_hook()
        finally:
            self.cloudsql_db_hook.delete_connection()
            self.cloudsql_db_hook = None