Exemple #1
0
class TestCloudSqlDatabaseHook(unittest.TestCase):
    @mock.patch(
        'airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_connection'
    )
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.connection = Connection(
            conn_id='my_gcp_connection',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra=
            '{"database_type":"postgres", "location":"my_location", "instance":"my_instance", '
            '"use_proxy": true, "project_id":"my_project"}')

        m.return_value = self.connection
        self.db_hook = CloudSqlDatabaseHook('my_gcp_connection')

    def test_get_sqlproxy_runner(self):
        self.db_hook._generate_connection_uri()
        sqlproxy_runner = self.db_hook.get_sqlproxy_runner()
        self.assertEqual(sqlproxy_runner.gcp_conn_id, self.connection.conn_id)
        project = self.connection.extra_dejson['project_id']
        location = self.connection.extra_dejson['location']
        instance = self.connection.extra_dejson['instance']
        instance_spec = "{project}:{location}:{instance}".format(
            project=project, location=location, instance=instance)
        self.assertEqual(sqlproxy_runner.instance_specification, instance_spec)
Exemple #2
0
class TestCloudSqlDatabaseHook(unittest.TestCase):
    @mock.patch(
        'airflow.contrib.hooks.gcp_sql_hook.CloudSqlDatabaseHook.get_connection'
    )
    def setUp(self, m):
        super(TestCloudSqlDatabaseHook, self).setUp()

        self.sql_connection = Connection(
            conn_id='my_gcp_sql_connection',
            conn_type='gcpcloudsql',
            login='******',
            password='******',
            host='host',
            schema='schema',
            extra='{"database_type":"postgres", "location":"my_location", '
            '"instance":"my_instance", "use_proxy": true, '
            '"project_id":"my_project"}')
        self.connection = Connection(
            conn_id='my_gcp_connection',
            conn_type='google_cloud_platform',
        )
        scopes = [
            "https://www.googleapis.com/auth/pubsub",
            "https://www.googleapis.com/auth/datastore",
            "https://www.googleapis.com/auth/bigquery",
            "https://www.googleapis.com/auth/devstorage.read_write",
            "https://www.googleapis.com/auth/logging.write",
            "https://www.googleapis.com/auth/cloud-platform",
        ]
        conn_extra = {
            "extra__google_cloud_platform__scope":
            ",".join(scopes),
            "extra__google_cloud_platform__project":
            "your-gcp-project",
            "extra__google_cloud_platform__key_path":
            '/var/local/google_cloud_default.json'
        }
        conn_extra_json = json.dumps(conn_extra)
        self.connection.set_extra(conn_extra_json)

        m.side_effect = [self.sql_connection, self.connection]
        self.db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id='my_gcp_sql_connection',
            gcp_conn_id='my_gcp_connection')

    def test_get_sqlproxy_runner(self):
        self.db_hook._generate_connection_uri()
        sqlproxy_runner = self.db_hook.get_sqlproxy_runner()
        self.assertEqual(sqlproxy_runner.gcp_conn_id, self.connection.conn_id)
        project = self.sql_connection.extra_dejson['project_id']
        location = self.sql_connection.extra_dejson['location']
        instance = self.sql_connection.extra_dejson['instance']
        instance_spec = "{project}:{location}:{instance}".format(
            project=project, location=location, instance=instance)
        self.assertEqual(sqlproxy_runner.instance_specification, instance_spec)
class CloudSqlQueryOperator(BaseOperator):
    """
    Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses
    cloud-sql-proxy to establish secure connection with the database.

    :param sql: SQL query or list of queries to run (should be DML or DDL query -
        this operator does not return any data from the database,
        so it is useless to pass it DQL queries. Note that it is responsibility of the
        author of the queries to make sure that the queries are idempotent. For example
        you can use CREATE TABLE IF NOT EXISTS to create a table.
    :type sql: str or [str]
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: mapping or iterable
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform for
        cloud-sql-proxy authentication.
    :type gcp_conn_id: str
    :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL
       its schema should be gcpcloudsql://.
       See :class:`~airflow.contrib.hooks.gcp_sql_hooks.CloudSqlDatabaseHook` for
       details on how to define gcpcloudsql:// connection.
    :type gcp_cloudsql_conn_id: str
    """
    # [START gcp_sql_query_template_fields]
    template_fields = ('sql', 'gcp_cloudsql_conn_id', 'gcp_conn_id')
    template_ext = ('.sql',)
    # [END gcp_sql_query_template_fields]

    @apply_defaults
    def __init__(self,
                 sql,
                 autocommit=False,
                 parameters=None,
                 gcp_conn_id='google_cloud_default',
                 gcp_cloudsql_conn_id='google_cloud_sql_default',
                 *args, **kwargs):
        super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
        self.sql = sql
        self.gcp_conn_id = gcp_conn_id
        self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.cloudsql_db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id=gcp_cloudsql_conn_id)
        self.cloud_sql_proxy_runner = None
        self.database_hook = None

    def pre_execute(self, context):
        self.cloudsql_db_hook.create_connection()
        self.database_hook = self.cloudsql_db_hook.get_database_hook()
        if self.cloudsql_db_hook.use_proxy:
            self.cloud_sql_proxy_runner = self.cloudsql_db_hook.get_sqlproxy_runner()
            self.cloudsql_db_hook.free_reserved_port()
            # There is very, very slim chance that the socket will be taken over
            # here by another bind(0). It's quite unlikely to happen though!
            self.cloud_sql_proxy_runner.start_proxy()

    def execute(self, context):
        self.log.info('Executing: "%s"', self.sql)
        self.database_hook.run(self.sql, self.autocommit, parameters=self.parameters)

    def post_execute(self, context, result=None):
        # Make sure that all the cleanups happen, no matter if there are some
        # exceptions thrown
        try:
            self.cloudsql_db_hook.cleanup_database_hook()
        finally:
            try:
                if self.cloud_sql_proxy_runner:
                    self.cloud_sql_proxy_runner.stop_proxy()
                    self.cloud_sql_proxy_runner = None
            finally:
                self.cloudsql_db_hook.delete_connection()
                self.cloudsql_db_hook = None
Exemple #4
0
class CloudSqlQueryOperator(BaseOperator):
    """
    Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses
    cloud-sql-proxy to establish secure connection with the database.

    :param sql: SQL query or list of queries to run (should be DML or DDL query -
        this operator does not return any data from the database,
        so it is useless to pass it DQL queries. Note that it is responsibility of the
        author of the queries to make sure that the queries are idempotent. For example
        you can use CREATE TABLE IF NOT EXISTS to create a table.
    :type sql: str or [str]
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: mapping or iterable
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param gcp_conn_id: The connection ID used to connect to Google Cloud Platform for
        cloud-sql-proxy authentication.
    :type gcp_conn_id: str
    :param gcp_cloudsql_conn_id: The connection ID used to connect to Google Cloud SQL
       its schema should be gcpcloudsql://.
       See :class:`~airflow.contrib.hooks.gcp_sql_hooks.CloudSqlDatabaseHook` for
       details on how to define gcpcloudsql:// connection.
    :type gcp_cloudsql_conn_id: str
    """
    # [START gcp_sql_query_template_fields]
    template_fields = ('sql', 'gcp_cloudsql_conn_id', 'gcp_conn_id')
    template_ext = ('.sql', )
    # [END gcp_sql_query_template_fields]

    @apply_defaults
    def __init__(self,
                 sql,
                 autocommit=False,
                 parameters=None,
                 gcp_conn_id='google_cloud_default',
                 gcp_cloudsql_conn_id='google_cloud_sql_default',
                 *args,
                 **kwargs):
        super(CloudSqlQueryOperator, self).__init__(*args, **kwargs)
        self.sql = sql
        self.gcp_conn_id = gcp_conn_id
        self.gcp_cloudsql_conn_id = gcp_cloudsql_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.cloudsql_db_hook = CloudSqlDatabaseHook(
            gcp_cloudsql_conn_id=gcp_cloudsql_conn_id)
        self.cloud_sql_proxy_runner = None
        self.database_hook = None

    def pre_execute(self, context):
        self.cloudsql_db_hook.create_connection()
        self.database_hook = self.cloudsql_db_hook.get_database_hook()
        if self.cloudsql_db_hook.use_proxy:
            self.cloud_sql_proxy_runner = self.cloudsql_db_hook.get_sqlproxy_runner(
            )
            self.cloudsql_db_hook.free_reserved_port()
            # There is very, very slim chance that the socket will be taken over
            # here by another bind(0). It's quite unlikely to happen though!
            self.cloud_sql_proxy_runner.start_proxy()

    def execute(self, context):
        self.log.info('Executing: "%s"', self.sql)
        self.database_hook.run(self.sql,
                               self.autocommit,
                               parameters=self.parameters)

    def post_execute(self, context, result=None):
        # Make sure that all the cleanups happen, no matter if there are some
        # exceptions thrown
        try:
            self.cloudsql_db_hook.cleanup_database_hook()
        finally:
            try:
                if self.cloud_sql_proxy_runner:
                    self.cloud_sql_proxy_runner.stop_proxy()
                    self.cloud_sql_proxy_runner = None
            finally:
                self.cloudsql_db_hook.delete_connection()
                self.cloudsql_db_hook = None