예제 #1
0
 def test_create_operator_with_wrong_parameters(self,
                                                project_id,
                                                location,
                                                instance_name,
                                                database_type,
                                                use_proxy,
                                                use_ssl,
                                                sql,
                                                message,
                                                get_connections):
     uri = \
         "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?" \
         "database_type={database_type}&" \
         "project_id={project_id}&location={location}&instance={instance_name}&" \
         "use_proxy={use_proxy}&use_ssl={use_ssl}".format(
             database_type=database_type,
             project_id=project_id,
             location=location,
             instance_name=instance_name,
             use_proxy=use_proxy,
             use_ssl=use_ssl)
     self._setup_connections(get_connections, uri)
     with self.assertRaises(AirflowException) as cm:
         op = CloudSqlQueryOperator(
             sql=sql,
             task_id='task_id'
         )
         op.execute(None)
     err = cm.exception
     self.assertIn(message, str(err))
예제 #2
0
    def test_cloudsql_hook_delete_connection_on_exception(
            self, get_connections, run, get_connection, delete_connection):
        connection = Connection()
        connection.parse_from_uri(
            "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&"
            "project_id=example-project&location=europe-west1&instance=testdb&"
            "use_proxy=False")
        get_connection.return_value = connection

        db_connection = Connection()
        db_connection.host = "127.0.0.1"
        db_connection.set_extra(json.dumps({"project_id": "example-project",
                                            "location": "europe-west1",
                                            "instance": "testdb",
                                            "database_type": "mysql"}))
        get_connections.return_value = [db_connection]
        run.side_effect = Exception("Exception when running a query")
        operator = CloudSqlQueryOperator(
            sql=['SELECT * FROM TABLE'],
            task_id='task_id'
        )
        with self.assertRaises(Exception) as cm:
            operator.execute(None)
        err = cm.exception
        self.assertEqual("Exception when running a query", str(err))
        delete_connection.assert_called_once_with()
예제 #3
0
 def test_create_operator_with_too_long_unix_socket_path(self, get_connections):
     uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \
           "project_id=example-project&location=europe-west1&" \
           "instance=" \
           "test_db_with_long_name_a_bit_above" \
           "_the_limit_of_UNIX_socket_asdadadasadasd&" \
           "use_proxy=True&sql_proxy_use_tcp=False"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(
         sql=['SELECT * FROM TABLE'],
         task_id='task_id'
     )
     with self.assertRaises(AirflowException) as cm:
         operator.execute(None)
     err = cm.exception
     self.assertIn("The UNIX socket path length cannot exceed", str(err))
예제 #4
0
 def test_create_operator_with_not_too_long_unix_socket_path(self, get_connections):
     uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \
           "project_id=example-project&location=europe-west1&" \
           "instance=" \
           "test_db_with_longname_but_with_limit_of_UNIX_socket&" \
           "use_proxy=True&sql_proxy_use_tcp=False"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(
         sql=['SELECT * FROM TABLE'],
         task_id='task_id'
     )
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(db_hook.postgres_conn_id)[0]
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('postgres', conn.conn_type)
     self.assertEqual('testdb', conn.schema)
예제 #5
0
 def test_create_operator_with_correct_parameters_mysql_tcp(self, get_connections):
     uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=mysql&" \
           "project_id=example-project&location=europe-west1&instance=testdb&" \
           "use_proxy=True&sql_proxy_use_tcp=True"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(
         sql=['SELECT * FROM TABLE'],
         task_id='task_id'
     )
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0]
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('mysql', conn.conn_type)
     self.assertEqual('127.0.0.1', conn.host)
     self.assertNotEqual(3200, conn.port)
     self.assertEqual('testdb', conn.schema)
예제 #6
0
 def test_create_operator_with_correct_parameters_mysql(
         self, get_connections):
     connection = Connection()
     connection.parse_from_uri(
         "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&"
         "project_id=example-project&location=europe-west1&instance=testdb&"
         "use_proxy=False&use_ssl=False")
     get_connections.return_value = [connection]
     operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'],
                                      task_id='task_id')
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0]
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('mysql', conn.conn_type)
     self.assertEqual('8.8.8.8', conn.host)
     self.assertEqual(3200, conn.port)
     self.assertEqual('testdb', conn.schema)
예제 #7
0
 def test_create_operator_with_correct_parameters_postgres_proxy_socket(
         self, get_connections):
     uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \
           "project_id=example-project&location=europe-west1&instance=testdb&" \
           "use_proxy=True&sql_proxy_use_tcp=False"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'],
                                      task_id='task_id')
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(
             db_hook.postgres_conn_id)[0]  # pylint: disable=no-member
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('postgres', conn.conn_type)
     self.assertIn('/tmp', conn.host)
     self.assertIn('example-project:europe-west1:testdb', conn.host)
     self.assertIsNone(conn.port)
     self.assertEqual('testdb', conn.schema)
예제 #8
0
 def test_create_operator_with_wrong_parameters(self, project_id, location,
                                                instance_name,
                                                database_type, use_proxy,
                                                use_ssl, sql, message,
                                                get_connections):
     connection = Connection()
     connection.parse_from_uri(
         "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type={database_type}&"
         "project_id={project_id}&location={location}&instance={instance_name}&"
         "use_proxy={use_proxy}&use_ssl={use_ssl}".format(
             database_type=database_type,
             project_id=project_id,
             location=location,
             instance_name=instance_name,
             use_proxy=use_proxy,
             use_ssl=use_ssl))
     get_connections.return_value = [connection]
     with self.assertRaises(AirflowException) as cm:
         CloudSqlQueryOperator(sql=sql, task_id='task_id')
     err = cm.exception
     self.assertIn(message, str(err))
예제 #9
0
 def test_create_operator_with_correct_parameters_postgres_proxy_socket(
         self, get_connections):
     connection = Connection()
     connection.parse_from_uri(
         "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=postgres&"
         "project_id=example-project&location=europe-west1&instance=testdb&"
         "use_proxy=True&sql_proxy_use_tcp=False")
     get_connections.return_value = [connection]
     operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'],
                                      task_id='task_id')
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(
             db_hook.postgres_conn_id)[0]
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('postgres', conn.conn_type)
     self.assertIn('/tmp', conn.host)
     self.assertIn('example-project:europe-west1:testdb', conn.host)
     self.assertIsNone(conn.port)
     self.assertEqual('testdb', conn.schema)
예제 #10
0
 def test_create_operator_with_correct_parameters_postgres_ssl(
         self, get_connections):
     uri = "gcpcloudsql://*****:*****@127.0.0.1:3200/testdb?database_type=postgres&" \
           "project_id=example-project&location=europe-west1&instance=testdb&" \
           "use_proxy=False&use_ssl=True&sslcert=/bin/bash&" \
           "sslkey=/bin/bash&sslrootcert=/bin/bash"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(sql=['SELECT * FROM TABLE'],
                                      task_id='task_id')
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(
             db_hook.postgres_conn_id)[0]  # pylint: disable=no-member
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('postgres', conn.conn_type)
     self.assertEqual('127.0.0.1', conn.host)
     self.assertEqual(3200, conn.port)
     self.assertEqual('testdb', conn.schema)
     self.assertEqual('/bin/bash', conn.extra_dejson['sslkey'])
     self.assertEqual('/bin/bash', conn.extra_dejson['sslcert'])
     self.assertEqual('/bin/bash', conn.extra_dejson['sslrootcert'])
예제 #11
0
 def test_create_operator_with_correct_parameters_mysql_proxy_socket(self,
                                                                     get_connections):
     uri = "gcpcloudsql://*****:*****@8.8.8.8:3200/testdb?database_type=mysql&" \
           "project_id=example-project&location=europe-west1&instance=testdb&" \
           "use_proxy=True&sql_proxy_use_tcp=False"
     self._setup_connections(get_connections, uri)
     operator = CloudSqlQueryOperator(
         sql=['SELECT * FROM TABLE'],
         task_id='task_id'
     )
     operator.cloudsql_db_hook.create_connection()
     try:
         db_hook = operator.cloudsql_db_hook.get_database_hook()
         conn = db_hook._get_connections_from_db(db_hook.mysql_conn_id)[0]
     finally:
         operator.cloudsql_db_hook.delete_connection()
     self.assertEqual('mysql', conn.conn_type)
     self.assertEqual('localhost', conn.host)
     self.assertIn('/tmp', conn.extra_dejson['unix_socket'])
     self.assertIn('example-project:europe-west1:testdb',
                   conn.extra_dejson['unix_socket'])
     self.assertIsNone(conn.port)
     self.assertEqual('testdb', conn.schema)
# [START howto_operator_cloudsql_query_operators]

connection_names = [
    "proxy_postgres_tcp",
    "proxy_postgres_socket",
    "public_postgres_tcp",
    "public_postgres_tcp_ssl",
    "proxy_mysql_tcp",
    "proxy_mysql_socket",
    "public_mysql_tcp",
    "public_mysql_tcp_ssl"
]

tasks = []

with models.DAG(
    dag_id='example_gcp_sql_query',
    default_args=default_args,
    schedule_interval=None
) as dag:
    for connection_name in connection_names:
        tasks.append(
            CloudSqlQueryOperator(
                gcp_cloudsql_conn_id=connection_name,
                task_id="example_gcp_sql_task_" + connection_name,
                sql=SQL
            )
        )
# [END howto_operator_cloudsql_query_operators]
예제 #13
0
    'dmp_proc_report',
    default_args=default_args
    # , on_success_callback=cleanup_xcom
    ,
    description='Prebuilt Report Generation pipeline',
    schedule_interval=None  #ga data 수집완료시점으로 매일 아침 7시(한국시간) '0 7 * * *' 일배치 수행
    ,
    start_date=airflow.utils.dates.days_ago(
        2)  #기준일을 지난 기간까지 포함할 경우 이전 날짜로 지정 start_date=datetime(2019,11,13) 
    ,
    catchup=False)

# Cloud SQL 의 DP_TB_PR_CRT_RPRT_INF Table 의 모든 건을 삭제
delete_mart_mysql = CloudSqlQueryOperator(
    task_id="delete_mart_in_mysql",
    gcp_cloudsql_conn_id=connection_name,
    sql="DELETE FROM DKDMPDB.DP_TB_PR_CRT_RPRT_INF",
    retries=1,
    dag=dag)

# BigQuery 의 Report Mart Table 의 모든 건을 CloudSQL 의 Table 로 Load
load_bq_mart_to_mysql = BigQueryToMySqlOperator(
    task_id="load_bqmart_to_mysql",
    dataset_table=reportMartDSTable,
    mysql_table="DP_TB_PR_CRT_RPRT_INF",
    gcp_conn_id="google_cloud_default",
    mysql_conn_id=cloudsql_connection,
    selected_fields=
    "SGM_NO,ITM_ID,PRP_NM,SUB_PRP_NM,POPART_V,OJ_CONS_RT,AM_CONS_RT",
    replace=True,
    batch_size=50000,
    location=loc_ga,
                            value="CREATE TABLE {} ({})".format(
                                TABLE, sqlcolumnas))


with DAG(ENV + "_" + OBJECT.upper(),
         schedule_interval=None,
         default_args=default_args,
         template_searchpath=''.format(
             cfg.ENV, OBJECT)) as dag:  # TODO: Template_searchpath

    get_df_and_types = PythonOperator(task_id='get_df_and_types',
                                      python_callable=get_df_and_types,
                                      provide_context=True)

    creartabla = CloudSqlQueryOperator(
        gcp_cloudsql_conn_id='cloudsql_pipeline',
        task_id="creartabla",
        sql="{{ti.xcom_pull(task_ids='get_df_and_types',key='SQL')}}")

    import_to_csql = CloudSqlInstanceImportOperator(
        task_id='import_to_csql',
        project_id=SQL_PROJECT,
        instance='servicedat-cal-mysql',
        body={
            "importContext": {
                "kind": "sql#importContext",
                "fileType": 'CSV',
                "uri": '{}/cloudSQLexport_temp.csv'.format(DIR_TMP),
                "database": DATABASE,
                "csvImportOptions": {
                    "table": TABLE
                }