def test_convert_type(self, value, schema_type, expected):
     op = MySqlToGoogleCloudStorageOperator(task_id=TASK_ID,
                                            mysql_conn_id=MYSQL_CONN_ID,
                                            sql=SQL,
                                            bucket=BUCKET,
                                            filename=JSON_FILENAME)
     self.assertEqual(op.convert_type(value, schema_type), expected)
Example #2
0
    def test_exec_success_csv_with_delimiter(self, gcs_hook_mock_class,
                                             mysql_hook_mock_class):
        """Test successful run of execute function for CSV with a field delimiter"""
        op = MySqlToGoogleCloudStorageOperator(task_id=TASK_ID,
                                               mysql_conn_id=MYSQL_CONN_ID,
                                               sql=SQL,
                                               export_format='csv',
                                               field_delimiter='|',
                                               bucket=BUCKET,
                                               filename=CSV_FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(CSV_FILENAME.format(0), obj)
            self.assertEqual('text/csv', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(b''.join(CSV_LINES_PIPE_DELIMITED), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(
            mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(
            SQL)
Example #3
0
    def test_exec_success_json(self, gcs_hook_mock_class,
                               mysql_hook_mock_class):
        """Test successful run of execute function for JSON"""
        op = MySqlToGoogleCloudStorageOperator(task_id=TASK_ID,
                                               mysql_conn_id=MYSQL_CONN_ID,
                                               sql=SQL,
                                               bucket=BUCKET,
                                               filename=JSON_FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(JSON_FILENAME.format(0), obj)
            self.assertEqual('application/json', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(b''.join(NDJSON_LINES), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(
            mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(
            SQL)
    def test_write_local_data_files():

        # Configure
        task_id = "some_test_id"
        sql = "some_sql"
        bucket = "some_bucket"
        filename = "some_filename"
        row_iter = [[1, b'byte_str_1'], [2, b'byte_str_2']]
        schema = [{
            'name': 'location',
            'type': 'STRING',
            'mode': 'nullable',
        }, {
            'name': 'uuid',
            'type': 'BYTES',
            'mode': 'nullable',
        }]
        schema_str = json.dumps(schema)

        op = MySqlToGoogleCloudStorageOperator(
            task_id=task_id,
            sql=sql,
            bucket=bucket,
            filename=filename,
            schema=schema_str)

        cursor_mock = MagicMock()
        cursor_mock.__iter__.return_value = row_iter

        # Run
        op._write_local_data_files(cursor_mock)
    def test_schema_file(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test writing schema files."""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type, gzip):  # pylint: disable=unused-argument
            if obj == SCHEMA_FILENAME:
                self.assertFalse(gzip)
                with open(tmp_filename, 'rb') as file:
                    self.assertEqual(b''.join(SCHEMA_JSON), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            schema_filename=SCHEMA_FILENAME)
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, gcs_hook_mock.upload.call_count)
Example #6
0
    def test_write_local_data_files():

        # Configure
        task_id = "some_test_id"
        sql = "some_sql"
        bucket = "some_bucket"
        filename = "some_filename"
        row_iter = [[1, b'byte_str_1'], [2, b'byte_str_2']]
        schema = [{
            'name': 'location',
            'type': 'STRING',
            'mode': 'nullable',
        }, {
            'name': 'uuid',
            'type': 'BYTES',
            'mode': 'nullable',
        }]
        schema_str = json.dumps(schema)

        op = MySqlToGoogleCloudStorageOperator(task_id=task_id,
                                               sql=sql,
                                               bucket=bucket,
                                               filename=filename,
                                               schema=schema_str)

        cursor_mock = MagicMock()
        cursor_mock.__iter__.return_value = row_iter

        # Run
        op._write_local_data_files(cursor_mock)
    def test_exec_success_csv_with_delimiter(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test successful run of execute function for CSV with a field delimiter"""
        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            export_format='csv',
            field_delimiter='|',
            bucket=BUCKET,
            filename=CSV_FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(CSV_FILENAME.format(0), obj)
            self.assertEqual('text/csv', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(b''.join(CSV_LINES_PIPE_DELIMITED), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
    def test_exec_success_csv_ensure_utc(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test successful run of execute function for CSV"""
        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            export_format='CSV',
            bucket=BUCKET,
            filename=CSV_FILENAME,
            ensure_utc=True)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None, gzip=False):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(CSV_FILENAME.format(0), obj)
            self.assertEqual('text/csv', mime_type)
            self.assertFalse(gzip)
            with open(tmp_filename, 'rb') as file:
                self.assertEqual(b''.join(CSV_LINES), file.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_has_calls([mock.call(TZ_QUERY), mock.call(SQL)])
    def test_file_splitting(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test that ndjson is split by approx_max_file_size_bytes param."""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value
        expected_upload = {
            JSON_FILENAME.format(0): b''.join(NDJSON_LINES[:2]),
            JSON_FILENAME.format(1): NDJSON_LINES[2],
        }

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual('application/json', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(expected_upload[obj], f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            approx_max_file_size_bytes=len(expected_upload[JSON_FILENAME.format(0)]))
        op.execute(None)
    def test_schema_file(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test writing schema files."""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type):
            if obj == SCHEMA_FILENAME:
                with open(tmp_filename, 'rb') as f:
                    self.assertEqual(b''.join(SCHEMA_JSON), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            schema_filename=SCHEMA_FILENAME)
        op.execute(None)

        # once for the file and once for the schema
        self.assertEqual(2, gcs_hook_mock.upload.call_count)
    def test_exec_success_json(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test successful run of execute function for JSON"""
        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(JSON_FILENAME.format(0), obj)
            self.assertEqual('application/json', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(b''.join(NDJSON_LINES), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
Example #12
0
    def test_exec_success_csv(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test the execute function in case where the run is successful."""
        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            mysql_conn_id=MYSQL_CONN_ID,
            sql=SQL,
            export_format={'file_format': 'csv', 'csv_dialect': 'excel'},
            bucket=BUCKET,
            filename=FILENAME)

        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value

        def _assert_upload(bucket, obj, tmp_filename, content_type):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual(FILENAME.format(0), obj)
            self.assertEqual('application/csv', content_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(b''.join(CSV_LINES), f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op.execute(None)

        mysql_hook_mock_class.assert_called_once_with(mysql_conn_id=MYSQL_CONN_ID)
        mysql_hook_mock.get_conn().cursor().execute.assert_called_once_with(SQL)
Example #13
0
    def test_file_splitting(self, gcs_hook_mock_class, mysql_hook_mock_class):
        """Test that ndjson is split by approx_max_file_size_bytes param."""
        mysql_hook_mock = mysql_hook_mock_class.return_value
        mysql_hook_mock.get_conn().cursor().__iter__.return_value = iter(ROWS)
        mysql_hook_mock.get_conn().cursor().description = CURSOR_DESCRIPTION

        gcs_hook_mock = gcs_hook_mock_class.return_value
        expected_upload = {
            JSON_FILENAME.format(0): b''.join(NDJSON_LINES[:2]),
            JSON_FILENAME.format(1): NDJSON_LINES[2],
        }

        def _assert_upload(bucket, obj, tmp_filename, mime_type=None):
            self.assertEqual(BUCKET, bucket)
            self.assertEqual('application/json', mime_type)
            with open(tmp_filename, 'rb') as f:
                self.assertEqual(expected_upload[obj], f.read())

        gcs_hook_mock.upload.side_effect = _assert_upload

        op = MySqlToGoogleCloudStorageOperator(
            task_id=TASK_ID,
            sql=SQL,
            bucket=BUCKET,
            filename=JSON_FILENAME,
            approx_max_file_size_bytes=len(
                expected_upload[JSON_FILENAME.format(0)]))
        op.execute(None)
Example #14
0
def gen_export_table_task(table_config):
    export_task = MySqlToGoogleCloudStorageOperator(task_id='export_{}'.format(table_config.params['export_table']),
                                                    dag=dag,
                                                    sql=table_config.params['export_query'],
                                                    bucket=table_config.params['export_bucket'],
                                                    filename="cloudsql_to_bigquery/{}/{}".format(table_config.params['export_database'],
                                                                                                 table_config.params['export_table']) + "_{}",
                                                    schema_filename="cloudsql_to_bigquery/schema/{}/schema_raw".format(table_config.params['export_table']),
                                                    mysql_conn_id="gcp_dvh_cloudsql")
    export_task.doc_md = """\
    #### Export table from cloudsql to cloud storage
    task documentation
    """
    return export_task
Example #15
0
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MySqlToGoogleCloudStorageOperator(
         task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=FILENAME)
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.sql, SQL)
     self.assertEqual(op.bucket, BUCKET)
     self.assertEqual(op.filename, FILENAME)
 def test_init(self):
     """Test MySqlToGoogleCloudStorageOperator instance is properly initialized."""
     op = MySqlToGoogleCloudStorageOperator(
         task_id=TASK_ID, sql=SQL, bucket=BUCKET, filename=JSON_FILENAME,
         export_format='CSV', field_delimiter='|')
     self.assertEqual(op.task_id, TASK_ID)
     self.assertEqual(op.sql, SQL)
     self.assertEqual(op.bucket, BUCKET)
     self.assertEqual(op.filename, JSON_FILENAME)
     self.assertEqual(op.export_format, 'csv')
     self.assertEqual(op.field_delimiter, '|')
Example #17
0
    def test_write_local_data_files(self):

        # Configure
        task_id = "some_test_id"
        sql = "some_sql"
        bucket = "some_bucket"
        filename = "some_filename"
        schema = "some_schema"
        description_list = [['col_integer'], ['col_byte']]
        row_iter = [[1, b'byte_str_1'], [2, b'byte_str_2']]

        op = MySqlToGoogleCloudStorageOperator(task_id=task_id,
                                               sql=sql,
                                               bucket=bucket,
                                               filename=filename,
                                               schema=schema)

        cursor_mock = MagicMock()
        cursor_mock.description = description_list
        cursor_mock.__iter__.return_value = row_iter

        # Run
        op._write_local_data_files(cursor_mock)
    def test_write_local_data_files(self):

        # Configure
        task_id = "some_test_id"
        sql = "some_sql"
        bucket = "some_bucket"
        filename = "some_filename"
        schema = "some_schema"
        description_list = [['col_integer'], ['col_byte']]
        row_iter = [[1, b'byte_str_1'], [2, b'byte_str_2']]

        op = MySqlToGoogleCloudStorageOperator(
            task_id=task_id,
            sql=sql,
            bucket=bucket,
            filename=filename,
            schema=schema)

        cursor_mock = MagicMock()
        cursor_mock.description = description_list
        cursor_mock.__iter__.return_value = row_iter

        # Run
        op._write_local_data_files(cursor_mock)
                             datetime.min.time())

default_args = {
    'start_date': yesterday,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}


def get_composer_gcs_bucket():
    return os.environ['GCS_BUCKET']


with DAG(dag_id='26_sql_operators',
         catchup=False,
         schedule_interval=timedelta(days=1),
         default_args=default_args,
         user_defined_macros={
             'get_composer_gcs_bucket': get_composer_gcs_bucket,
         }) as dag:

    sql_operators = MySqlToGoogleCloudStorageOperator(
        task_id='sql_operators',
        sql='SELECT * FROM PERSONS;',
        bucket='{{ get_composer_gcs_bucket() }}',
        filename='data/26_sql_operators/{{ execution_date }}/persons.json',
        mysql_conn_id='workshop_sql_conn_id',
        dag=dag)
Example #20
0
    # 'end_date': datetime(2016, 1, 1),
}


dag = DAG("study_pn_campaign",
          default_args=default_args,
          schedule_interval=timedelta(1))

extract_recent_questions = MySqlToGoogleCloudStorageOperator(
    task_id="extract_recent_questions",
    dag=dag,
    sql="""
        SELECT qid, author_uid as uid, created
        FROM `qa_question`
        WHERE created >= 1605606503
        AND created < 1605692903
    """,
    bucket="pipeline-pn-campaign",
    filename="{{ ds }}/recent_questions.csv",
    mysql_conn_id='gotit_study_mysql',
    google_cloud_storage_conn_id='gotit_analytics_gc',
    export_format='csv'
)

extract_recently_active_users = MySqlToGoogleCloudStorageOperator(
    task_id="extract_recently_active_users",
    dag=dag,
    sql="""
        SELECT DISTINCT vc_account.uid as uid, vc_account.balance as balance
        FROM qa_question JOIN vc_account
        ON qa_question.author_uid = vc_account.uid
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG(
    'cloud_sql_proxy_to_gcs',
    default_args=default_args,
    description='Cloud SQL Proxy to GCS',
    schedule_interval=timedelta(days=1),
)

SQL = 'select * from sundar.supplier'
t1 = MySqlToGoogleCloudStorageOperator(
    mysql_conn_id="sundar-sql-con",
    task_id="sample_test_conn",
    bucket="hackathon-demo-238019",
    filename="test.json",
    google_cloud_storage_conn_id="sundar-gcs-con",
    export_format="json",
    sql=SQL,
    dag=dag)
t2 = BashOperator(task_id='sleep',
                  depends_on_past=False,
                  bash_command='sleep 5',
                  dag=dag)

t1 >> t2
slack_notify = SlackAPIPostOperator(
    task_id='slack_notify',
    token='xxxxxx',
    channel='data-status',
    username='******',
    text='Successfully performed sakila ETL operation',
    dag=dag)

for connection in sakila_connections:
    for table in sakila_tables:
        extract = MySqlToGoogleCloudStorageOperator(
            task_id="extract_mysql_%s_%s" % (connection, table),
            mysql_conn_id=connection,
            google_cloud_storage_conn_id='gcp_test',
            sql="SELECT *, '%s' as source FROM sakila.%s" %
            (connection, table),
            bucket='ghen-airflow',
            filename="%s/%s/%s{}.json" % (connection, table, table),
            schema_filename="%s/schemas/%s.json" % (connection, table),
            dag=dag)

        load = GoogleCloudStorageToBigQueryOperator(
            task_id="load_bq_%s_%s" % (connection, table),
            bigquery_conn_id='gcp_test',
            google_cloud_storage_conn_id='gcp_test',
            bucket='ghen-airflow',
            destination_project_dataset_table="spark-test-173322.%s.%s" %
            (connection, table),
            source_objects=["%s/%s/%s*.json" % (connection, table, table)],
            schema_object="%s/schemas/%s.json" % (connection, table),
            source_format='NEWLINE_DELIMITED_JSON',
gcp_connection_id = "google_cloud_default"

default_args = {'owner': 'airflow', 'start_date': datetime(2019, 03, 21)}


def query_to_join_posts_with_users():
    return 'SELECT p.owner_user_id as user_id, u.display_name as user_name, p.score as post_score, p.title as post_title FROM `demo_temp.stackoverflow_posts` as p join `demo_temp.stackoverflow_users` as u on p.owner_user_id = u.id'


with DAG('stackoverflow',
         default_args=default_args,
         schedule_interval="*/10 * * * *") as dag:

    extract_posts_data = MySqlToGoogleCloudStorageOperator(
        task_id='extract_posts_data',
        sql='SELECT * FROM stackoverflow_posts',
        export_format='JSON',
        bucket='demo-sample',
        filename='so_posts.json')

    extract_users_data = PostgresToGoogleCloudStorageOperator(
        task_id='extract_users_data',
        sql='SELECT * FROM stackoverflow_users',
        bucket='demo-sample',
        filename='so_users.json')

    move_users_data_to_staging = gcs_to_bq.GoogleCloudStorageToBigQueryOperator(
        task_id='move_users_data_to_staging',
        bucket='demo-sample',
        source_objects=['so_users.json'],
        write_disposition='WRITE_TRUNCATE',
        schema_fields=[{
Example #24
0
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

bucket_name = 'rendy-test'  # change to your bucket name here
sales_filename = 'sales_export.json'
sales_schema_filename = 'sales_schema.json'

with DAG('devfest2020',
         schedule_interval=timedelta(days=1),
         default_args=default_args) as dag:
    extract = MySqlToGoogleCloudStorageOperator(
        task_id='extract',
        sql='SELECT * FROM test_db.sales_table st',  # change to your mysql table
        bucket=bucket_name,
        filename=sales_filename,
        schema_filename=sales_schema_filename,
        mysql_conn_id='devfest2020',  # change to your mysql connection id
    )

    load = GoogleCloudStorageToBigQueryOperator(
        task_id='load',
        destination_project_dataset_table=
        'project.rendy_test.sales',  #change to your bq
        bucket=bucket_name,
        source_objects=[sales_filename],
        schema_object=sales_schema_filename,
        write_disposition='WRITE_TRUNCATE',
        create_disposition='CREATE_IF_NEEDED',
        source_format='NEWLINE_DELIMITED_JSON')
 def test_convert_type(self, value, schema_type, expected):
     self.assertEqual(
         MySqlToGoogleCloudStorageOperator._convert_type(
             value, schema_type), expected)
Example #26
0
from airflow import DAG
from airflow.contrib.operators.mysql_to_gcs import MySqlToGoogleCloudStorageOperator
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta

default_args = {
    'owner': 'mikeghen',
    'start_date': datetime(2017, 8, 11),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('mysql_to_gcs', default_args=default_args)

export_actor = MySqlToGoogleCloudStorageOperator(
    task_id='extract_actors',
    mysql_conn_id='sakila_test',
    google_cloud_storage_conn_id='gcp_test',
    sql='SELECT * FROM sakila.actor',
    bucket='ghen-airflow',
    filename='sakila/actors/actors{}.json',
    schema_filename='sakila/schemas/actors.json',
    dag=dag)