Beispiel #1
0
    def test_execute(self, mock_run, schema, prefix, s3_keys, columns_array):
        table = 'table'
        stage = 'stage'
        file_format = 'file_format'

        S3ToSnowflakeOperator(
            s3_keys=s3_keys,
            table=table,
            stage=stage,
            prefix=prefix,
            file_format=file_format,
            schema=schema,
            columns_array=columns_array,
            task_id="task_id",
            dag=None,
        ).execute(None)

        copy_query = "COPY INTO "
        if schema:
            copy_query += f"{schema}.{table}"
        else:
            copy_query += table
        if columns_array:
            copy_query += f"({','.join(columns_array)})"

        copy_query += f"\nFROM @{stage}/{prefix or ''}"

        if s3_keys:
            files = ", ".join(f"'{key}'" for key in s3_keys)
            copy_query += f"\nfiles=({files})"

        copy_query += f"\nfile_format={file_format}"

        mock_run.assert_called_once()
        assert mock_run.call_args[0][0] == copy_query
Beispiel #2
0
    def test_execute(self, mock_run):
        s3_keys = ['1.csv', '2.csv']
        table = 'table'
        stage = 'stage'
        file_format = 'file_format'
        schema = 'schema'

        S3ToSnowflakeOperator(
            s3_keys=s3_keys,
            table=table,
            stage=stage,
            file_format=file_format,
            schema=schema,
            columns_array=None,
            task_id="task_id",
            dag=None,
        ).execute(None)

        files = str(s3_keys)
        files = files.replace('[', '(')
        files = files.replace(']', ')')
        base_sql = """
                FROM @{stage}/
                files={files}
                file_format={file_format}
            """.format(stage=stage, files=files, file_format=file_format)

        copy_query = """
                COPY INTO {schema}.{table} {base_sql}
            """.format(schema=schema, table=table, base_sql=base_sql)

        assert mock_run.call_count == 1
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            copy_query)
Beispiel #3
0
    def test_execute(self, mock_run, prefix, s3_keys, columns_array):
        table = 'table'
        stage = 'stage'
        file_format = 'file_format'
        schema = 'schema'

        S3ToSnowflakeOperator(
            s3_keys=s3_keys,
            table=table,
            stage=stage,
            prefix=prefix,
            file_format=file_format,
            schema=schema,
            columns_array=columns_array,
            task_id="task_id",
            dag=None,
        ).execute(None)

        files = None
        if s3_keys:
            files = "files=({})".format(", ".join(f"'{key}'"
                                                  for key in s3_keys))
        base_sql = f"""
                FROM @{stage}/{prefix if prefix else ''}
                {files if files else ''}
                file_format={file_format}
            """

        columns = None
        if columns_array:
            columns = f"({','.join(columns_array)})"
        copy_query = f"""
                COPY INTO {schema}.{table}{columns if columns else ''} {base_sql}
            """
        copy_query = "\n".join(line.strip()
                               for line in copy_query.splitlines())

        mock_run.assert_called_once()
        assert mock_run.call_args[0][0] == copy_query
Beispiel #4
0
snowflake_op_template_file = SnowflakeOperator(
    task_id='snowflake_op_template_file',
    dag=dag,
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    sql='/path/to/sql/<filename>.sql',
)

# [END howto_operator_snowflake]

# [START howto_operator_s3_to_snowflake]

copy_into_table = S3ToSnowflakeOperator(
    task_id='copy_into_table',
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    s3_keys=[S3_FILE_PATH],
    table=SNOWFLAKE_SAMPLE_TABLE,
    schema=SNOWFLAKE_SCHEMA,
    stage=SNOWFLAKE_STAGE,
    file_format="(type = 'CSV',field_delimiter = ';')",
    dag=dag,
)

# [END howto_operator_s3_to_snowflake]

# [START howto_operator_snowflake_to_slack]

slack_report = SnowflakeToSlackOperator(
    task_id="slack_report",
    sql=SNOWFLAKE_SLACK_SQL,
    slack_message=SNOWFLAKE_SLACK_MESSAGE,
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    slack_conn_id=SLACK_CONN_ID,
Beispiel #5
0
)

slack_report = SnowflakeToSlackOperator(task_id="slack_report",
                                        sql=SNOWFLAKE_SLACK_SQL,
                                        slack_message=SNOWFLAKE_SLACK_MESSAGE,
                                        snowflake_conn_id=SNOWFLAKE_CONN_ID,
                                        slack_conn_id=SLACK_CONN_ID,
                                        dag=dag)

create_table = SnowflakeOperator(
    task_id='create_table',
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    sql=SNOWFLAKE_CREATE_TABLE_SQL,
    schema=SNOWFLAKE_SCHEMA,
    dag=dag,
)

copy_into_table = S3ToSnowflakeOperator(
    task_id='copy_into_table',
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    s3_keys=[SNOWFLAKE_LOAD_JSON_PATH],
    table=SNOWFLAKE_LOAD_TABLE,
    schema=SNOWFLAKE_SCHEMA,
    stage=SNOWFLAKE_STAGE,
    file_format="(type = 'JSON')",
    dag=dag,
)

select >> slack_report
create_table >> copy_into_table
Beispiel #6
0
         default_args=default_args,
         catchup=False) as dag:

    t0 = DummyOperator(task_id='start')

    pivot_data = SnowflakeOperator(task_id='call_pivot_sproc',
                                   snowflake_conn_id='snowflake',
                                   sql='call pivot_state_data();',
                                   role='KENTEND',
                                   schema='SANDBOX_KENTEND')

    for endpoint in endpoints:
        generate_files = PythonOperator(
            task_id='generate_file_{0}'.format(endpoint),
            python_callable=upload_to_s3,
            op_kwargs={
                'endpoint': endpoint,
                'date': date
            })

        snowflake = S3ToSnowflakeOperator(
            task_id='upload_{0}_snowflake'.format(endpoint),
            s3_keys=['{0}_{1}.csv'.format(endpoint, date)],
            stage='covid_stage',
            table='STATE_DATA',
            schema='SANDBOX_KENTEND',
            file_format='covid_csv',
            role='KENTEND',
            snowflake_conn_id='snowflake')

        t0 >> generate_files >> snowflake >> pivot_data