コード例 #1
0
    def extract(self) -> [StepMetadata]:
        # (1) Parse sql statement to obtain input / output tables.
        sql_meta: SqlMeta = SqlParser.parse(self.operator.sql)

        # (2) Default all inputs / outputs to current connection.
        # NOTE: We'll want to look into adding support for the `database`
        # property that is used to override the one defined in the connection.
        conn_id = self.operator.postgres_conn_id
        source = Source(type='POSTGRESQL',
                        name=conn_id,
                        connection_url=get_connection_uri(conn_id))

        # (3) Map input / output tables to dataset objects with source set
        # as the current connection. We need to also fetch the schema for the
        # input tables to format the dataset name as:
        # {schema_name}.{table_name}
        inputs = [
            Dataset.from_table(source=source,
                               table_name=in_table_schema.table_name.name,
                               schema_name=in_table_schema.schema_name)
            for in_table_schema in self._get_table_schemas(sql_meta.in_tables)
        ]
        outputs = [
            Dataset.from_table_schema(source=source,
                                      table_schema=out_table_schema) for
            out_table_schema in self._get_table_schemas(sql_meta.out_tables)
        ]

        return [
            StepMetadata(
                name=f"{self.operator.dag_id}.{self.operator.task_id}",
                inputs=inputs,
                outputs=outputs,
                context={'sql': self.operator.sql})
        ]
コード例 #2
0
def test_parse_simple_select_with_table_schema_prefix_and_extra_whitespace():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          FROM    schema0.table0   ;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('schema0.table0')]
    assert sql_meta.out_tables == []
コード例 #3
0
def test_parse_simple_insert_into():
    sql_meta = SqlParser.parse(
        '''
        INSERT INTO table0 (col0, col1, col2)
        VALUES (val0, val1, val2);
        '''
    )

    assert sql_meta.in_tables == []
    assert sql_meta.out_tables == [DbTableName('table0')]
コード例 #4
0
def test_parse_simple_select_into():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          INTO table0
          FROM table1;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table1')]
    assert sql_meta.out_tables == [DbTableName('table0')]
コード例 #5
0
def test_parse_simple_select():
    sql_meta = SqlParser.parse(
        '''
        SELECT *
          FROM table0;
        '''
    )

    log.debug("sqlparser.parse() successful.")
    assert sql_meta.in_tables == [DbTableName('table0')]
    assert sql_meta.out_tables == []
コード例 #6
0
def test_parse_simple_insert_into_select():
    sql_meta = SqlParser.parse(
        '''
        INSERT INTO table1 (col0, col1, col2)
        SELECT col0, col1, col2
          FROM table0;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0')]
    assert sql_meta.out_tables == [DbTableName('table1')]
コード例 #7
0
def test_parse_simple_left_join():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
          LEFT JOIN table1
            ON t1.col0 = t2.col0
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0'), DbTableName('table1')]
    assert sql_meta.out_tables == []
コード例 #8
0
def test_parse_simple_right_outer_join():
    sql_meta = SqlParser.parse(
        '''
        SELECT col0, col1, col2
          FROM table0
          RIGHT OUTER JOIN table1
            ON t1.col0 = t2.col0;
        '''
    )

    assert sql_meta.in_tables == [DbTableName('table0'), DbTableName('table1')]
    assert sql_meta.out_tables == []
コード例 #9
0
 def parse_sql_context(self):
     context = {
         'sql': self.operator.sql,
     }
     try:
         sql_meta = SqlParser.parse(self.operator.sql)
         log.debug(f"bigquery sql parsed and obtained meta: {sql_meta}")
         context['bigquery.sql.parsed.inputs'] = json.dumps(
             [in_table.name for in_table in sql_meta.in_tables])
         context['bigquery.sql.parsed.outputs'] = json.dumps(
             [out_table.name for out_table in sql_meta.out_tables])
     except Exception as e:
         log.error(f"Cannot parse sql query. {e}", exc_info=True)
         context['bigquery.extractor.sql_parser_error'] = \
             f'{e}: {traceback.format_exc()}'
     self.log.info(context)
     return context