예제 #1
0
    def test_data_source_join_overlapping_fields(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame([[1, 9]], columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(
                        pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'd']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'INNER'
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_on = EMPTY_NODE
        joins = [(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), [[1, 9, 1, 2]])
        self.assertEqual(
            list(context.table.dataframe),
            ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.d'])
예제 #2
0
    def test_data_source_join_error(
            self,
            join_type,  # type: str
            join_on,  # type: ConditionsType
            error_type,  # type: Type[BaseException]
            error  # type: str
    ):
        # type: (...) -> None
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        joins = [Join(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        with self.assertRaisesRegexp(error_type, error):
            data_source.create_context(self.table_context)
예제 #3
0
    def test_data_source(self):
        data_source = DataSource((TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), [])
        data_source_context = data_source.create_context(self.table_context)

        self.assertEqual(data_source_context.table.to_list_of_lists(),
                         [[1], [2]])
        self.assertEqual(list(data_source_context.table.dataframe),
                         ['my_table.a'])
        self.assertEqual(data_source_context.table.types,
                         [BQScalarType.INTEGER])
예제 #4
0
    def test_data_source_join_multiple_joins(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1, 2, 3], [4, 5, 6]],
                                                columns=['a', 'b1', 'c1']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[1, 8, 9], [0, 7, 2]],
                                                columns=['a', 'b', 'c2']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table3':
                    TypedDataFrame(pd.DataFrame([[3, 4, 5], [6, 7, 8]],
                                                columns=['a3', 'b', 'c3']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'FULL'
        join_table2 = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_table3 = (TableReference(
            ('my_project', 'my_dataset', 'my_table3')), EMPTY_NODE)
        joins = [(join_type, join_table2, ('a', )),
                 (join_type, join_table3, ('b', ))]
        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        result = [[1, 2, 3, 1, 8, 9, None, None, None],
                  [4, 5, 6, None, None, None, None, None, None],
                  [None, None, None, 0, 7, 2, 6, 7, 8],
                  [None, None, None, None, None, None, 3, 4, 5]]
        self.assertEqual(context.table.to_list_of_lists(), result)
예제 #5
0
    def test_data_source_join_multiple_columns(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1, 2, 3], [1, 5, 6]],
                                                columns=['a', 'b', 'c']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[1, 2, 7], [3, 2, 8]],
                                                columns=['a', 'b', 'd']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'FULL'
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_on = ('a', 'b')
        joins = [(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        result = [[1, 2, 3, 1, 2, 7], [1, 5, 6, None, None, None],
                  [None, None, None, 3, 2, 8]]
        self.assertEqual(context.table.to_list_of_lists(), result)