Ejemplo n.º 1
0
    def test_to_dataframe(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
            {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
            {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
        self.assertEqual(df.name.dtype.name, 'object')
        self.assertEqual(df.age.dtype.name, 'int64')
Ejemplo n.º 2
0
    def test_to_dataframe_error_if_pandas_is_none(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {
                'f': [{
                    'v': 'Phred Phlyntstone'
                }, {
                    'v': '32'
                }]
            },
            {
                'f': [{
                    'v': 'Bharney Rhubble'
                }, {
                    'v': '33'
                }]
            },
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(mock.sentinel.client, api_request, path,
                                   schema)

        with self.assertRaises(ValueError):
            row_iterator.to_dataframe()
Ejemplo n.º 3
0
    def test_iterate(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)
        self.assertEqual(row_iterator.num_results, 0)

        rows_iter = iter(row_iterator)

        val1 = six.next(rows_iter)
        print(val1)
        self.assertEqual(val1.name, 'Phred Phlyntstone')
        self.assertEqual(row_iterator.num_results, 1)

        val2 = six.next(rows_iter)
        self.assertEqual(val2.name, 'Bharney Rhubble')
        self.assertEqual(row_iterator.num_results, 2)

        with self.assertRaises(StopIteration):
            six.next(rows_iter)

        api_request.assert_called_once_with(
            method='GET', path=path, query_params={})
Ejemplo n.º 4
0
    def test_to_dataframe_w_empty_results(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': []})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 0)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
Ejemplo n.º 5
0
    def test_to_dataframe_w_various_types_nullable(self):
        import datetime
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            [None, None, None, None, None, None],
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        for index, row in df.iterrows():
            if index == 0:
                self.assertTrue(row.isnull().all())
            else:
                self.assertIsInstance(row.start_timestamp, pandas.Timestamp)
                self.assertIsInstance(row.seconds, float)
                self.assertIsInstance(row.payment_type, str)
                self.assertIsInstance(row.complete, bool)
                self.assertIsInstance(row.date, datetime.date)
Ejemplo n.º 6
0
    def test_page_size(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})

        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema, page_size=4)
        row_iterator._get_next_page_response()

        api_request.assert_called_once_with(
            method='GET', path=path, query_params={
                'maxResults': row_iterator._page_size})
Ejemplo n.º 7
0
    def test_to_dataframe_column_dtypes(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 3)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]')
        self.assertEqual(df.seconds.dtype.name, 'int64')
        self.assertEqual(df.miles.dtype.name, 'float64')
        self.assertEqual(df.payment_type.dtype.name, 'object')
        self.assertEqual(df.complete.dtype.name, 'bool')
        self.assertEqual(df.date.dtype.name, 'object')
Ejemplo n.º 8
0
    def test_constructor(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery._helpers import _item_to_row
        from google.cloud.bigquery._helpers import _rows_page_start

        client = mock.sentinel.client
        api_request = mock.sentinel.api_request
        path = '/foo'
        schema = []
        iterator = RowIterator(client, api_request, path, schema)

        self.assertFalse(iterator._started)
        self.assertIs(iterator.client, client)
        self.assertEqual(iterator.path, path)
        self.assertIs(iterator._item_to_value, _item_to_row)
        self.assertEqual(iterator._items_key, 'rows')
        self.assertIsNone(iterator.max_results)
        self.assertEqual(iterator.extra_params, {})
        self.assertEqual(iterator._page_start, _rows_page_start)
        # Changing attributes.
        self.assertEqual(iterator.page_number, 0)
        self.assertIsNone(iterator.next_page_token)
        self.assertEqual(iterator.num_results, 0)