def test_to_dataframe(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
            {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
            {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
        self.assertEqual(df.name.dtype.name, 'object')
        self.assertEqual(df.age.dtype.name, 'int64')
    def test_iterate(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)
        self.assertEqual(row_iterator.num_results, 0)

        rows_iter = iter(row_iterator)

        val1 = six.next(rows_iter)
        print(val1)
        self.assertEqual(val1.name, 'Phred Phlyntstone')
        self.assertEqual(row_iterator.num_results, 1)

        val2 = six.next(rows_iter)
        self.assertEqual(val2.name, 'Bharney Rhubble')
        self.assertEqual(row_iterator.num_results, 2)

        with self.assertRaises(StopIteration):
            six.next(rows_iter)

        api_request.assert_called_once_with(
            method='GET', path=path, query_params={})
    def test_to_dataframe_error_if_pandas_is_none(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {
                'f': [{
                    'v': 'Phred Phlyntstone'
                }, {
                    'v': '32'
                }]
            },
            {
                'f': [{
                    'v': 'Bharney Rhubble'
                }, {
                    'v': '33'
                }]
            },
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(mock.sentinel.client, api_request, path,
                                   schema)

        with self.assertRaises(ValueError):
            row_iterator.to_dataframe()
    def test_partition_experation_bad_type(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = self._make_one(table_ref, schema=[full_name, age])
        with self.assertRaises(ValueError):
            table.partition_expiration = "NEVER"
    def test_schema_setter(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        table = self._make_one(table_ref)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        table.schema = [full_name, age]
        self.assertEqual(table.schema, [full_name, age])
    def test_partition_type_setter_w_known_value(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = self._make_one(table_ref, schema=[full_name, age])
        self.assertIsNone(table.partitioning_type)
        table.partitioning_type = 'DAY'
        self.assertEqual(table.partitioning_type, 'DAY')
    def test_partition_expiration_w_none_no_partition_set(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = self._make_one(table_ref, schema=[full_name, age])
        self.assertIsNone(table.partition_expiration)
        table.partition_expiration = None
        self.assertIsNone(table.partitioning_type)
        self.assertIsNone(table.partition_expiration)
    def test_partition_type_setter_w_none(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        table = self._make_one(table_ref, schema=[full_name, age])
        table._properties['timePartitioning'] = {'type': 'DAY'}
        table.partitioning_type = None
        self.assertIsNone(table.partitioning_type)
        self.assertFalse('timePartitioning' in table._properties)
    def test_schema_setter_invalid_field(self):
        from google.cloud.bigquery.table import SchemaField

        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        table = self._make_one(table_ref)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        with self.assertRaises(ValueError):
            table.schema = [full_name, object()]
Example #10
0
    def test_to_dataframe_w_empty_results(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': []})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 0)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
Example #11
0
    def test__row_from_mapping_w_schema(self):
        from google.cloud.bigquery.table import Table, SchemaField
        MAPPING = {
            'full_name': 'Phred Phlyntstone',
            'age': 32,
            'colors': ['red', 'green'],
            'extra': 'IGNORED',
        }
        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
        joined = SchemaField('joined', 'STRING', mode='NULLABLE')
        table = Table(table_ref, schema=[full_name, age, colors, joined])

        self.assertEqual(self._call_fut(MAPPING, table.schema),
                         ('Phred Phlyntstone', 32, ['red', 'green'], None))
Example #12
0
    def test__row_from_mapping_w_invalid_schema(self):
        from google.cloud.bigquery.table import Table, SchemaField
        MAPPING = {
            'full_name': 'Phred Phlyntstone',
            'age': 32,
            'colors': ['red', 'green'],
            'bogus': 'WHATEVER',
        }
        dataset = DatasetReference(self.PROJECT, self.DS_ID)
        table_ref = dataset.table(self.TABLE_NAME)
        full_name = SchemaField('full_name', 'STRING', mode='REQUIRED')
        age = SchemaField('age', 'INTEGER', mode='REQUIRED')
        colors = SchemaField('colors', 'DATETIME', mode='REPEATED')
        bogus = SchemaField('joined', 'STRING', mode='BOGUS')
        table = Table(table_ref, schema=[full_name, age, colors, bogus])

        with self.assertRaises(ValueError) as exc:
            self._call_fut(MAPPING, table.schema)

        self.assertIn('Unknown field mode: BOGUS', str(exc.exception))
    def test_page_size(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})

        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema, page_size=4)
        row_iterator._get_next_page_response()

        api_request.assert_called_once_with(
            method='GET', path=path, query_params={
                'maxResults': row_iterator._page_size})
Example #14
0
    def test_to_dataframe_w_various_types_nullable(self):
        import datetime
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            [None, None, None, None, None, None],
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        for index, row in df.iterrows():
            if index == 0:
                self.assertTrue(row.isnull().all())
            else:
                self.assertIsInstance(row.start_timestamp, pandas.Timestamp)
                self.assertIsInstance(row.seconds, float)
                self.assertIsInstance(row.payment_type, str)
                self.assertIsInstance(row.complete, bool)
                self.assertIsInstance(row.date, datetime.date)
Example #15
0
    def test_to_dataframe_column_dtypes(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 3)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]')
        self.assertEqual(df.seconds.dtype.name, 'int64')
        self.assertEqual(df.miles.dtype.name, 'float64')
        self.assertEqual(df.payment_type.dtype.name, 'object')
        self.assertEqual(df.complete.dtype.name, 'bool')
        self.assertEqual(df.date.dtype.name, 'object')