def test_to_dataframe(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] rows = [ {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, ] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows self.assertEqual(list(df), ['name', 'age']) # verify the column names self.assertEqual(df.name.dtype.name, 'object') self.assertEqual(df.age.dtype.name, 'int64')
def test_iterate(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] rows = [ {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, ] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema) self.assertEqual(row_iterator.num_results, 0) rows_iter = iter(row_iterator) val1 = six.next(rows_iter) print(val1) self.assertEqual(val1.name, 'Phred Phlyntstone') self.assertEqual(row_iterator.num_results, 1) val2 = six.next(rows_iter) self.assertEqual(val2.name, 'Bharney Rhubble') self.assertEqual(row_iterator.num_results, 2) with self.assertRaises(StopIteration): six.next(rows_iter) api_request.assert_called_once_with( method='GET', path=path, query_params={})
def test_to_dataframe_error_if_pandas_is_none(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] rows = [ { 'f': [{ 'v': 'Phred Phlyntstone' }, { 'v': '32' }] }, { 'f': [{ 'v': 'Bharney Rhubble' }, { 'v': '33' }] }, ] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator(mock.sentinel.client, api_request, path, schema) with self.assertRaises(ValueError): row_iterator.to_dataframe()
def test_partition_experation_bad_type(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(table_ref, schema=[full_name, age]) with self.assertRaises(ValueError): table.partition_expiration = "NEVER"
def test_schema_setter(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age])
def test_partition_type_setter_w_known_value(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partitioning_type) table.partitioning_type = 'DAY' self.assertEqual(table.partitioning_type, 'DAY')
def test_partition_expiration_w_none_no_partition_set(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(table_ref, schema=[full_name, age]) self.assertIsNone(table.partition_expiration) table.partition_expiration = None self.assertIsNone(table.partitioning_type) self.assertIsNone(table.partition_expiration)
def test_partition_type_setter_w_none(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') table = self._make_one(table_ref, schema=[full_name, age]) table._properties['timePartitioning'] = {'type': 'DAY'} table.partitioning_type = None self.assertIsNone(table.partitioning_type) self.assertFalse('timePartitioning' in table._properties)
def test_schema_setter_invalid_field(self): from google.cloud.bigquery.table import SchemaField dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') with self.assertRaises(ValueError): table.schema = [full_name, object()]
def test_to_dataframe_w_empty_results(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] path = '/foo' api_request = mock.Mock(return_value={'rows': []}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ['name', 'age']) # verify the column names
def test__row_from_mapping_w_schema(self): from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'extra': 'IGNORED', } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') joined = SchemaField('joined', 'STRING', mode='NULLABLE') table = Table(table_ref, schema=[full_name, age, colors, joined]) self.assertEqual(self._call_fut(MAPPING, table.schema), ('Phred Phlyntstone', 32, ['red', 'green'], None))
def test__row_from_mapping_w_invalid_schema(self): from google.cloud.bigquery.table import Table, SchemaField MAPPING = { 'full_name': 'Phred Phlyntstone', 'age': 32, 'colors': ['red', 'green'], 'bogus': 'WHATEVER', } dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') age = SchemaField('age', 'INTEGER', mode='REQUIRED') colors = SchemaField('colors', 'DATETIME', mode='REPEATED') bogus = SchemaField('joined', 'STRING', mode='BOGUS') table = Table(table_ref, schema=[full_name, age, colors, bogus]) with self.assertRaises(ValueError) as exc: self._call_fut(MAPPING, table.schema) self.assertIn('Unknown field mode: BOGUS', str(exc.exception))
def test_page_size(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('name', 'STRING', mode='REQUIRED'), SchemaField('age', 'INTEGER', mode='REQUIRED') ] rows = [ {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, ] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema, page_size=4) row_iterator._get_next_page_response() api_request.assert_called_once_with( method='GET', path=path, query_params={ 'maxResults': row_iterator._page_size})
def test_to_dataframe_w_various_types_nullable(self): import datetime from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('start_timestamp', 'TIMESTAMP'), SchemaField('seconds', 'INT64'), SchemaField('miles', 'FLOAT64'), SchemaField('payment_type', 'STRING'), SchemaField('complete', 'BOOL'), SchemaField('date', 'DATE'), ] row_data = [ [None, None, None, None, None, None], ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], ] rows = [{'f': [{'v': field} for field in row]} for row in row_data] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 4) # verify the number of rows exp_columns = [field.name for field in schema] self.assertEqual(list(df), exp_columns) # verify the column names for index, row in df.iterrows(): if index == 0: self.assertTrue(row.isnull().all()) else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) self.assertIsInstance(row.seconds, float) self.assertIsInstance(row.payment_type, str) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date)
def test_to_dataframe_column_dtypes(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField schema = [ SchemaField('start_timestamp', 'TIMESTAMP'), SchemaField('seconds', 'INT64'), SchemaField('miles', 'FLOAT64'), SchemaField('payment_type', 'STRING'), SchemaField('complete', 'BOOL'), SchemaField('date', 'DATE'), ] row_data = [ ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'], ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'], ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'], ] rows = [{'f': [{'v': field} for field in row]} for row in row_data] path = '/foo' api_request = mock.Mock(return_value={'rows': rows}) row_iterator = RowIterator( mock.sentinel.client, api_request, path, schema) df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows exp_columns = [field.name for field in schema] self.assertEqual(list(df), exp_columns) # verify the column names self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]') self.assertEqual(df.seconds.dtype.name, 'int64') self.assertEqual(df.miles.dtype.name, 'float64') self.assertEqual(df.payment_type.dtype.name, 'object') self.assertEqual(df.complete.dtype.name, 'bool') self.assertEqual(df.date.dtype.name, 'object')