def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id)
def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId
def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.nested_table_id = os.getenv('TABLE_ID', 'test_table') + '_nested_' + str(int(time.time())) self.flat_table_id = os.getenv('TABLE_ID', 'test_table') + '_flat_' + str(int(time.time())) self.bucket = os.getenv('BUCKET') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) if self.bucket is None: print('BCUKET is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId # create & load nested table schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [ { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ]}, { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' }, ] self.bq.create_table(self.nested_table_id, schema=schema) rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } }, { 'id': 3, 'name': 'baz', 'url': [ 'http://www.yahoo.co.jp/', 'http://www.google.co.jp/', ]} ] self.bq.load(self.nested_table_id, rows) # create & load flat table schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, ] self.bq.create_table(self.flat_table_id, schema=schema) rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar' }, { 'id': 3, 'name': 'baz' }, ] self.bq.load(self.flat_table_id, rows)
def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time())) self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time())) if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId
def setUp(self): self.project_id = os.getenv("PROJECT_ID") self.dataset_id = os.getenv("DATASET_ID", "test_dataset") self.table_id = os.getenv("TABLE_ID", "test_table") self.view_id = os.getenv("VIEW_ID", "test_view") if self.project_id is None: print("PROJECT_ID is not defined.") sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId
def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time())) self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time())) if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [ { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ]}, { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' }, ] self.bq.create_table(self.table_id, schema=schema)
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.nested_table_id = os.getenv('TABLE_ID', 'test_table') + '_nested_' + str(int(time.time())) self.flat_table_id = os.getenv('TABLE_ID', 'test_table') + '_flat_' + str(int(time.time())) self.bucket = os.getenv('BUCKET') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) if self.bucket is None: print('BCUKET is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId # create & load nested table schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [ { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ]}, { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' }, ] self.bq.create_table(self.nested_table_id, schema=schema) rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } }, { 'id': 3, 'name': 'baz', 'url': [ 'http://www.yahoo.co.jp/', 'http://www.google.co.jp/', ]} ] self.bq.load(self.nested_table_id, rows) # create & load flat table schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, ] self.bq.create_table(self.flat_table_id, schema=schema) rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar' }, { 'id': 3, 'name': 'baz' }, ] self.bq.load(self.flat_table_id, rows) def TearDown(self): self.bq.drop_table(self.nested_table_id) self.bq.drop_dataset(self.dataset_id, delete_contents=True) def test_normal(self): # json destination_uris = [ self.bucket + '/test-*.json' ] print("extract start (json)") res = self.bq.extract(self.nested_table_id, destination_uris) pprint(res) print("extract end (json)") # avro destination_uris = [ self.bucket + '/test-*.avro' ] print("extract start (avro)") res = self.bq.extract(self.nested_table_id, destination_uris) pprint(res) print("extract end (avro)") # csv destination_uris = [ self.bucket + '/test-*.csv' ] print("extract start (csv)") res = self.bq.extract(self.flat_table_id, destination_uris) pprint(res) print("extract end (csv)") # tsv destination_uris = [ self.bucket + '/test-*.tsv' ] print("extract start (tsv)") res = self.bq.extract(self.flat_table_id, destination_uris) pprint(res) print("extract end (tsv)") # json + gz destination_uris = [ self.bucket + '/test-*.json.gz' ] print("extract start (json+gz)") res = self.bq.extract(self.nested_table_id, destination_uris) pprint(res) print("extract end (json+gz)") # csv + gz destination_uris = [ self.bucket + '/test-*.csv.gz' ] print("extract start (csv+gz)") res = self.bq.extract(self.flat_table_id, destination_uris) pprint(res) print("extract end (csv+gz)") # tsv + gz destination_uris = [ self.bucket + '/test-*.tsv.gz' ] print("extract start (tsv+gz)") res = self.bq.extract(self.flat_table_id, destination_uris) pprint(res) print("extract end (tsv+gz)")
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time())) self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time())) if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId #schema = [ # { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' }, # { 'name': 'count', 'type': 'INTEGER', 'mode': 'REQUIRED' } #] #self.bq.create_table(self.table_id, schema=schema) def TearDown(self): self.bq.drop_table(self.table_id) self.bq.drop_dataset(self.dataset_id, delete_contents=True) # def test_error_allow_large_results(self): # schema = [ # { 'name': 'word', 'type': 'STRING', 'mode': 'REQUIRED' }, # { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'REQUIRED' }, # ] # self.bq.create_table(self.table_id, schema=schema) # # query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10' # # # Cannnot append to required fields when allowLargeResults is True # with self.assertRaises(BigQueryError): # res = self.bq.insert_from_select(self.table_id, query, allow_large_results=True) def test_error_no_required_field(self): schema = [ { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'unique_words', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'required_field', 'type': 'STRING', 'mode': 'REQUIRED' }, ] self.bq.create_table(self.table_id, schema=schema) query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare]' # No Required Field with self.assertRaises(BigQueryError): self.bq.insert_from_select(self.table_id, query) def test_error_schema_mismatch(self): schema = [ { 'name': 'title', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ] self.bq.create_table(self.table_id, schema=schema) query = 'SELECT title,id FROM [publicdata:samples.wikipedia] LIMIT 10' # Can't change type form NULLABLE to REQUIRED with self.assertRaises(BigQueryError): self.bq.insert_from_select(self.table_id, query) def test_normal_allow_large_results(self): schema = [ { 'name': 'word', 'type': 'STRING', 'mode': 'NULLABLE' }, { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'NULLABLE' }, ] self.bq.create_table(self.table_id, schema=schema) query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10' res = self.bq.insert_from_select(self.table_id, query, allow_large_results=True) self.assertTrue(bool(res)) pprint(res) res = self.bq.dump_table(self.table_id) self.assertEqual(10, len(res)) pprint(res) def test_normal_insert_into_exists_table(self): schema = [ { 'name': 'word', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'word_count', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ] self.bq.create_table(self.table_id, schema=schema) query = 'SELECT word,word_count FROM [publicdata:samples.shakespeare] LIMIT 10' res = self.bq.insert_from_select(self.table_id, query) self.assertTrue(bool(res)) pprint(res) res = self.bq.dump_table(self.table_id) self.assertEqual(10, len(res)) pprint(res) def test_normal_with_args(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words FROM shakespeare' res = self.bq.insert_from_select( dest_project_id=self.project_id, dest_dataset_id=self.dataset_id, dest_table_id=self.table_id, query=query, src_project_id='publicdata', src_dataset_id='samples' ) self.assertTrue(bool(res)) pprint(res) res = self.bq.dump_table(self.table_id) self.assertEqual(10, len(res)) pprint(res) def test_normal_async(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare]' res = self.bq.insert_from_select(self.table_id, query, async=True) self.assertTrue(re.match(r'job_', res)) print(res) self.bq.wait_job(res) res = self.bq.dump_table(self.table_id) self.assertEqual(10, len(res)) pprint(res) def test_normal(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare]' res = self.bq.insert_from_select(self.table_id, query) self.assertTrue(bool(res)) pprint(res) res = self.bq.dump_table(self.table_id) self.assertEqual(10, len(res)) pprint(res)
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId def TearDown(self): self.bq.drop_dataset(self.dataset_id, delete_contents=True) def test_normal_page_token(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare] ' res = self.bq.select(query, max_results=1) self.assertEqual(10, len(res)) pprint(res) def test_normal_empty(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare] ' \ + 'WHERE corpus = "hoge" ' res = self.bq.select(query) self.assertEqual(0, len(res)) pprint(res) def test_normal_async(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare]' res = self.bq.select(query, async=True) self.assertTrue(re.match(r'job_', res)) pprint(res) def test_normal(self): query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words ' \ + 'FROM [publicdata:samples.shakespeare]' res = self.bq.select(query) self.assertEqual(10, len(res)) pprint(res)
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv("PROJECT_ID") self.dataset_id = os.getenv("DATASET_ID", "test_dataset") self.table_id = os.getenv("TABLE_ID", "test_table") self.view_id = os.getenv("VIEW_ID", "test_view") if self.project_id is None: print("PROJECT_ID is not defined.") sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId def TearDown(self): self.bq.drop_dataset(self.dataset_id, delete_contents=True) def test_error(self): query = "SELECT * FROM " + self.dataset_id + "." + self.table_id with self.assertRaises(NotFoundError): self.bq.create_view(self.view_id, query) schema = [ {"name": "id", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "name", "type": "STRING", "mode": "REQUIRED"}, { "name": "birth", "type": "RECORD", "mode": "NULLABLE", "fields": [ {"name": "year", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "month", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "day", "type": "INTEGER", "mode": "REQUIRED"}, ], }, {"name": "url", "type": "STRING", "mode": "REPEATED"}, ] with self.assertRaises(NotFoundError): self.bq.create_table(self.table_id, schema=schema, dataset_id="not_found_dataset") self.bq.create_table(self.table_id, schema=schema) # "Schema field shouldn't be used as input with a view" with self.assertRaises(Http4xxError): self.bq.create_view(self.view_id, query, schema=schema) def test_normal_with_args(self): print("exists table?") if self.bq.exists_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id): print("exists") print("drop table") res = self.bq.drop_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id) self.assertTrue(bool(res)) else: print("no exists") print("create table") schema = [ {"name": "id", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "name", "type": "STRING", "mode": "REQUIRED"}, { "name": "birth", "type": "RECORD", "mode": "NULLABLE", "fields": [ {"name": "year", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "month", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "day", "type": "INTEGER", "mode": "REQUIRED"}, ], }, {"name": "url", "type": "STRING", "mode": "REPEATED"}, ] expiration_time = str(int(time.time()) + 86400) + "000" res = self.bq.create_table( project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id, schema=schema, description="Description", expiration_time=expiration_time, friendly_name="Friendly Name", ) self.assertTrue(bool(res)) print("info table") res = self.bq.info_table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id) self.assertEqual("Description", res["description"]) self.assertEqual(expiration_time, res["expirationTime"]) self.assertEqual("Friendly Name", res["friendlyName"]) pprint(res) print("create view") query = "SELECT * FROM " + self.dataset_id + "." + self.table_id res = self.bq.create_view( project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.view_id, query=query ) self.assertTrue(bool(res)) print("show tables") res = self.bq.show_tables(project_id=self.project_id, dataset_id=self.dataset_id, max_results=1) self.assertIn(self.table_id, res) self.assertIn(self.view_id, res) print("\n".join(res)) print("drop view") res = self.bq.drop_table(self.view_id) self.assertFalse(bool(res)) print("drop table") res = self.bq.drop_table(self.table_id) self.assertFalse(bool(res)) def test_normal(self): print("exists table?") if self.bq.exists_table(self.table_id): print("exists") print("drop table") res = self.bq.drop_table(self.table_id) self.assertTrue(bool(res)) else: print("no exists") print("create table") schema = [ {"name": "id", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "name", "type": "STRING", "mode": "REQUIRED"}, { "name": "birth", "type": "RECORD", "mode": "NULLABLE", "fields": [ {"name": "year", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "month", "type": "INTEGER", "mode": "REQUIRED"}, {"name": "day", "type": "INTEGER", "mode": "REQUIRED"}, ], }, {"name": "url", "type": "STRING", "mode": "REPEATED"}, ] res = self.bq.create_table(self.table_id, schema=schema) self.assertTrue(bool(res)) print("create exists table") res = self.bq.create_table(self.table_id, schema=schema) self.assertFalse(bool(res)) print("info table") res = self.bq.info_table(self.table_id) self.assertTrue(bool(res)) pprint(res) print("create view") query = "SELECT * FROM " + self.dataset_id + "." + self.table_id self.bq.create_view(self.view_id, query) self.assertTrue(bool(res)) print("info view") res = self.bq.info_table(self.view_id) self.assertTrue(bool(res)) pprint(res) print("show tables") res = self.bq.show_tables() self.assertIn(self.table_id, res) self.assertIn(self.view_id, res) print("\n".join(res)) print("drop view") res = self.bq.drop_table(self.view_id) self.assertFalse(bool(res)) print("drop table") res = self.bq.drop_table(self.table_id) self.assertFalse(bool(res)) print("drop no exists table") res = self.bq.drop_table(self.table_id) self.assertFalse(bool(res))
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) def TearDown(self): pass def test_normal(self): print('exists dataset') if self.bq.exists_dataset(self.dataset_id): print('delete dataset') res = self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.assertTrue(bool(res)) print('create dataset') res = self.bq.create_dataset(self.dataset_id) self.assertTrue(bool(res)) print('create exists dataset') res = self.bq.create_dataset(self.dataset_id) self.assertFalse(bool(res)) print('show datasets') res = self.bq.show_datasets() self.assertIn(self.dataset_id, res) print('\n'.join(res)) print('delete dataset') res = self.bq.drop_dataset(self.dataset_id) self.assertFalse(bool(res)) print('delete no exists dataset') res = self.bq.drop_dataset(self.dataset_id) self.assertFalse(bool(res)) def test_normal_with_args(self): print('exists dataset: ' + self.dataset_id) if self.bq.exists_dataset(self.dataset_id, project_id=self.project_id): print('exists') print('delete dataset: ' + self.dataset_id) res = self.bq.drop_dataset(self.dataset_id, project_id=self.project_id, delete_contents=True) self.assertTrue(bool(res)) else: print('no exists') print('create dataset') access = [ { 'role': 'OWNER', 'specialGroup': 'projectOwners' }, ] res = self.bq.create_dataset(self.dataset_id, project_id=self.project_id, access=access, default_table_expiration_ms=3600000, description='Description', friendly_name='Friendly Name', location='EU') self.assertTrue(bool(res)) print('info dataset') res = self.bq.info_dataset(self.dataset_id, project_id=self.project_id) self.assertEqual(1, len(res['access'])) self.assertEqual('OWNER', res['access'][0]['role']) self.assertEqual('projectOwners', res['access'][0]['specialGroup']) self.assertEqual(3600000, int(res['defaultTableExpirationMs'])) self.assertEqual('Description', res['description']) self.assertEqual('Friendly Name', res['friendlyName']) self.assertEqual('EU', res['location']) pprint(res) print('show datasets') res = self.bq.show_datasets(project_id=self.project_id, all=True, max_results=10) self.assertIn(self.dataset_id, res) print('\n'.join(res)) print('delete dataset: ' + self.dataset_id) res = self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.assertFalse(bool(res)) def test_error(self): with self.assertRaises(TypeError): self.bq.create_dataset() with self.assertRaises(ParameterError): self.bq.create_dataset(dataset_id=None)
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time())) self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time())) if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [ { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ]}, { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' }, ] self.bq.create_table(self.table_id, schema=schema) def TearDown(self): self.bq.drop_table(self.table_id) self.bq.drop_dataset(self.dataset_id, delete_contents=True) def test_error_invalid_rows(self): rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2 }, { 'id': 'three', 'name': 'baz' }, ] with self.assertRaises(BigQueryError): self.bq.load(self.table_id, rows) res = self.bq.dump_table(self.table_id) self.assertEqual(0, len(res)) pprint(res) def test_error_unknown_values(self): rows = [ { 'id': 1, 'name': 'foo', 'unknown_field': 'unknown_value' }, ] with self.assertRaises(BigQueryError): self.bq.load(self.table_id, rows) def test_normal_unknown_values(self): rows = [ { 'id': 1, 'name': 'foo', 'unknown_field': 'unknown_value' }, ] self.bq.load(self.table_id, rows, ignore_unknown_values=True) res = self.bq.dump_table(self.table_id) self.assertEqual(1, len(res)) pprint(res) def test_normal_async(self): filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.json' job_id = self.bq.load(self.table_id, filepath, async=True) self.assertTrue(re.match(r'job_', job_id)) print(job_id) while True: res = self.bq.info_job(job_id) state = res['status']['state'] print(state) if state == 'DONE': break time.sleep(2) res = self.bq.dump_table(self.table_id) self.assertEqual(3, len(res)) pprint(res) def test_normal_from_csv(self): filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.csv' schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, ] res = self.bq.load(self.table_id, filepath, schema=schema, skip_leading_rows=1) self.assertTrue(bool(res)) res = self.bq.dump_table(self.table_id) self.assertEqual(3, len(res)) pprint(res) def test_normal_from_json(self): filepath = os.path.dirname(os.path.abspath(__file__)) + '/data.json' res = self.bq.load(self.table_id, filepath) self.assertTrue(bool(res)) res = self.bq.dump_table(self.table_id) self.assertEqual(3, len(res)) pprint(res) def test_normal_from_obj(self): rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } }, { 'id': 3, 'name': 'baz', 'url': [ 'http://www.yahoo.co.jp/', 'http://www.google.co.jp/', ]} ] res = self.bq.load(self.table_id, rows) self.assertTrue(bool(res)) res = self.bq.dump_table(self.table_id) self.assertEqual(3, len(res)) pprint(res)
class BigQueryTest(unittest.TestCase): def setUp(self): self.project_id = os.getenv('PROJECT_ID') self.dataset_id = os.getenv('DATASET_ID', 'test_dataset') self.table_id = os.getenv('TABLE_ID', 'test_table') + '_' + str(int(time.time())) self.view_id = os.getenv('VIEW_ID', 'test_view') + '_' + str(int(time.time())) if self.project_id is None: print('PROJECT_ID is not defined.') sys.exit(1) self.bq = BigQuery(self.project_id) if self.bq.exists_dataset(self.dataset_id): self.bq.drop_dataset(self.dataset_id, delete_contents=True) self.bq.create_dataset(self.dataset_id) self.bq.dataset_id = self.dataset_id # Set default datasetId schema = [ { 'name': 'id', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'name', 'type': 'STRING', 'mode': 'REQUIRED' }, { 'name': 'birth', 'type': 'RECORD', 'mode': 'NULLABLE', 'fields': [ { 'name': 'year', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'month', 'type': 'INTEGER', 'mode': 'REQUIRED' }, { 'name': 'day', 'type': 'INTEGER', 'mode': 'REQUIRED' }, ]}, { 'name': 'url', 'type': 'STRING', 'mode': 'REPEATED' }, ] self.bq.create_table(self.table_id, schema=schema) def TearDown(self): self.bq.drop_table(self.table_id) self.bq.drop_dataset(self.dataset_id, delete_contents=True) @staticmethod def sigalrm_handler(x, y): raise Exception("Timeout") def wait_insert(self): signal.signal(signal.SIGALRM, BigQueryTest.sigalrm_handler) signal.alarm(120) while True: res = self.bq.dump_table(self.table_id) if bool(res): signal.alarm(0) return res print('sleep...') time.sleep(2) def test_error(self): rows = [ { 'id': 1, 'name': 'foo' }, # normal { 'id': 2 }, # missing required field { 'id': 'three', 'name': 'baz' }, # invalid data type ] with self.assertRaises(BigQueryError): self.bq.insert(self.table_id, rows, ignore_unknown_values=True) rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar', 'unknown_fields': 'unknown_value' }, ] with self.assertRaises(BigQueryError): self.bq.insert(self.table_id, rows, skip_invalid_rows=True) def test_normal_skip_invalid_rows(self): rows = [ { 'id': 1, 'name': 'foo' }, # normal { 'id': 2 }, # missing required field { 'id': 'three', 'name': 'baz' }, # invalid data type ] res = self.bq.insert(self.table_id, rows, skip_invalid_rows=True) self.assertTrue(bool(res)) res = self.wait_insert() self.assertEqual(1, len(res)) pprint(res) def test_normal_ignore_unknown_values(self): rows = [ { 'id': 1, 'name': 'foo' }, # normal { 'id': 2, 'name': 'bar', 'unknown_field': 'unknown_value' }, ] res = self.bq.insert(self.table_id, rows, ignore_unknown_values=True) self.assertTrue(bool(res)) res = self.wait_insert() self.assertEqual(2, len(res)) pprint(res) def test_normal(self): rows = [ { 'id': 1, 'name': 'foo' }, { 'id': 2, 'name': 'bar', 'birth': { 'year': 2015, 'month': 10, 'day': 28 } }, { 'id': 3, 'name': 'baz', 'url': [ 'http://www.yahoo.co.jp/', 'http://www.google.co.jp/', ]} ] res = self.bq.insert(self.table_id, rows) self.assertTrue(bool(res)) res = self.wait_insert() self.assertEqual(3, len(res)) pprint(res)