def test_dofn_client_start_bundle_create_called(self): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': 404}, content=None, url=None) client.tables.Insert.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project_id', datasetId='dataset_id', tableId='table_id')) create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND schema = {'fields': [ {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]} fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema=schema, create_disposition=create_disposition, write_disposition=write_disposition, kms_key='kms_key', test_client=client) fn.start_bundle() self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def create_table(self, table_name): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'int64' table_field.type = 'INT64' table_field.mode = 'REQUIRED' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'bytes' table_field.type = 'BYTES' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'date' table_field.type = 'DATE' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'time' table_field.type = 'TIME' table_schema.fields.append(table_field) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=self.project, datasetId=self.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=self.project, datasetId=self.dataset_id, table=table) self.bigquery_client.client.tables.Insert(request)
def create_table(cls, table_name): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'float' table_field.type = 'FLOAT' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'numeric' table_field.type = 'NUMERIC' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'bytes' table_field.type = 'BYTES' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'date' table_field.type = 'DATE' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'time' table_field.type = 'TIME' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'datetime' table_field.type = 'DATETIME' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'timestamp' table_field.type = 'TIMESTAMP' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'geo' table_field.type = 'GEOGRAPHY' table_schema.fields.append(table_field) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=cls.project, datasetId=cls.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=cls.project, datasetId=cls.dataset_id, table=table) cls.bigquery_client.client.tables.Insert(request) row_data = { 'float': 0.33, 'numeric': Decimal('10'), 'bytes': base64.b64encode(b'\xab\xac').decode('utf-8'), 'date': '3000-12-31', 'time': '23:59:59', 'datetime': '2018-12-31T12:44:31', 'timestamp': '2018-12-31 12:44:31.744957 UTC', 'geo': 'POINT(30 10)' } table_data = [row_data] # add rows with only one key value pair and None values for all other keys for key, value in iteritems(row_data): table_data.append({key: value}) cls.bigquery_client.insert_rows(cls.project, cls.dataset_id, table_name, table_data)
def test_dofn_client_finish_bundle_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND schema = {'fields': [ {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]} fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema=schema, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() fn.process({'month': 1}) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows called in finish bundle self.assertTrue(client.tabledata.InsertAll.called)
def test_rows_are_written(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND insert_response = mock.Mock() insert_response.insertErrors = [] client.tabledata.InsertAll.return_value = insert_response with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client) as writer: writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14}) sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14} expected_rows = [] json_object = bigquery.JsonObject() for k, v in iteritems(sample_row): json_object.additionalProperties.append( bigquery.JsonObject.AdditionalProperty(key=k, value=to_json_value(v))) expected_rows.append( bigquery.TableDataInsertAllRequest.RowsValueListEntry( insertId='_1', # First row ID generated with prefix '' json=json_object)) client.tabledata.InsertAll.assert_called_with( bigquery.BigqueryTabledataInsertAllRequest( projectId='project', datasetId='dataset', tableId='table', tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest( rows=expected_rows)))
def test_dofn_client_process_performs_batching(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema='month:INTEGER', create_disposition=create_disposition, write_disposition=write_disposition, client=client) fn.start_bundle() fn.process({'month': 1}) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit yet self.assertFalse(client.tabledata.InsertAll.called)
def test_dofn_client_finish_bundle_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() # Destination is a tuple of (destination, schema) to ensure the table is # created. fn.process(('project_id:dataset_id.table_id', ({ 'month': 1 }, 'insertid3'))) self.assertTrue(client.tables.Get.called) # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows called in finish bundle self.assertTrue(client.tabledata.InsertAll.called)
def test_dofn_client_process_flush_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = ( bigquery.TableDataInsertAllResponse(insertErrors=[])) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() fn.process(('project_id:dataset_id.table_id', ({ 'month': 1 }, 'insertid1'))) fn.process(('project_id:dataset_id.table_id', ({ 'month': 2 }, 'insertid2'))) # InsertRows called as batch size is hit self.assertTrue(client.tabledata.InsertAll.called)
def _setup_new_types_env(self): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'bytes' table_field.type = 'BYTES' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'date' table_field.type = 'DATE' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'time' table_field.type = 'TIME' table_schema.fields.append(table_field) table = bigquery.Table( tableReference=bigquery.TableReference( projectId=self.project, datasetId=self.dataset_id, tableId=NEW_TYPES_INPUT_TABLE), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=self.project, datasetId=self.dataset_id, table=table) self.bigquery_client.client.tables.Insert(request) table_data = [ {'bytes':b'xyw=', 'date':'2011-01-01', 'time':'23:59:59.999999'}, {'bytes':b'abc=', 'date':'2000-01-01', 'time':'00:00:00'}, {'bytes':b'dec=', 'date':'3000-12-31', 'time':'23:59:59.990000'} ] self.bigquery_client.insert_rows( self.project, self.dataset_id, NEW_TYPES_INPUT_TABLE, table_data)
def test_dofn_client_no_records(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) client.tabledata.InsertAll.return_value = \ bigquery.TableDataInsertAllResponse(insertErrors=[]) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( batch_size=2, create_disposition=create_disposition, write_disposition=write_disposition, kms_key=None, test_client=client) fn.start_bundle() # InsertRows not called as batch size is not hit self.assertFalse(client.tabledata.InsertAll.called) fn.finish_bundle() # InsertRows not called in finish bundle as no records self.assertFalse(client.tabledata.InsertAll.called)
def create_table(self, tablename): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'number' table_field.type = 'INTEGER' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'str' table_field.type = 'STRING' table_schema.fields.append(table_field) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=self.project, datasetId=self.dataset_id, tableId=tablename), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=self.project, datasetId=self.dataset_id, table=table) self.bigquery_client.client.tables.Insert(request) table_data = [{ 'number': 1, 'str': 'abc' }, { 'number': 2, 'str': 'def' }, { 'number': 3, 'str': u'你好' }, { 'number': 4, 'str': u'привет' }] self.bigquery_client.insert_rows(self.project, self.dataset_id, tablename, table_data)
def create_table_new_types(self, table_name): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'bytes' table_field.type = 'BYTES' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'date' table_field.type = 'DATE' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'time' table_field.type = 'TIME' table_schema.fields.append(table_field) table = bigquery.Table( tableReference=bigquery.TableReference( projectId=self.project, datasetId=self.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=self.project, datasetId=self.dataset_id, table=table) self.bigquery_client.client.tables.Insert(request) table_data = [ {'bytes': b'xyw', 'date': '2011-01-01', 'time': '23:59:59.999999'}, {'bytes': b'abc', 'date': '2000-01-01', 'time': '00:00:00'}, {'bytes': b'\xe4\xbd\xa0\xe5\xa5\xbd', 'date': '3000-12-31', 'time': '23:59:59'}, {'bytes': b'\xab\xac\xad', 'date': '2000-01-01', 'time': '00:00:00'} ] # bigquery client expects base64 encoded bytes for row in table_data: row['bytes'] = base64.b64encode(row['bytes']).decode('utf-8') self.bigquery_client.insert_rows( self.project, self.dataset_id, table_name, table_data)
def _create_table(self, project_id, dataset_id, table_id, schema): table = bigquery.Table( tableReference=bigquery.TableReference( projectId=project_id, datasetId=dataset_id, tableId=table_id), schema=schema) request = bigquery.BigqueryTablesInsertRequest( projectId=project_id, datasetId=dataset_id, table=table) response = self.client.tables.Insert(request) # The response is a bigquery.Table instance. return response
def create_table(cls, table_name, data, table_schema): table = bigquery.Table(tableReference=bigquery.TableReference( projectId=cls.project, datasetId=cls.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=cls.project, datasetId=cls.dataset_id, table=table) cls.bigquery_client.client.tables.Insert(request) cls.bigquery_client.insert_rows(cls.project, cls.dataset_id, table_name, data) return table_schema
def side_effect(request): if (request == bigquery.BigqueryTablesGetRequest( projectId='project', datasetId='dataset', tableId='table__sample_info')): raise exceptions.HttpError(response={'status': '404'}, url='', content='') return bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table__chr1_part1'))
def _create_table(cls, table_name): table_schema = bigquery.TableSchema() number = bigquery.TableFieldSchema() number.name = 'number' number.type = 'INTEGER' table_schema.fields.append(number) string = bigquery.TableFieldSchema() string.name = 'string' string.type = 'STRING' table_schema.fields.append(string) time = bigquery.TableFieldSchema() time.name = 'time' time.type = 'TIME' table_schema.fields.append(time) datetime = bigquery.TableFieldSchema() datetime.name = 'datetime' datetime.type = 'DATETIME' table_schema.fields.append(datetime) rec = bigquery.TableFieldSchema() rec.name = 'rec' rec.type = 'RECORD' rec_datetime = bigquery.TableFieldSchema() rec_datetime.name = 'rec_datetime' rec_datetime.type = 'DATETIME' rec.fields.append(rec_datetime) rec_rec = bigquery.TableFieldSchema() rec_rec.name = 'rec_rec' rec_rec.type = 'RECORD' rec_rec_datetime = bigquery.TableFieldSchema() rec_rec_datetime.name = 'rec_rec_datetime' rec_rec_datetime.type = 'DATETIME' rec_rec.fields.append(rec_rec_datetime) rec.fields.append(rec_rec) table_schema.fields.append(rec) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=cls.project, datasetId=cls.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=cls.project, datasetId=cls.dataset_id, table=table) cls.bigquery_client.client.tables.Insert(request) cls.bigquery_client.insert_rows(cls.project, cls.dataset_id, table_name, cls.TABLE_DATA)
def test_table_with_write_disposition_append(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tables.Insert.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def _setup_new_types_env(self): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'bytes' table_field.type = 'BYTES' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'date' table_field.type = 'DATE' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'time' table_field.type = 'TIME' table_schema.fields.append(table_field) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=self.project, datasetId=self.dataset_id, tableId=NEW_TYPES_INPUT_TABLE), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=self.project, datasetId=self.dataset_id, table=table) self.bigquery_client.client.tables.Insert(request) table_data = [{ 'bytes': b'xyw', 'date': '2011-01-01', 'time': '23:59:59.999999' }, { 'bytes': b'abc', 'date': '2000-01-01', 'time': '00:00:00' }, { 'bytes': b'\xe4\xbd\xa0\xe5\xa5\xbd', 'date': '3000-12-31', 'time': '23:59:59.990000' }, { 'bytes': b'\xab\xac\xad', 'date': '2000-01-01', 'time': '00:00:00' }] # the API Tools bigquery client expects byte values to be base-64 encoded # TODO https://github.com/apache/beam/issues/19073: upgrade to # google-cloud-bigquery which does not require handling the encoding in # beam for row in table_data: row['bytes'] = base64.b64encode(row['bytes']).decode('utf-8') passed, errors = self.bigquery_client.insert_rows( self.project, self.dataset_id, NEW_TYPES_INPUT_TABLE, table_data) self.assertTrue(passed, 'Error in BQ setup: %s' % errors)
def test_table_not_empty_and_write_disposition_empty( self, patched_time_sleep): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tabledata.List.return_value = bigquery.TableDataList(totalRows=1) write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY with self.assertRaisesRegexp( RuntimeError, r'Table project:dataset\.table is not empty but write ' r'disposition is WRITE_EMPTY'): with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass
def test_no_table_and_create_if_needed(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') client.tables.Insert.return_value = table create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED with beam.io.BigQuerySink( 'project:dataset.table', schema='somefield:INTEGER', create_disposition=create_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def _create_table(self, project_id, dataset_id, table_id, schema, additional_parameters=None): additional_parameters = additional_parameters or {} table = bigquery.Table(tableReference=bigquery.TableReference( projectId=project_id, datasetId=dataset_id, tableId=table_id), schema=schema, **additional_parameters) request = bigquery.BigqueryTablesInsertRequest(projectId=project_id, datasetId=dataset_id, table=table) response = self.client.tables.Insert(request) logging.debug("Created the table with id %s", table_id) # The response is a bigquery.Table instance. return response
def test_existing_sample_table(self): args = self._make_args([ '--append', 'False', '--output_table', 'project:dataset.table', '--sharding_config_path', 'gcp_variant_transforms/testing/data/sharding_configs/' 'residual_at_end.yaml' ]) client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table__sample_info')) with self.assertRaisesRegexp( ValueError, 'project:dataset.table__sample_info already exists'): self._options.validate(args, client)
def test_table_empty_and_write_disposition_empty(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tabledata.List.return_value = bigquery.TableDataList( totalRows=0) client.tables.Insert.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tabledata.List.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def test_table_exist(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table')) self.assertEqual( bigquery_util.table_exist(client, 'project', 'dataset', 'table'), True) client.tables.Get.side_effect = exceptions.HttpError( response={'status': '404'}, url='', content='') self.assertEqual( bigquery_util.table_exist(client, 'project', 'dataset', 'table'), False) client.tables.Get.side_effect = exceptions.HttpError( response={'status': '401'}, url='', content='') self.assertRaises(exceptions.HttpError, bigquery_util.table_exist, client, 'project', 'dataset', 'table')
def test_dofn_client_start_bundle_called(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project_id', datasetId='dataset_id', tableId='table_id')) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema='month:INTEGER', create_disposition=create_disposition, write_disposition=write_disposition, client=client) fn.start_bundle() self.assertTrue(client.tables.Get.called)
def create_table(cls, table_name): table_schema = bigquery.TableSchema() table_field = bigquery.TableFieldSchema() table_field.name = 'number' table_field.type = 'INTEGER' table_schema.fields.append(table_field) table_field = bigquery.TableFieldSchema() table_field.name = 'str' table_field.type = 'STRING' table_schema.fields.append(table_field) table = bigquery.Table(tableReference=bigquery.TableReference( projectId=cls.project, datasetId=cls.dataset_id, tableId=table_name), schema=table_schema) request = bigquery.BigqueryTablesInsertRequest( projectId=cls.project, datasetId=cls.dataset_id, table=table) cls.bigquery_client.client.tables.Insert(request) cls.bigquery_client.insert_rows(cls.project, cls.dataset_id, table_name, cls.TABLE_DATA)
def test_rows_are_written(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND client.insert_rows_json.return_value = [] with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client) as writer: writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14}) sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14} client.insert_rows_json.assert_called_with(gcp_bigquery.TableReference( gcp_bigquery.DatasetReference('project', 'dataset'), 'table'), json_rows=[sample_row], row_ids=['_1'], skip_invalid_rows=True)
def test_dofn_client_start_bundle_create_called(self): client = mock.Mock() client.tables.Get.return_value = None client.tables.Insert.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project_id', datasetId='dataset_id', tableId='table_id')) create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND schema = {'fields': [ {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]} fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema=schema, create_disposition=create_disposition, write_disposition=write_disposition, client=client) fn.start_bundle() self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)