def test_rows_are_written(self): client = mock.Mock() table = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND insert_response = mock.Mock() insert_response.insertErrors = [] client.tabledata.InsertAll.return_value = insert_response with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client) as writer: writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14}) sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14} expected_rows = [] json_object = bigquery.JsonObject() for k, v in sample_row.iteritems(): json_object.additionalProperties.append( bigquery.JsonObject.AdditionalProperty( key=k, value=to_json_value(v))) expected_rows.append( bigquery.TableDataInsertAllRequest.RowsValueListEntry( insertId='_1', # First row ID generated with prefix '' json=json_object)) client.tabledata.InsertAll.assert_called_with( bigquery.BigqueryTabledataInsertAllRequest( projectId='project', datasetId='dataset', tableId='table', tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest( rows=expected_rows)))
def _create_table(self, project_id, dataset_id, table_id, schema): table = bigquery.Table( tableReference=bigquery.TableReference( projectId=project_id, datasetId=dataset_id, tableId=table_id), schema=schema) request = bigquery.BigqueryTablesInsertRequest( projectId=project_id, datasetId=dataset_id, table=table) response = self.client.tables.Insert(request) # The response is a bigquery.Table instance. return response
def test_table_with_write_disposition_append(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tables.Insert.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def test_no_table_and_create_if_needed(self): client = mock.Mock() table = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') client.tables.Insert.return_value = table create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED with beam.io.BigQuerySink( 'project:dataset.table', schema='somefield:INTEGER', create_disposition=create_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def test_table_empty_and_write_disposition_empty(self): client = mock.Mock() table = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tabledata.List.return_value = bigquery.TableDataList(totalRows=0) client.tables.Insert.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tabledata.List.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def test_table_not_empty_and_write_disposition_empty( self, patched_time_sleep): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tabledata.List.return_value = bigquery.TableDataList(totalRows=1) write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY with self.assertRaises(RuntimeError) as exn: with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table is not empty but write disposition is ' 'WRITE_EMPTY.')