def insert_rows(self, project_id, dataset_id, table_id, rows): """Inserts rows into the specified table. Args: project_id: The project id owning the table. dataset_id: The dataset id owning the table. table_id: The table id. rows: A list of plain Python dictionaries. Each dictionary is a row and each key in it is the name of a field. Returns: A tuple (bool, errors). If first element is False then the second element will be a bigquery.InserttErrorsValueListEntry instance containing specific errors. """ # Prepare rows for insertion. Of special note is the row ID that we add to # each row in order to help BigQuery avoid inserting a row multiple times. # BigQuery will do a best-effort if unique IDs are provided. This situation # can happen during retries on failures. # TODO(silviuc): Must add support to writing TableRow's instead of dicts. final_rows = [] for row in rows: json_object = bigquery.JsonObject() for k, v in row.iteritems(): json_object.additionalProperties.append( bigquery.JsonObject.AdditionalProperty( key=k, value=to_json_value(v))) final_rows.append( bigquery.TableDataInsertAllRequest.RowsValueListEntry( insertId=str(self.unique_row_id), json=json_object)) result, errors = self._insert_all_rows( project_id, dataset_id, table_id, final_rows) return result, errors
def test_rows_are_written(self): client = mock.Mock() table = bigquery.Table( tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND insert_response = mock.Mock() insert_response.insertErrors = [] client.tabledata.InsertAll.return_value = insert_response with beam.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client) as writer: writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14}) sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14} expected_rows = [] json_object = bigquery.JsonObject() for k, v in sample_row.iteritems(): json_object.additionalProperties.append( bigquery.JsonObject.AdditionalProperty( key=k, value=to_json_value(v))) expected_rows.append( bigquery.TableDataInsertAllRequest.RowsValueListEntry( insertId='_1', # First row ID generated with prefix '' json=json_object)) client.tabledata.InsertAll.assert_called_with( bigquery.BigqueryTabledataInsertAllRequest( projectId='project', datasetId='dataset', tableId='table', tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest( rows=expected_rows)))