Ejemplo n.º 1
0
  def insert_rows(self, project_id, dataset_id, table_id, rows):
    """Inserts rows into the specified table.

    Args:
      project_id: The project id owning the table.
      dataset_id: The dataset id owning the table.
      table_id: The table id.
      rows: A list of plain Python dictionaries. Each dictionary is a row and
        each key in it is the name of a field.

    Returns:
      A tuple (bool, errors). If first element is False then the second element
      will be a bigquery.InserttErrorsValueListEntry instance containing
      specific errors.
    """

    # Prepare rows for insertion. Of special note is the row ID that we add to
    # each row in order to help BigQuery avoid inserting a row multiple times.
    # BigQuery will do a best-effort if unique IDs are provided. This situation
    # can happen during retries on failures.
    # TODO(silviuc): Must add support to writing TableRow's instead of dicts.
    final_rows = []
    for row in rows:
      json_object = bigquery.JsonObject()
      for k, v in row.iteritems():
        json_object.additionalProperties.append(
            bigquery.JsonObject.AdditionalProperty(
                key=k, value=to_json_value(v)))
      final_rows.append(
          bigquery.TableDataInsertAllRequest.RowsValueListEntry(
              insertId=str(self.unique_row_id),
              json=json_object))
    result, errors = self._insert_all_rows(
        project_id, dataset_id, table_id, final_rows)
    return result, errors
Ejemplo n.º 2
0
  def test_rows_are_written(self):
    client = mock.Mock()
    table = bigquery.Table(
        tableReference=bigquery.TableReference(
            projectId='project', datasetId='dataset', tableId='table'),
        schema=bigquery.TableSchema())
    client.tables.Get.return_value = table
    write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

    insert_response = mock.Mock()
    insert_response.insertErrors = []
    client.tabledata.InsertAll.return_value = insert_response

    with beam.io.BigQuerySink(
        'project:dataset.table',
        write_disposition=write_disposition).writer(client) as writer:
      writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14})

    sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14}
    expected_rows = []
    json_object = bigquery.JsonObject()
    for k, v in sample_row.iteritems():
      json_object.additionalProperties.append(
          bigquery.JsonObject.AdditionalProperty(
              key=k, value=to_json_value(v)))
    expected_rows.append(
        bigquery.TableDataInsertAllRequest.RowsValueListEntry(
            insertId='_1',  # First row ID generated with prefix ''
            json=json_object))
    client.tabledata.InsertAll.assert_called_with(
        bigquery.BigqueryTabledataInsertAllRequest(
            projectId='project', datasetId='dataset', tableId='table',
            tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest(
                rows=expected_rows)))