Ejemplo n.º 1
0
    def test_rows_are_written(self):
        client = mock.Mock()
        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId='project', datasetId='dataset', tableId='table'),
                               schema=bigquery.TableSchema())
        client.tables.Get.return_value = table
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        insert_response = mock.Mock()
        insert_response.insertErrors = []
        client.tabledata.InsertAll.return_value = insert_response

        with beam.io.BigQuerySink(
                'project:dataset.table',
                write_disposition=write_disposition).writer(client) as writer:
            writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14})

        sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14}
        expected_rows = []
        json_object = bigquery.JsonObject()
        for k, v in sample_row.iteritems():
            json_object.additionalProperties.append(
                bigquery.JsonObject.AdditionalProperty(key=k,
                                                       value=to_json_value(v)))
        expected_rows.append(
            bigquery.TableDataInsertAllRequest.RowsValueListEntry(
                insertId='_1',  # First row ID generated with prefix ''
                json=json_object))
        client.tabledata.InsertAll.assert_called_with(
            bigquery.BigqueryTabledataInsertAllRequest(
                projectId='project',
                datasetId='dataset',
                tableId='table',
                tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest(
                    rows=expected_rows)))
Ejemplo n.º 2
0
 def _create_table(self, project_id, dataset_id, table_id, schema):
   table = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId=project_id, datasetId=dataset_id, tableId=table_id),
       schema=schema)
   request = bigquery.BigqueryTablesInsertRequest(
       projectId=project_id, datasetId=dataset_id, table=table)
   response = self.client.tables.Insert(request)
   # The response is a bigquery.Table instance.
   return response
Ejemplo n.º 3
0
def _parse_table_reference(table, dataset=None, project=None):
    """Parses a table reference into a (project, dataset, table) tuple.

  Args:
    table: The ID of the table. The ID must contain only letters
      (a-z, A-Z), numbers (0-9), or underscores (_). If dataset argument is None
      then the table argument must contain the entire table reference:
      'DATASET.TABLE' or 'PROJECT:DATASET.TABLE'. This argument can be a
      bigquery.TableReference instance in which case dataset and project are
      ignored and the reference is returned as a result.  Additionally, for date
      partitioned tables, appending '$YYYYmmdd' to the table name is supported,
      e.g. 'DATASET.TABLE$YYYYmmdd'.
    dataset: The ID of the dataset containing this table or null if the table
      reference is specified entirely by the table argument.
    project: The ID of the project containing this table or null if the table
      reference is specified entirely by the table (and possibly dataset)
      argument.

  Returns:
    A bigquery.TableReference object. The object has the following attributes:
    projectId, datasetId, and tableId.

  Raises:
    ValueError: if the table reference as a string does not match the expected
      format.
  """

    if isinstance(table, bigquery.TableReference):
        return table

    table_reference = bigquery.TableReference()
    # If dataset argument is not specified, the expectation is that the
    # table argument will contain a full table reference instead of just a
    # table name.
    if dataset is None:
        match = re.match(
            r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>[\w\$]+)$',
            table)
        if not match:
            raise ValueError(
                'Expected a table reference (PROJECT:DATASET.TABLE or '
                'DATASET.TABLE) instead of %s.' % table)
        table_reference.projectId = match.group('project')
        table_reference.datasetId = match.group('dataset')
        table_reference.tableId = match.group('table')
    else:
        table_reference.projectId = project
        table_reference.datasetId = dataset
        table_reference.tableId = table
    return table_reference
Ejemplo n.º 4
0
 def test_table_with_write_disposition_append(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tables.Insert.return_value = table
     write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
     with beam.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
Ejemplo n.º 5
0
 def test_no_table_and_create_if_needed(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.side_effect = HttpError(response={'status': '404'},
                                               url='',
                                               content='')
     client.tables.Insert.return_value = table
     create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
     with beam.io.BigQuerySink(
             'project:dataset.table',
             schema='somefield:INTEGER',
             create_disposition=create_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tables.Insert.called)
Ejemplo n.º 6
0
 def test_table_empty_and_write_disposition_empty(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tabledata.List.return_value = bigquery.TableDataList(
         totalRows=0)
     client.tables.Insert.return_value = table
     write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY
     with beam.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tabledata.List.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
Ejemplo n.º 7
0
 def test_table_not_empty_and_write_disposition_empty(
     self, patched_time_sleep):
   client = mock.Mock()
   client.tables.Get.return_value = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId='project', datasetId='dataset', tableId='table'),
       schema=bigquery.TableSchema())
   client.tabledata.List.return_value = bigquery.TableDataList(totalRows=1)
   write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY
   with self.assertRaises(RuntimeError) as exn:
     with beam.io.BigQuerySink(
         'project:dataset.table',
         write_disposition=write_disposition).writer(client):
       pass
   self.assertEqual(
       exn.exception.message,
       'Table project:dataset.table is not empty but write disposition is '
       'WRITE_EMPTY.')