def test_rows_are_written(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table write_disposition = df.io.BigQueryDisposition.WRITE_APPEND insert_response = mock.Mock() insert_response.insertErrors = [] client.tabledata.InsertAll.return_value = insert_response with df.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client) as writer: writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14}) sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14} expected_rows = [] json_object = bigquery.JsonObject() for k, v in sample_row.iteritems(): json_object.additionalProperties.append( bigquery.JsonObject.AdditionalProperty(key=k, value=to_json_value(v))) expected_rows.append( bigquery.TableDataInsertAllRequest.RowsValueListEntry( insertId='_1', # First row ID generated with prefix '' json=json_object)) client.tabledata.InsertAll.assert_called_with( bigquery.BigqueryTabledataInsertAllRequest( projectId='project', datasetId='dataset', tableId='table', tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest( rows=expected_rows)))
def _create_table(self, project_id, dataset_id, table_id, schema): table = bigquery.Table( tableReference=bigquery.TableReference( projectId=project_id, datasetId=dataset_id, tableId=table_id), schema=schema) request = bigquery.BigqueryTablesInsertRequest( projectId=project_id, datasetId=dataset_id, table=table) response = self.client.tables.Insert(request) # The response is a bigquery.Table instance. return response
def test_table_with_write_disposition_append(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tables.Insert.return_value = table write_disposition = df.io.BigQueryDisposition.WRITE_APPEND with df.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def _parse_table_reference(table, dataset=None, project=None): """Parses a table reference into a (project, dataset, table) tuple. Args: table: The ID of the table. The ID must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_). If dataset argument is None then the table argument must contain the entire table reference: 'DATASET.TABLE' or 'PROJECT:DATASET.TABLE'. This argument can be a bigquery.TableReference instance in which case dataset and project are ignored and the reference is returned as a result. dataset: The ID of the dataset containing this table or null if the table reference is specified entirely by the table argument. project: The ID of the project containing this table or null if the table reference is specified entirely by the table (and possibly dataset) argument. Returns: A bigquery.TableReference object. The object has the following attributes: projectId, datasetId, and tableId. Raises: ValueError: if the table reference as a string does not match the expected format. """ if isinstance(table, bigquery.TableReference): return table table_reference = bigquery.TableReference() # If dataset argument is not specified, the expectation is that the # table argument will contain a full table reference instead of just a # table name. if dataset is None: match = re.match( r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>\w+)$', table) if not match: raise ValueError( 'Expected a table reference (PROJECT:DATASET.TABLE or ' 'DATASET.TABLE) instead of %s.' % table) table_reference.projectId = match.group('project') table_reference.datasetId = match.group('dataset') table_reference.tableId = match.group('table') else: table_reference.projectId = project table_reference.datasetId = dataset table_reference.tableId = table return table_reference
def test_no_table_and_create_if_needed(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') client.tables.Insert.return_value = table create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED with df.io.BigQuerySink( 'project:dataset.table', schema='somefield:INTEGER', create_disposition=create_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def test_table_empty_and_write_disposition_empty(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.return_value = table client.tabledata.List.return_value = bigquery.TableDataList( totalRows=0) client.tables.Insert.return_value = table write_disposition = df.io.BigQueryDisposition.WRITE_EMPTY with df.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tabledata.List.called) self.assertFalse(client.tables.Delete.called) self.assertFalse(client.tables.Insert.called)
def test_table_not_empty_and_write_disposition_empty(self): client = mock.Mock() client.tables.Get.return_value = bigquery.Table( tableReference=bigquery.TableReference(projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tabledata.List.return_value = bigquery.TableDataList( totalRows=1) write_disposition = df.io.BigQueryDisposition.WRITE_EMPTY with self.assertRaises(RuntimeError) as exn: with df.io.BigQuerySink( 'project:dataset.table', write_disposition=write_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table is not empty but write disposition is ' 'WRITE_EMPTY.')