Esempio n. 1
0
  def test_dofn_client_start_bundle_create_called(self):
    client = mock.Mock()
    client.tables.Get.side_effect = HttpError(
        response={'status': 404}, content=None, url=None)
    client.tables.Insert.return_value = bigquery.Table(
        tableReference=bigquery.TableReference(
            projectId='project_id', datasetId='dataset_id', tableId='table_id'))
    create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
    write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
    schema = {'fields': [
        {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}

    fn = beam.io.gcp.bigquery.BigQueryWriteFn(
        table_id='table_id',
        dataset_id='dataset_id',
        project_id='project_id',
        batch_size=2,
        schema=schema,
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        kms_key='kms_key',
        test_client=client)

    fn.start_bundle()
    self.assertTrue(client.tables.Get.called)
    self.assertTrue(client.tables.Insert.called)
Esempio n. 2
0
 def create_table(self, table_name):
     table_schema = bigquery.TableSchema()
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'int64'
     table_field.type = 'INT64'
     table_field.mode = 'REQUIRED'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'bytes'
     table_field.type = 'BYTES'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'date'
     table_field.type = 'DATE'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'time'
     table_field.type = 'TIME'
     table_schema.fields.append(table_field)
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=self.project,
         datasetId=self.dataset_id,
         tableId=table_name),
                            schema=table_schema)
     request = bigquery.BigqueryTablesInsertRequest(
         projectId=self.project, datasetId=self.dataset_id, table=table)
     self.bigquery_client.client.tables.Insert(request)
Esempio n. 3
0
    def create_table(cls, table_name):
        table_schema = bigquery.TableSchema()
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'float'
        table_field.type = 'FLOAT'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'numeric'
        table_field.type = 'NUMERIC'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'bytes'
        table_field.type = 'BYTES'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'date'
        table_field.type = 'DATE'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'time'
        table_field.type = 'TIME'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'datetime'
        table_field.type = 'DATETIME'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'timestamp'
        table_field.type = 'TIMESTAMP'
        table_schema.fields.append(table_field)
        table_field = bigquery.TableFieldSchema()
        table_field.name = 'geo'
        table_field.type = 'GEOGRAPHY'
        table_schema.fields.append(table_field)
        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId=cls.project,
            datasetId=cls.dataset_id,
            tableId=table_name),
                               schema=table_schema)
        request = bigquery.BigqueryTablesInsertRequest(
            projectId=cls.project, datasetId=cls.dataset_id, table=table)
        cls.bigquery_client.client.tables.Insert(request)
        row_data = {
            'float': 0.33,
            'numeric': Decimal('10'),
            'bytes': base64.b64encode(b'\xab\xac').decode('utf-8'),
            'date': '3000-12-31',
            'time': '23:59:59',
            'datetime': '2018-12-31T12:44:31',
            'timestamp': '2018-12-31 12:44:31.744957 UTC',
            'geo': 'POINT(30 10)'
        }

        table_data = [row_data]
        # add rows with only one key value pair and None values for all other keys
        for key, value in iteritems(row_data):
            table_data.append({key: value})

        cls.bigquery_client.insert_rows(cls.project, cls.dataset_id,
                                        table_name, table_data)
Esempio n. 4
0
  def test_dofn_client_finish_bundle_flush_called(self):
    client = mock.Mock()
    client.tables.Get.return_value = bigquery.Table(
        tableReference=bigquery.TableReference(
            projectId='project_id', datasetId='dataset_id', tableId='table_id'))
    client.tabledata.InsertAll.return_value = \
        bigquery.TableDataInsertAllResponse(insertErrors=[])
    create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
    write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
    schema = {'fields': [
        {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}

    fn = beam.io.gcp.bigquery.BigQueryWriteFn(
        table_id='table_id',
        dataset_id='dataset_id',
        project_id='project_id',
        batch_size=2,
        schema=schema,
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        kms_key=None,
        test_client=client)

    fn.start_bundle()
    fn.process({'month': 1})

    self.assertTrue(client.tables.Get.called)
    # InsertRows not called as batch size is not hit
    self.assertFalse(client.tabledata.InsertAll.called)

    fn.finish_bundle()
    # InsertRows called in finish bundle
    self.assertTrue(client.tabledata.InsertAll.called)
Esempio n. 5
0
    def test_rows_are_written(self):
        client = mock.Mock()
        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId='project', datasetId='dataset', tableId='table'),
                               schema=bigquery.TableSchema())
        client.tables.Get.return_value = table
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        insert_response = mock.Mock()
        insert_response.insertErrors = []
        client.tabledata.InsertAll.return_value = insert_response

        with beam.io.BigQuerySink(
                'project:dataset.table',
                write_disposition=write_disposition).writer(client) as writer:
            writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14})

        sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14}
        expected_rows = []
        json_object = bigquery.JsonObject()
        for k, v in iteritems(sample_row):
            json_object.additionalProperties.append(
                bigquery.JsonObject.AdditionalProperty(key=k,
                                                       value=to_json_value(v)))
        expected_rows.append(
            bigquery.TableDataInsertAllRequest.RowsValueListEntry(
                insertId='_1',  # First row ID generated with prefix ''
                json=json_object))
        client.tabledata.InsertAll.assert_called_with(
            bigquery.BigqueryTabledataInsertAllRequest(
                projectId='project',
                datasetId='dataset',
                tableId='table',
                tableDataInsertAllRequest=bigquery.TableDataInsertAllRequest(
                    rows=expected_rows)))
Esempio n. 6
0
    def test_dofn_client_process_performs_batching(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
            bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            table_id='table_id',
            dataset_id='dataset_id',
            project_id='project_id',
            batch_size=2,
            schema='month:INTEGER',
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            client=client)

        fn.start_bundle()
        fn.process({'month': 1})

        self.assertTrue(client.tables.Get.called)
        # InsertRows not called as batch size is not hit yet
        self.assertFalse(client.tabledata.InsertAll.called)
Esempio n. 7
0
    def test_dofn_client_finish_bundle_flush_called(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
          bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()

        # Destination is a tuple of (destination, schema) to ensure the table is
        # created.
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 1
        }, 'insertid3')))

        self.assertTrue(client.tables.Get.called)
        # InsertRows not called as batch size is not hit
        self.assertFalse(client.tabledata.InsertAll.called)

        fn.finish_bundle()
        # InsertRows called in finish bundle
        self.assertTrue(client.tabledata.InsertAll.called)
Esempio n. 8
0
    def test_dofn_client_process_flush_called(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = (
            bigquery.TableDataInsertAllResponse(insertErrors=[]))
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 1
        }, 'insertid1')))
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 2
        }, 'insertid2')))
        # InsertRows called as batch size is hit
        self.assertTrue(client.tabledata.InsertAll.called)
 def _setup_new_types_env(self):
   table_schema = bigquery.TableSchema()
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'bytes'
   table_field.type = 'BYTES'
   table_schema.fields.append(table_field)
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'date'
   table_field.type = 'DATE'
   table_schema.fields.append(table_field)
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'time'
   table_field.type = 'TIME'
   table_schema.fields.append(table_field)
   table = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId=self.project,
           datasetId=self.dataset_id,
           tableId=NEW_TYPES_INPUT_TABLE),
       schema=table_schema)
   request = bigquery.BigqueryTablesInsertRequest(
       projectId=self.project, datasetId=self.dataset_id, table=table)
   self.bigquery_client.client.tables.Insert(request)
   table_data = [
       {'bytes':b'xyw=', 'date':'2011-01-01', 'time':'23:59:59.999999'},
       {'bytes':b'abc=', 'date':'2000-01-01', 'time':'00:00:00'},
       {'bytes':b'dec=', 'date':'3000-12-31', 'time':'23:59:59.990000'}
   ]
   self.bigquery_client.insert_rows(
       self.project, self.dataset_id, NEW_TYPES_INPUT_TABLE, table_data)
Esempio n. 10
0
    def test_dofn_client_no_records(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
          bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()
        # InsertRows not called as batch size is not hit
        self.assertFalse(client.tabledata.InsertAll.called)

        fn.finish_bundle()
        # InsertRows not called in finish bundle as no records
        self.assertFalse(client.tabledata.InsertAll.called)
Esempio n. 11
0
 def create_table(self, tablename):
     table_schema = bigquery.TableSchema()
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'number'
     table_field.type = 'INTEGER'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'str'
     table_field.type = 'STRING'
     table_schema.fields.append(table_field)
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=self.project,
         datasetId=self.dataset_id,
         tableId=tablename),
                            schema=table_schema)
     request = bigquery.BigqueryTablesInsertRequest(
         projectId=self.project, datasetId=self.dataset_id, table=table)
     self.bigquery_client.client.tables.Insert(request)
     table_data = [{
         'number': 1,
         'str': 'abc'
     }, {
         'number': 2,
         'str': 'def'
     }, {
         'number': 3,
         'str': u'你好'
     }, {
         'number': 4,
         'str': u'привет'
     }]
     self.bigquery_client.insert_rows(self.project, self.dataset_id,
                                      tablename, table_data)
Esempio n. 12
0
 def create_table_new_types(self, table_name):
   table_schema = bigquery.TableSchema()
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'bytes'
   table_field.type = 'BYTES'
   table_schema.fields.append(table_field)
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'date'
   table_field.type = 'DATE'
   table_schema.fields.append(table_field)
   table_field = bigquery.TableFieldSchema()
   table_field.name = 'time'
   table_field.type = 'TIME'
   table_schema.fields.append(table_field)
   table = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId=self.project,
           datasetId=self.dataset_id,
           tableId=table_name),
       schema=table_schema)
   request = bigquery.BigqueryTablesInsertRequest(
       projectId=self.project, datasetId=self.dataset_id, table=table)
   self.bigquery_client.client.tables.Insert(request)
   table_data = [
       {'bytes': b'xyw', 'date': '2011-01-01', 'time': '23:59:59.999999'},
       {'bytes': b'abc', 'date': '2000-01-01', 'time': '00:00:00'},
       {'bytes': b'\xe4\xbd\xa0\xe5\xa5\xbd', 'date': '3000-12-31',
        'time': '23:59:59'},
       {'bytes': b'\xab\xac\xad', 'date': '2000-01-01', 'time': '00:00:00'}
   ]
   # bigquery client expects base64 encoded bytes
   for row in table_data:
     row['bytes'] = base64.b64encode(row['bytes']).decode('utf-8')
   self.bigquery_client.insert_rows(
       self.project, self.dataset_id, table_name, table_data)
Esempio n. 13
0
 def _create_table(self, project_id, dataset_id, table_id, schema):
   table = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId=project_id, datasetId=dataset_id, tableId=table_id),
       schema=schema)
   request = bigquery.BigqueryTablesInsertRequest(
       projectId=project_id, datasetId=dataset_id, table=table)
   response = self.client.tables.Insert(request)
   # The response is a bigquery.Table instance.
   return response
Esempio n. 14
0
 def create_table(cls, table_name, data, table_schema):
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=cls.project,
         datasetId=cls.dataset_id,
         tableId=table_name),
                            schema=table_schema)
     request = bigquery.BigqueryTablesInsertRequest(
         projectId=cls.project, datasetId=cls.dataset_id, table=table)
     cls.bigquery_client.client.tables.Insert(request)
     cls.bigquery_client.insert_rows(cls.project, cls.dataset_id,
                                     table_name, data)
     return table_schema
 def side_effect(request):
     if (request == bigquery.BigqueryTablesGetRequest(
             projectId='project',
             datasetId='dataset',
             tableId='table__sample_info')):
         raise exceptions.HttpError(response={'status': '404'},
                                    url='',
                                    content='')
     return bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project',
         datasetId='dataset',
         tableId='table__chr1_part1'))
Esempio n. 16
0
    def _create_table(cls, table_name):
        table_schema = bigquery.TableSchema()

        number = bigquery.TableFieldSchema()
        number.name = 'number'
        number.type = 'INTEGER'
        table_schema.fields.append(number)

        string = bigquery.TableFieldSchema()
        string.name = 'string'
        string.type = 'STRING'
        table_schema.fields.append(string)

        time = bigquery.TableFieldSchema()
        time.name = 'time'
        time.type = 'TIME'
        table_schema.fields.append(time)

        datetime = bigquery.TableFieldSchema()
        datetime.name = 'datetime'
        datetime.type = 'DATETIME'
        table_schema.fields.append(datetime)

        rec = bigquery.TableFieldSchema()
        rec.name = 'rec'
        rec.type = 'RECORD'
        rec_datetime = bigquery.TableFieldSchema()
        rec_datetime.name = 'rec_datetime'
        rec_datetime.type = 'DATETIME'
        rec.fields.append(rec_datetime)
        rec_rec = bigquery.TableFieldSchema()
        rec_rec.name = 'rec_rec'
        rec_rec.type = 'RECORD'
        rec_rec_datetime = bigquery.TableFieldSchema()
        rec_rec_datetime.name = 'rec_rec_datetime'
        rec_rec_datetime.type = 'DATETIME'
        rec_rec.fields.append(rec_rec_datetime)
        rec.fields.append(rec_rec)
        table_schema.fields.append(rec)

        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId=cls.project,
            datasetId=cls.dataset_id,
            tableId=table_name),
                               schema=table_schema)
        request = bigquery.BigqueryTablesInsertRequest(
            projectId=cls.project, datasetId=cls.dataset_id, table=table)
        cls.bigquery_client.client.tables.Insert(request)
        cls.bigquery_client.insert_rows(cls.project, cls.dataset_id,
                                        table_name, cls.TABLE_DATA)
Esempio n. 17
0
 def test_table_with_write_disposition_append(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tables.Insert.return_value = table
     write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
     with beam.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
Esempio n. 18
0
 def _setup_new_types_env(self):
     table_schema = bigquery.TableSchema()
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'bytes'
     table_field.type = 'BYTES'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'date'
     table_field.type = 'DATE'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'time'
     table_field.type = 'TIME'
     table_schema.fields.append(table_field)
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=self.project,
         datasetId=self.dataset_id,
         tableId=NEW_TYPES_INPUT_TABLE),
                            schema=table_schema)
     request = bigquery.BigqueryTablesInsertRequest(
         projectId=self.project, datasetId=self.dataset_id, table=table)
     self.bigquery_client.client.tables.Insert(request)
     table_data = [{
         'bytes': b'xyw',
         'date': '2011-01-01',
         'time': '23:59:59.999999'
     }, {
         'bytes': b'abc',
         'date': '2000-01-01',
         'time': '00:00:00'
     }, {
         'bytes': b'\xe4\xbd\xa0\xe5\xa5\xbd',
         'date': '3000-12-31',
         'time': '23:59:59.990000'
     }, {
         'bytes': b'\xab\xac\xad',
         'date': '2000-01-01',
         'time': '00:00:00'
     }]
     # the API Tools bigquery client expects byte values to be base-64 encoded
     # TODO https://github.com/apache/beam/issues/19073: upgrade to
     # google-cloud-bigquery which does not require handling the encoding in
     # beam
     for row in table_data:
         row['bytes'] = base64.b64encode(row['bytes']).decode('utf-8')
     passed, errors = self.bigquery_client.insert_rows(
         self.project, self.dataset_id, NEW_TYPES_INPUT_TABLE, table_data)
     self.assertTrue(passed, 'Error in BQ setup: %s' % errors)
Esempio n. 19
0
 def test_table_not_empty_and_write_disposition_empty(
     self, patched_time_sleep):
   client = mock.Mock()
   client.tables.Get.return_value = bigquery.Table(
       tableReference=bigquery.TableReference(
           projectId='project', datasetId='dataset', tableId='table'),
       schema=bigquery.TableSchema())
   client.tabledata.List.return_value = bigquery.TableDataList(totalRows=1)
   write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY
   with self.assertRaisesRegexp(
       RuntimeError, r'Table project:dataset\.table is not empty but write '
                     r'disposition is WRITE_EMPTY'):
     with beam.io.BigQuerySink(
         'project:dataset.table',
         write_disposition=write_disposition).writer(client):
       pass
Esempio n. 20
0
 def test_no_table_and_create_if_needed(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.side_effect = HttpError(response={'status': '404'},
                                               url='',
                                               content='')
     client.tables.Insert.return_value = table
     create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
     with beam.io.BigQuerySink(
             'project:dataset.table',
             schema='somefield:INTEGER',
             create_disposition=create_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tables.Insert.called)
Esempio n. 21
0
 def _create_table(self,
                   project_id,
                   dataset_id,
                   table_id,
                   schema,
                   additional_parameters=None):
     additional_parameters = additional_parameters or {}
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=project_id, datasetId=dataset_id, tableId=table_id),
                            schema=schema,
                            **additional_parameters)
     request = bigquery.BigqueryTablesInsertRequest(projectId=project_id,
                                                    datasetId=dataset_id,
                                                    table=table)
     response = self.client.tables.Insert(request)
     logging.debug("Created the table with id %s", table_id)
     # The response is a bigquery.Table instance.
     return response
    def test_existing_sample_table(self):
        args = self._make_args([
            '--append', 'False', '--output_table', 'project:dataset.table',
            '--sharding_config_path',
            'gcp_variant_transforms/testing/data/sharding_configs/'
            'residual_at_end.yaml'
        ])

        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(
                projectId='project',
                datasetId='dataset',
                tableId='table__sample_info'))
        with self.assertRaisesRegexp(
                ValueError,
                'project:dataset.table__sample_info already exists'):
            self._options.validate(args, client)
Esempio n. 23
0
 def test_table_empty_and_write_disposition_empty(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.return_value = table
     client.tabledata.List.return_value = bigquery.TableDataList(
         totalRows=0)
     client.tables.Insert.return_value = table
     write_disposition = beam.io.BigQueryDisposition.WRITE_EMPTY
     with beam.io.BigQuerySink(
             'project:dataset.table',
             write_disposition=write_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tabledata.List.called)
     self.assertFalse(client.tables.Delete.called)
     self.assertFalse(client.tables.Insert.called)
    def test_table_exist(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(
                projectId='project', datasetId='dataset', tableId='table'))
        self.assertEqual(
            bigquery_util.table_exist(client, 'project', 'dataset', 'table'),
            True)

        client.tables.Get.side_effect = exceptions.HttpError(
            response={'status': '404'}, url='', content='')
        self.assertEqual(
            bigquery_util.table_exist(client, 'project', 'dataset', 'table'),
            False)

        client.tables.Get.side_effect = exceptions.HttpError(
            response={'status': '401'}, url='', content='')
        self.assertRaises(exceptions.HttpError, bigquery_util.table_exist,
                          client, 'project', 'dataset', 'table')
Esempio n. 25
0
    def test_dofn_client_start_bundle_called(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            table_id='table_id',
            dataset_id='dataset_id',
            project_id='project_id',
            batch_size=2,
            schema='month:INTEGER',
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            client=client)

        fn.start_bundle()
        self.assertTrue(client.tables.Get.called)
Esempio n. 26
0
 def create_table(cls, table_name):
     table_schema = bigquery.TableSchema()
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'number'
     table_field.type = 'INTEGER'
     table_schema.fields.append(table_field)
     table_field = bigquery.TableFieldSchema()
     table_field.name = 'str'
     table_field.type = 'STRING'
     table_schema.fields.append(table_field)
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId=cls.project,
         datasetId=cls.dataset_id,
         tableId=table_name),
                            schema=table_schema)
     request = bigquery.BigqueryTablesInsertRequest(
         projectId=cls.project, datasetId=cls.dataset_id, table=table)
     cls.bigquery_client.client.tables.Insert(request)
     cls.bigquery_client.insert_rows(cls.project, cls.dataset_id,
                                     table_name, cls.TABLE_DATA)
Esempio n. 27
0
    def test_rows_are_written(self):
        client = mock.Mock()
        table = bigquery.Table(tableReference=bigquery.TableReference(
            projectId='project', datasetId='dataset', tableId='table'),
                               schema=bigquery.TableSchema())
        client.tables.Get.return_value = table
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        client.insert_rows_json.return_value = []

        with beam.io.BigQuerySink(
                'project:dataset.table',
                write_disposition=write_disposition).writer(client) as writer:
            writer.Write({'i': 1, 'b': True, 's': 'abc', 'f': 3.14})

        sample_row = {'i': 1, 'b': True, 's': 'abc', 'f': 3.14}
        client.insert_rows_json.assert_called_with(gcp_bigquery.TableReference(
            gcp_bigquery.DatasetReference('project', 'dataset'), 'table'),
                                                   json_rows=[sample_row],
                                                   row_ids=['_1'],
                                                   skip_invalid_rows=True)
Esempio n. 28
0
  def test_dofn_client_start_bundle_create_called(self):
    client = mock.Mock()
    client.tables.Get.return_value = None
    client.tables.Insert.return_value = bigquery.Table(
        tableReference=bigquery.TableReference(
            projectId='project_id', datasetId='dataset_id', tableId='table_id'))
    create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
    write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
    schema = {'fields': [
        {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}

    fn = beam.io.gcp.bigquery.BigQueryWriteFn(
        table_id='table_id',
        dataset_id='dataset_id',
        project_id='project_id',
        batch_size=2,
        schema=schema,
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        client=client)

    fn.start_bundle()
    self.assertTrue(client.tables.Get.called)
    self.assertTrue(client.tables.Insert.called)