Beispiel #1
0
    def test_dofn_client_no_records(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
          bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()
        # InsertRows not called as batch size is not hit
        self.assertFalse(client.tabledata.InsertAll.called)

        fn.finish_bundle()
        # InsertRows not called in finish bundle as no records
        self.assertFalse(client.tabledata.InsertAll.called)
Beispiel #2
0
    def test_dofn_client_finish_bundle_flush_called(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
          bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()

        # Destination is a tuple of (destination, schema) to ensure the table is
        # created.
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 1
        }, 'insertid3')))

        self.assertTrue(client.tables.Get.called)
        # InsertRows not called as batch size is not hit
        self.assertFalse(client.tabledata.InsertAll.called)

        fn.finish_bundle()
        # InsertRows called in finish bundle
        self.assertTrue(client.tabledata.InsertAll.called)
Beispiel #3
0
  def test_dofn_client_finish_bundle_flush_called(self):
    client = mock.Mock()
    client.tables.Get.return_value = bigquery.Table(
        tableReference=bigquery.TableReference(
            projectId='project_id', datasetId='dataset_id', tableId='table_id'))
    client.tabledata.InsertAll.return_value = \
        bigquery.TableDataInsertAllResponse(insertErrors=[])
    create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
    write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND
    schema = {'fields': [
        {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]}

    fn = beam.io.gcp.bigquery.BigQueryWriteFn(
        table_id='table_id',
        dataset_id='dataset_id',
        project_id='project_id',
        batch_size=2,
        schema=schema,
        create_disposition=create_disposition,
        write_disposition=write_disposition,
        kms_key=None,
        test_client=client)

    fn.start_bundle()
    fn.process({'month': 1})

    self.assertTrue(client.tables.Get.called)
    # InsertRows not called as batch size is not hit
    self.assertFalse(client.tabledata.InsertAll.called)

    fn.finish_bundle()
    # InsertRows called in finish bundle
    self.assertTrue(client.tabledata.InsertAll.called)
Beispiel #4
0
    def test_dofn_client_process_flush_called(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = (
            bigquery.TableDataInsertAllResponse(insertErrors=[]))
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            batch_size=2,
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            kms_key=None,
            test_client=client)

        fn.start_bundle()
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 1
        }, 'insertid1')))
        fn.process(('project_id:dataset_id.table_id', ({
            'month': 2
        }, 'insertid2')))
        # InsertRows called as batch size is hit
        self.assertTrue(client.tabledata.InsertAll.called)
Beispiel #5
0
    def test_dofn_client_process_performs_batching(self):
        client = mock.Mock()
        client.tables.Get.return_value = bigquery.Table(
            tableReference=bigquery.TableReference(projectId='project_id',
                                                   datasetId='dataset_id',
                                                   tableId='table_id'))
        client.tabledata.InsertAll.return_value = \
            bigquery.TableDataInsertAllResponse(insertErrors=[])
        create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
        write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND

        fn = beam.io.gcp.bigquery.BigQueryWriteFn(
            table_id='table_id',
            dataset_id='dataset_id',
            project_id='project_id',
            batch_size=2,
            schema='month:INTEGER',
            create_disposition=create_disposition,
            write_disposition=write_disposition,
            client=client)

        fn.start_bundle()
        fn.process({'month': 1})

        self.assertTrue(client.tables.Get.called)
        # InsertRows not called as batch size is not hit yet
        self.assertFalse(client.tabledata.InsertAll.called)