Example #1
0
    def test_failure_has_same_insert_ids(self):
        tempdir = '%s%s' % (self._new_tempdir(), os.sep)
        file_name_1 = os.path.join(tempdir, 'file1')
        file_name_2 = os.path.join(tempdir, 'file2')

        def store_callback(arg):
            insert_ids = [
                r.insertId for r in arg.tableDataInsertAllRequest.rows
            ]
            colA_values = [
                r.json.additionalProperties[0].value.string_value
                for r in arg.tableDataInsertAllRequest.rows
            ]
            json_output = {'insertIds': insert_ids, 'colA_values': colA_values}
            # The first time we try to insert, we save those insertions in
            # file insert_calls1.
            if not os.path.exists(file_name_1):
                with open(file_name_1, 'w') as f:
                    json.dump(json_output, f)
                raise RuntimeError()
            else:
                with open(file_name_2, 'w') as f:
                    json.dump(json_output, f)

            res = mock.Mock()
            res.insertErrors = []
            return res

        client = mock.Mock()
        client.tabledata.InsertAll = mock.Mock(side_effect=store_callback)

        # Using the bundle based direct runner to avoid pickling problems
        # with mocks.
        with beam.Pipeline(runner='BundleBasedDirectRunner') as p:
            _ = (p
                 | beam.Create([{
                     'columnA': 'value1',
                     'columnB': 'value2'
                 }, {
                     'columnA': 'value3',
                     'columnB': 'value4'
                 }, {
                     'columnA': 'value5',
                     'columnB': 'value6'
                 }])
                 | _StreamToBigQuery('project:dataset.table', [], [],
                                     'anyschema',
                                     None,
                                     'CREATE_NEVER',
                                     None,
                                     None,
                                     None, [],
                                     test_client=client))

        with open(file_name_1) as f1, open(file_name_2) as f2:
            self.assertEqual(json.load(f1), json.load(f2))
Example #2
0
  def test_failure_has_same_insert_ids(self):

    def store_callback(arg):
      insert_ids = [r.insertId for r in arg.tableDataInsertAllRequest.rows]
      colA_values = [r.json.additionalProperties[0].value.string_value
                     for r in arg.tableDataInsertAllRequest.rows]
      json_output = {'insertIds': insert_ids,
                     'colA_values': colA_values}
      # The first time we try to insert, we save those insertions in
      # file insert_calls1.
      if not os.path.exists('insert_calls1'):
        json.dump(json_output, open('insert_calls1', 'w'))
        raise Exception()
      else:
        json.dump(json_output, open('insert_calls2', 'w'))

      res = mock.Mock()
      res.insertErrors = []
      return res

    client = mock.Mock()
    client.tabledata.InsertAll = mock.Mock(side_effect=store_callback)

    with beam.Pipeline(runner='BundleBasedDirectRunner') as p:
      _ = (p
           | beam.Create([{'columnA':'value1', 'columnB':'value2'},
                          {'columnA':'value3', 'columnB':'value4'},
                          {'columnA':'value5', 'columnB':'value6'}])
           | _StreamToBigQuery(
               'project:dataset.table',
               [], [],
               'anyschema',
               None,
               'CREATE_NEVER', None,
               None, None,
               [], test_client=client))

    self.assertEqual(
        json.load(open('insert_calls1')),
        json.load(open('insert_calls2')))