def test_failure_has_same_insert_ids(self): tempdir = '%s%s' % (self._new_tempdir(), os.sep) file_name_1 = os.path.join(tempdir, 'file1') file_name_2 = os.path.join(tempdir, 'file2') def store_callback(arg): insert_ids = [ r.insertId for r in arg.tableDataInsertAllRequest.rows ] colA_values = [ r.json.additionalProperties[0].value.string_value for r in arg.tableDataInsertAllRequest.rows ] json_output = {'insertIds': insert_ids, 'colA_values': colA_values} # The first time we try to insert, we save those insertions in # file insert_calls1. if not os.path.exists(file_name_1): with open(file_name_1, 'w') as f: json.dump(json_output, f) raise RuntimeError() else: with open(file_name_2, 'w') as f: json.dump(json_output, f) res = mock.Mock() res.insertErrors = [] return res client = mock.Mock() client.tabledata.InsertAll = mock.Mock(side_effect=store_callback) # Using the bundle based direct runner to avoid pickling problems # with mocks. with beam.Pipeline(runner='BundleBasedDirectRunner') as p: _ = (p | beam.Create([{ 'columnA': 'value1', 'columnB': 'value2' }, { 'columnA': 'value3', 'columnB': 'value4' }, { 'columnA': 'value5', 'columnB': 'value6' }]) | _StreamToBigQuery('project:dataset.table', [], [], 'anyschema', None, 'CREATE_NEVER', None, None, None, [], test_client=client)) with open(file_name_1) as f1, open(file_name_2) as f2: self.assertEqual(json.load(f1), json.load(f2))
def test_failure_has_same_insert_ids(self): def store_callback(arg): insert_ids = [r.insertId for r in arg.tableDataInsertAllRequest.rows] colA_values = [r.json.additionalProperties[0].value.string_value for r in arg.tableDataInsertAllRequest.rows] json_output = {'insertIds': insert_ids, 'colA_values': colA_values} # The first time we try to insert, we save those insertions in # file insert_calls1. if not os.path.exists('insert_calls1'): json.dump(json_output, open('insert_calls1', 'w')) raise Exception() else: json.dump(json_output, open('insert_calls2', 'w')) res = mock.Mock() res.insertErrors = [] return res client = mock.Mock() client.tabledata.InsertAll = mock.Mock(side_effect=store_callback) with beam.Pipeline(runner='BundleBasedDirectRunner') as p: _ = (p | beam.Create([{'columnA':'value1', 'columnB':'value2'}, {'columnA':'value3', 'columnB':'value4'}, {'columnA':'value5', 'columnB':'value6'}]) | _StreamToBigQuery( 'project:dataset.table', [], [], 'anyschema', None, 'CREATE_NEVER', None, None, None, [], test_client=client)) self.assertEqual( json.load(open('insert_calls1')), json.load(open('insert_calls2')))