def testDriverJsonContract(self): # This test is identical to testDriverWithoutSpan, but uses raw JSON strings # for inputs and expects against the raw JSON output of the driver, to # better illustrate the JSON I/O contract of the driver. split1 = os.path.join(_TEST_INPUT_DIR, 'split1', 'data') io_utils.write_string_file(split1, 'testing') os.utime(split1, (0, 1)) split2 = os.path.join(_TEST_INPUT_DIR, 'split2', 'data') io_utils.write_string_file(split2, 'testing2') os.utime(split2, (0, 3)) serialized_args = [ 'driver.py', '--json_serialized_invocation_args', self._executor_invocation_from_file ] # Invoke the driver driver.main(serialized_args) # Check the output metadata file for the expected outputs with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json: self.assertDictEqual( json.loads(re.sub(r'\s+', '', output_meta_json.read())), json.loads(re.sub(r'\s+', '', self._expected_result_from_file)))
def testDriverWithSpan(self): # Test align of span number. span1_split1 = os.path.join(_TEST_INPUT_DIR, 'span01', 'split1', 'data') io_utils.write_string_file(span1_split1, 'testing11') span1_split2 = os.path.join(_TEST_INPUT_DIR, 'span01', 'split2', 'data') io_utils.write_string_file(span1_split2, 'testing12') span2_split1 = os.path.join(_TEST_INPUT_DIR, 'span02', 'split1', 'data') io_utils.write_string_file(span2_split1, 'testing21') serialized_args = [ 'driver.py', '--json_serialized_invocation_args', json_format.MessageToJson(message=self._executor_invocation) ] with self.assertRaisesRegexp( ValueError, 'Latest span should be the same for each split'): driver.main(serialized_args) # Test if latest span is selected when span aligns for each split. span2_split2 = os.path.join(_TEST_INPUT_DIR, 'span02', 'split2', 'data') io_utils.write_string_file(span2_split2, 'testing22') driver.main(serialized_args) # Check the output metadata file for the expected outputs with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json: output_metadata = pipeline_pb2.ExecutorOutput() json_format.Parse(output_meta_json.read(), output_metadata, ignore_unknown_fields=True) self.assertEqual(output_metadata.parameters['span'].string_value, '2') self.assertEqual( output_metadata.parameters['input_config'].string_value, json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span02/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span02/split2/*') ])))
def testDriverWithoutSpan(self): split1 = os.path.join(_TEST_INPUT_DIR, 'split1', 'data') io_utils.write_string_file(split1, 'testing') os.utime(split1, (0, 1)) split2 = os.path.join(_TEST_INPUT_DIR, 'split2', 'data') io_utils.write_string_file(split2, 'testing2') os.utime(split2, (0, 3)) self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='split2/*') ])) serialized_args = [ 'driver.py', '--json_serialized_invocation_args', json_format.MessageToJson(message=self._executor_invocation) ] # Invoke the driver driver.main(serialized_args) # Check the output metadata file for the expected outputs with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json: output_metadata = pipeline_pb2.ExecutorOutput() json_format.Parse(output_meta_json.read(), output_metadata, ignore_unknown_fields=True) self.assertEqual(output_metadata.parameters['span'].string_value, '0') self.assertEqual( output_metadata.parameters['input_fingerprint'].string_value, 'split:s1,num_files:1,total_bytes:7,xor_checksum:1,sum_checksum:1\n' 'split:s2,num_files:1,total_bytes:8,xor_checksum:3,sum_checksum:3' ) self.assertEqual( output_metadata.parameters['input_config'].string_value, json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='split2/*') ])))