Ejemplo n.º 1
0
    def testDriverJsonContract(self):
        # This test is identical to testDriverWithoutSpan, but uses raw JSON strings
        # for inputs and expects against the raw JSON output of the driver, to
        # better illustrate the JSON I/O contract of the driver.
        split1 = os.path.join(_TEST_INPUT_DIR, 'split1', 'data')
        io_utils.write_string_file(split1, 'testing')
        os.utime(split1, (0, 1))
        split2 = os.path.join(_TEST_INPUT_DIR, 'split2', 'data')
        io_utils.write_string_file(split2, 'testing2')
        os.utime(split2, (0, 3))

        serialized_args = [
            '--json_serialized_invocation_args',
            self._executor_invocation_from_file
        ]

        # Invoke the driver
        driver.main(driver._parse_flags(serialized_args))

        # Check the output metadata file for the expected outputs
        with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json:
            self.assertEqual(
                json.dumps(json.loads(output_meta_json.read()),
                           indent=2,
                           sort_keys=True),
                json.dumps(json.loads(self._expected_result_from_file),
                           indent=2,
                           sort_keys=True))
Ejemplo n.º 2
0
    def testDriverWithSpan(self):
        # Test align of span number.
        span1_split1 = os.path.join(_TEST_INPUT_DIR, 'span1', 'split1', 'data')
        io_utils.write_string_file(span1_split1, 'testing11')
        span1_split2 = os.path.join(_TEST_INPUT_DIR, 'span1', 'split2', 'data')
        io_utils.write_string_file(span1_split2, 'testing12')
        span2_split1 = os.path.join(_TEST_INPUT_DIR, 'span2', 'split1', 'data')
        io_utils.write_string_file(span2_split1, 'testing21')

        serialized_args = [
            '--json_serialized_invocation_args',
            json_format.MessageToJson(message=self._executor_invocation)
        ]
        with self.assertRaisesRegexp(
                ValueError, 'Latest span should be the same for each split'):
            driver.main(driver._parse_flags(serialized_args))

        # Test if latest span is selected when span aligns for each split.
        span2_split2 = os.path.join(_TEST_INPUT_DIR, 'span2', 'split2', 'data')
        io_utils.write_string_file(span2_split2, 'testing22')

        driver.main(driver._parse_flags(serialized_args))

        # Check the output metadata file for the expected outputs
        with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json:
            output_metadata = pipeline_pb2.ExecutorOutput()
            json_format.Parse(output_meta_json.read(),
                              output_metadata,
                              ignore_unknown_fields=True)
            self.assertEqual(output_metadata.parameters['span'].string_value,
                             '2')
            self.assertEqual(
                output_metadata.parameters['input_config'].string_value,
                json_format.MessageToJson(
                    example_gen_pb2.Input(splits=[
                        example_gen_pb2.Input.Split(name='s1',
                                                    pattern='span2/split1/*'),
                        example_gen_pb2.Input.Split(name='s2',
                                                    pattern='span2/split2/*')
                    ])))
Ejemplo n.º 3
0
    def testDriverWithoutSpan(self):
        split1 = os.path.join(_TEST_INPUT_DIR, 'split1', 'data')
        io_utils.write_string_file(split1, 'testing')
        os.utime(split1, (0, 1))
        split2 = os.path.join(_TEST_INPUT_DIR, 'split2', 'data')
        io_utils.write_string_file(split2, 'testing2')
        os.utime(split2, (0, 3))

        self._executor_invocation.inputs.parameters[
            'input_config'].string_value = json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(name='s1', pattern='split1/*'),
                    example_gen_pb2.Input.Split(name='s2', pattern='split2/*')
                ]))
        serialized_args = [
            '--json_serialized_invocation_args',
            json_format.MessageToJson(message=self._executor_invocation)
        ]
        # Invoke the driver
        driver.main(driver._parse_flags(serialized_args))

        # Check the output metadata file for the expected outputs
        with open(_TEST_OUTPUT_METADATA_JSON) as output_meta_json:
            output_metadata = pipeline_pb2.ExecutorOutput()
            json_format.Parse(output_meta_json.read(),
                              output_metadata,
                              ignore_unknown_fields=True)
            self.assertEqual(output_metadata.parameters['span'].string_value,
                             '0')
            self.assertEqual(
                output_metadata.parameters['input_fingerprint'].string_value,
                'split:s1,num_files:1,total_bytes:7,xor_checksum:1,sum_checksum:1\n'
                'split:s2,num_files:1,total_bytes:8,xor_checksum:3,sum_checksum:3'
            )
            self.assertEqual(
                output_metadata.parameters['input_config'].string_value,
                json_format.MessageToJson(
                    example_gen_pb2.Input(splits=[
                        example_gen_pb2.Input.Split(name='s1',
                                                    pattern='split1/*'),
                        example_gen_pb2.Input.Split(name='s2',
                                                    pattern='split2/*')
                    ])))