def testDo(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') statistics_artifact = standard_artifacts.ExampleStatistics() statistics_artifact.uri = os.path.join(source_data_dir, 'statistics_gen') statistics_artifact.split_names = artifact_utils.encode_split_names( ['train']) output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) schema_output = standard_artifacts.Schema() schema_output.uri = os.path.join(output_data_dir, 'schema_output') input_dict = { 'stats': [statistics_artifact], } output_dict = { 'output': [schema_output], } exec_properties = {'infer_feature_shape': False} schema_gen_executor = executor.Executor() schema_gen_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual(0, len(tf.io.gfile.listdir(schema_output.uri)))
def testDoWithStatistics(self): schema_gen_executor = executor.Executor() schema_gen_executor.Do(self.input_dict, self.output_dict, self.exec_properties) self.assertNotEqual(0, len(tf.io.gfile.listdir(self.schema_output.uri))) self._assertSchemaEqual(self.expected_schema, self.schema_output)
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') train_stats_artifact = types.Artifact('ExampleStatsPath', split='train') train_stats_artifact.uri = os.path.join(source_data_dir, 'statistics_gen/train/') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) schema_output = standard_artifacts.Schema() schema_output.uri = os.path.join(output_data_dir, 'schema_output') input_dict = { 'stats': [train_stats_artifact], } output_dict = { 'output': [schema_output], } exec_properties = {} schema_gen_executor = executor.Executor() schema_gen_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual(0, len(tf.gfile.ListDirectory(schema_output.uri)))
def testDo(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') statistics_artifact = standard_artifacts.ExampleStatistics() statistics_artifact.uri = os.path.join(source_data_dir, 'statistics_gen') statistics_artifact.split_names = artifact_utils.encode_split_names( ['train', 'eval', 'test']) output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) schema_output = standard_artifacts.Schema() schema_output.uri = os.path.join(output_data_dir, 'schema_output') input_dict = { standard_component_specs.STATISTICS_KEY: [statistics_artifact], } exec_properties = { # List needs to be serialized before being passed into Do function. standard_component_specs.EXCLUDE_SPLITS_KEY: json_utils.dumps(['test']) } output_dict = { standard_component_specs.SCHEMA_KEY: [schema_output], } schema_gen_executor = executor.Executor() schema_gen_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual(0, len(fileio.listdir(schema_output.uri)))
def testDoWithSchema(self): self.input_dict['schema'] = [self.schema] self.input_dict.pop('stats') schema_gen_executor = executor.Executor() schema_gen_executor.Do(self.input_dict, self.output_dict, self.exec_properties) self.assertNotEqual(0, len(tf.io.gfile.listdir(self.schema_output.uri))) self._assertSchemaEqual(self.schema, self.schema_output)
def testDoWithNonExistentSchema(self): non_existent_schema = standard_artifacts.Schema() non_existent_schema.uri = '/path/to/non_existent/schema' self.input_dict['schema'] = [non_existent_schema] self.input_dict.pop('stats') with self.assertRaises(ValueError): schema_gen_executor = executor.Executor() schema_gen_executor.Do(self.input_dict, self.output_dict, self.exec_properties)