예제 #1
0
  def testPrepareOutputArtifacts(self):
    examples = standard_artifacts.Examples()
    output_dict = {utils.EXAMPLES_KEY: channel_utils.as_channel([examples])}
    exec_properties = {
        utils.SPAN_PROPERTY_NAME: 2,
        utils.VERSION_PROPERTY_NAME: 1,
        utils.FINGERPRINT_PROPERTY_NAME: 'fp'
    }

    pipeline_info = data_types.PipelineInfo(
        pipeline_name='name', pipeline_root=self._test_dir, run_id='rid')
    component_info = data_types.ComponentInfo(
        component_type='type', component_id='cid', pipeline_info=pipeline_info)

    input_artifacts = {}
    output_artifacts = self._example_gen_driver._prepare_output_artifacts(  # pylint: disable=protected-access
        input_artifacts, output_dict, exec_properties, 1, pipeline_info,
        component_info)
    examples = artifact_utils.get_single_instance(
        output_artifacts[utils.EXAMPLES_KEY])
    base_output_dir = os.path.join(self._test_dir, component_info.component_id)
    expected_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
        base_output_dir, 'examples', 1)

    self.assertEqual(examples.uri, expected_uri)
    self.assertEqual(
        examples.get_string_custom_property(utils.FINGERPRINT_PROPERTY_NAME),
        'fp')
    self.assertEqual(
        examples.get_string_custom_property(utils.SPAN_PROPERTY_NAME), '2')
    self.assertEqual(
        examples.get_string_custom_property(utils.VERSION_PROPERTY_NAME), '1')
예제 #2
0
파일: base_handler.py 프로젝트: 00mjk/tfx
    def _read_schema_from_pipeline_root(self, pipeline_name, pipeline_root):
        # Check if pipeline root created. If not, it means that the user has not
        # created a run yet or the pipeline is still running for the first time.

        if not fileio.exists(pipeline_root):
            sys.exit(
                'Create a run before inferring schema. If pipeline is already running, then wait for it to successfully finish.'
            )

        # If pipeline_root exists, then check if SchemaGen output exists.
        components = fileio.listdir(pipeline_root)
        if 'SchemaGen' not in components:
            sys.exit(
                'Either SchemaGen component does not exist or pipeline is still running. If pipeline is running, then wait for it to successfully finish.'
            )

        # Get the latest SchemaGen output.
        component_output_dir = os.path.join(pipeline_root, 'SchemaGen')
        schema_dir = os.path.join(component_output_dir, 'schema')
        schemagen_outputs = fileio.listdir(schema_dir)
        latest_schema_folder = max(schemagen_outputs, key=int)

        # Copy schema to current dir.
        latest_schema_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
            component_output_dir, 'schema', int(latest_schema_folder))
        latest_schema_path = os.path.join(latest_schema_uri, 'schema.pbtxt')
        curr_dir_path = os.path.join(os.getcwd(), 'schema.pbtxt')
        io_utils.copy_file(latest_schema_path, curr_dir_path, overwrite=True)

        # Print schema and path to schema
        click.echo('Path to schema: {}'.format(curr_dir_path))
        click.echo('*********SCHEMA FOR {}**********'.format(
            pipeline_name.upper()))
        with open(curr_dir_path, 'r') as f:
            click.echo(f.read())
예제 #3
0
    def testPipelineSchemaSuccessfulRun(self):
        # First create a pipeline.
        flags_dict = {
            labels.ENGINE_FLAG: self.engine,
            labels.PIPELINE_DSL_PATH: self.pipeline_path
        }
        handler = beam_handler.BeamHandler(flags_dict)
        handler.create_pipeline()

        flags_dict = {
            labels.ENGINE_FLAG: self.engine,
            labels.PIPELINE_NAME: self.pipeline_name,
        }
        handler = beam_handler.BeamHandler(flags_dict)
        # Create fake schema in pipeline root.
        component_output_dir = os.path.join(self.pipeline_root, 'SchemaGen')
        schema_path = base_driver._generate_output_uri(  # pylint: disable=protected-access
            component_output_dir, 'schema', 3)

        fileio.makedirs(schema_path)
        with open(os.path.join(schema_path, 'schema.pbtxt'), 'w') as f:
            f.write('SCHEMA')
        with self.captureWritesToStream(sys.stdout) as captured:
            handler.get_schema()
            curr_dir_path = os.path.abspath('schema.pbtxt')
            self.assertIn('Path to schema: {}'.format(curr_dir_path),
                          captured.contents())
            self.assertIn(
                '*********SCHEMA FOR {}**********'.format(
                    self.pipeline_name.upper()), captured.contents())
            self.assertTrue(fileio.exists(curr_dir_path))
예제 #4
0
파일: e2etest.py 프로젝트: NeoTim/nitroml
    def artifact_dir(self,
                     artifact_root: str,
                     artifact_subdir: str = "") -> str:
        """Returns the full path to the artifact subdir under the pipeline root.

    For example to get the transformed examples for the train split:

      `self.artifact_dir('Transform.AutoData/transformed_examples', 'train/*')`

    would return the path

      '<pipeline_root>/Transform.AutoData/transformed_examples/4/train/*'.

    Assumes there is a single execution per component.

    Args:
      artifact_root: Root subdirectory to specify the component's artifacts.
      artifact_subdir: Optional subdirectory to append after the execution
        ID.

    Returns:
      The full path to the artifact subdir under the pipeline root.
    """

        root = os.path.join(self.pipeline_root, artifact_root)
        artifact_subdirs = tf.io.gfile.listdir(root)
        if len(artifact_subdirs) != 1:
            raise ValueError(
                f"Expected a single artifact dir, got: {artifact_subdirs}")
        base_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
            self.pipeline_root, artifact_root, artifact_subdirs[0])
        return os.path.join(base_uri, artifact_subdir)
 def assertInfraValidatorPassed(self) -> None:
   infra_validator_path = os.path.join(self._pipeline_root, 'InfraValidator')
   blessing_path = os.path.join(self._pipeline_root, 'InfraValidator',
                                'blessing')
   executions = fileio.listdir(blessing_path)
   self.assertGreaterEqual(len(executions), 1)
   for exec_id in executions:
     blessing_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
         infra_validator_path, 'blessing', exec_id)
     blessed = os.path.join(blessing_uri, 'INFRA_BLESSED')
     self.assertTrue(fileio.exists(blessed))
예제 #6
0
    def testGetSchema(self):
        flags_dict = {
            labels.ENGINE_FLAG: self.engine,
            labels.PIPELINE_DSL_PATH: self.pipeline_path,
            labels.ENDPOINT: self.endpoint,
            labels.IAP_CLIENT_ID: self.iap_client_id,
            labels.NAMESPACE: self.namespace,
            labels.PIPELINE_PACKAGE_PATH: self.pipeline_package_path
        }
        handler = kubeflow_handler.KubeflowHandler(flags_dict)
        handler.create_pipeline()

        flags_dict = {
            labels.ENGINE_FLAG: self.engine,
            labels.PIPELINE_NAME: self.pipeline_name,
        }

        # No pipeline root
        handler = kubeflow_handler.KubeflowHandler(flags_dict)
        with self.assertRaises(SystemExit) as err:
            handler.get_schema()
        self.assertEqual(
            str(err.exception),
            'Create a run before inferring schema. If pipeline is already running, then wait for it to successfully finish.'
        )

        # No SchemaGen output.
        fileio.makedirs(self.pipeline_root)
        with self.assertRaises(SystemExit) as err:
            handler.get_schema()
        self.assertEqual(
            str(err.exception),
            'Either SchemaGen component does not exist or pipeline is still running. If pipeline is running, then wait for it to successfully finish.'
        )

        # Successful pipeline run.
        # Create fake schema in pipeline root.
        component_output_dir = os.path.join(self.pipeline_root, 'SchemaGen')
        schema_path = base_driver._generate_output_uri(  # pylint: disable=protected-access
            component_output_dir, 'schema', 3)
        fileio.makedirs(schema_path)
        with open(os.path.join(schema_path, 'schema.pbtxt'), 'w') as f:
            f.write('SCHEMA')
        with self.captureWritesToStream(sys.stdout) as captured:
            handler.get_schema()
            curr_dir_path = os.path.join(os.getcwd(), 'schema.pbtxt')
            self.assertIn('Path to schema: {}'.format(curr_dir_path),
                          captured.contents())
            self.assertIn(
                '*********SCHEMA FOR {}**********'.format(
                    self.pipeline_name.upper()), captured.contents())
            self.assertTrue(fileio.exists(curr_dir_path))
예제 #7
0
    def get_schema(self):
        pipeline_name = self.flags_dict[labels.PIPELINE_NAME]

        # Check if pipeline exists.
        self._check_pipeline_existence(pipeline_name)

        # Path to pipeline args.
        pipeline_args_path = os.path.join(
            self._handler_home_dir, self.flags_dict[labels.PIPELINE_NAME],
            'pipeline_args.json')

        # Get pipeline_root.
        with open(pipeline_args_path, 'r') as f:
            pipeline_args = json.load(f)

        # Check if pipeline root created. If not, it means that the user has not
        # created a run yet or the pipeline is still running for the first time.
        pipeline_root = pipeline_args[labels.PIPELINE_ROOT]
        if not tf.io.gfile.exists(pipeline_root):
            sys.exit(
                'Create a run before inferring schema. If pipeline is already running, then wait for it to successfully finish.'
            )

        # If pipeline_root exists, then check if SchemaGen output exists.
        components = tf.io.gfile.listdir(pipeline_root)
        if 'SchemaGen' not in components:
            sys.exit(
                'Either SchemaGen component does not exist or pipeline is still running. If pipeline is running, then wait for it to successfully finish.'
            )

        # Get the latest SchemaGen output.
        component_output_dir = os.path.join(pipeline_root, 'SchemaGen')
        schema_dir = os.path.join(component_output_dir, 'schema')
        schemagen_outputs = tf.io.gfile.listdir(schema_dir)
        latest_schema_folder = max(schemagen_outputs, key=int)

        # Copy schema to current dir.
        latest_schema_uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
            component_output_dir, 'schema', latest_schema_folder)
        latest_schema_path = os.path.join(latest_schema_uri, 'schema.pbtxt')
        curr_dir_path = os.path.join(os.getcwd(), 'schema.pbtxt')
        io_utils.copy_file(latest_schema_path, curr_dir_path, overwrite=True)

        # Print schema and path to schema
        click.echo('Path to schema: {}'.format(curr_dir_path))
        click.echo('*********SCHEMA FOR {}**********'.format(
            pipeline_name.upper()))
        with open(curr_dir_path, 'r') as f:
            click.echo(f.read())
예제 #8
0
    def testGetSchema(self, mock_extract_pipeline_args):
        temp_pipeline_root = os.path.join(self.tmp_dir, 'pipeline_root')
        mock_extract_pipeline_args.return_value = {
            labels.PIPELINE_NAME: self.pipeline_name,
            labels.PIPELINE_ROOT: temp_pipeline_root
        }

        flags_dict = {
            labels.ENGINE_FLAG: self.engine,
            labels.PIPELINE_NAME: self.pipeline_name,
        }
        handler = kubeflow_handler.KubeflowHandler(flags_dict)

        # No pipeline root
        with self.assertRaises(SystemExit) as err:
            handler.get_schema()
        self.assertEqual(
            str(err.exception),
            'Create a run before inferring schema. If pipeline is already running, then wait for it to successfully finish.'
        )

        # No SchemaGen output.
        fileio.makedirs(temp_pipeline_root)
        with self.assertRaises(SystemExit) as err:
            handler.get_schema()
        self.assertEqual(
            str(err.exception),
            'Either SchemaGen component does not exist or pipeline is still running. If pipeline is running, then wait for it to successfully finish.'
        )

        # Successful pipeline run.
        # Create fake schema in pipeline root.
        component_output_dir = os.path.join(temp_pipeline_root, 'SchemaGen')
        schema_path = base_driver._generate_output_uri(  # pylint: disable=protected-access
            component_output_dir, 'schema', 3)
        fileio.makedirs(schema_path)
        with open(os.path.join(schema_path, 'schema.pbtxt'), 'w') as f:
            f.write('SCHEMA')
        with self.captureWritesToStream(sys.stdout) as captured:
            handler.get_schema()
            curr_dir_path = os.path.join(os.getcwd(), 'schema.pbtxt')
            self.assertIn('Path to schema: {}'.format(curr_dir_path),
                          captured.contents())
            self.assertIn(
                '*********SCHEMA FOR {}**********'.format(
                    self.pipeline_name.upper()), captured.contents())
            self.assertTrue(fileio.exists(curr_dir_path))
예제 #9
0
파일: driver.py 프로젝트: kp425/tfx
    def _prepare_output_artifacts(
        self,
        input_artifacts: Dict[Text, List[types.Artifact]],
        output_dict: Dict[Text, types.Channel],
        exec_properties: Dict[Text, Any],
        execution_id: int,
        pipeline_info: data_types.PipelineInfo,
        component_info: data_types.ComponentInfo,
    ) -> Dict[Text, List[types.Artifact]]:
        """Overrides BaseDriver._prepare_output_artifacts()."""
        del input_artifacts

        example_artifact = output_dict[utils.EXAMPLES_KEY].type()
        base_output_dir = os.path.join(pipeline_info.pipeline_root,
                                       component_info.component_id)

        example_artifact.uri = base_driver._generate_output_uri(  # pylint: disable=protected-access
            base_output_dir, utils.EXAMPLES_KEY, execution_id)
        update_output_artifact(exec_properties, example_artifact.mlmd_artifact)
        base_driver._prepare_output_paths(example_artifact)  # pylint: disable=protected-access

        return {utils.EXAMPLES_KEY: [example_artifact]}