def testTaxiPipelineConstructionAndDefinitionFileExists(self):
        logical_pipeline = taxi_pipeline_kubeflow_simple._create_pipeline(
            pipeline_name=taxi_pipeline_kubeflow_simple._pipeline_name,
            pipeline_root=taxi_pipeline_kubeflow_simple._pipeline_root,
            data_root=taxi_pipeline_kubeflow_simple._data_root,
            module_file=taxi_pipeline_kubeflow_simple._module_file,
            serving_model_dir=taxi_pipeline_kubeflow_simple._serving_model_dir,
            direct_num_workers=1)
        self.assertEqual(9, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(
            self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_simple.tar.gz')
        self.assertTrue(tf.io.gfile.exists(file_path))
    def testTaxiPipelineConstructionAndDefinitionFileExists(self):
        logical_pipeline = taxi_pipeline_kubeflow_local._create_pipeline(
            pipeline_name=taxi_pipeline_kubeflow_local._pipeline_name,
            pipeline_root=taxi_pipeline_kubeflow_local._pipeline_root,
            data_root=taxi_pipeline_kubeflow_local._data_root,
            module_file=taxi_pipeline_kubeflow_local._module_file,
            serving_model_dir=taxi_pipeline_kubeflow_local._serving_model_dir,
            beam_pipeline_args=[])
        self.assertEqual(10, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(
            self.tmp_dir, 'chicago_taxi_pipeline_kubeflow_local.tar.gz')
        self.assertTrue(fileio.exists(file_path))
    def testTaxiPipelineConstructionAndDefinitionFileExists(self):
        logical_pipeline = taxi_pipeline_kubeflow_gcp.create_pipeline(
            pipeline_name=taxi_pipeline_kubeflow_gcp._pipeline_name,
            pipeline_root=taxi_pipeline_kubeflow_gcp._pipeline_root,
            module_file=taxi_pipeline_kubeflow_gcp._module_file,
            ai_platform_training_args=taxi_pipeline_kubeflow_gcp.
            _ai_platform_training_args,
            ai_platform_serving_args=taxi_pipeline_kubeflow_gcp.
            _ai_platform_serving_args)
        self.assertEqual(9, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(self._tmp_dir,
                                 'chicago_taxi_pipeline_kubeflow_gcp.tar.gz')
        self.assertTrue(tf.io.gfile.exists(file_path))
    def testTaxiPipelineConstructionAndDefinitionFileExists(self):
        logical_pipeline = taxi_pipeline_kubeflow_simple._create_pipeline(
            pipeline_name=taxi_pipeline_kubeflow_simple._pipeline_name,
            pipeline_root=taxi_pipeline_kubeflow_simple._pipeline_root,
            query=taxi_pipeline_kubeflow_simple._query,
            module_file=taxi_pipeline_kubeflow_simple._module_file,
            beam_pipeline_args=taxi_pipeline_kubeflow_simple.
            _beam_pipeline_args,
        )
        self.assertEqual(9, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(
            self._tmp_dir, 'chicago_taxi_pipeline_kubeflow_simple.tar.gz')
        self.assertTrue(tf.io.gfile.exists(file_path))
    def testPipelineConstruction(self):
        logical_pipeline = penguin_pipeline_sklearn_gcp._create_pipeline(
            pipeline_name=self._pipeline_name,
            pipeline_root=self._pipeline_root,
            data_root=self._data_root,
            trainer_module_file=self._trainer_module_file,
            evaluator_module_file=self._evaluator_module_file,
            ai_platform_training_args=self._ai_platform_training_args,
            ai_platform_serving_args=self._ai_platform_serving_args,
            beam_pipeline_args=[])
        self.assertEqual(8, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(self.tmp_dir, 'sklearn_test.tar.gz')
        self.assertTrue(fileio.exists(file_path))
    def testPenguinPipelineConstructionAndDefinitionFileExists(self):
        logical_pipeline = penguin_pipeline_kubeflow_gcp.create_pipeline(
            pipeline_name=penguin_pipeline_kubeflow_gcp._pipeline_name,
            pipeline_root=penguin_pipeline_kubeflow_gcp._pipeline_root,
            data_root=penguin_pipeline_kubeflow_gcp._data_root,
            module_file=penguin_pipeline_kubeflow_gcp._module_file,
            enable_tuning=True,
            ai_platform_training_args=penguin_pipeline_kubeflow_gcp.
            _ai_platform_training_args,
            ai_platform_serving_args=penguin_pipeline_kubeflow_gcp.
            _ai_platform_serving_args,
            beam_pipeline_args=penguin_pipeline_kubeflow_gcp.
            _beam_pipeline_args)
        self.assertEqual(10, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(self.tmp_dir, 'penguin_kubeflow_gcp.tar.gz')
        self.assertTrue(fileio.exists(file_path))
Beispiel #7
0
    def testPipelineConstruction(self, resolve_mock):
        # Avoid actually performing user module packaging because relative path is
        # not valid with respect to temporary directory.
        resolve_mock.side_effect = lambda pipeline_root: None

        logical_pipeline = penguin_pipeline_sklearn_gcp._create_pipeline(
            pipeline_name=self._pipeline_name,
            pipeline_root=self._pipeline_root,
            data_root=self._data_root,
            trainer_module_file=self._trainer_module_file,
            evaluator_module_file=self._evaluator_module_file,
            ai_platform_training_args=self._ai_platform_training_args,
            ai_platform_serving_args=self._ai_platform_serving_args,
            beam_pipeline_args=[])
        self.assertEqual(8, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(self.tmp_dir, 'sklearn_test.tar.gz')
        self.assertTrue(fileio.exists(file_path))
    def testTaxiPipelineConstructionAndDefinitionFileExists(
            self, resolve_mock):
        # Avoid actually performing user module packaging because a placeholder
        # GCS bucket is used.
        resolve_mock.side_effect = lambda pipeline_root: None

        logical_pipeline = taxi_pipeline_kubeflow_local._create_pipeline(
            pipeline_name=taxi_pipeline_kubeflow_local._pipeline_name,
            pipeline_root=taxi_pipeline_kubeflow_local._pipeline_root,
            data_root=taxi_pipeline_kubeflow_local._data_root,
            module_file=taxi_pipeline_kubeflow_local._module_file,
            serving_model_dir=taxi_pipeline_kubeflow_local._serving_model_dir,
            beam_pipeline_args=[])
        self.assertEqual(10, len(logical_pipeline.components))

        KubeflowDagRunner().run(logical_pipeline)
        file_path = os.path.join(
            self.tmp_dir, 'chicago_taxi_pipeline_kubeflow_local.tar.gz')
        self.assertTrue(fileio.exists(file_path))
  def testPenguinPipelineConstructionAndDefinitionFileExists(
      self, resolve_mock):
    # Avoid actually performing user module packaging because a placeholder
    # GCS bucket is used.
    resolve_mock.side_effect = lambda pipeline_root: None

    logical_pipeline = penguin_pipeline_kubeflow_gcp.create_pipeline(
        pipeline_name=penguin_pipeline_kubeflow_gcp._pipeline_name,
        pipeline_root=penguin_pipeline_kubeflow_gcp._pipeline_root,
        data_root=penguin_pipeline_kubeflow_gcp._data_root,
        module_file=penguin_pipeline_kubeflow_gcp._module_file,
        enable_tuning=True,
        ai_platform_training_args=penguin_pipeline_kubeflow_gcp
        ._ai_platform_training_args,
        ai_platform_serving_args=penguin_pipeline_kubeflow_gcp
        ._ai_platform_serving_args,
        beam_pipeline_args=penguin_pipeline_kubeflow_gcp._beam_pipeline_args)
    self.assertEqual(10, len(logical_pipeline.components))

    KubeflowDagRunner().run(logical_pipeline)
    file_path = os.path.join(self.tmp_dir, 'penguin_kubeflow_gcp.tar.gz')
    self.assertTrue(fileio.exists(file_path))
  examples = csv_input(_data_root)

  # Brings data into the pipeline or otherwise joins/converts training data.
  example_gen = CsvExampleGen(input=examples)

  # Computes statistics over data for visualization and example validation.
  statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])

  # Generates schema based on statistics files.
  infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics'])

  return pipeline.Pipeline(
      pipeline_name='chicago_taxi_pipeline_kubeflow',
      pipeline_root=_pipeline_root,
      components=[example_gen, statistics_gen, infer_schema],
      additional_pipeline_args={
          'beam_pipeline_args': [
              '--runner=DataflowRunner',
              '--experiments=shuffle_mode=auto',
              '--project=' + _project_id,
              '--temp_location=' + os.path.join(_output_dir, 'tmp'),
              '--region=' + _gcp_region,
          ],
      },
      log_root='/var/tmp/tfx/logs',
  )


_ = KubeflowDagRunner().run(_create_pipeline())