Beispiel #1
0
 def testPrepareExecution(self):
     with kubeflow_metadata_adapter.KubeflowMetadataAdapter(
             connection_config=self._connection_config) as m:
         contexts = m.register_contexts_if_not_exists(
             self._pipeline_info, self._component_info)
         exec_properties = {'arg_one': 1}
         os.environ['KFP_POD_NAME'] = 'fake_pod_name'
         m.register_execution(exec_properties=exec_properties,
                              pipeline_info=self._pipeline_info,
                              component_info=self._component_info,
                              contexts=contexts)
         [execution] = m.store.get_executions_by_context(contexts[0].id)
         self.assertProtoEquals(
             """
     id: 1
     type_id: 4
     properties {
       key: "state"
       value {
         string_value: "new"
       }
     }
     properties {
       key: "pipeline_name"
       value {
         string_value: "fake_pipeline_name"
       }
     }
     properties {
       key: "pipeline_root"
       value {
         string_value: "/fake_pipeline_root"
       }
     }
     properties {
       key: "run_id"
       value {
         string_value: "fake_run_id"
       }
     }
     properties {
       key: "component_id"
       value {
         string_value: "fake_component_id"
       }
     }
     properties {
       key: "arg_one"
       value {
         string_value: "1"
       }
     }
     properties {
       key: "kfp_pod_name"
       value {
         string_value: "fake_pod_name"
       }
     }""", execution)
Beispiel #2
0
def main():
    # Log to the container's stdout so Kubeflow Pipelines UI can display logs to
    # the user.
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--pipeline_name', type=str, required=True)
    parser.add_argument('--pipeline_root', type=str, required=True)
    parser.add_argument('--kubeflow_metadata_config', type=str, required=True)
    parser.add_argument('--beam_pipeline_args', type=str, required=True)
    parser.add_argument('--additional_pipeline_args', type=str, required=True)
    parser.add_argument('--component_launcher_class_path',
                        type=str,
                        required=True)
    parser.add_argument('--enable_cache', action='store_true')
    parser.add_argument('--serialized_component', type=str, required=True)
    parser.add_argument('--component_config', type=str, required=True)

    args = parser.parse_args()

    component = json_utils.loads(args.serialized_component)
    component_config = json_utils.loads(args.component_config)
    component_launcher_class = import_utils.import_class_by_path(
        args.component_launcher_class_path)
    if not issubclass(component_launcher_class,
                      base_component_launcher.BaseComponentLauncher):
        raise TypeError(
            'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher'
            % component_launcher_class)

    kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig()
    json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config)
    metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter(
        _get_metadata_connection_config(kubeflow_metadata_config))
    driver_args = data_types.DriverArgs(enable_cache=args.enable_cache)

    beam_pipeline_args = _make_beam_pipeline_args(args.beam_pipeline_args)

    additional_pipeline_args = json.loads(args.additional_pipeline_args)

    launcher = component_launcher_class.create(
        component=component,
        pipeline_info=data_types.PipelineInfo(
            pipeline_name=args.pipeline_name,
            pipeline_root=args.pipeline_root,
            run_id=os.environ['WORKFLOW_ID']),
        driver_args=driver_args,
        metadata_connection=metadata_connection,
        beam_pipeline_args=beam_pipeline_args,
        additional_pipeline_args=additional_pipeline_args,
        component_config=component_config)

    execution_info = launcher.launch()

    # Dump the UI metadata.
    _dump_ui_metadata(component, execution_info)
Beispiel #3
0
 def testIsEligiblePreviousExecution(self):
     with kubeflow_metadata_adapter.KubeflowMetadataAdapter(
             connection_config=self._connection_config) as m:
         contexts = m.register_contexts_if_not_exists(
             self._pipeline_info, self._component_info)
         exec_properties = {'arg_one': 1}
         os.environ['KFP_POD_NAME'] = 'fake_pod_name1'
         m.register_execution(exec_properties=exec_properties,
                              pipeline_info=self._pipeline_info,
                              component_info=self._component_info,
                              contexts=contexts)
         os.environ['KFP_POD_NAME'] = 'fake_pod_name2'
         m.register_execution(exec_properties=exec_properties,
                              pipeline_info=self._pipeline_info,
                              component_info=self._component_info,
                              contexts=contexts)
         [execution1,
          execution2] = m.store.get_executions_by_context(contexts[0].id)
         self.assertTrue(
             m._is_eligible_previous_execution(execution1, execution2))
Beispiel #4
0
def main():
  # Log to the container's stdout so Kubeflow Pipelines UI can display logs to
  # the user.
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
  logging.getLogger().setLevel(logging.INFO)

  parser = argparse.ArgumentParser()
  parser.add_argument('--pipeline_name', type=str, required=True)
  parser.add_argument('--pipeline_root', type=str, required=True)
  parser.add_argument('--kubeflow_metadata_config', type=str, required=True)
  parser.add_argument('--beam_pipeline_args', type=str, required=True)
  parser.add_argument('--additional_pipeline_args', type=str, required=True)
  parser.add_argument(
      '--component_launcher_class_path', type=str, required=True)
  parser.add_argument('--enable_cache', action='store_true')
  parser.add_argument('--serialized_component', type=str, required=True)
  parser.add_argument('--component_config', type=str, required=True)

  args = parser.parse_args()

  component = json_utils.loads(args.serialized_component)
  component_config = json_utils.loads(args.component_config)
  component_launcher_class = import_utils.import_class_by_path(
      args.component_launcher_class_path)
  if not issubclass(component_launcher_class,
                    base_component_launcher.BaseComponentLauncher):
    raise TypeError(
        'component_launcher_class "%s" is not subclass of base_component_launcher.BaseComponentLauncher'
        % component_launcher_class)

  kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig()
  json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config)
  metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter(
      _get_metadata_connection_config(kubeflow_metadata_config))
  driver_args = data_types.DriverArgs(enable_cache=args.enable_cache)

  beam_pipeline_args = json.loads(args.beam_pipeline_args)

  additional_pipeline_args = json.loads(args.additional_pipeline_args)

  launcher = component_launcher_class.create(
      component=component,
      pipeline_info=data_types.PipelineInfo(
          pipeline_name=args.pipeline_name,
          pipeline_root=args.pipeline_root,
          run_id=os.environ['WORKFLOW_ID']),
      driver_args=driver_args,
      metadata_connection=metadata_connection,
      beam_pipeline_args=beam_pipeline_args,
      additional_pipeline_args=additional_pipeline_args,
      component_config=component_config)

  # Attach necessary labels to distinguish different runner and DSL.
  # TODO(zhitaoli): Pass this from KFP runner side when the same container
  # entrypoint can be used by a different runner.
  with telemetry_utils.scoped_labels({
      telemetry_utils.LABEL_TFX_RUNNER: 'kfp',
  }):
    execution_info = launcher.launch()

  # Dump the UI metadata.
  _dump_ui_metadata(component, execution_info)
Beispiel #5
0
 def testPrepareExecution(self):
   with kubeflow_metadata_adapter.KubeflowMetadataAdapter(
       connection_config=self._connection_config) as m:
     contexts = m.register_pipeline_contexts_if_not_exists(self._pipeline_info)
     exec_properties = {'arg_one': 1}
     os.environ['KFP_POD_NAME'] = 'fake_pod_name'
     m.register_execution(
         exec_properties=exec_properties,
         pipeline_info=self._pipeline_info,
         component_info=self._component_info,
         contexts=contexts)
     [execution] = m.store.get_executions_by_context(contexts[0].id)
     # Skip verifying time sensitive fields.
     execution.ClearField('create_time_since_epoch')
     execution.ClearField('last_update_time_since_epoch')
     self.assertProtoEquals(
         """
       id: 1
       type_id: 3
       last_known_state: RUNNING
       properties {
         key: "state"
         value {
           string_value: "new"
         }
       }
       properties {
         key: "pipeline_name"
         value {
           string_value: "fake_pipeline_name"
         }
       }
       properties {
         key: "pipeline_root"
         value {
           string_value: "/fake_pipeline_root"
         }
       }
       properties {
         key: "run_id"
         value {
           string_value: "fake_run_id"
         }
       }
       properties {
         key: "component_id"
         value {
           string_value: "fake_component_id"
         }
       }
       properties {
         key: "arg_one"
         value {
           string_value: "1"
         }
       }
       properties {
         key: "kfp_pod_name"
         value {
           string_value: "fake_pod_name"
         }
       }""", execution)
Beispiel #6
0
def main():
    # Log to the container's stdout so Kubeflow Pipelines UI can display logs to
    # the user.
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--pipeline_root', type=str, required=True)
    parser.add_argument('--kubeflow_metadata_config', type=str, required=True)
    parser.add_argument('--serialized_component', type=str, required=True)
    parser.add_argument('--tfx_ir', type=str, required=True)
    parser.add_argument('--node_id', type=str, required=True)
    launcher._register_execution = _register_execution  # pylint: disable=protected-access

    args = parser.parse_args()

    tfx_ir = pipeline_pb2.Pipeline()
    json_format.Parse(args.tfx_ir, tfx_ir)
    # Substitute the runtime parameter to be a concrete run_id
    runtime_parameter_utils.substitute_runtime_parameter(
        tfx_ir, {
            constants.PIPELINE_RUN_ID_PARAMETER_NAME:
            os.environ['WORKFLOW_ID'],
        })

    deployment_config = runner_utils.extract_local_deployment_config(tfx_ir)

    kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig()
    json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config)
    metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter(
        _get_metadata_connection_config(kubeflow_metadata_config))

    node_id = args.node_id
    # Attach necessary labels to distinguish different runner and DSL.
    # TODO(zhitaoli): Pass this from KFP runner side when the same container
    # entrypoint can be used by a different runner.
    with telemetry_utils.scoped_labels({
            telemetry_utils.LABEL_TFX_RUNNER: 'kfp',
    }):
        custom_executor_operators = {
            executable_spec_pb2.ContainerExecutableSpec:
            kubernetes_executor_operator.KubernetesExecutorOperator
        }

        executor_spec = runner_utils.extract_executor_spec(
            deployment_config, node_id)
        custom_driver_spec = runner_utils.extract_custom_driver_spec(
            deployment_config, node_id)

        pipeline_node = _get_pipeline_node(tfx_ir, node_id)
        component_launcher = launcher.Launcher(
            pipeline_node=pipeline_node,
            mlmd_connection=metadata_connection,
            pipeline_info=tfx_ir.pipeline_info,
            pipeline_runtime_spec=tfx_ir.runtime_spec,
            executor_spec=executor_spec,
            custom_driver_spec=custom_driver_spec,
            custom_executor_operators=custom_executor_operators)
        logging.info('Component %s is running.', node_id)
        execution_info = component_launcher.launch()
        logging.info('Component %s is finished.', node_id)

    # Dump the UI metadata.
    _dump_ui_metadata(pipeline_node, execution_info)