Esempio n. 1
0
    def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
        """Schedule pipeline on kubeflow to run based upon a cron job

        Arguments:
            experiment_id {[type]} -- The expriment within which we would like kubeflow 
            job_name {[type]} -- The name of the scheduled job

        Keyword Arguments:
            pipeline_package_path {[type]} -- The path to the pipeline package (default: {None})
            params {dict} -- The pipeline parameters (default: {{}})
            pipeline_id {[type]} -- The id of the pipeline which should run on schedule (default: {None})
            namespace {[type]} -- The name space with which the pipeline should run (default: {None})
        """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [kfp_server_api.ApiParameter(
            name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
            value=str(v)) for k, v in params.items()]
        resource_references = []

        key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                                   type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                               relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)
        if namespace is not None:
            key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                       type=kfp_server_api.models.ApiResourceType.NAMESPACE)
            reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                                   name=namespace,
                                                                   relationship=kfp_server_api.models.ApiRelationship.OWNER)
            resource_references.append(reference)
        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)

        trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(
            cron="0 0 9 ? * 2-6")
        job_id = ''.join(random.choices(
            string.ascii_uppercase + string.digits, k=10))
        schedule_body = kfp_server_api.models.ApiJob(
            id=job_id,
            name="TestScheduling",
            description="Schedule the pipeline using the API",
            pipeline_spec=spec,
            resource_references=resource_references,
            max_concurrency=10,
            trigger=trigger,
            enabled=True,
        )
Esempio n. 2
0
  def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
    """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.
      namespace: kubernetes namespace where the pipeline runs are created.
        For single user deployment, leave it as None;
        For multi user, input a namespace where the user is authorized

    Returns:
      A run object. Most important field is id.
    """

    pipeline_json_string = None
    if pipeline_package_path:
      pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
      pipeline_json_string = json.dumps(pipeline_obj)
    api_params = [kfp_server_api.ApiParameter(
        name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
        value=str(v)) for k,v in params.items()]
    resource_references = []

    key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                        type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
    reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                           relationship=kfp_server_api.models.ApiRelationship.OWNER)
    resource_references.append(reference)
    if namespace is not None:
      key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                 type=kfp_server_api.models.ApiResourceType.NAMESPACE)
      reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                             name=namespace,
                                                             relationship=kfp_server_api.models.ApiRelationship.OWNER)
      resource_references.append(reference)
    spec = kfp_server_api.models.ApiPipelineSpec(
        pipeline_id=pipeline_id,
        workflow_manifest=pipeline_json_string,
        parameters=api_params)
    run_body = kfp_server_api.models.ApiRun(
        pipeline_spec=spec, resource_references=resource_references, name=job_name)

    response = self._run_api.create_run(body=run_body)

    if self._is_ipython():
      import IPython
      html = ('Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
              % (self._get_url_prefix(), response.run.id))
      IPython.display.display(IPython.display.HTML(html))
    return response.run
Esempio n. 3
0
    def _create_job_config(self, experiment_id, params, pipeline_package_path,
                           pipeline_id, version_id):
        """Create a JobConfig with spec and resource_references.

    Args:
      experiment_id: The id of an experiment.
      pipeline_package_path: Local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: A dictionary with key (string) as param name and value (string) as param value.
      pipeline_id: The id of a pipeline.
      version_id: The id of a pipeline version.
        If both pipeline_id and version_id are specified, version_id will take precendence.
        If only pipeline_id is specified, the default version of this pipeline is used to create the run.

    Returns:
      A JobConfig object with attributes spec and resource_reference.
    """
        class JobConfig:
            def __init__(self, spec, resource_references):
                self.spec = spec
                self.resource_references = resource_references

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(
                name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
                value=str(v) if type(v) not in (list, dict) else json.dumps(v))
            for k, v in params.items()
        ]
        resource_references = []
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)

        if version_id:
            key = kfp_server_api.models.ApiResourceKey(
                id=version_id,
                type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION)
            reference = kfp_server_api.models.ApiResourceReference(
                key=key,
                relationship=kfp_server_api.models.ApiRelationship.CREATOR)
            resource_references.append(reference)

        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        return JobConfig(spec=spec, resource_references=resource_references)
Esempio n. 4
0
    def run_pipeline(self,
                     experiment_id,
                     job_name,
                     pipeline_package_path=None,
                     params={},
                     pipeline_id=None):
        """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.

    Returns:
      A run object. Most important field is id.
    """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(
                name=_k8s_helper.K8sHelper.sanitize_k8s_name(k), value=str(v))
            for k, v in params.items()
        ]
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key, kfp_server_api.models.ApiRelationship.OWNER)
        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        run_body = kfp_server_api.models.ApiRun(
            pipeline_spec=spec, resource_references=[reference], name=job_name)

        response = self._run_api.create_run(body=run_body)

        if self._is_ipython():
            import IPython
            html = (
                'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
                % (self._get_url_prefix(), response.run.id))
            IPython.display.display(IPython.display.HTML(html))
        return response.run
Esempio n. 5
0
  def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None,
    namespace=None, cron_schedule=None, description=None, max_concurrency=10, no_catchup=None):
    """Schedule pipeline on kubeflow to run based upon a cron job

    Arguments:
        experiment_id {string} -- The expriment within which we would like kubeflow
        job_name {string} -- The name of the scheduled job

    Keyword Arguments:
        pipeline_package_path {string} -- The path to the pipeline package (default: {None})
        params {dict} -- The pipeline parameters (default: {{}})
        pipeline_id {string} -- The id of the pipeline which should run on schedule (default: {None})
        namespace {string} -- The name space with which the pipeline should run (default: {None})
        max_concurrency {int} -- Max number of concurrent runs scheduled (default: {10})
        no_catchup {boolean} -- Whether the recurring run should catch up if behind schedule.
          For example, if the recurring run is paused for a while and re-enabled
          afterwards. If no_catchup=False, the scheduler will catch up on (backfill) each
          missed interval. Otherwise, it only schedules the latest interval if more than one interval
          is ready to be scheduled.
          Usually, if your pipeline handles backfill internally, you should turn catchup
          off to avoid duplicate backfill. (default: {False})
    """

    pipeline_json_string = None
    if pipeline_package_path:
      pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
      pipeline_json_string = json.dumps(pipeline_obj)
    api_params = [kfp_server_api.ApiParameter(
        name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
        value=str(v)) for k,v in params.items()]
    resource_references = []

    key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                        type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
    reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                           relationship=kfp_server_api.models.ApiRelationship.OWNER)
    resource_references.append(reference)
    if namespace is not None:
      key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                 type=kfp_server_api.models.ApiResourceType.NAMESPACE)
      reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                             name=namespace,
                                                             relationship=kfp_server_api.models.ApiRelationship.OWNER)
      resource_references.append(reference)
    spec = kfp_server_api.models.ApiPipelineSpec(
        pipeline_id=pipeline_id,
        workflow_manifest=pipeline_json_string,
        parameters=api_params)

    trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(cron=cron_schedule) #Example:cron_schedule="0 0 9 ? * 2-6"
    job_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
    schedule_body = kfp_server_api.models.ApiJob(
        id=job_id,
        name=job_name,
        description=description,
        pipeline_spec=spec,
        resource_references=resource_references,
        max_concurrency=max_concurrency,
        no_catchup=no_catchup,
        trigger=trigger,
        enabled=True,
        )
    #[TODO] Add link to the scheduled job.
    response = self._job_api.create_job(body=schedule_body)
Esempio n. 6
0
    def run_pipeline(self,
                     experiment_id,
                     job_name,
                     pipeline_package_path=None,
                     params={},
                     pipeline_id=None,
                     version_id=None):
        """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.
      version_id: the string ID of a pipeline version.
        If both pipeline_id and version_id are specified, version_id will take precendence.
        If only pipeline_id is specified, the default version of this pipeline is used to create the run.

    Returns:
      A run object. Most important field is id.
    """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(
                name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
                value=str(v)) for k, v in params.items()
        ]
        resource_references = []
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)

        if version_id:
            key = kfp_server_api.models.ApiResourceKey(
                id=version_id,
                type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION)
            reference = kfp_server_api.models.ApiResourceReference(
                key=key,
                relationship=kfp_server_api.models.ApiRelationship.CREATOR)
            resource_references.append(reference)

        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        run_body = kfp_server_api.models.ApiRun(
            pipeline_spec=spec,
            resource_references=resource_references,
            name=job_name)

        response = self._run_api.create_run(body=run_body)

        if self._is_ipython():
            import IPython
            html = (
                'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
                % (self._get_url_prefix(), response.run.id))
            IPython.display.display(IPython.display.HTML(html))
        return response.run
Esempio n. 7
0
def pipeline_deploy(request):
    request_json = request.get_json()
    kfp_id = request_json['kfp']
    pipeline_file_url = request_json.get(
        'template_url',
        'https://storage.googleapis.com/your_bucket_name/pipeline.yaml')
    dataset_display_name = request_json['dataset_display_name']
    dataset_path = request_json['dataset_path']

    creds, projects = google.auth.default()
    auth_req = google.auth.transport.requests.Request()
    creds.refresh(auth_req)

    client = kfp.Client(host=kfp_id, existing_token=creds.token)

    #create pipeline using url
    suffix = request_json['name']
    pipeline_name = 'nlp-pipeline-' + suffix
    run_name = 'nlp-run-' + suffix
    thread = client.pipelines.list_pipelines(async_req=True)
    result = thread.get()
    for pipeline in result.pipelines:
        if pipeline.name == pipeline_name:
            return 'Please specify a new name.'
    api_url = kfp_server_api.models.ApiUrl(pipeline_file_url)
    api_pipeline = kfp_server_api.models.ApiPipeline(name=pipeline_name,
                                                     url=api_url)
    thread = client.pipelines.create_pipeline(api_pipeline, async_req=True)
    result = thread.get()
    default_version_id = result.default_version.id  # pipeline id
    logging.info('pipeline id: {}'.format(default_version_id))

    # Create an experiment.
    experiment_name = 'nlp-experiment-' + suffix
    experiment = client.experiments.create_experiment(
        body={'name': experiment_name})
    experiment_id = experiment.id
    logging.info('experiment id: {}'.format(experiment_id))

    # Create a run
    resource_references = []
    key = kfp_server_api.models.ApiResourceKey(
        id=experiment_id,
        type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
    reference = kfp_server_api.models.ApiResourceReference(
        key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
    resource_references.append(reference)

    parameters = []
    parameter = kfp_server_api.ApiParameter(name='gcp_project_id',
                                            value=projects)
    parameters.append(parameter)
    parameter = kfp_server_api.ApiParameter(name='gcp_region',
                                            value='us-central1')
    parameters.append(parameter)
    parameter = kfp_server_api.ApiParameter(name='dataset_display_name',
                                            value=dataset_display_name)
    parameters.append(parameter)
    parameter = kfp_server_api.ApiParameter(name='api_endpoint', value='')
    parameters.append(parameter)
    parameter = kfp_server_api.ApiParameter(name='gcs_path',
                                            value=dataset_path)
    parameters.append(parameter)
    parameter = kfp_server_api.ApiParameter(name='model_prefix',
                                            value='nlpmodel')
    parameters.append(parameter)
    pipeline_spec = kfp_server_api.ApiPipelineSpec(
        parameters=parameters, pipeline_id=default_version_id)

    run = client.runs.create_run(
        body={
            'name': run_name,
            'resource_references': resource_references,
            'pipeline_spec': pipeline_spec
        })
    run_id = run.run.id
    logging.info('run id: {}'.format(run_id))

    return run_id