Ejemplo n.º 1
0
 def __init__(self, trans_id=None, nb_path=None):
     if not trans_id:
         trans_id = random_string(size=10)
     self.log = create_adapter(logging.getLogger(__name__), trans_id,
                               nb_path)
     self.trans_id = trans_id
     self.nb_path = nb_path
Ejemplo n.º 2
0
def parse_metadata(notebook_metadata):
    """Parse the Notebook's metadata and update it when needed.

    Args:
        notebook_metadata (dict): metadata annotated by Kale.
        Refer to DEFAULT_METADATA for defaults

    Returns (dict): updated and validated metadata
    """
    # check for required fields before adding all possible defaults
    validated_notebook_metadata = copy.deepcopy(notebook_metadata)
    for required in METADATA_REQUIRED_KEYS:
        if required not in validated_notebook_metadata:
            raise ValueError(
                "Key {} not found. Add this field either on"
                " the notebook metadata or as an override".format(required))

    metadata = copy.deepcopy(DEFAULT_METADATA)
    metadata.update(validated_notebook_metadata)

    if not re.match(KALE_STEP_NAME_REGEX, metadata['pipeline_name']):
        raise ValueError("Pipeline name  {}".format(KALE_NAME_MSG))

    # update the pipeline name with a random string
    random_pipeline_name = "{}-{}".format(metadata['pipeline_name'],
                                          random_string())
    metadata['pipeline_name'] = random_pipeline_name

    volumes = metadata.get('volumes', [])
    if isinstance(volumes, list):
        metadata.update({'volumes': _parse_volumes_metadata(volumes)})
    else:
        raise ValueError("Volumes spec must be a list")
    return metadata
    def __init__(self):
        self.store = self._connect()
        self.pod_name = pod_utils.get_pod_name()
        self.pod_namespace = pod_utils.get_namespace()
        self.pod = pod_utils.get_pod(self.pod_name, self.pod_namespace)
        self.workflow_name = self.pod.metadata.labels.get(
            ARGO_WORKFLOW_LABEL_KEY)
        self.workflow = pod_utils.get_workflow(self.workflow_name,
                                               self.pod_namespace)

        workflow_labels = self.workflow["metadata"].get("labels", {})
        self.run_uuid = workflow_labels.get(pod_utils.KFP_RUN_ID_LABEL_KEY,
                                            self.workflow_name)

        workflow_annotations = self.workflow["metadata"].get("annotations", {})
        pipeline_spec = json.loads(
            workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec",
                                     {}))
        self.pipeline_name = pipeline_spec.get("name", self.workflow_name)

        self.component_id = pod_utils.compute_component_id(self.pod)
        self.execution_hash = self.pod.metadata.annotations.get(
            MLMD_EXECUTION_HASH_PROPERTY_KEY, utils.random_string(10))

        self.run_context = self._get_or_create_run_context()
        self.execution = self._create_execution_in_run_context()
        self._label_with_context_and_execution()
Ejemplo n.º 4
0
    def save_pipeline(self, pipeline_code, output_path=None):
        if output_path is None:
            # create tmp path
            tmp_dir = tempfile.mkdtemp()
            filename = "kale_pipeline_code_{}.py".format(
                utils.random_string(5))
            output_path = os.path.join(tmp_dir, filename)

        with open(output_path, "w") as f:
            f.write(pipeline_code)
        self.logger.info("Pipeline code saved at {}".format(output_path))
        return output_path
Ejemplo n.º 5
0
def generate_run_name(pipeline_name: str):
    """Generate a new run name based on pipeline name."""
    return "{}_run-{}".format(pipeline_name, utils.random_string(5))