コード例 #1
0
ファイル: objects.py プロジェクト: pferreiro92/kubespawner
def make_pvc(name, storage_class, access_modes, storage, labels):
    """
    Make a k8s pvc specification for running a user notebook.

    Parameters:
      - name:
        Name of persistent volume claim. Must be unique within the namespace the object is
        going to be created in. Must be a valid DNS label.
      - storage_class
        String of the name of the k8s Storage Class to use.
      - access_modes:
        A list of specifying what access mode the pod should have towards the pvc
      - storage
        The ammount of storage needed for the pvc

    """
    pvc = V1PersistentVolumeClaim()
    pvc.kind = "PersistentVolumeClaim"
    pvc.api_version = "v1"
    pvc.metadata = V1ObjectMeta()
    pvc.metadata.name = name
    pvc.metadata.annotations = {}
    if storage_class:
        pvc.metadata.annotations.update(
            {"volume.beta.kubernetes.io/storage-class": storage_class})
    pvc.metadata.labels = {}
    pvc.metadata.labels.update(labels)
    pvc.spec = V1PersistentVolumeClaimSpec()
    pvc.spec.access_modes = access_modes
    pvc.spec.resources = V1ResourceRequirements()
    pvc.spec.resources.requests = {"storage": storage}

    return pvc
コード例 #2
0
def make_pvc(
    name,
    storage_class,
    access_modes,
    selector,
    storage,
    labels=None,
    annotations=None,
):
    """
    Make a k8s pvc specification for running a user notebook.

    Parameters
    ----------
    name:
        Name of persistent volume claim. Must be unique within the namespace the object is
        going to be created in. Must be a valid DNS label.
    storage_class:
        String of the name of the k8s Storage Class to use.
    access_modes:
        A list of specifying what access mode the pod should have towards the pvc
    selector:
        Dictionary Selector to match pvc to pv.
    storage:
        The ammount of storage needed for the pvc

    """
    pvc = V1PersistentVolumeClaim()
    pvc.kind = "PersistentVolumeClaim"
    pvc.api_version = "v1"
    pvc.metadata = V1ObjectMeta()
    pvc.metadata.name = name
    pvc.metadata.annotations = (annotations or {}).copy()
    pvc.metadata.labels = (labels or {}).copy()
    pvc.spec = V1PersistentVolumeClaimSpec()
    pvc.spec.access_modes = access_modes
    pvc.spec.resources = V1ResourceRequirements()
    pvc.spec.resources.requests = {"storage": storage}

    if storage_class is not None:
        pvc.metadata.annotations.update(
            {"volume.beta.kubernetes.io/storage-class": storage_class}
        )
        pvc.spec.storage_class_name = storage_class

    if selector:
        pvc.spec.selector = selector

    return pvc
コード例 #3
0
def make_pvc(old_pvc, storage_class):
    pvc = V1PersistentVolumeClaim()
    pvc.kind = "PersistentVolumeClaim"
    pvc.api_version = "v1"
    pvc.metadata = V1ObjectMeta()
    pvc.metadata.name = old_pvc['metadata']['name']
    username = old_pvc['metadata']['annotations']['hub.jupyter.org/username']
    pvc.metadata.annotations = {'hub.jupyter.org/username': username}
    pvc.metadata.labels = old_pvc['metadata']['labels'].copy()
    pvc.spec = V1PersistentVolumeClaimSpec()
    pvc.spec.access_modes = old_pvc['spec']['accessModes'].copy()
    pvc.spec.resources = V1ResourceRequirements()
    pvc.spec.resources.requests = {
        "storage": old_pvc['spec']['resources']['requests']['storage']
    }
    if storage_class:
        pvc.metadata.annotations.update(
            {"volume.beta.kubernetes.io/storage-class": storage_class})
        pvc.spec.storage_class_name = storage_class
    return pvc
コード例 #4
0
    def __init__(self,
                 resource_name: str = None,
                 size: str = None,
                 storage_class: str = None,
                 modes: List[str] = None,
                 annotations: Dict[str, str] = None,
                 data_source=None,
                 volume_name=None,
                 **kwargs):
        # Add size to attribute outputs
        self.attribute_outputs = {"size": "{.status.capacity.storage}"}

        if "k8s_resource" in kwargs:
            if resource_name or size or storage_class or modes or annotations:
                raise ValueError("You cannot provide k8s_resource along with "
                                 "other arguments.")
            if not isinstance(kwargs["k8s_resource"], V1PersistentVolumeClaim):
                raise ValueError("k8s_resource in VolumeOp must be an instance"
                                 " of V1PersistentVolumeClaim")
            super().__init__(**kwargs)
            self.volume = PipelineVolume(name=sanitize_k8s_name(self.name),
                                         pvc=self.outputs["name"])
            return

        if not size:
            raise ValueError("Please provide size")
        elif not match_serialized_pipelineparam(str(size)):
            self._validate_memory_string(size)

        if data_source and not isinstance(
                data_source,
            (str, PipelineParam, V1TypedLocalObjectReference)):
            raise ValueError("data_source can be one of (str, PipelineParam, "
                             "V1TypedLocalObjectReference).")
        if data_source and isinstance(data_source, (str, PipelineParam)):
            data_source = V1TypedLocalObjectReference(
                api_group="snapshot.storage.k8s.io",
                kind="VolumeSnapshot",
                name=data_source)

        # Set the k8s_resource
        if not match_serialized_pipelineparam(str(resource_name)):
            resource_name = sanitize_k8s_name(resource_name)
        pvc_metadata = V1ObjectMeta(name="{{workflow.name}}-%s" %
                                    resource_name,
                                    annotations=annotations)
        requested_resources = V1ResourceRequirements(
            requests={"storage": size})
        pvc_spec = V1PersistentVolumeClaimSpec(
            access_modes=modes or VOLUME_MODE_RWM,
            resources=requested_resources,
            storage_class_name=storage_class,
            data_source=data_source,
            volume_name=volume_name)
        k8s_resource = V1PersistentVolumeClaim(api_version="v1",
                                               kind="PersistentVolumeClaim",
                                               metadata=pvc_metadata,
                                               spec=pvc_spec)

        super().__init__(
            k8s_resource=k8s_resource,
            **kwargs,
        )
        self.volume = PipelineVolume(name=sanitize_k8s_name(self.name),
                                     pvc=self.outputs["name"])
コード例 #5
0
    def __init__(self,
                 resource_name: str = None,
                 size: str = None,
                 storage_class: str = None,
                 modes: List[str] = VOLUME_MODE_RWM,
                 annotations: Dict[str, str] = None,
                 data_source=None,
                 **kwargs):
        """Create a new instance of VolumeOp.

        Args:
            resource_name: A desired name for the PVC which will be created
            size: The size of the PVC which will be created
            storage_class: The storage class to use for the dynamically created
                PVC
            modes: The access modes for the PVC
            annotations: Annotations to be patched in the PVC
            data_source: May be a V1TypedLocalObjectReference, and then it is
                used in the data_source field of the PVC as is. Can also be a
                string/PipelineParam, and in that case it will be used as a
                VolumeSnapshot name (Alpha feature)
            kwargs: See ResourceOp definition
        Raises:
            ValueError: if k8s_resource is provided along with other arguments
                        if k8s_resource is not a V1PersistentVolumeClaim
                        if size is None
                        if size is an invalid memory string (when not a
                            PipelineParam)
                        if data_source is not one of (str, PipelineParam,
                            V1TypedLocalObjectReference)
        """
        # Add size to attribute outputs
        self.attribute_outputs = {"size": "{.status.capacity.storage}"}

        if "k8s_resource" in kwargs:
            if resource_name or size or storage_class or modes or annotations:
                raise ValueError("You cannot provide k8s_resource along with "
                                 "other arguments.")
            if not isinstance(kwargs["k8s_resource"], V1PersistentVolumeClaim):
                raise ValueError("k8s_resource in VolumeOp must be an instance"
                                 " of V1PersistentVolumeClaim")
            super().__init__(**kwargs)
            self.volume = PipelineVolume(name=sanitize_k8s_name(self.name),
                                         pvc=self.outputs["name"])
            return

        if not size:
            raise ValueError("Please provide size")
        elif not match_serialized_pipelineparam(str(size)):
            self._validate_memory_string(size)

        if data_source and not isinstance(
                data_source,
            (str, PipelineParam, V1TypedLocalObjectReference)):
            raise ValueError("data_source can be one of (str, PipelineParam, "
                             "V1TypedLocalObjectReference).")
        if data_source and isinstance(data_source, (str, PipelineParam)):
            data_source = V1TypedLocalObjectReference(
                api_group="snapshot.storage.k8s.io",
                kind="VolumeSnapshot",
                name=data_source)

        # Set the k8s_resource
        if not match_serialized_pipelineparam(str(resource_name)):
            resource_name = sanitize_k8s_name(resource_name)
        pvc_metadata = V1ObjectMeta(name="{{workflow.name}}-%s" %
                                    resource_name,
                                    annotations=annotations)
        requested_resources = V1ResourceRequirements(
            requests={"storage": size})
        pvc_spec = V1PersistentVolumeClaimSpec(
            access_modes=modes,
            resources=requested_resources,
            storage_class_name=storage_class,
            data_source=data_source)
        k8s_resource = V1PersistentVolumeClaim(api_version="v1",
                                               kind="PersistentVolumeClaim",
                                               metadata=pvc_metadata,
                                               spec=pvc_spec)

        super().__init__(
            k8s_resource=k8s_resource,
            **kwargs,
        )
        self.volume = PipelineVolume(name=sanitize_k8s_name(self.name),
                                     pvc=self.outputs["name"])
コード例 #6
0
def nlp_pipeline(
        csv_url="https://raw.githubusercontent.com/axsauze/reddit-classification-exploration/master/data/reddit_train.csv",
        csv_encoding="ISO-8859-1",
        features_column="BODY",
        labels_column="REMOVED",
        raw_text_path='/mnt/text.data',
        labels_path='/mnt/labels.data',
        clean_text_path='/mnt/clean.data',
        spacy_tokens_path='/mnt/tokens.data',
        tfidf_vectors_path='/mnt/tfidf.data',
        lr_prediction_path='/mnt/prediction.data',
        tfidf_model_path='/mnt/tfidf.model',
        lr_model_path='/mnt/lr.model',
        lr_c_param=0.1,
        tfidf_max_features=10000,
        tfidf_ngram_range=3,
        batch_size='100',
        github_branch='master'):
    """
    Pipeline 
    """

    pvc_metadata = V1ObjectMeta(name="{{workflow.name}}-my-pvc",
                                labels={
                                    "branch":
                                    "{{workflow.parameters.github-branch}}",
                                    "app": "nlp"
                                })
    requested_resources = V1ResourceRequirements(requests={"storage": "1Gi"})
    pvc_spec = V1PersistentVolumeClaimSpec(access_modes=["ReadWriteOnce"],
                                           resources=requested_resources)
    pvc = V1PersistentVolumeClaim(api_version="v1",
                                  kind="PersistentVolumeClaim",
                                  metadata=pvc_metadata,
                                  spec=pvc_spec)

    vop = dsl.VolumeOp(name="create-pvc", k8s_resource=pvc, modes=None)

    download_step = dsl.ContainerOp(
        name='data_downloader',
        image='maximmold/data_downloader:0.1',
        command="python",
        arguments=[
            "/microservice/pipeline_step.py", "--labels-path", labels_path,
            "--features-path", raw_text_path, "--csv-url", csv_url,
            "--csv-encoding", csv_encoding, "--features-column",
            features_column, "--labels-column", labels_column
        ],
        pvolumes={"/mnt": vop.volume})

    clean_step = dsl.ContainerOp(name='clean_text',
                                 image='maximmold/clean_text_transformer:0.1',
                                 command="python",
                                 arguments=[
                                     "/microservice/pipeline_step.py",
                                     "--in-path",
                                     raw_text_path,
                                     "--out-path",
                                     clean_text_path,
                                 ],
                                 pvolumes={"/mnt": download_step.pvolume})

    tokenize_step = dsl.ContainerOp(name='tokenize',
                                    image='maximmold/spacy_tokenizer:0.1',
                                    command="python",
                                    arguments=[
                                        "/microservice/pipeline_step.py",
                                        "--in-path",
                                        clean_text_path,
                                        "--out-path",
                                        spacy_tokens_path,
                                    ],
                                    pvolumes={"/mnt": clean_step.pvolume})

    vectorize_step = dsl.ContainerOp(name='vectorize',
                                     image='maximmold/tfidf_vectorizer:0.1',
                                     command="python",
                                     arguments=[
                                         "/microservice/pipeline_step.py",
                                         "--in-path",
                                         spacy_tokens_path,
                                         "--out-path",
                                         tfidf_vectors_path,
                                         "--max-features",
                                         tfidf_max_features,
                                         "--ngram-range",
                                         tfidf_ngram_range,
                                         "--action",
                                         "train",
                                         "--model-path",
                                         tfidf_model_path,
                                     ],
                                     pvolumes={"/mnt": tokenize_step.pvolume})

    predict_step = dsl.ContainerOp(name='predictor',
                                   image='maximmold/lr_text_classifier:0.1',
                                   command="python",
                                   arguments=[
                                       "/microservice/pipeline_step.py",
                                       "--in-path",
                                       tfidf_vectors_path,
                                       "--labels-path",
                                       labels_path,
                                       "--out-path",
                                       lr_prediction_path,
                                       "--c-param",
                                       lr_c_param,
                                       "--action",
                                       "train",
                                       "--model-path",
                                       lr_model_path,
                                   ],
                                   pvolumes={"/mnt": vectorize_step.pvolume})

    try:
        seldon_config = yaml.load(
            open("../deploy_pipeline/seldon_production_pipeline.yaml"))
    except:
        # If this file is run from the project core directory
        seldon_config = yaml.load(
            open("deploy_pipeline/seldon_production_pipeline.yaml"))

    deploy_step = dsl.ResourceOp(
        action="apply",
        name="seldondeploy",
        k8s_resource=seldon_config,
        attribute_outputs={"name": "{.metadata.name}"})

    deploy_step.after(predict_step)

    delete_previous_pvc = dsl.ContainerOp(
        name="deletepreviouspvc",
        image="bitnami/kubectl",
        command="kubectl",
        arguments=[
            "delete", "pvc", "-l",
            "app=nlp,branch={{workflow.parameters.github-branch}}",
            "--field-selector", "metadata.name!={{workflow.name}}-my-pvc",
            "--grace-period=0", "--force", "--wait=false"
        ])

    delete_previous_pvc.after(deploy_step)

    patch_pvc_finalizer = dsl.ContainerOp(
        name="patchpvcfinalizer",
        image="bitnami/kubectl",
        command=["bash"],
        arguments=[
            "-c",
            'for j in $(kubectl get pvc -o name -l app=nlp,branch={{workflow.parameters.github-branch}} --field-selector metadata.name!={{workflow.name}}-my-pvc -n kubeflow); do kubectl patch $j -p '
            "'"
            '{"metadata":{"finalizers": []}}'
            "'"
            ' -n kubeflow --type=merge; done'
        ])

    patch_pvc_finalizer.after(delete_previous_pvc)