Beispiel #1
0
def randomforest():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    train_df = _kale_marshal_utils.load("train_df")
    train_labels = _kale_marshal_utils.load("train_labels")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    random_forest = RandomForestClassifier(n_estimators=100)
    random_forest.fit(train_df, train_labels)
    acc_random_forest = round(random_forest.score(train_df, train_labels) * 100, 2)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(acc_random_forest, "acc_random_forest")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block,
              block1,
              block2,
              data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/randomforest.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('randomforest')

    _kale_mlmd_utils.call("mark_execution_complete")
def test():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    v1 = "Hello"
    '''

    block2 = '''
    print(v1)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("")
    _kale_marshal_utils.save(v1, "v1")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (block1, block2, data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/test.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #3
0
def results():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    acc_decision_tree = _kale_marshal_utils.load("acc_decision_tree")
    acc_gaussian = _kale_marshal_utils.load("acc_gaussian")
    acc_linear_svc = _kale_marshal_utils.load("acc_linear_svc")
    acc_log = _kale_marshal_utils.load("acc_log")
    acc_random_forest = _kale_marshal_utils.load("acc_random_forest")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    results = pd.DataFrame({
        'Model': ['Support Vector Machines', 'logistic Regression',
                  'Random Forest', 'Naive Bayes', 'Decision Tree'],
        'Score': [acc_linear_svc, acc_log,
                  acc_random_forest, acc_gaussian, acc_decision_tree]})
    result_df = results.sort_values(by='Score', ascending=False)
    result_df = result_df.set_index('Score')
    print(result_df)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block,
              block1,
              block2,
              )
    html_artifact = _kale_run_code(blocks)
    with open("/results.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('results')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #4
0
def loaddata():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    path = "data/"

    PREDICTION_LABEL = 'Survived'

    test_df = pd.read_csv(path + "test.csv")
    train_df = pd.read_csv(path + "train.csv")
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(PREDICTION_LABEL, "PREDICTION_LABEL")
    _kale_marshal_utils.save(test_df, "test_df")
    _kale_marshal_utils.save(train_df, "train_df")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        block1,
        block2,
        data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/loaddata.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('loaddata')

    _kale_mlmd_utils.call("mark_execution_complete")
def final_auto_snapshot():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    from kale.utils import pod_utils as _kale_pod_utils
    _kale_mlmd_utils.call("link_input_rok_artifacts")
    _rok_snapshot_task = _kale_pod_utils.snapshot_pipeline_step(
        "T", "final_auto_snapshot", "/path/to/nb", before=False)
    _kale_mlmd_utils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmd_utils.call("mark_execution_complete")
def create_matrix(d1: int, d2: int):
    pipeline_parameters_block = '''
    d1 = {}
    d2 = {}
    '''.format(d1, d2)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    '''

    block2 = '''
    rnd_matrix = np.random.rand(d1, d2)
    '''

    block3 = '''
    from kale.utils import kfp_utils as _kale_kfp_utils
    _kale_kfp_metrics = {
        "d1": d1,
        "d2": d2
    }
    _kale_kfp_utils.generate_mlpipeline_metrics(_kale_kfp_metrics)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(rnd_matrix, "rnd_matrix")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (pipeline_parameters_block, block1, block2, block3,
              data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/create_matrix.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('create_matrix')

    _kale_mlmd_utils.call("mark_execution_complete")
def sum_matrix():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    rnd_matrix = _kale_marshal_utils.load("rnd_matrix")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    '''

    block2 = '''
    sum_result = rnd_matrix.sum()
    '''

    block3 = '''
    from kale.utils import kfp_utils as _kale_kfp_utils
    _kale_kfp_metrics = {
        "sum-result": sum_result
    }
    _kale_kfp_utils.generate_mlpipeline_metrics(_kale_kfp_metrics)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        data_loading_block,
        block1,
        block2,
        block3,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/sum_matrix.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('sum_matrix')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #8
0
def test():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    print("hello")
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (block1, )
    html_artifact = _kale_run_code(blocks)
    with open("/test.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmd_utils.call("mark_execution_complete")
def test():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("")
    _kale_marshal_utils.set_kale_directory_file_names()
    v1 = _kale_marshal_utils.load("v1")
    # -----------------------DATA LOADING END----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block, )
    html_artifact = _kale_run_code(blocks)
    with open("/test.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #10
0
def build_model(INCOME_MODEL_PATH: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str,
                MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    INCOME_MODEL_PATH = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(INCOME_MODEL_PATH, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_MODEL_BUCKET, MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    adult = fetch_adult()
    adult.keys()
    '''

    block4 = '''
    data = adult.data
    target = adult.target
    feature_names = adult.feature_names
    category_map = adult.category_map
    '''

    block5 = '''
    from alibi.utils.data import gen_category_map
    '''

    block6 = '''
    np.random.seed(0)
    data_perm = np.random.permutation(np.c_[data, target])
    data = data_perm[:,:-1]
    target = data_perm[:,-1]
    '''

    block7 = '''
    idx = 30000
    X_train,Y_train = data[:idx,:], target[:idx]
    X_test, Y_test = data[idx+1:,:], target[idx+1:]
    '''

    block8 = '''
    ordinal_features = [x for x in range(len(feature_names)) if x not in list(category_map.keys())]
    ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                          ('scaler', StandardScaler())])
    '''

    block9 = '''
    categorical_features = list(category_map.keys())
    categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                                              ('onehot', OneHotEncoder(handle_unknown='ignore'))])
    '''

    block10 = '''
    preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features),
                                                   ('cat', categorical_transformer, categorical_features)])
    '''

    block11 = '''
    np.random.seed(0)
    clf = RandomForestClassifier(n_estimators=50)
    '''

    block12 = '''
    model=Pipeline(steps=[("preprocess",preprocessor),("model",clf)])
    model.fit(X_train,Y_train)
    '''

    block13 = '''
    def predict_fn(x):
        return model.predict(x)
    '''

    block14 = '''
    #predict_fn = lambda x: clf.predict(preprocessor.transform(x))
    print('Train accuracy: ', accuracy_score(Y_train, predict_fn(X_train)))
    print('Test accuracy: ', accuracy_score(Y_test, predict_fn(X_test)))
    '''

    block15 = '''
    dump(model, 'model.joblib') 
    '''

    block16 = '''
    print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{INCOME_MODEL_PATH}/model.joblib", 'model.joblib'))
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(X_test, "X_test")
    _kale_marshal_utils.save(X_train, "X_train")
    _kale_marshal_utils.save(Y_train, "Y_train")
    _kale_marshal_utils.save(adult, "adult")
    _kale_marshal_utils.save(category_map, "category_map")
    _kale_marshal_utils.save(feature_names, "feature_names")
    _kale_marshal_utils.save(model, "model")
    _kale_marshal_utils.save(predict_fn, "predict_fn")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (pipeline_parameters_block, block1, block2, block3, block4,
              block5, block6, block7, block8, block9, block10, block11,
              block12, block13, block14, block15, block16, data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/build_model.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('build_model')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #11
0
def setup(MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str,
          MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET,
               MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    minioClient = get_minio()
    buckets = minioClient.list_buckets()
    for bucket in buckets:
        print(bucket.name, bucket.creation_date)
    '''

    block4 = '''
    if not minioClient.bucket_exists(MINIO_MODEL_BUCKET):
        minioClient.make_bucket(MINIO_MODEL_BUCKET)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/setup.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('setup')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #12
0
def deploy_outlier(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str,
                   MINIO_HOST: str, MINIO_MODEL_BUCKET: str,
                   MINIO_SECRET_KEY: str, OUTLIER_MODEL_PATH: str):
    pipeline_parameters_block = '''
    DEPLOY_NAMESPACE = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    OUTLIER_MODEL_PATH = "{}"
    '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_MODEL_BUCKET, MINIO_SECRET_KEY, OUTLIER_MODEL_PATH)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    outlier_yaml=f"""apiVersion: serving.knative.dev/v1
    kind: Service
    metadata:
      name: income-outlier
      namespace: {DEPLOY_NAMESPACE}
    spec:
      template:
        metadata:
          annotations:
            autoscaling.knative.dev/minScale: "1"
        spec:
          containers:
          - image: seldonio/alibi-detect-server:1.2.2-dev_alibidetect
            imagePullPolicy: IfNotPresent
            args:
            - --model_name
            - adultod
            - --http_port
            - '8080'
            - --protocol
            - seldon.http
            - --storage_uri
            - s3://{MINIO_MODEL_BUCKET}/{OUTLIER_MODEL_PATH}
            - --reply_url
            - http://default-broker       
            - --event_type
            - io.seldon.serving.inference.outlier
            - --event_source
            - io.seldon.serving.incomeod
            - OutlierDetector
            envFrom:
            - secretRef:
                name: seldon-init-container-secret
    """
    with open("outlier.yaml","w") as f:
        f.write(outlier_yaml)
    run("kubectl apply -f outlier.yaml", shell=True)
    '''

    block4 = '''
    trigger_outlier_yaml=f"""apiVersion: eventing.knative.dev/v1alpha1
    kind: Trigger
    metadata:
      name: income-outlier-trigger
      namespace: {DEPLOY_NAMESPACE}
    spec:
      filter:
        sourceAndType:
          type: io.seldon.serving.inference.request
      subscriber:
        ref:
          apiVersion: serving.knative.dev/v1alpha1
          kind: Service
          name: income-outlier
    """
    with open("outlier_trigger.yaml","w") as f:
        f.write(trigger_outlier_yaml)
    run("kubectl apply -f outlier_trigger.yaml", shell=True)
    '''

    block5 = '''
    run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l serving.knative.dev/service=income-outlier -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
        block5,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/deploy_outlier.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('deploy_outlier')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #13
0
def test_model(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str,
               MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    DEPLOY_NAMESPACE = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    payload='{"data": {"ndarray": [[53,4,0,2,8,4,4,0,0,0,60,9]]}}'
    cmd=f"""curl -d '{payload}' \\
       http://income-classifier-default.{DEPLOY_NAMESPACE}:8000/api/v1.0/predictions \\
       -H "Content-Type: application/json"
    """
    ret = Popen(cmd, shell=True,stdout=PIPE)
    raw = ret.stdout.read().decode("utf-8")
    print(raw)
    '''

    block4 = '''
    payload='{"data": {"ndarray": [[53,4,0,2,8,4,4,0,0,0,60,9]]}}'
    cmd=f"""curl -d '{payload}' \\
       http://income-classifier-default-explainer.{DEPLOY_NAMESPACE}:9000/api/v1.0/explain \\
       -H "Content-Type: application/json"
    """
    ret = Popen(cmd, shell=True,stdout=PIPE)
    raw = ret.stdout.read().decode("utf-8")
    print(raw)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/test_model.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test_model')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #14
0
def deploy_seldon(DEPLOY_NAMESPACE: str, EXPLAINER_MODEL_PATH: str,
                  INCOME_MODEL_PATH: str, MINIO_ACCESS_KEY: str,
                  MINIO_HOST: str, MINIO_MODEL_BUCKET: str,
                  MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    DEPLOY_NAMESPACE = "{}"
    EXPLAINER_MODEL_PATH = "{}"
    INCOME_MODEL_PATH = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(DEPLOY_NAMESPACE, EXPLAINER_MODEL_PATH, INCOME_MODEL_PATH,
               MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET,
               MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    secret = f"""apiVersion: v1
    kind: Secret
    metadata:
      name: seldon-init-container-secret
      namespace: {DEPLOY_NAMESPACE}
    type: Opaque
    stringData:
      AWS_ACCESS_KEY_ID: {MINIO_ACCESS_KEY}
      AWS_SECRET_ACCESS_KEY: {MINIO_SECRET_KEY}
      AWS_ENDPOINT_URL: http://{MINIO_HOST}
      USE_SSL: "false"
    """
    with open("secret.yaml","w") as f:
        f.write(secret)
    run("cat secret.yaml | kubectl apply -f -", shell=True)
    '''

    block4 = '''
    sa = f"""apiVersion: v1
    kind: ServiceAccount
    metadata:
      name: minio-sa
      namespace: {DEPLOY_NAMESPACE}
    secrets:
      - name: seldon-init-container-secret
    """
    with open("sa.yaml","w") as f:
        f.write(sa)
    run("kubectl apply -f sa.yaml", shell=True)
    '''

    block5 = '''
    model_yaml=f"""apiVersion: machinelearning.seldon.io/v1
    kind: SeldonDeployment
    metadata:
      name: income-classifier
      namespace: {DEPLOY_NAMESPACE}
    spec:
      predictors:
      - componentSpecs:
        graph:
          implementation: SKLEARN_SERVER
          modelUri: s3://{MINIO_MODEL_BUCKET}/{INCOME_MODEL_PATH}
          envSecretRefName: seldon-init-container-secret
          name: classifier
          logger:
             mode: all
        explainer:
          type: AnchorTabular
          modelUri: s3://{MINIO_MODEL_BUCKET}/{EXPLAINER_MODEL_PATH}
          envSecretRefName: seldon-init-container-secret
        name: default
        replicas: 1
    """
    with open("model.yaml","w") as f:
        f.write(model_yaml)
    run("kubectl apply -f model.yaml", shell=True)
    '''

    block6 = '''
    run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l seldon-deployment-id=income-classifier -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True)
    '''

    block7 = '''
    run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l seldon-deployment-id=income-classifier -o jsonpath='{{.items[1].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
        block5,
        block6,
        block7,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/deploy_seldon.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('deploy_seldon')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #15
0
def train_explainer(EXPLAINER_MODEL_PATH: str, MINIO_ACCESS_KEY: str,
                    MINIO_HOST: str, MINIO_MODEL_BUCKET: str,
                    MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    EXPLAINER_MODEL_PATH = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(EXPLAINER_MODEL_PATH, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_MODEL_BUCKET, MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    X_train = _kale_marshal_utils.load("X_train")
    category_map = _kale_marshal_utils.load("category_map")
    feature_names = _kale_marshal_utils.load("feature_names")
    model = _kale_marshal_utils.load("model")
    predict_fn = _kale_marshal_utils.load("predict_fn")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    model.predict(X_train)
    explainer = AnchorTabular(predict_fn, feature_names, categorical_names=category_map)
    '''

    block4 = '''
    explainer.fit(X_train, disc_perc=[25, 50, 75])
    '''

    block5 = '''
    with open("explainer.dill", "wb") as dill_file:
        dill.dump(explainer, dill_file)    
        dill_file.close()
    print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{EXPLAINER_MODEL_PATH}/explainer.dill", 'explainer.dill'))
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(X_train, "X_train")
    _kale_marshal_utils.save(explainer, "explainer")
    _kale_marshal_utils.save(model, "model")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (pipeline_parameters_block, data_loading_block, block1, block2,
              block3, block4, block5, data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/train_explainer.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('train_explainer')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #16
0
def build_outlier(MINIO_ACCESS_KEY: str, MINIO_HOST: str,
                  MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str,
                  OUTLIER_MODEL_PATH: str):
    pipeline_parameters_block = '''
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_MODEL_BUCKET = "{}"
    MINIO_SECRET_KEY = "{}"
    OUTLIER_MODEL_PATH = "{}"
    '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET,
               MINIO_SECRET_KEY, OUTLIER_MODEL_PATH)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    X_train = _kale_marshal_utils.load("X_train")
    Y_train = _kale_marshal_utils.load("Y_train")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    from alibi_detect.od import IForest

    od = IForest(
        threshold=0.,
        n_estimators=200,
    )
    '''

    block4 = '''
    od.fit(X_train)
    '''

    block5 = '''
    np.random.seed(0)
    perc_outlier = 5
    threshold_batch = create_outlier_batch(X_train, Y_train, n_samples=1000, perc_outlier=perc_outlier)
    X_threshold, y_threshold = threshold_batch.data.astype('float'), threshold_batch.target
    #X_threshold = (X_threshold - mean) / stdev
    print('{}% outliers'.format(100 * y_threshold.mean()))
    '''

    block6 = '''
    od.infer_threshold(X_threshold, threshold_perc=100-perc_outlier)
    print('New threshold: {}'.format(od.threshold))
    threshold = od.threshold
    '''

    block7 = '''
    X_outlier = [[300,  4,  4,  2,  1,  4,  4,  0,  0,  0, 600,  9]]
    '''

    block8 = '''
    od.predict(
        X_outlier
    )
    '''

    block9 = '''
    from alibi_detect.utils.saving import save_detector, load_detector
    from os import listdir
    from os.path import isfile, join

    filepath="ifoutlier"
    save_detector(od, filepath) 
    onlyfiles = [f for f in listdir(filepath) if isfile(join(filepath, f))]
    for filename in onlyfiles:
        print(filename)
        print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{OUTLIER_MODEL_PATH}/{filename}", join(filepath, filename)))
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        data_loading_block,
        block1,
        block2,
        block3,
        block4,
        block5,
        block6,
        block7,
        block8,
        block9,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/build_outlier.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('build_outlier')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #17
0
def explain(MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    X_test = _kale_marshal_utils.load("X_test")
    X_train = _kale_marshal_utils.load("X_train")
    adult = _kale_marshal_utils.load("adult")
    explainer = _kale_marshal_utils.load("explainer")
    model = _kale_marshal_utils.load("model")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    model.predict(X_train)
    idx = 0
    class_names = adult.target_names
    print('Prediction: ', class_names[explainer.predict_fn(X_test[idx].reshape(1, -1))[0]])
    '''

    block4 = '''
    explanation = explainer.explain(X_test[idx], threshold=0.95)
    print('Anchor: %s' % (' AND '.join(explanation['names'])))
    print('Precision: %.2f' % explanation['precision'])
    print('Coverage: %.2f' % explanation['coverage'])
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        data_loading_block,
        block1,
        block2,
        block3,
        block4,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/explain.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('explain')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #18
0
def deploy_event_display(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str,
                         MINIO_HOST: str, MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    DEPLOY_NAMESPACE = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    event_display=f"""apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: event-display
      namespace: {DEPLOY_NAMESPACE}          
    spec:
      replicas: 1
      selector:
        matchLabels: &labels
          app: event-display
      template:
        metadata:
          labels: *labels
        spec:
          containers:
            - name: helloworld-go
              # Source code: https://github.com/knative/eventing-contrib/tree/master/cmd/event_display
              image: gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display@sha256:f4628e97a836c77ed38bd3b6fd3d0b06de4d5e7db6704772fe674d48b20bd477
    ---
    kind: Service
    apiVersion: v1
    metadata:
      name: event-display
      namespace: {DEPLOY_NAMESPACE}
    spec:
      selector:
        app: event-display
      ports:
        - protocol: TCP
          port: 80
          targetPort: 8080
    ---
    apiVersion: eventing.knative.dev/v1alpha1
    kind: Trigger
    metadata:
      name: income-outlier-display
      namespace: {DEPLOY_NAMESPACE}
    spec:
      broker: default
      filter:
        attributes:
          type: io.seldon.serving.inference.outlier
      subscriber:
        ref:
          apiVersion: v1
          kind: Service
          name: event-display
    """
    with open("event_display.yaml","w") as f:
        f.write(event_display)
    run("kubectl apply -f event_display.yaml", shell=True)
    '''

    block4 = '''
    run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/event-display -n {DEPLOY_NAMESPACE}", shell=True)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/deploy_event_display.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('deploy_event_display')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #19
0
def featureengineering():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    PREDICTION_LABEL = _kale_marshal_utils.load("PREDICTION_LABEL")
    test_df = _kale_marshal_utils.load("test_df")
    train_df = _kale_marshal_utils.load("train_df")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    data = [train_df, test_df]

    for dataset in data:
        dataset['Fare'] = dataset['Fare'].fillna(0)
        dataset['Fare'] = dataset['Fare'].astype(int)
    '''

    block3 = '''
    data = [train_df, test_df]
    titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}

    for dataset in data:
        # extract titles
        dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False)
        # replace titles with a more common title or as Rare
        dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\
                                                'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
        dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')
        # convert titles into numbers
        dataset['Title'] = dataset['Title'].map(titles)
        # filling NaN with 0, to get safe
        dataset['Title'] = dataset['Title'].fillna(0)
    train_df = train_df.drop(['Name'], axis=1)
    test_df = test_df.drop(['Name'], axis=1)
    '''

    block4 = '''
    genders = {"male": 0, "female": 1}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Sex'] = dataset['Sex'].map(genders)
    '''

    block5 = '''
    train_df = train_df.drop(['Ticket'], axis=1)
    test_df = test_df.drop(['Ticket'], axis=1)
    '''

    block6 = '''
    ports = {"S": 0, "C": 1, "Q": 2}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Embarked'] = dataset['Embarked'].map(ports)
    '''

    block7 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['Age'] = dataset['Age'].astype(int)
        dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0
        dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1
        dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2
        dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3
        dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4
        dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5
        dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6
        dataset.loc[ dataset['Age'] > 66, 'Age'] = 6

    # let's see how it's distributed train_df['Age'].value_counts()
    '''

    block8 = '''
    data = [train_df, test_df]

    for dataset in data:
        dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0
        dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
        dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare']   = 2
        dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare']   = 3
        dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare']   = 4
        dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5
        dataset['Fare'] = dataset['Fare'].astype(int)
    '''

    block9 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['Age_Class']= dataset['Age']* dataset['Pclass']
    '''

    block10 = '''
    for dataset in data:
        dataset['Fare_Per_Person'] = dataset['Fare']/(dataset['relatives']+1)
        dataset['Fare_Per_Person'] = dataset['Fare_Per_Person'].astype(int)
    # Let's take a last look at the training set, before we start training the models.
    train_df.head(10)
    '''

    block11 = '''
    train_labels = train_df[PREDICTION_LABEL]
    train_df = train_df.drop(PREDICTION_LABEL, axis=1)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(train_df, "train_df")
    _kale_marshal_utils.save(train_labels, "train_labels")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block,
              block1,
              block2,
              block3,
              block4,
              block5,
              block6,
              block7,
              block8,
              block9,
              block10,
              block11,
              data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/featureengineering.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('featureengineering')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #20
0
def test_outliers(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str,
                  MINIO_HOST: str, MINIO_SECRET_KEY: str):
    pipeline_parameters_block = '''
    DEPLOY_NAMESPACE = "{}"
    MINIO_ACCESS_KEY = "{}"
    MINIO_HOST = "{}"
    MINIO_SECRET_KEY = "{}"
    '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST,
               MINIO_SECRET_KEY)

    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    block1 = '''
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from alibi.explainers import AnchorTabular
    from alibi.datasets import fetch_adult
    from minio import Minio
    from minio.error import ResponseError
    from joblib import dump, load
    import dill
    import time
    import json
    from subprocess import run, Popen, PIPE
    from alibi_detect.utils.data import create_outlier_batch
    '''

    block2 = '''
    def get_minio():
        return Minio(MINIO_HOST,
                        access_key=MINIO_ACCESS_KEY,
                        secret_key=MINIO_SECRET_KEY,
                        secure=False)
    '''

    block3 = '''
    def predict():
        payload='{"data": {"ndarray": [[300,  4,  4,  2,  1,  4,  4,  0,  0,  0, 600,  9]]}}'
        cmd=f"""curl -d '{payload}' \\
           http://income-classifier-default.{DEPLOY_NAMESPACE}:8000/api/v1.0/predictions \\
           -H "Content-Type: application/json"
        """
        ret = Popen(cmd, shell=True,stdout=PIPE)
        raw = ret.stdout.read().decode("utf-8")
        print(raw)
    '''

    block4 = '''
    def get_outlier_event_display_logs():
        cmd=f"kubectl logs $(kubectl get pod -l app=event-display -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE}) -n {DEPLOY_NAMESPACE}"
        ret = Popen(cmd, shell=True,stdout=PIPE)
        res = ret.stdout.read().decode("utf-8").split("\\n")
        data= []
        for i in range(0,len(res)):
            if res[i] == 'Data,':
                j = json.loads(json.loads(res[i+1]))
                if "is_outlier"in j["data"].keys():
                    data.append(j)
        if len(data) > 0:
            return data[-1]
        else:
            return None
    j = None
    while j is None:
        predict()
        print("Waiting for outlier logs, sleeping")
        time.sleep(2)
        j = get_outlier_event_display_logs()
        
    print(j)
    print("Outlier",j["data"]["is_outlier"]==[1])
    '''

    block5 = '''
    
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        pipeline_parameters_block,
        block1,
        block2,
        block3,
        block4,
        block5,
    )
    html_artifact = _kale_run_code(blocks)
    with open("/test_outliers.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test_outliers')

    _kale_mlmd_utils.call("mark_execution_complete")
Beispiel #21
0
def datapreprocessing():
    from kale.utils import mlmd_utils as _kale_mlmd_utils
    _kale_mlmd_utils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.set_kale_directory_file_names()
    test_df = _kale_marshal_utils.load("test_df")
    train_df = _kale_marshal_utils.load("train_df")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['relatives'] = dataset['SibSp'] + dataset['Parch']
        dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0
        dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1
        dataset['not_alone'] = dataset['not_alone'].astype(int)
    train_df['not_alone'].value_counts()
    '''

    block3 = '''
    # This does not contribute to a person survival probability
    train_df = train_df.drop(['PassengerId'], axis=1)
    '''

    block4 = '''
    import re
    deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Cabin'] = dataset['Cabin'].fillna("U0")
        dataset['Deck'] = dataset['Cabin'].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group())
        dataset['Deck'] = dataset['Deck'].map(deck)
        dataset['Deck'] = dataset['Deck'].fillna(0)
        dataset['Deck'] = dataset['Deck'].astype(int)
    # we can now drop the cabin feature
    train_df = train_df.drop(['Cabin'], axis=1)
    test_df = test_df.drop(['Cabin'], axis=1)
    '''

    block5 = '''
    data = [train_df, test_df]

    for dataset in data:
        mean = train_df["Age"].mean()
        std = test_df["Age"].std()
        is_null = dataset["Age"].isnull().sum()
        # compute random numbers between the mean, std and is_null
        rand_age = np.random.randint(mean - std, mean + std, size = is_null)
        # fill NaN values in Age column with random values generated
        age_slice = dataset["Age"].copy()
        age_slice[np.isnan(age_slice)] = rand_age
        dataset["Age"] = age_slice
        dataset["Age"] = train_df["Age"].astype(int)
    train_df["Age"].isnull().sum()
    '''

    block6 = '''
    train_df['Embarked'].describe()
    '''

    block7 = '''
    # fill with most common value
    common_value = 'S'
    data = [train_df, test_df]

    for dataset in data:
        dataset['Embarked'] = dataset['Embarked'].fillna(common_value)
    '''

    block8 = '''
    train_df.info()
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(test_df, "test_df")
    _kale_marshal_utils.save(train_df, "train_df")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.utils.jupyter_utils import run_code as _kale_run_code
    from kale.utils.kfp_utils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block,
              block1,
              block2,
              block3,
              block4,
              block5,
              block6,
              block7,
              block8,
              data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/datapreprocessing.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('datapreprocessing')

    _kale_mlmd_utils.call("mark_execution_complete")