Ejemplo n.º 1
0
def test_mlops_explained_variance_score_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    evs = sklearn.metrics.explained_variance_score(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, evs)

    # second way
    pm.metrics.explained_variance_score(y_true=labels_actual,
                                        y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.explained_variance_score(y_true=labels_actual,
                                            y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.explained_variance_score(y_true=labels_actual,
                                        y_pred=labels_pred,
                                        sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 2
0
def test_mlops_mean_squared_error_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    mse = sklearn.metrics.mean_squared_error(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, mse)

    # second way
    pm.metrics.mean_squared_error(y_true=labels_actual, y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.mean_squared_error(y_true=labels_actual,
                                      y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.mean_squared_error(y_true=labels_actual,
                                  y_pred=labels_pred,
                                  sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 3
0
def main():

    print("args: {}".format(sys.argv))
    options = parse_args()
    print("- inside test-python-aux Running main.py")
    print("arg1:         {}".format(options.arg1))
    print("input_model:  {}".format(options.input_model))
    print("use-mlops:    {}".format(options.use_mlops))
    print("iter:         {}".format(options.iter))
    print("exit_value:   {}".format(options.exit_value))

    print("Calling mlops.init()")
    if options.use_mlops:
        mlops.init()

    # Some output - to test logs
    for idx in range(options.iter):
        print("stdout - Idx {}".format(idx))
        print("stderr - Idx {}".format(idx), file=sys.stderr)
        if options.use_mlops:
            mlops.set_stat("aux_stat", 1)
        time.sleep(1)

    if options.use_mlops:
        mlops.done()

    # Exit status
    if options.exit_value >= 0:
        print("About to exit with value: {}".format(options.exit_value))
        sys.exit(options.exit_value)
    else:
        print("About to raise exception: {}".format(options.exit_value))
        raise Exception("Exiting main using exception")
Ejemplo n.º 4
0
def main():
    print("Starting example")
    mlops.init(run_in_non_pm_mode=True, mlops_mode=MLOpsMode.PYTHON)

    # Line graphs
    mlops.set_stat("myCounterDouble", 5.5)
    mlops.set_stat("myCounterDouble2", 7.3)

    # Multi-line graphs
    mlt = MultiLineGraph().name("Multi Line").labels(["l1",
                                                      "l2"]).data([5, 16])
    mlops.set_stat(mlt)

    tbl = Table().name("MyTable").cols(["Date", "Some number"])
    tbl.add_row(["2001Q1", "55"])
    tbl.add_row(["2001Q2", "66"])
    tbl.add_row(["2003Q3", "33"])
    tbl.add_row(["2003Q2", "22"])
    mlops.set_stat(tbl)

    bar = BarGraph().name("MyBar").cols(["aa", "bb", "cc", "dd",
                                         "ee"]).data([10, 15, 12, 9, 8])
    mlops.set_stat(bar)

    mlops.done()
    print("Example done")
Ejemplo n.º 5
0
def test_mlops_bas_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 1, 1, 0]
    labels_actual = [0, 1, 0, 0, 0, 1]

    bas = sklearn.metrics.balanced_accuracy_score(labels_actual, labels_pred)

    # first way
    pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, bas)

    # second way
    pm.metrics.balanced_accuracy_score(y_true=labels_actual, y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        pm.metrics.balanced_accuracy_score(y_true=labels_actual, y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.balanced_accuracy_score(y_true=labels_actual,
                                       y_pred=labels_pred,
                                       sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 6
0
def test_mlops_median_absolute_error_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    mae = sklearn.metrics.median_absolute_error(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, mae)

    # second way
    pm.metrics.median_absolute_error(y_true=labels_actual, y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.mean_absolute_error(y_true=labels_actual,
                                       y_pred=labels_pred_missing_values)

    pm.done()
Ejemplo n.º 7
0
def test_mlops_completeness_score_apis():
    mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 2, 3, 0]
    labels_actual = [0, 1, 0, 1, 3, 1]

    cs = metrics.completeness_score(labels_actual, labels_pred)

    # first way
    mlops.set_stat(ClusteringMetrics.COMPLETENESS_SCORE, cs)

    # second way
    mlops.metrics.completeness_score(labels_true=labels_actual,
                                     labels_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.COMPLETENESS_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        mlops.metrics.completeness_score(
            labels_true=labels_actual, labels_pred=labels_pred_missing_values)

    mlops.done()
Ejemplo n.º 8
0
def main():
    root = logging.getLogger()
    root.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

    options = parse_args()

    mlops.init()
    print("MLOps testing")
    print("My {} Node id:   {}".format(Constants.ION_LITERAL,
                                       mlops.get_current_node().id))
    print("My {} Node name: {}".format(Constants.ION_LITERAL,
                                       mlops.get_current_node().name))
    print("Test name:        {}".format(options.test_name))
    print("Taking test directory from main __file__ value: Name: {} File: {} ".
          format(__name__, __file__))

    run_mlops_tests(package_to_scan=parallelm.mlops.e2e_tests.health_node,
                    test_to_run=options.test_name)
    mlops.done()
Ejemplo n.º 9
0
def test_mlops_calinski_harabaz_score_apis():
    mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    X = [[1, 2], [1, 3], [1, 2], [2, 4], [4, 5], [9, 9]]
    labels_pred = [1, 0, 1, 2, 3, 0]

    chs = metrics.calinski_harabaz_score(X, labels_pred)

    # first way
    mlops.set_stat(ClusteringMetrics.CALINSKI_HARABAZ_SCORE, chs)

    # second way
    mlops.metrics.calinski_harabaz_score(X=X, labels=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.CALINSKI_HARABAZ_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        mlops.metrics.calinski_harabaz_score(X=X,
                                             labels=labels_pred_missing_values)

    mlops.done()
Ejemplo n.º 10
0
def test_mlops_normalized_mutual_info_score_apis():
    mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 2, 3, 0]
    labels_actual = [0, 1, 0, 1, 3, 1]

    nmis = metrics.normalized_mutual_info_score(labels_actual, labels_pred)

    # first way
    mlops.set_stat(ClusteringMetrics.NORMALIZED_MUTUAL_INFO_SCORE, nmis)

    # second way
    mlops.metrics.normalized_mutual_info_score(labels_true=labels_actual,
                                               labels_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.NORMALIZED_MUTUAL_INFO_SCORE,
                       [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        mlops.metrics.normalized_mutual_info_score(
            labels_true=labels_actual, labels_pred=labels_pred_missing_values)

    mlops.done()
Ejemplo n.º 11
0
def test_mlops_homogeneity_completeness_v_measure_apis():
    mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 2, 3, 0]
    labels_actual = [0, 1, 0, 1, 3, 1]

    homogeneity, completeness, v_measure = metrics \
        .homogeneity_completeness_v_measure(labels_true=labels_actual, labels_pred=labels_pred)

    # first way
    mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE,
                   [homogeneity, completeness, v_measure])

    # second way
    mlops.metrics.homogeneity_completeness_v_measure(labels_true=labels_actual,
                                                     labels_pred=labels_pred)

    # should throw error if list is not of size three
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE,
                       [1, 1])

    # should throw error if list is not of size three
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE,
                       [1, 1, 1, 1])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        mlops.metrics.homogeneity_completeness_v_measure(
            labels_true=labels_actual, labels_pred=labels_pred_missing_values)

    mlops.done()
Ejemplo n.º 12
0
def main():
    options = parse_args()
    print("PM: Configuration:")
    print("PM: Data file:            {}".format(options.data_file))
    print("PM: Output model:         {}".format(options.output_model))

    print()
    print("PM: Starting  code")
    print()

    print("PM: imported function!")
    print("PM: creating spark session")
    spark = SparkSession.builder.appName("KmeansTrain").getOrCreate()

    print("PM: calling pm.init()")
    pm.init(spark.sparkContext)

    print("PM: calling Kmeans_train")

    model = kmeans_train(pm_options=options, spark=spark)
    print("PM: json returned from Kmeans_train function!")

    print("PM: Saving model")
    save_model_locally(model, options, spark=spark)
    print("PM: model file saved locally!")

    print("PM: calling spark.stop")
    spark.stop()

    print("PM: calling pm.done()")
    pm.done()
    print("PM: after pm.done")
Ejemplo n.º 13
0
    def _save_file(self, file_path):
        self._merge_params()

        # Initialize mlops
        mlops.init()

        if self._params["sink_get_save_file_size"]:
            file_size = os.stat(file_path).st_size / (1024 * 1024)
            mlops.set_stat("s3.outputFileSizeMB", file_size)

        if self._params["sink_get_save_line_count"]:
            line_count = len(open(file_path).readlines())
            mlops.set_stat("s3.outputFileLineCount", line_count)

        client = boto3.client(
            's3',
            aws_access_key_id=self._params["sink_aws_access_key_id"],
            aws_secret_access_key=self._params["sink_aws_secret_access_key"],
        )

        data = open(file_path, 'rb')
        save_start_time = time.time()
        client.put_object(Bucket=self._params["sink_bucket"],
                          Key=self._params["sink_key"],
                          Body=data)
        save_elapsed_time = time.time() - save_start_time
        if self._params["sink_get_save_time"]:
            mlops.set_stat("s3.outputSaveTimemsec", save_elapsed_time)

        return
Ejemplo n.º 14
0
def test_mlops_matthews_corrcoef_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 1, 1, 0]
    labels_actual = [0, 1, 0, 0, 0, 1]

    mcc = sklearn.metrics.matthews_corrcoef(labels_actual, labels_pred)

    # first way
    pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT, mcc)

    # second way
    pm.metrics.matthews_corrcoef(labels_actual, labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT,
                    [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [1, 0, 1, 1]
        pm.metrics.matthews_corrcoef(y_true=labels_actual,
                                     y_pred=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.matthews_corrcoef(y_true=labels_actual,
                                 y_pred=labels_pred,
                                 sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 15
0
def test_bar_graph():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    with pytest.raises(MLOpsException):
        BarGraph().name("bar").cols(["g1", "g2"]).data(["aa", "bb"])

    with pytest.raises(MLOpsException):
        BarGraph().name("bar").data(["aa", "bb"])

    with pytest.raises(MLOpsException):
        mlt = BarGraph().name("mlt").cols(["g1"]).data([55, 66])
        pm.set_stat(mlt)

    with pytest.raises(MLOpsException):
        mlt_cont = BarGraph().name("mlt").cols([1,
                                                2]).data([55,
                                                          66]).as_continuous()
        pm.set_stat(mlt_cont)

    mlt = BarGraph().name("mlt").cols(["g1", "g2"]).data([55, 66])
    pm.set_stat(mlt)

    mlt_cont = BarGraph().name("mlt").cols([1, 2,
                                            3]).data([55, 66]).as_continuous()
    pm.set_stat(mlt_cont)

    pm.done()
Ejemplo n.º 16
0
def test_feature_importance():
    num_significant_features = 6
    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    pipeline_instance_id = ION1.PIPELINE_INST_ID_1
    set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost",
                                                     mlops_port="3456", token="")
    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)
    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})
        m.post(rest_helper.url_post_stat(pipeline_instance_id), json={})

        # Test Python channel
        mlops.init(ctx=None, mlops_mode=MLOpsMode.AGENT)
        published_model = mlops.Model(name="dtr_mlops_model",
                                      model_format=ModelFormat.SPARKML,
                                      description="model of decision tree regression with explainability")
        published_model.feature_importance(model=FinalModel, feature_names=FinalModel.feature_names,
                                           num_significant_features=num_significant_features)
        mlops.done()
Ejemplo n.º 17
0
def test_publish_model_api():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    model_data = "MODEL_DATA"
    annotation = {"a": "b"}
    model = pm.Model(name="my model",
                     model_format=ModelFormat.TEXT,
                     description="test model")

    model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
    f = open(model_file, 'w')
    f.write(model_data)
    f.close()

    model.set_model_path(model_file)
    model.set_annotations(annotation)

    model_id = pm.publish_model(model)
    assert (model_id == model.get_id())
    os.remove(model_file)

    model_df = pm.get_model_by_id(model_id, download=True)

    pm.done()

    # accessing 0th row, 'data' column of returned model dataframe
    print(model_df.iloc[0])
    assert (model_data == model_df.iloc[0]['data'])
    assert (annotation == model_df.iloc[0]['annotations'])
Ejemplo n.º 18
0
def test_multi_graph():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)
    with pytest.raises(MLOpsException):
        MultiGraph().name("mg").add_series(label="a",
                                           x="not-a-vec",
                                           y="not-a-vec")

    with pytest.raises(MLOpsException):
        MultiGraph().name("gg").add_series(label="a", x=[1, 2], y=["a", "b"])

    with pytest.raises(MLOpsException):
        # Y is not the same size of X
        MultiGraph().name("gg").add_series(x=[1, 2, 3, 4, 5, 6],
                                           label="rate",
                                           y=[1, 2, 3, 4, 5])

    x1_series = [0, 2, 4, 6]
    y1_series = [11, 12, 13, 14]

    x2_series = [1, 3, 5, 7]
    y2_series = [15, 16, 17, 18]

    gg = MultiGraph().name("gg")
    gg.add_series(x=x1_series, label="y1", y=y1_series)
    gg.add_series(x=x2_series, label="y2", y=y2_series)
    pm.set_stat(gg)
    pm.done()
Ejemplo n.º 19
0
    def __init__(self,
                 print_interval,
                 stats_type,
                 num_categories,
                 conf_thresh,
                 hot_label=True):
        super(CategoricalStatistics, self).__init__(print_interval)
        self._num_categories = num_categories
        self._hot_label = hot_label
        self._stats_type = stats_type
        self._conf_thresh = conf_thresh / 100.0

        # These are useful for development, but should be replaced by mlops library functions
        self._label_hist = []
        self._infer_hist = []
        for i in range(0, self._num_categories):
            self._label_hist.append(0)
            self._infer_hist.append(0)

        if self._stats_type == "python":
            mlops.init(ctx=None,
                       connect_mlops=True,
                       mlops_mode=MLOpsMode.AGENT)
        elif self._stats_type == "file":
            mlops.init(ctx=None,
                       connect_mlops=False,
                       mlops_mode=MLOpsMode.STAND_ALONE)
        else:
            self._stats_type = "none"

        if self._stats_type != "none":
            self._infer_tbl = Table().name("inferences").cols(
                ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
Ejemplo n.º 20
0
def test_table():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    with pytest.raises(MLOpsException):
        Table().name("mytable").cols(["a", "b",
                                      "c"]).add_row([1, 2, 3]).add_row([1, 2])

    with pytest.raises(MLOpsException):
        tbl = Table().name("mytable").cols(["a", "b"])
        pm.set_stat(tbl)

    tbl = Table().name("good-1").cols(["a", "b", "c"]).add_rows([[1, 2, 3],
                                                                 [1, 2, 3]])
    pm.set_stat(tbl)

    tbl = Table().name("good-2").cols(["a", "b", "c"])
    tbl.add_row("r1", [1, 2, 3])
    tbl.add_row("r2", [3, 4, 5])
    pm.set_stat(tbl)

    tbl = Table().name("good-3").cols(["a", "b", "c"])
    tbl.add_row([6, 7, 8])
    tbl.add_row([9, 0, 1])
    pm.set_stat(tbl)

    pm.done()
Ejemplo n.º 21
0
def main():
    pm_options = parse_args()
    # Initialize MLOps Library
    mlops.init()
    # Load the model
    if pm_options.input_model is not None:
        try:
            filename = pm_options.input_model
            file_obj = open(filename, 'rb')
            mlops.set_stat("model_file", 1)
        except Exception as e:
            print("Model not found")
            print("Got exception: {}".format(e))
            mlops.set_stat("model_file", 0)
            mlops.done()
            return 0

    classifier = pickle.load(file_obj)

    # Create synthetic data (Gaussian Distribution, Poisson Distribution and Beta Distribution)
    num_samples = int(pm_options.num_samples)
    num_features = int(pm_options.num_features)

    np.random.seed(0)
    g = np.random.normal(0, 1, (num_samples, num_features))
    p = np.random.poisson(0.7, (num_samples, num_features))
    b = np.random.beta(2, 2, (num_samples, num_features))

    test_data = np.concatenate((g, p, b), axis=0)
    np.random.seed()
    test_features = test_data[np.random.choice(test_data.shape[0],
                                               num_samples,
                                               replace=False)]

    # Output Health Statistics to MCenter
    # MLOps API to report the distribution statistics of each feature in the data and compare it automatically with the ones
    # reported during training to generate the similarity score.
    mlops.set_data_distribution_stat(test_features)

    # Output the number of samples being processed using MCenter
    mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT, num_samples,
                   st.TIME_SERIES)

    # Predict labels
    result = classifier.predict(test_features)

    # Label distribution in prediction
    value, counts = np.unique(result, return_counts=True)
    label_distribution = np.asarray((value, counts)).T
    column_names = value.astype(str).tolist()
    print("Label distributions: \n {0}".format(label_distribution))

    # Output label distribution as a BarGraph using MCenter
    bar = BarGraph().name("Label Distribution").cols(
        (label_distribution[:, 0]).astype(str).tolist()).data(
            (label_distribution[:, 1]).tolist())
    mlops.set_stat(bar)

    # Terminate MLOPs
    mlops.done()
Ejemplo n.º 22
0
def test_mlops_bsl_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_actual = [1, 0, 1, 1, 1, 0]
    labels_pred_prob = [0.9, 0.8, 0.7, 0.9, 0.75, 1]

    bsl = sklearn.metrics.brier_score_loss(labels_actual, labels_pred_prob)

    # first way
    pm.set_stat(ClassificationMetrics.BRIER_SCORE_LOSS, bsl)

    # second way
    pm.metrics.brier_score_loss(y_true=labels_actual, y_prob=labels_pred_prob)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.BRIER_SCORE_LOSS, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [0, 0, 0, 1]
        pm.metrics.brier_score_loss(y_true=labels_actual, y_prob=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.brier_score_loss(y_true=labels_actual,
                                y_prob=labels_pred_prob,
                                sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 23
0
def test_mlops_roc_auc_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred_prob = [0.9, 0.4, 0.6, 0.9, 0.1, 0.9]
    labels_actual = [0, 1, 0, 0, 0, 1]

    roc_auc_score = sklearn.metrics.roc_auc_score(labels_actual,
                                                  labels_pred_prob)

    # first way
    pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, roc_auc_score)

    # second way
    pm.metrics.roc_auc_score(labels_actual, labels_pred_prob)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [0.0, 0.9, 1.0, 0.85]
        pm.metrics.roc_auc_score(y_true=labels_actual,
                                 y_score=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.roc_auc_score(y_true=labels_actual,
                             y_score=labels_pred_prob,
                             sample_weight=sample_weight)

    pm.done()
Ejemplo n.º 24
0
def test_get_models_api():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)
    pm._set_api_test_mode()

    with pytest.raises(MLOpsException):
        pm.get_models_by_time(start_time=None, end_time=None)

    pm.done()
Ejemplo n.º 25
0
 def _materialize(self, parent_data_objs, user_data):
     # Initialize MLOps Library
     mlops.init()
     df_data = parent_data_objs[0]
     df_clean = do_nan_removal(df_data)
     get_data_distribution_stat(df_clean)
     # Terminate MLOPs
     mlops.done()
     return [df_clean]
Ejemplo n.º 26
0
def test_mlops_structure_api():
    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    token = ION1.TOKEN

    set_mlops_env(ion_id=ion_instance_id,
                  ion_node_id=ion_node_id,
                  token=token,
                  model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT,
                                                     mlops_server="localhost",
                                                     mlops_port="3456",
                                                     token=token)

    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)
    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id),
              json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id),
              json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID),
              json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"),
              json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})

        pm.init(ctx=None, mlops_mode=MLOpsMode.AGENT)
        assert pm.get_mlapp_id() == ION1.ION_ID
        assert pm.get_mlapp_name() == ION1.ION_NAME

        curr_node = pm.get_current_node()
        assert curr_node.id == ion_node_id

        nodes = pm.get_nodes()
        assert len(nodes) == 2

        node0 = pm.get_node('1')
        assert node0 is not None
        assert node0.pipeline_pattern_id == ION1.PIPELINE_PATTERN_ID_1
        assert node0.pipeline_instance_id == ION1.PIPELINE_INST_ID_1

        node0_agents = pm.get_agents('1')
        assert len(node0_agents) == 1
        assert node0_agents[0].id == ION1.AGENT_ID_0
        assert node0_agents[0].hostname == 'localhost'

        agent = pm.get_agent('1', ION1.AGENT_ID_0)
        assert agent.id == ION1.AGENT_ID_0
        assert agent.hostname == 'localhost'

        model = pm.current_model()
        assert model is not None
        assert model.metadata.modelId == ION1.MODEL_ID

        pm.done()
Ejemplo n.º 27
0
def test_set_stat_basic():
    with pytest.raises(MLOpsException):
        pm.set_stat(name=None, data=None)

    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)
    pm.set_stat("st1", data=5.5, category=StatCategory.TIME_SERIES)
    pm.set_stat("st1", data=5.5)

    pm.done()
Ejemplo n.º 28
0
def main():
    options = parse_args()

    sc = SparkContext(appName="predict-test")

    pm.init(sc)
    predict_node(options)
    sc.stop()
    pm.done()
Ejemplo n.º 29
0
 def _df_to_db(self, engine, df_sink, table, database):
     """
     Save DataFrame to Database
     """
     mlops.init()
     df_sink.to_sql(con = engine, name = table, if_exists = 'replace', index=False)
     mlops.set_stat(database.join(table), df_sink.shape[0])
     mlops.done()
     return(df_sink.shape[0])
Ejemplo n.º 30
0
 def _materialize(self, parent_data_objs, user_data):
     df_infer_set = self._gen_inf_dataset(parent_data_objs[0])
     # Initialize MLOps Library
     mlops.init()
     #Record the data distribution stats for the DataFrame
     mlops.set_data_distribution_stat(df_infer_set)
     # Terminate MLOPs
     mlops.done()
     return [df_infer_set]