def test_mlops_explained_variance_score_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75] labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25] evs = sklearn.metrics.explained_variance_score(labels_actual, labels_pred) # first way pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, evs) # second way pm.metrics.explained_variance_score(y_true=labels_actual, y_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75] pm.metrics.explained_variance_score(y_true=labels_actual, y_pred=labels_pred_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.explained_variance_score(y_true=labels_actual, y_pred=labels_pred, sample_weight=sample_weight) pm.done()
def test_mlops_mean_squared_error_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75] labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25] mse = sklearn.metrics.mean_squared_error(labels_actual, labels_pred) # first way pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, mse) # second way pm.metrics.mean_squared_error(y_true=labels_actual, y_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75] pm.metrics.mean_squared_error(y_true=labels_actual, y_pred=labels_pred_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.mean_squared_error(y_true=labels_actual, y_pred=labels_pred, sample_weight=sample_weight) pm.done()
def main(): print("args: {}".format(sys.argv)) options = parse_args() print("- inside test-python-aux Running main.py") print("arg1: {}".format(options.arg1)) print("input_model: {}".format(options.input_model)) print("use-mlops: {}".format(options.use_mlops)) print("iter: {}".format(options.iter)) print("exit_value: {}".format(options.exit_value)) print("Calling mlops.init()") if options.use_mlops: mlops.init() # Some output - to test logs for idx in range(options.iter): print("stdout - Idx {}".format(idx)) print("stderr - Idx {}".format(idx), file=sys.stderr) if options.use_mlops: mlops.set_stat("aux_stat", 1) time.sleep(1) if options.use_mlops: mlops.done() # Exit status if options.exit_value >= 0: print("About to exit with value: {}".format(options.exit_value)) sys.exit(options.exit_value) else: print("About to raise exception: {}".format(options.exit_value)) raise Exception("Exiting main using exception")
def main(): print("Starting example") mlops.init(run_in_non_pm_mode=True, mlops_mode=MLOpsMode.PYTHON) # Line graphs mlops.set_stat("myCounterDouble", 5.5) mlops.set_stat("myCounterDouble2", 7.3) # Multi-line graphs mlt = MultiLineGraph().name("Multi Line").labels(["l1", "l2"]).data([5, 16]) mlops.set_stat(mlt) tbl = Table().name("MyTable").cols(["Date", "Some number"]) tbl.add_row(["2001Q1", "55"]) tbl.add_row(["2001Q2", "66"]) tbl.add_row(["2003Q3", "33"]) tbl.add_row(["2003Q2", "22"]) mlops.set_stat(tbl) bar = BarGraph().name("MyBar").cols(["aa", "bb", "cc", "dd", "ee"]).data([10, 15, 12, 9, 8]) mlops.set_stat(bar) mlops.done() print("Example done")
def test_mlops_bas_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1, 0, 1, 1, 1, 0] labels_actual = [0, 1, 0, 0, 0, 1] bas = sklearn.metrics.balanced_accuracy_score(labels_actual, labels_pred) # first way pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, bas) # second way pm.metrics.balanced_accuracy_score(y_true=labels_actual, y_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [0, 0, 0, 1] pm.metrics.balanced_accuracy_score(y_true=labels_actual, y_pred=labels_pred_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.balanced_accuracy_score(y_true=labels_actual, y_pred=labels_pred, sample_weight=sample_weight) pm.done()
def test_mlops_median_absolute_error_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75] labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25] mae = sklearn.metrics.median_absolute_error(labels_actual, labels_pred) # first way pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, mae) # second way pm.metrics.median_absolute_error(y_true=labels_actual, y_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75] pm.metrics.mean_absolute_error(y_true=labels_actual, y_pred=labels_pred_missing_values) pm.done()
def test_mlops_completeness_score_apis(): mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1, 0, 1, 2, 3, 0] labels_actual = [0, 1, 0, 1, 3, 1] cs = metrics.completeness_score(labels_actual, labels_pred) # first way mlops.set_stat(ClusteringMetrics.COMPLETENESS_SCORE, cs) # second way mlops.metrics.completeness_score(labels_true=labels_actual, labels_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): mlops.set_stat(ClusteringMetrics.COMPLETENESS_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [0, 0, 0, 1] mlops.metrics.completeness_score( labels_true=labels_actual, labels_pred=labels_pred_missing_values) mlops.done()
def main(): root = logging.getLogger() root.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) root.addHandler(ch) options = parse_args() mlops.init() print("MLOps testing") print("My {} Node id: {}".format(Constants.ION_LITERAL, mlops.get_current_node().id)) print("My {} Node name: {}".format(Constants.ION_LITERAL, mlops.get_current_node().name)) print("Test name: {}".format(options.test_name)) print("Taking test directory from main __file__ value: Name: {} File: {} ". format(__name__, __file__)) run_mlops_tests(package_to_scan=parallelm.mlops.e2e_tests.health_node, test_to_run=options.test_name) mlops.done()
def test_mlops_calinski_harabaz_score_apis(): mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) X = [[1, 2], [1, 3], [1, 2], [2, 4], [4, 5], [9, 9]] labels_pred = [1, 0, 1, 2, 3, 0] chs = metrics.calinski_harabaz_score(X, labels_pred) # first way mlops.set_stat(ClusteringMetrics.CALINSKI_HARABAZ_SCORE, chs) # second way mlops.metrics.calinski_harabaz_score(X=X, labels=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): mlops.set_stat(ClusteringMetrics.CALINSKI_HARABAZ_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [0, 0, 0, 1] mlops.metrics.calinski_harabaz_score(X=X, labels=labels_pred_missing_values) mlops.done()
def test_mlops_normalized_mutual_info_score_apis(): mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1, 0, 1, 2, 3, 0] labels_actual = [0, 1, 0, 1, 3, 1] nmis = metrics.normalized_mutual_info_score(labels_actual, labels_pred) # first way mlops.set_stat(ClusteringMetrics.NORMALIZED_MUTUAL_INFO_SCORE, nmis) # second way mlops.metrics.normalized_mutual_info_score(labels_true=labels_actual, labels_pred=labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): mlops.set_stat(ClusteringMetrics.NORMALIZED_MUTUAL_INFO_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [0, 0, 0, 1] mlops.metrics.normalized_mutual_info_score( labels_true=labels_actual, labels_pred=labels_pred_missing_values) mlops.done()
def test_mlops_homogeneity_completeness_v_measure_apis(): mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1, 0, 1, 2, 3, 0] labels_actual = [0, 1, 0, 1, 3, 1] homogeneity, completeness, v_measure = metrics \ .homogeneity_completeness_v_measure(labels_true=labels_actual, labels_pred=labels_pred) # first way mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE, [homogeneity, completeness, v_measure]) # second way mlops.metrics.homogeneity_completeness_v_measure(labels_true=labels_actual, labels_pred=labels_pred) # should throw error if list is not of size three with pytest.raises(MLOpsStatisticsException): mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE, [1, 1]) # should throw error if list is not of size three with pytest.raises(MLOpsStatisticsException): mlops.set_stat(ClusteringMetrics.HOMOGENEITY_COMPLETENESS_V_MEASURE, [1, 1, 1, 1]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_pred_missing_values = [0, 0, 0, 1] mlops.metrics.homogeneity_completeness_v_measure( labels_true=labels_actual, labels_pred=labels_pred_missing_values) mlops.done()
def main(): options = parse_args() print("PM: Configuration:") print("PM: Data file: {}".format(options.data_file)) print("PM: Output model: {}".format(options.output_model)) print() print("PM: Starting code") print() print("PM: imported function!") print("PM: creating spark session") spark = SparkSession.builder.appName("KmeansTrain").getOrCreate() print("PM: calling pm.init()") pm.init(spark.sparkContext) print("PM: calling Kmeans_train") model = kmeans_train(pm_options=options, spark=spark) print("PM: json returned from Kmeans_train function!") print("PM: Saving model") save_model_locally(model, options, spark=spark) print("PM: model file saved locally!") print("PM: calling spark.stop") spark.stop() print("PM: calling pm.done()") pm.done() print("PM: after pm.done")
def _save_file(self, file_path): self._merge_params() # Initialize mlops mlops.init() if self._params["sink_get_save_file_size"]: file_size = os.stat(file_path).st_size / (1024 * 1024) mlops.set_stat("s3.outputFileSizeMB", file_size) if self._params["sink_get_save_line_count"]: line_count = len(open(file_path).readlines()) mlops.set_stat("s3.outputFileLineCount", line_count) client = boto3.client( 's3', aws_access_key_id=self._params["sink_aws_access_key_id"], aws_secret_access_key=self._params["sink_aws_secret_access_key"], ) data = open(file_path, 'rb') save_start_time = time.time() client.put_object(Bucket=self._params["sink_bucket"], Key=self._params["sink_key"], Body=data) save_elapsed_time = time.time() - save_start_time if self._params["sink_get_save_time"]: mlops.set_stat("s3.outputSaveTimemsec", save_elapsed_time) return
def test_mlops_matthews_corrcoef_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred = [1, 0, 1, 1, 1, 0] labels_actual = [0, 1, 0, 0, 0, 1] mcc = sklearn.metrics.matthews_corrcoef(labels_actual, labels_pred) # first way pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT, mcc) # second way pm.metrics.matthews_corrcoef(labels_actual, labels_pred) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_prob_missing_values = [1, 0, 1, 1] pm.metrics.matthews_corrcoef(y_true=labels_actual, y_pred=labels_prob_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.matthews_corrcoef(y_true=labels_actual, y_pred=labels_pred, sample_weight=sample_weight) pm.done()
def test_bar_graph(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) with pytest.raises(MLOpsException): BarGraph().name("bar").cols(["g1", "g2"]).data(["aa", "bb"]) with pytest.raises(MLOpsException): BarGraph().name("bar").data(["aa", "bb"]) with pytest.raises(MLOpsException): mlt = BarGraph().name("mlt").cols(["g1"]).data([55, 66]) pm.set_stat(mlt) with pytest.raises(MLOpsException): mlt_cont = BarGraph().name("mlt").cols([1, 2]).data([55, 66]).as_continuous() pm.set_stat(mlt_cont) mlt = BarGraph().name("mlt").cols(["g1", "g2"]).data([55, 66]) pm.set_stat(mlt) mlt_cont = BarGraph().name("mlt").cols([1, 2, 3]).data([55, 66]).as_continuous() pm.set_stat(mlt_cont) pm.done()
def test_feature_importance(): num_significant_features = 6 ion_instance_id = ION1.ION_INSTANCE_ID ion_node_id = ION1.NODE_1_ID pipeline_instance_id = ION1.PIPELINE_INST_ID_1 set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, model_id=ION1.MODEL_ID) rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost", mlops_port="3456", token="") rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX) with requests_mock.mock() as m: m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances) m.get(rest_helper.url_get_ees(), json=test_ee_info) m.get(rest_helper.url_get_agents(), json=test_agents_info) m.get(rest_helper.url_get_model_list(), json=test_models_info) m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info) m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats) m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"}) m.post(rest_helper.url_post_stat(pipeline_instance_id), json={}) # Test Python channel mlops.init(ctx=None, mlops_mode=MLOpsMode.AGENT) published_model = mlops.Model(name="dtr_mlops_model", model_format=ModelFormat.SPARKML, description="model of decision tree regression with explainability") published_model.feature_importance(model=FinalModel, feature_names=FinalModel.feature_names, num_significant_features=num_significant_features) mlops.done()
def test_publish_model_api(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) model_data = "MODEL_DATA" annotation = {"a": "b"} model = pm.Model(name="my model", model_format=ModelFormat.TEXT, description="test model") model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4())) f = open(model_file, 'w') f.write(model_data) f.close() model.set_model_path(model_file) model.set_annotations(annotation) model_id = pm.publish_model(model) assert (model_id == model.get_id()) os.remove(model_file) model_df = pm.get_model_by_id(model_id, download=True) pm.done() # accessing 0th row, 'data' column of returned model dataframe print(model_df.iloc[0]) assert (model_data == model_df.iloc[0]['data']) assert (annotation == model_df.iloc[0]['annotations'])
def test_multi_graph(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) with pytest.raises(MLOpsException): MultiGraph().name("mg").add_series(label="a", x="not-a-vec", y="not-a-vec") with pytest.raises(MLOpsException): MultiGraph().name("gg").add_series(label="a", x=[1, 2], y=["a", "b"]) with pytest.raises(MLOpsException): # Y is not the same size of X MultiGraph().name("gg").add_series(x=[1, 2, 3, 4, 5, 6], label="rate", y=[1, 2, 3, 4, 5]) x1_series = [0, 2, 4, 6] y1_series = [11, 12, 13, 14] x2_series = [1, 3, 5, 7] y2_series = [15, 16, 17, 18] gg = MultiGraph().name("gg") gg.add_series(x=x1_series, label="y1", y=y1_series) gg.add_series(x=x2_series, label="y2", y=y2_series) pm.set_stat(gg) pm.done()
def __init__(self, print_interval, stats_type, num_categories, conf_thresh, hot_label=True): super(CategoricalStatistics, self).__init__(print_interval) self._num_categories = num_categories self._hot_label = hot_label self._stats_type = stats_type self._conf_thresh = conf_thresh / 100.0 # These are useful for development, but should be replaced by mlops library functions self._label_hist = [] self._infer_hist = [] for i in range(0, self._num_categories): self._label_hist.append(0) self._infer_hist.append(0) if self._stats_type == "python": mlops.init(ctx=None, connect_mlops=True, mlops_mode=MLOpsMode.AGENT) elif self._stats_type == "file": mlops.init(ctx=None, connect_mlops=False, mlops_mode=MLOpsMode.STAND_ALONE) else: self._stats_type = "none" if self._stats_type != "none": self._infer_tbl = Table().name("inferences").cols( ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])
def test_table(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) with pytest.raises(MLOpsException): Table().name("mytable").cols(["a", "b", "c"]).add_row([1, 2, 3]).add_row([1, 2]) with pytest.raises(MLOpsException): tbl = Table().name("mytable").cols(["a", "b"]) pm.set_stat(tbl) tbl = Table().name("good-1").cols(["a", "b", "c"]).add_rows([[1, 2, 3], [1, 2, 3]]) pm.set_stat(tbl) tbl = Table().name("good-2").cols(["a", "b", "c"]) tbl.add_row("r1", [1, 2, 3]) tbl.add_row("r2", [3, 4, 5]) pm.set_stat(tbl) tbl = Table().name("good-3").cols(["a", "b", "c"]) tbl.add_row([6, 7, 8]) tbl.add_row([9, 0, 1]) pm.set_stat(tbl) pm.done()
def main(): pm_options = parse_args() # Initialize MLOps Library mlops.init() # Load the model if pm_options.input_model is not None: try: filename = pm_options.input_model file_obj = open(filename, 'rb') mlops.set_stat("model_file", 1) except Exception as e: print("Model not found") print("Got exception: {}".format(e)) mlops.set_stat("model_file", 0) mlops.done() return 0 classifier = pickle.load(file_obj) # Create synthetic data (Gaussian Distribution, Poisson Distribution and Beta Distribution) num_samples = int(pm_options.num_samples) num_features = int(pm_options.num_features) np.random.seed(0) g = np.random.normal(0, 1, (num_samples, num_features)) p = np.random.poisson(0.7, (num_samples, num_features)) b = np.random.beta(2, 2, (num_samples, num_features)) test_data = np.concatenate((g, p, b), axis=0) np.random.seed() test_features = test_data[np.random.choice(test_data.shape[0], num_samples, replace=False)] # Output Health Statistics to MCenter # MLOps API to report the distribution statistics of each feature in the data and compare it automatically with the ones # reported during training to generate the similarity score. mlops.set_data_distribution_stat(test_features) # Output the number of samples being processed using MCenter mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT, num_samples, st.TIME_SERIES) # Predict labels result = classifier.predict(test_features) # Label distribution in prediction value, counts = np.unique(result, return_counts=True) label_distribution = np.asarray((value, counts)).T column_names = value.astype(str).tolist() print("Label distributions: \n {0}".format(label_distribution)) # Output label distribution as a BarGraph using MCenter bar = BarGraph().name("Label Distribution").cols( (label_distribution[:, 0]).astype(str).tolist()).data( (label_distribution[:, 1]).tolist()) mlops.set_stat(bar) # Terminate MLOPs mlops.done()
def test_mlops_bsl_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_actual = [1, 0, 1, 1, 1, 0] labels_pred_prob = [0.9, 0.8, 0.7, 0.9, 0.75, 1] bsl = sklearn.metrics.brier_score_loss(labels_actual, labels_pred_prob) # first way pm.set_stat(ClassificationMetrics.BRIER_SCORE_LOSS, bsl) # second way pm.metrics.brier_score_loss(y_true=labels_actual, y_prob=labels_pred_prob) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(ClassificationMetrics.BRIER_SCORE_LOSS, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_prob_missing_values = [0, 0, 0, 1] pm.metrics.brier_score_loss(y_true=labels_actual, y_prob=labels_prob_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.brier_score_loss(y_true=labels_actual, y_prob=labels_pred_prob, sample_weight=sample_weight) pm.done()
def test_mlops_roc_auc_apis(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) labels_pred_prob = [0.9, 0.4, 0.6, 0.9, 0.1, 0.9] labels_actual = [0, 1, 0, 0, 0, 1] roc_auc_score = sklearn.metrics.roc_auc_score(labels_actual, labels_pred_prob) # first way pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, roc_auc_score) # second way pm.metrics.roc_auc_score(labels_actual, labels_pred_prob) # should throw error if not numeric number is provided with pytest.raises(MLOpsStatisticsException): pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, [1, 2, 3]) # should throw error if labels predicted is different length than actuals with pytest.raises(ValueError): labels_prob_missing_values = [0.0, 0.9, 1.0, 0.85] pm.metrics.roc_auc_score(y_true=labels_actual, y_score=labels_prob_missing_values) sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0] # testing with sample weights as well pm.metrics.roc_auc_score(y_true=labels_actual, y_score=labels_pred_prob, sample_weight=sample_weight) pm.done()
def test_get_models_api(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) pm._set_api_test_mode() with pytest.raises(MLOpsException): pm.get_models_by_time(start_time=None, end_time=None) pm.done()
def _materialize(self, parent_data_objs, user_data): # Initialize MLOps Library mlops.init() df_data = parent_data_objs[0] df_clean = do_nan_removal(df_data) get_data_distribution_stat(df_clean) # Terminate MLOPs mlops.done() return [df_clean]
def test_mlops_structure_api(): ion_instance_id = ION1.ION_INSTANCE_ID ion_node_id = ION1.NODE_1_ID token = ION1.TOKEN set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, token=token, model_id=ION1.MODEL_ID) rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost", mlops_port="3456", token=token) rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX) with requests_mock.mock() as m: m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances) m.get(rest_helper.url_get_ees(), json=test_ee_info) m.get(rest_helper.url_get_agents(), json=test_agents_info) m.get(rest_helper.url_get_model_list(), json=test_models_info) m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info) m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats) m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"}) pm.init(ctx=None, mlops_mode=MLOpsMode.AGENT) assert pm.get_mlapp_id() == ION1.ION_ID assert pm.get_mlapp_name() == ION1.ION_NAME curr_node = pm.get_current_node() assert curr_node.id == ion_node_id nodes = pm.get_nodes() assert len(nodes) == 2 node0 = pm.get_node('1') assert node0 is not None assert node0.pipeline_pattern_id == ION1.PIPELINE_PATTERN_ID_1 assert node0.pipeline_instance_id == ION1.PIPELINE_INST_ID_1 node0_agents = pm.get_agents('1') assert len(node0_agents) == 1 assert node0_agents[0].id == ION1.AGENT_ID_0 assert node0_agents[0].hostname == 'localhost' agent = pm.get_agent('1', ION1.AGENT_ID_0) assert agent.id == ION1.AGENT_ID_0 assert agent.hostname == 'localhost' model = pm.current_model() assert model is not None assert model.metadata.modelId == ION1.MODEL_ID pm.done()
def test_set_stat_basic(): with pytest.raises(MLOpsException): pm.set_stat(name=None, data=None) pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) pm.set_stat("st1", data=5.5, category=StatCategory.TIME_SERIES) pm.set_stat("st1", data=5.5) pm.done()
def main(): options = parse_args() sc = SparkContext(appName="predict-test") pm.init(sc) predict_node(options) sc.stop() pm.done()
def _df_to_db(self, engine, df_sink, table, database): """ Save DataFrame to Database """ mlops.init() df_sink.to_sql(con = engine, name = table, if_exists = 'replace', index=False) mlops.set_stat(database.join(table), df_sink.shape[0]) mlops.done() return(df_sink.shape[0])
def _materialize(self, parent_data_objs, user_data): df_infer_set = self._gen_inf_dataset(parent_data_objs[0]) # Initialize MLOps Library mlops.init() #Record the data distribution stats for the DataFrame mlops.set_data_distribution_stat(df_infer_set) # Terminate MLOPs mlops.done() return [df_infer_set]