def test_mlops_log_loss_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred_prob = [[0.9, 0.1], [0.6, 0.4], [0.6, 0.4], [0.1, 0.9],
                        [0.1, 0.8], [0.1, 0.9]]
    labels_actual = [0, 1, 0, 0, 0, 1]

    log_loss = sklearn.metrics.log_loss(labels_actual, labels_pred_prob)

    # first way
    pm.set_stat(ClassificationMetrics.LOG_LOSS, log_loss)

    # second way
    pm.metrics.log_loss(labels_actual, labels_pred_prob)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.LOG_LOSS, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [[0.9, 0.1], [0.6, 0.4], [0.6, 0.4]]
        pm.metrics.log_loss(y_true=labels_actual,
                            y_pred=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.log_loss(y_true=labels_actual,
                        y_pred=labels_pred_prob,
                        sample_weight=sample_weight)

    pm.done()
Example #2
0
def main():

    print("args: {}".format(sys.argv))
    options = parse_args()
    print("- inside test-python-aux Running main.py")
    print("arg1:         {}".format(options.arg1))
    print("input_model:  {}".format(options.input_model))
    print("use-mlops:    {}".format(options.use_mlops))
    print("iter:         {}".format(options.iter))
    print("exit_value:   {}".format(options.exit_value))

    print("Calling mlops.init()")
    if options.use_mlops:
        mlops.init()

    # Some output - to test logs
    for idx in range(options.iter):
        print("stdout - Idx {}".format(idx))
        print("stderr - Idx {}".format(idx), file=sys.stderr)
        if options.use_mlops:
            mlops.set_stat("aux_stat", 1)
        time.sleep(1)

    if options.use_mlops:
        mlops.done()

    # Exit status
    if options.exit_value >= 0:
        print("About to exit with value: {}".format(options.exit_value))
        sys.exit(options.exit_value)
    else:
        print("About to raise exception: {}".format(options.exit_value))
        raise Exception("Exiting main using exception")
def test_mlops_v_measure_score_apis():
    mlops.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 2, 3, 0]
    labels_actual = [0, 1, 0, 1, 3, 1]

    vms = metrics.v_measure_score(labels_actual, labels_pred)

    # first way
    mlops.set_stat(ClusteringMetrics.V_MEASURE_SCORE, vms)

    # second way
    mlops.metrics.v_measure_score(labels_true=labels_actual,
                                  labels_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        mlops.set_stat(ClusteringMetrics.V_MEASURE_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        mlops.metrics.v_measure_score(labels_true=labels_actual,
                                      labels_pred=labels_pred_missing_values)

    mlops.done()
Example #4
0
def test_mlops_mean_squared_error_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    mse = sklearn.metrics.mean_squared_error(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, mse)

    # second way
    pm.metrics.mean_squared_error(y_true=labels_actual, y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.MEAN_SQUARED_ERROR, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.mean_squared_error(y_true=labels_actual,
                                      y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.mean_squared_error(y_true=labels_actual,
                                  y_pred=labels_pred,
                                  sample_weight=sample_weight)

    pm.done()
Example #5
0
def test_mlops_explained_variance_score_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    evs = sklearn.metrics.explained_variance_score(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, evs)

    # second way
    pm.metrics.explained_variance_score(y_true=labels_actual,
                                        y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.EXPLAINED_VARIANCE_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.explained_variance_score(y_true=labels_actual,
                                            y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.explained_variance_score(y_true=labels_actual,
                                        y_pred=labels_pred,
                                        sample_weight=sample_weight)

    pm.done()
Example #6
0
def main():
    print("Starting example")
    mlops.init(run_in_non_pm_mode=True, mlops_mode=MLOpsMode.PYTHON)

    # Line graphs
    mlops.set_stat("myCounterDouble", 5.5)
    mlops.set_stat("myCounterDouble2", 7.3)

    # Multi-line graphs
    mlt = MultiLineGraph().name("Multi Line").labels(["l1",
                                                      "l2"]).data([5, 16])
    mlops.set_stat(mlt)

    tbl = Table().name("MyTable").cols(["Date", "Some number"])
    tbl.add_row(["2001Q1", "55"])
    tbl.add_row(["2001Q2", "66"])
    tbl.add_row(["2003Q3", "33"])
    tbl.add_row(["2003Q2", "22"])
    mlops.set_stat(tbl)

    bar = BarGraph().name("MyBar").cols(["aa", "bb", "cc", "dd",
                                         "ee"]).data([10, 15, 12, 9, 8])
    mlops.set_stat(bar)

    mlops.done()
    print("Example done")
Example #7
0
def test_mlops_median_absolute_error_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1.0, 0.5, 2.5, 4.75, 7.0, 0.75]
    labels_actual = [1.5, 0.75, 2.75, 4.5, 7.50, 0.25]

    mae = sklearn.metrics.median_absolute_error(labels_actual, labels_pred)

    # first way
    pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, mae)

    # second way
    pm.metrics.median_absolute_error(y_true=labels_actual, y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(RegressionMetrics.MEDIAN_ABSOLUTE_ERROR, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [1.0, 0.5, 7.0, 0.75]
        pm.metrics.mean_absolute_error(y_true=labels_actual,
                                       y_pred=labels_pred_missing_values)

    pm.done()
def test_mlops_bas_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 1, 1, 0]
    labels_actual = [0, 1, 0, 0, 0, 1]

    bas = sklearn.metrics.balanced_accuracy_score(labels_actual, labels_pred)

    # first way
    pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, bas)

    # second way
    pm.metrics.balanced_accuracy_score(y_true=labels_actual,
                                       y_pred=labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.BALANCED_ACCURACY_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_pred_missing_values = [0, 0, 0, 1]
        pm.metrics.balanced_accuracy_score(y_true=labels_actual,
                                           y_pred=labels_pred_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.balanced_accuracy_score(y_true=labels_actual,
                                       y_pred=labels_pred,
                                       sample_weight=sample_weight)

    pm.done()
def test_mlops_roc_auc_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred_prob = [0.9, 0.4, 0.6, 0.9, 0.1, 0.9]
    labels_actual = [0, 1, 0, 0, 0, 1]

    roc_auc_score = sklearn.metrics.roc_auc_score(labels_actual,
                                                  labels_pred_prob)

    # first way
    pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, roc_auc_score)

    # second way
    pm.metrics.roc_auc_score(labels_actual, labels_pred_prob)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.ROC_AUC_SCORE, [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [0.0, 0.9, 1.0, 0.85]
        pm.metrics.roc_auc_score(y_true=labels_actual,
                                 y_score=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.roc_auc_score(y_true=labels_actual,
                             y_score=labels_pred_prob,
                             sample_weight=sample_weight)

    pm.done()
def test_mlops_matthews_corrcoef_apis():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    labels_pred = [1, 0, 1, 1, 1, 0]
    labels_actual = [0, 1, 0, 0, 0, 1]

    mcc = sklearn.metrics.matthews_corrcoef(labels_actual, labels_pred)

    # first way
    pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT, mcc)

    # second way
    pm.metrics.matthews_corrcoef(labels_actual, labels_pred)

    # should throw error if not numeric number is provided
    with pytest.raises(MLOpsStatisticsException):
        pm.set_stat(ClassificationMetrics.MATTHEWS_CORRELATION_COEFFICIENT,
                    [1, 2, 3])

    # should throw error if labels predicted is different length than actuals
    with pytest.raises(ValueError):
        labels_prob_missing_values = [1, 0, 1, 1]
        pm.metrics.matthews_corrcoef(y_true=labels_actual,
                                     y_pred=labels_prob_missing_values)

    sample_weight = [0.9, 0.1, 0.5, 0.9, 1.0, 0]

    # testing with sample weights as well
    pm.metrics.matthews_corrcoef(y_true=labels_actual,
                                 y_pred=labels_pred,
                                 sample_weight=sample_weight)

    pm.done()
Example #11
0
    def job_secondary_transitions(rows):
        tbl = Table().name("SageMaker Job Transitions")\
                     .cols(["Start Time", "End Time", "Time Span", "Status", "Description"])
        for row in rows:
            tbl.add_row(row)

        mlops.set_stat(tbl)
Example #12
0
 def job_host_metrics(job_name, metrics_data):
     tbl = Table().name("Job Host Metrics").cols(["Metric", "Value"])
     for metric_data in metrics_data:
         tbl.add_row([
             metric_data['Label'],
             metric_data['Values'][0] if metric_data['Values'] else 0
         ])
     mlops.set_stat(tbl)
Example #13
0
def gen_data_dist_stats(spark_ctx):

    spark_session = SparkSession(spark_ctx)

    # Import Data
    ##################################
    K = 3  # fixed number of centers
    num_attr = 10  # fixed number of attributes
    num_rows = 60000  # number of rows in the dataset
    input_data = generate_dataset(num_attr, num_rows, K, spark_ctx)

    column_names_all = input_data.columns
    for col_index in range(0, len(column_names_all)):
        input_data = input_data.withColumnRenamed(column_names_all[col_index],
                                                  'c' + str(col_index))

    input_data = input_data.cache()

    input_train = input_data

    # SparkML pipeline
    ##################################
    exclude_cols = []
    column_names = input_train.columns
    input_col_names = []
    for elmts in column_names:
        ind = True
        for excludes in exclude_cols:
            if elmts == excludes:
                ind = False
        if ind:
            input_col_names.append(elmts)
    print(input_col_names)

    vector_assembler = VectorAssembler(inputCols=input_col_names,
                                       outputCol="features")

    kmeans_pipe = KMeans(k=K,
                         initMode="k-means||",
                         initSteps=5,
                         tol=1e-4,
                         maxIter=100,
                         featuresCol="features")
    full_pipe = [vector_assembler, kmeans_pipe]
    model_kmeans = Pipeline(stages=full_pipe).fit(input_train)

    try:
        mlops.set_data_distribution_stat(data=input_train, model=model_kmeans)
        m = mlops.Model(model_format=ModelFormat.SPARKML)
        m.set_data_distribution_stat(data=input_train)
        print("PM: done generating histogram")
    except Exception as e:
        print("PM: failed to generate histogram using pm.stat")
        print(e)

    # Indicating that model statistics were reported
    mlops.set_stat(E2EConstants.MODEL_STATS_REPORTED_STAT_NAME, 1)
    return model_kmeans
def db_to_df(engine, table):
    """
    Save DataFrame to Database
    """
    mlops.init()
    df_sink = pandas.read_sql("{} {}".format(SELECT_STATEMENT, table), con = engine)
    mlops.set_stat(table, df_sink.shape[0])
    mlops.done()
    return(df_sink, df_sink.shape[0])
Example #15
0
    def _materialize(self, parent_data_objs, user_data):
        for k,v in self._params.items():
            params_info = "key: {key} ==> value: {value}".format(key=k, value=v)
            print(params_info)
            self._logger.info(params_info)

        for x in range(100):
            mlops.set_stat("k graph", x)
        return ["s3://kenshoo/this is your report/report.txt"]
Example #16
0
 def _df_to_db(self, engine, df_sink, table, database):
     """
     Save DataFrame to Database
     """
     mlops.init()
     df_sink.to_sql(con = engine, name = table, if_exists = 'replace', index=False)
     mlops.set_stat(database.join(table), df_sink.shape[0])
     mlops.done()
     return(df_sink.shape[0])
Example #17
0
 def _report_metrics_collection(self, metrics):
     for name, value in metrics.items():
         metric_meta = Metric.metric_by_name(name)
         self._logger.debug("Reporting metrics ... {}".format(metric_meta))
         if not metric_meta.hidden:
             if metric_meta.metric_relation == MetricRelation.BAR_GRAPH:
                 self._report_bar_graph_metric(metric_meta, metrics)
             else:
                 mlops.set_stat(metric_meta.title, value)
Example #18
0
 def job_status(job_name, running_time_sec, billing_time_sec, status=""):
     Report._last_metric_values[job_name] = status
     tbl = Table().name("SageMaker Job Status").cols(
         ["Job Name", "Total Running Time", "Time for Billing", "Status"])
     tbl.add_row([
         job_name,
         Report.seconds_fmt(running_time_sec),
         Report.seconds_fmt(billing_time_sec), status
     ])
     mlops.set_stat(tbl)
Example #19
0
def export_bar_table(bar_names, bar_data, title_name):
    """
    This function provides a bar_graph for a bar type data at MCenter data scientist view
    :param bar_names: Bar graph names
    :param bar_data: Bar graph data.
    :param title_name: Title of the bar Graph
    :return:
    """
    bar_graph_data = BarGraph().name(title_name).cols(
        bar_names.astype(str).tolist()).data(bar_data.tolist())
    mlops.set_stat(bar_graph_data)
Example #20
0
    def _materialize(self, parent_data_objs, user_data):
        for param in parent_data_objs:
            prent_param = "parent param is: {param}".format(param=param)
            print(prent_param)
            self._logger.info(prent_param)

        tbl = Table().name("Table example").cols(["Worker", "Requests"])
        for index in range(0, 10):
            tbl.add_row(["kenshoo-worker-{}".format(index), index + 3])
        mlops.set_stat(tbl)

        return ["s3://Kenshoo/this is the logistic model path/model.pmml"]
Example #21
0
 def _report_event(self, tb_parse_event, time_stamp_start):
     """
     Process the TensorBoard events
     only `summary` events are scanned and updated using
     mlops-stats API, entries supported are scalar values only.
     """
     if tb_parse_event.HasField('summary') and (time_stamp_start <
                                                tb_parse_event.wall_time):
         for tf_value in tb_parse_event.summary.value:
             self._print_verbose("calling mlops.set_stats {}".format(
                 tf_value.tag))
             mlops.set_stat(tf_value.tag, data=tf_value.simple_value)
Example #22
0
    def _report_bar_graph_metric(self, metric_meta, metrics):
        cols = []
        data = []
        for related_m, bar_name in metric_meta.related_metric:
            cols.append(bar_name)
            data.append(metrics[related_m.metric_name])

        if not all(v == 0
                   for v in data) or not metric_meta.metric_already_displayed:
            metric_meta.metric_already_displayed = True
            mlt = BarGraph().name(metric_meta.title).cols(cols).data(data)
            mlops.set_stat(mlt)
Example #23
0
def test_init_done():
    """
    Testing api for information such as ION id, ion name and such
    :return:
    """
    with pytest.raises(MLOpsException):
        pm.set_stat("st1", 5.5)

    with pytest.raises(MLOpsException):
        pm.done()

    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)
    pm.done()
Example #24
0
 def _report_acc_requests_and_status(self):
     self._logger.debug("Reporting about workers requests & status ...")
     tbl = Table().name(StatsConstants.ACC_REQS_TABLE_NAME).cols([
         StatsConstants.ACC_REQS_NUM_REQS_COL_NAME,
         StatsConstants.ACC_REQS_STATUS_COL_NAME
     ])
     for col, value, status in self._curr_stats_snapshot.sorted_worker_stats:
         tbl.add_row(col, [value, status])
     tbl.add_row(StatsConstants.ACC_REQS_LAST_ROW_NAME,
                 [self._curr_stats_snapshot.total_requests, "---"])
     mlops.set_stat(tbl)
     mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT,
                    self._curr_stats_snapshot.total_requests_diff)
Example #25
0
    def _materialize(self, parent_data_objs, user_data):
        for param in parent_data_objs:
            prent_param = "parent param is: {param}".format(param=param)
            print(prent_param)
            self._logger.info(prent_param)

        mlt = MultiLineGraph().name("Multi-line graph example").labels(
            ["lable-1", "lable-2", "lable-3"])
        for x in range(100):
            mlt.data([x, x + 1, 0.5 * x])
            mlops.set_stat(mlt)

        return ["s3://Kenshoo/this is the linear model path/model.pmml"]
Example #26
0
def export_confusion_table(confmat, algo):
    """
    This function provides the confusion matrix as a table in at MCenter data scientist view
    :param confmat: Confusion matrix
    :param algo: text for the algorithm type
    :return:
    """

    tbl = Table()\
        .name("Confusion Matrix for " + str(algo))\
        .cols(["Predicted label: " + str(i) for i in range(0, confmat.shape[0])])
    for i in range(confmat.shape[1]):
        tbl.add_row("True Label: " + str(i), [str(confmat[i, j]) for j in range(0, confmat.shape[0])])
    mlops.set_stat(tbl)
Example #27
0
def test_bar_graph():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    with pytest.raises(MLOpsException):
        BarGraph().name("bar").cols(["g1", "g2"]).data(["aa", "bb"])

    with pytest.raises(MLOpsException):
        BarGraph().name("bar").data(["aa", "bb"])

    with pytest.raises(MLOpsException):
        mlt = BarGraph().name("mlt").cols(["g1"]).data([55, 66])
        pm.set_stat(mlt)

    with pytest.raises(MLOpsException):
        mlt_cont = BarGraph().name("mlt").cols([1,
                                                2]).data([55,
                                                          66]).as_continuous()
        pm.set_stat(mlt_cont)

    mlt = BarGraph().name("mlt").cols(["g1", "g2"]).data([55, 66])
    pm.set_stat(mlt)

    mlt_cont = BarGraph().name("mlt").cols([1, 2,
                                            3]).data([55, 66]).as_continuous()
    pm.set_stat(mlt_cont)

    pm.done()
Example #28
0
def test_table():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    with pytest.raises(MLOpsException):
        Table().name("mytable").cols(["a", "b",
                                      "c"]).add_row([1, 2, 3]).add_row([1, 2])

    with pytest.raises(MLOpsException):
        tbl = Table().name("mytable").cols(["a", "b"])
        pm.set_stat(tbl)

    tbl = Table().name("good-1").cols(["a", "b", "c"]).add_rows([[1, 2, 3],
                                                                 [1, 2, 3]])
    pm.set_stat(tbl)

    tbl = Table().name("good-2").cols(["a", "b", "c"])
    tbl.add_row("r1", [1, 2, 3])
    tbl.add_row("r2", [3, 4, 5])
    pm.set_stat(tbl)

    tbl = Table().name("good-3").cols(["a", "b", "c"])
    tbl.add_row([6, 7, 8])
    tbl.add_row([9, 0, 1])
    pm.set_stat(tbl)

    pm.done()
Example #29
0
def main():
    pm_options = parse_args()
    # Initialize MLOps Library
    mlops.init()
    # Load the model
    if pm_options.input_model is not None:
        try:
            filename = pm_options.input_model
            file_obj = open(filename, 'rb')
            mlops.set_stat("model_file", 1)
        except Exception as e:
            print("Model not found")
            print("Got exception: {}".format(e))
            mlops.set_stat("model_file", 0)
            mlops.done()
            return 0

    classifier = pickle.load(file_obj)

    # Create synthetic data (Gaussian Distribution, Poisson Distribution and Beta Distribution)
    num_samples = int(pm_options.num_samples)
    num_features = int(pm_options.num_features)

    np.random.seed(0)
    g = np.random.normal(0, 1, (num_samples, num_features))
    p = np.random.poisson(0.7, (num_samples, num_features))
    b = np.random.beta(2, 2, (num_samples, num_features))

    test_data = np.concatenate((g, p, b), axis=0)
    np.random.seed()
    test_features = test_data[np.random.choice(test_data.shape[0],
                                               num_samples,
                                               replace=False)]

    # Output Health Statistics to MCenter
    # MLOps API to report the distribution statistics of each feature in the data and compare it automatically with the ones
    # reported during training to generate the similarity score.
    mlops.set_data_distribution_stat(test_features)

    # Output the number of samples being processed using MCenter
    mlops.set_stat(PredefinedStats.PREDICTIONS_COUNT, num_samples,
                   st.TIME_SERIES)

    # Predict labels
    result = classifier.predict(test_features)

    # Label distribution in prediction
    value, counts = np.unique(result, return_counts=True)
    label_distribution = np.asarray((value, counts)).T
    column_names = value.astype(str).tolist()
    print("Label distributions: \n {0}".format(label_distribution))

    # Output label distribution as a BarGraph using MCenter
    bar = BarGraph().name("Label Distribution").cols(
        (label_distribution[:, 0]).astype(str).tolist()).data(
            (label_distribution[:, 1]).tolist())
    mlops.set_stat(bar)

    # Terminate MLOPs
    mlops.done()
Example #30
0
def count_words(sc, words_file):
    lines = sc.textFile(words_file)
    words = lines.flatMap(lambda line: line.split(" ")).map(lambda word:
                                                            (word, 1))
    counts = words.reduceByKey(operator.add)
    sorted_counts = counts.sortBy(lambda x: x[1], False)
    total_words = sorted_counts.count()
    mlops.set_stat("total_words_1_push_7", total_words)

    total_words = 0
    for word, count in sorted_counts.toLocalIterator():
        print(u"{} --> {}".format(word, count))
        total_words += 1
    mlops.set_stat("total_words_2_push_7", total_words)