Beispiel #1
0
def __evaluate(confusion_matrix: tf.Tensor, auc: float, precision: np.ndarray,
               recall: np.ndarray) -> None:
    # Confusion Matrix
    fig, ax = plt.subplots(figsize=(16, 8))

    sns.heatmap(confusion_matrix,
                annot=True,
                fmt='d',
                cmap=sns.color_palette("Blues"),
                ax=ax)

    ax.set_xlabel('Predicted')
    ax.set_ylabel('Ground Truth')

    mlflow.log_figure(fig, f'confusion_matrix.png')
    plt.close(fig)

    # AUC
    mlflow.log_metric('val_auc', auc)

    # Precision Recall
    fig, ax = plt.subplots(figsize=(16, 8))

    sns.lineplot(x=recall, y=precision, ax=ax)

    ax.set_xlabel('Recall')
    ax.set_xlim(0., 1.)

    ax.set_ylabel('Precision')
    ax.set_ylim(0., 1.)

    mlflow.log_figure(fig, f'precision_recall.png')
    plt.close(fig)
Beispiel #2
0
def log_plots(exp_id, run_id, density, boxplot):
    # If model reside in MLflow runs
    if (exp_id and run_id) is not None:
        with mlflow.start_run(experiment_id=exp_id, run_id=run_id):
            print(f"\nCalculate and log model's residuals...")
            mlflow.log_figure(density, "./plots/eval_distplot.png")
            mlflow.log_figure(boxplot, "./plots/eval_boxplots.png")
Beispiel #3
0
def feature_conductance(test_input_tensor):
    ig = IntegratedGradients(net)
    test_input_tensor.requires_grad_()
    attr, _ = ig.attribute(test_input_tensor, target=1, return_convergence_delta=True)
    attr = attr.detach().numpy()
    # To understand these attributions, we can first average them across all the inputs
    # and print and visualize the average attribution for each feature.
    feature_imp, feature_imp_dict = visualize_importances(feature_names, np.mean(attr, axis=0))
    mlflow.log_metrics(feature_imp_dict)
    mlflow.log_text(str(feature_imp), "feature_imp_summary.txt")
    fig, (ax1, ax2) = plt.subplots(2, 1)
    fig.tight_layout(pad=3)
    ax1.hist(attr[:, 1], 100)
    ax1.set(title="Distribution of Sibsp Attribution Values")

    # we can bucket the examples by the value of the sibsp feature and
    # plot the average attribution for the feature.
    # In the plot below, the size of the dot is proportional to
    # the number of examples with that value.

    bin_means, bin_edges, _ = stats.binned_statistic(
        test_features[:, 1], attr[:, 1], statistic="mean", bins=6
    )
    bin_count, _, _ = stats.binned_statistic(
        test_features[:, 1], attr[:, 1], statistic="count", bins=6
    )

    bin_width = bin_edges[1] - bin_edges[0]
    bin_centers = bin_edges[1:] - bin_width / 2
    ax2.scatter(bin_centers, bin_means, s=bin_count)
    ax2.set(xlabel="Average Sibsp Feature Value", ylabel="Average Attribution")
    mlflow.log_figure(fig, "Average_Sibsp_Feature_Value.png")
def layer_conductance(net, test_input_tensor):
    """
    To use Layer Conductance, we create a LayerConductance object passing in the model as well as the module (layer) whose output we would like to understand.
    In this case, we choose net.sigmoid1, the output of the first hidden layer.
    Now obtain the conductance values for all the test examples by calling attribute on the LayerConductance object.
    LayerConductance also requires a target index for networks with mutliple outputs, defining the index of the output for which gradients are computed.
    Similar to feature attributions, we provide target = 1, corresponding to survival.
    LayerConductance also utilizes a baseline, but we simply use the default zero baseline as in integrated gradients.
    """

    cond = LayerConductance(net, net.sigmoid1)

    cond_vals = cond.attribute(test_input_tensor, target=1)
    cond_vals = cond_vals.detach().numpy()
    # We can begin by visualizing the average conductance for each neuron.
    neuron_names = ["neuron " + str(x) for x in range(12)]
    avg_neuron_imp, neuron_imp_dict = visualize_importances(
        neuron_names,
        np.mean(cond_vals, axis=0),
        title="Average Neuron Importances",
        axis_title="Neurons",
    )
    mlflow.log_metrics(neuron_imp_dict)
    mlflow.log_text(str(avg_neuron_imp), "neuron_imp_summary.txt")
    # We can also look at the distribution of each neuron's attributions. Below we look at the distributions for neurons 7 and 9,
    # and we can confirm that their attribution distributions are very close to 0, suggesting they are not learning substantial features.
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(9, 6))
    fig.tight_layout(pad=3)
    ax1.hist(cond_vals[:, 9], 100)
    ax1.set(title="Neuron 9 Distribution")
    ax2.hist(cond_vals[:, 7], 100)
    ax2.set(title="Neuron 7 Distribution")
    mlflow.log_figure(fig, "Neurons_Distribution.png")
Beispiel #5
0
def plotSamples(train):
    msg = """
    ====================================================
    Saving a sample in MLFlow to later visaully validate results...
    ====================================================
    """
    messages(msg)
    fig = plt.figure(figsize=(20, 12))
    for i in range(0, 10):
        plt.subplot(2, 5, i + 1)
        for X_batch, Y_batch in train:
            image = X_batch[0]
            dic = {0: 'NORMAL', 1: 'PNEUMONIA'}
            plt.title(dic.get(Y_batch[0]))
            plt.axis('off')
            plt.imshow(np.squeeze(image), cmap='gray', interpolation='nearest')
            break
    plt.tight_layout()
    # plt.savefig("data/batch_images.pdf")
    # mlflow.log_artifact("data/batch_images.pdf","data")
    mlflow.log_figure(fig, "samples/sample.png")
    msg = """
    ====================================================
    done...move to next section
    ====================================================
    """
    messages(msg)
Beispiel #6
0
def log_model_residuals(model, X_train, y_train, X_val, y_val):
    tic = time.time()
    print(f"\nCalculate model's residuals and log them...")
    fig = plot_residuals_errors(model, X_train, y_train, X_val, y_val)
    mlflow.log_figure(fig, "./plots/residuals_errors.png")
    min, sec = divmod(time.time() - tic, 60)
    print(f"Calculating residuals took: {int(min)}min {int(sec)}sec")
Beispiel #7
0
def main(cfg: DictConfig) -> None:

    # read WP thresholds from mlflow artifacts
    path_to_mlflow = to_absolute_path(cfg['create_df']['path_to_mlflow'])
    mlflow.set_tracking_uri(f"file://{path_to_mlflow}")
    experiment_id = cfg['create_df']['experiment_id']
    run_id = cfg['create_df']['run_id']
    path_to_wp_definitions = f"{path_to_mlflow}/{experiment_id}/{run_id}/artifacts/working_points.json"
    with open(path_to_wp_definitions, 'r') as f:
        wp_definitions = json.load(f)

    # instantiate partial create_df object from hydra cfg
    OmegaConf.register_new_resolver("get_method", hydra.utils.get_method)
    partial_create_df = instantiate(cfg["create_df"])
    partial_plot_efficiency = instantiate(cfg["var_cfg"])

    # make dataframes with specified input features and predictions for each tau_type
    vs_type = cfg['vs_type']
    df_sel = {}
    if not cfg['from_skims']:  # create dataframes and log to mlflow
        with mlflow.start_run(experiment_id=experiment_id,
                              run_id=run_id) as active_run:
            for tau_type in ['tau', vs_type]:
                df = partial_create_df(
                    tau_type_to_select=tau_type,
                    pred_samples=cfg['pred_samples'][tau_type])
                df.to_csv(f'{tau_type}.csv')
                mlflow.log_artifact(f'{tau_type}.csv',
                                    cfg['output_skim_folder'])
                df_sel[tau_type] = df
    else:  # read already existing skimmed dataframes
        for tau_type in ['tau', vs_type]:
            df = pd.read_csv(
                f"{path_to_mlflow}/{experiment_id}/{run_id}/artifacts/{cfg['output_skim_folder']}/{tau_type}.csv"
            )
            df_sel[tau_type] = df

    # compute and plot efficiency curves
    wp_thrs, wp_names = list(wp_definitions[vs_type].values()), list(
        wp_definitions[vs_type].keys())
    WPs_to_require = OmegaConf.to_object(cfg['WPs_to_require'])
    del WPs_to_require[vs_type]  # remove vs_type key
    eff, eff_up, eff_down = differential_efficiency(
        df_sel['tau'], df_sel[vs_type], cfg['var_cfg']['var_name'],
        cfg['var_cfg']['var_bins'], vs_type, 'score_vs_', wp_thrs,
        cfg['require_WPs_in_numerator'], cfg['require_WPs_in_denominator'],
        WPs_to_require, wp_definitions)
    fig = partial_plot_efficiency(eff=eff,
                                  eff_up=eff_up,
                                  eff_down=eff_down,
                                  labels=wp_names)

    # log to mlflow
    with mlflow.start_run(experiment_id=experiment_id,
                          run_id=run_id) as active_run:
        mlflow.log_figure(fig, cfg['output_filename'])
    print(
        f'\n[INFO] logged plots to artifacts for experiment ({experiment_id}), run ({run_id})\n'
    )
 def _log_figures_with_mlflow(self, report) -> None:
     """Log figures with MLflow in the artifact folder."""
     if report.feature_importance_figure is not None:
         mlflow.log_figure(
             report.feature_importance_figure, "figures/weight_plot.html"
         )
     for key, figure in report.data_series_figures.items():
         mlflow.log_figure(figure, f"figures/{key}.html")
     self.logger.info("Logged figures to MLflow.")
Beispiel #9
0
def __evaluate(
    weights: tff.learning.ModelWeights, client_states: Dict[int, ClientState],
    dataset: FederatedDataset, evaluation_fn: Callable[
        [tff.learning.ModelWeights, List[tf.data.Dataset], List[ClientState]],
        Tuple[tf.Tensor, Dict[Text, tf.Tensor], Dict[Text, tf.Tensor]]]
) -> None:
    confusion_matrix, aggregated_metrics, client_metrics = evaluation_fn(
        weights, [dataset.data[client] for client in dataset.clients],
        [client_states[client] for client in dataset.clients])

    # Confusion Matrix
    fig, ax = plt.subplots(figsize=(16, 8))

    sns.heatmap(confusion_matrix,
                annot=True,
                fmt='d',
                cmap=sns.color_palette("Blues"),
                ax=ax)

    ax.set_xlabel('Predicted')
    ax.set_ylabel('Ground Truth')

    mlflow.log_figure(fig, f'confusion_matrix.png')
    plt.close(fig)

    # Precision Recall
    fig, ax = plt.subplots(figsize=(16, 8))

    sns.lineplot(x=aggregated_metrics['recall'],
                 y=aggregated_metrics['precision'],
                 ax=ax)

    ax.set_xlabel('Recall')
    ax.set_xlim(0., 1.)

    ax.set_ylabel('Precision')
    ax.set_ylim(0., 1.)

    mlflow.log_figure(fig, f'precision_recall.png')
    plt.close(fig)

    # Client Metrics
    auc = metrics.SigmoidDecorator(tf.keras.metrics.AUC(curve='PR'),
                                   name='auc')
    accuracy = metrics.SigmoidDecorator(tf.keras.metrics.BinaryAccuracy(),
                                        name='accuracy')

    for client, metric in zip(client_states.keys(), iter(client_metrics)):
        tf.nest.map_structure(lambda v, t: v.assign(t), auc.variables,
                              list(metric['auc']))
        tf.nest.map_structure(lambda v, t: v.assign(t), accuracy.variables,
                              list(metric['accuracy']))

        mlflow.log_metric(f'client_{client}_val_auc', auc.result().numpy())
        mlflow.log_metric(f'client_{client}_val_acc',
                          accuracy.result().numpy())
Beispiel #10
0
def test_log_figure_plotly(subdir):
    from plotly import graph_objects as go

    filename = "figure.html"
    artifact_file = filename if subdir is None else posixpath.join(subdir, filename)

    fig = go.Figure(go.Scatter(x=[0, 1], y=[2, 3]))

    with mlflow.start_run():
        mlflow.log_figure(fig, artifact_file)

        artifact_path = None if subdir is None else posixpath.normpath(subdir)
        artifact_uri = mlflow.get_artifact_uri(artifact_path)
        run_artifact_dir = local_file_uri_to_path(artifact_uri)
        assert os.listdir(run_artifact_dir) == [filename]
Beispiel #11
0
def test_log_figure_matplotlib(subdir):
    import matplotlib.pyplot as plt

    filename = "figure.png"
    artifact_file = filename if subdir is None else posixpath.join(subdir, filename)

    fig, ax = plt.subplots()
    ax.plot([0, 1], [2, 3])

    with mlflow.start_run():
        mlflow.log_figure(fig, artifact_file)
        plt.close(fig)

        artifact_path = None if subdir is None else posixpath.normpath(subdir)
        artifact_uri = mlflow.get_artifact_uri(artifact_path)
        run_artifact_dir = local_file_uri_to_path(artifact_uri)
        assert os.listdir(run_artifact_dir) == [filename]
Beispiel #12
0
def log_evaluated_results(
    eval_results,
    mlflow_tracking,
    fraction,
    n_splits,
    n_repeats,
):
    """Logs the genereted plot

    Args:
        plots (list of pandas.plot): genereted plots via Pandas
        df_names (list of strings): short names of dataset used
        model_name (string): short name of chosen model
    """
    if mlflow_tracking:
        print(f"Log artifacts for model evaluated...")
        model_name = eval_results[0]
        plots = eval_results[1]
        scores = eval_results[2]
        df_names = eval_results[3]
        time_spent = eval_results[4]

        exp_id = mlflow_set_exp_id("Model:Choose")
        run_name = f"{model_name} : Best"
        with mlflow.start_run(experiment_id=exp_id, run_name=run_name):
            for i in range(2):
                fig = plots[i].get_figure()
                path = f"./plots/{model_name}_on_{df_names[i]}.png"
                mlflow.log_figure(fig, path)
                mlflow.log_params(
                    {
                        "time_spent": time_spent,
                        "fraction": fraction,
                        "cv_n_splits": n_splits,
                        "cv_n_repeats": n_repeats,
                        "random_state": rnd_state,
                    }
                )
                mlflow.log_metrics(
                    {
                        "score_on_train": scores[0],
                        "score_on_val": scores[1],
                    }
                )
Beispiel #13
0
def visualize_importances(
    feature_names,
    importances,
    title="Average Feature Importances",
    plot=True,
    axis_title="Features",
):
    feature_imp = PrettyTable(["feature_name", "importances"])
    feature_imp_dict = {}
    for i in range(len(feature_names)):
        print(feature_names[i], ": ", "%.3f" % (importances[i]))
        feature_imp.add_row([feature_names[i], importances[i]])
        feature_imp_dict[str(feature_names[i])] = importances[i]
    x_pos = np.arange(len(feature_names))
    if plot:
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.bar(x_pos, importances, align="center")
        ax.set(title=title, xlabel=axis_title)
        ax.set_xticks(x_pos)
        ax.set_xticklabels(feature_names, rotation="vertical")
        mlflow.log_figure(fig, title + ".png")
    return feature_imp, feature_imp_dict
Beispiel #14
0
def main():
    with open("data/featurized.pkl", "rb") as f:
        dataset = pickle.load(f)

    with open("params.yaml", "r") as f:
        cfg = yaml.safe_load(f)

    mlflow.set_tracking_uri("http://localhost:5000")

    with mlflow.start_run():
        mlflow.sklearn.autolog()

        clf = LogisticRegression(**cfg["evaluate_model"])
        clf.fit(dataset["train"]["X"], dataset["train"]["y"])

        y_pred = clf.predict(dataset["test"]["X"])

        report = classification_report(
            y_pred=y_pred,
            y_true=dataset["test"]["y"],
            output_dict=True,
        )
        metrics = {
            "accuracy": report["accuracy"],
            "f1-score": report["macro avg"]["f1-score"],
        }

        fig = plot_metrics_per_class(report, labels=dataset["labels"])
        mlflow.log_params(cfg["download_data"])
        mlflow.log_metrics(metrics=metrics)
        mlflow.sklearn.eval_and_log_metrics(
            clf, X=dataset["test"]["X"], y_true=dataset["test"]["y"], prefix="test_"
        )
        mlflow.log_figure(fig, artifact_file="metrics.png")

    with open("data/results.json", "w") as f:
        json.dump(obj=metrics, fp=f)
Beispiel #15
0
def test_log_figure_raises_error_for_unsupported_figure_object_type():
    with mlflow.start_run(), pytest.raises(TypeError, match="Unsupported figure object type"):
        mlflow.log_figure("not_figure", "figure.png")
            # setup figure
            ax = fig.add_subplot(5, 5, idx)
            xs,ys = score_df[param], score_df[metric]
            # scatter
            ax.scatter(xs, ys)
            # fit line
            z = np.polyfit(xs, ys, 1)
            p = np.poly1d(z)
            sample_xs = np.linspace(xs.min(), xs.max(), 100)
            ax.plot(sample_xs, p(sample_xs), linestyle='--', color='k')
            # label axes
            ax.set(xlabel=param.replace('_',' ').title(), ylabel=ylabel_map[metric])
            # prepare for next round
            idx += 1
    fig.tight_layout()
    mlflow.log_figure(fig, 'param_vs_metric.png')

    # visualization via pca
    from sklearn.decomposition import PCA
    pca = PCA(n_components=2, svd_solver='full')
    pca.fit(score_df.iloc[:,:5].T)  # we're only using the parameters
    # - per param
    fig = plt.figure(figsize=[20,4])
    idx = 1
    for param in ['n_estimators','max_depth','n_bins','max_samples','max_features']:
        ax = fig.add_subplot(1, 5, idx)
        ax.scatter(pca.components_[0], pca.components_[1], c=score_df[param])
        ax.set(xlabel='PC1', ylabel='PC2', title=param)
        idx += 1
    fig.tight_layout()
    mlflow.log_figure(fig, 'pca_params.png')
Beispiel #17
0
  # Define mlflow artifacts to log with the experiment run
  mlflow.log_metric("precision", metrics.precision(1.0))
  mlflow.log_metric("recall", metrics.recall(1.0))
  mlflow.log_metric("f1", metrics.fMeasure(1.0))
  
  mlflow.spark.log_model(pipelineTrained, "turbine_anomalies")
  mlflow.set_tag("model", "gbt") 
  
  # Add confusion matrix to the model
  labels = pipelineTrained.stages[2].labels
  fig = plt.figure()
  sn.heatmap(pd.DataFrame(metrics.confusionMatrix().toArray()), annot=True, fmt='g', xticklabels=labels, yticklabels=labels)
  plt.suptitle("Turbine Damage Prediction. F1={:.2f}".format(metrics.fMeasure(1.0)), fontsize = 18)
  plt.xlabel("Predicted Labels")
  plt.ylabel("True Labels")
  mlflow.log_figure(fig, "confusion_matrix.png") # needs mlflow version >=1.13.1

# COMMAND ----------

# MAGIC %md ## Saving our model to MLFLow registry

# COMMAND ----------

# DBTITLE 1,Save our new model to the registry as a new version
# Get the best model having the best metrics.AUROC from the registry that fits our search criteria
best_models = mlflow.search_runs(filter_string='tags.model="gbt" and attributes.status = "FINISHED" and metrics.f1 > 0',
                                 order_by=['metrics.f1 DESC'], max_results=1)
model_uri = best_models.iloc[0].artifact_uri
print(f"Model is stored at '{model_uri}'.")

# Register the model
Beispiel #18
0
import mlflow
import matplotlib.pyplot as plt

if __name__ == '__main__':
    fig, ax = plt.subplots()
    ax.plot([0, 1], [2, 3])

    with mlflow.start_run():
        mlflow.log_figure(fig, "figure.png")
Beispiel #19
0
def run(experiment_name: str, run_name: str, config: Config) -> None:
    mlflow.set_experiment(experiment_name)

    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(config._asdict())

        def reduce_fn(state, data):
            model = __model_fn(config.window_size, config.hidden_size,
                               config.dropout)
            loss_fn = losses.WeightedBinaryCrossEntropy(config.pos_weight)
            optimizer = __optimizer_fn(config.learning_rate)

            input_spec = (tf.TensorSpec(
                (None, config.window_size, len(SENSORS)),
                dtype=tf.float32), tf.TensorSpec((None, 1), dtype=tf.float32))

            train_step = tf.function(partial(__train_step,
                                             model=model,
                                             optimizer=optimizer,
                                             loss_fn=loss_fn),
                                     input_signature=input_spec)

            val_state = tf.zeros((2, 2), dtype=tf.int32)
            val_metrics = __metrics_fn()
            val_step = partial(__validation_step,
                               model=model,
                               metrics=val_metrics)

            for _ in range(1, config.epochs + 1):
                for X, y in data[0]:
                    train_step(X, y)

            confusion_matrix = data[1].reduce(val_state, val_step)
            auc = val_metrics[0].result().numpy()
            precision = val_metrics[1].result().numpy()
            recall = val_metrics[2].result().numpy()

            return (state[0] + confusion_matrix, state[1] + [auc],
                    state[2] + [precision], state[3] + [recall])

        confusion_matrix, auc, precision, recall = reduce(
            reduce_fn,
            __load_data(config.path, config.window_size, config.batch_size),
            (tf.zeros((2, 2), dtype=tf.int32), [], [], []))

        # Confusion matrix
        fig, ax = plt.subplots(figsize=(16, 8))

        sns.heatmap(confusion_matrix,
                    annot=True,
                    fmt='d',
                    cmap=sns.color_palette("Blues"),
                    ax=ax)

        ax.set_xlabel('Predicted')
        ax.set_ylabel('Ground Truth')

        mlflow.log_figure(fig, 'confusion_matrix.png')
        plt.close(fig)

        # AUC
        mlflow.log_metric('val_auc', np.mean(auc))

        # Precision Recall
        fig, ax = plt.subplots(figsize=(16, 8))

        sns.lineplot(x=np.mean(recall, axis=0),
                     y=np.mean(precision, axis=0),
                     ax=ax)

        ax.set_xlabel('Recall')
        ax.set_xlim(0., 1.)

        ax.set_ylabel('Precision')
        ax.set_ylim(0., 1.)

        mlflow.log_figure(fig, 'precision_recall.png')
        plt.close(fig)
Beispiel #20
0
        hparams = json.load(hfile)
    mlflow.set_tracking_uri(hparams['meta']["mlflow_uri"])
    mlflow.set_experiment(hparams['meta']["name"])

    with mlflow.start_run(run_name=hparams['meta']["method"] + "-" +
                          hparams["meta"]["optimizer"]) as run:
        mlflow.log_dict(hparams, artifact_file="hparams/hparams.json")
        mlflow.log_text("", artifact_file="output/_touch.txt")
        artifact_uri = mlflow.get_artifact_uri("output/")
        hparams["meta"]["output_dir"] = artifact_uri
        print(f"URI: {artifact_uri}")
        start_time = datetime.now()

        if hparams["n_perturbs"] > 1:
            for perturb in range(hparams["n_perturbs"]):
                print(f"Running perturb {perturb}")
                continuation = ContinuationCreator(
                    problem=problem, hparams=hparams,
                    key=perturb).get_continuation_method()
                continuation.run()
        else:
            continuation = ContinuationCreator(
                problem=problem, hparams=hparams).get_continuation_method()
            continuation.run()

        end_time = datetime.now()
        print(f"Duration: {end_time-start_time}")

        figure = bif_plot(hparams["meta"]["output_dir"], pick_array)
        mlflow.log_figure(figure, artifact_file="plots/fig.png")
Beispiel #21
0
def plot_predictions(
    y,
    ypred,
    metrics,
    use_mlflow=False,
    pdf_path="prediction_plots.pdf",
    persistence=False,
    lead=None,
    unit="minutes",
):
    if use_mlflow:
        import mlflow
    elif pdf_path is not None:
        # Save plots into pdf instead
        from matplotlib.backends.backend_pdf import PdfPages

        pdf = PdfPages(pdf_path)

    # Use last metric if there are more than one
    if isinstance(metrics, (list, tuple)):
        if len(metrics) > 1:
            metric = metrics[-1]
        else:
            metric = metrics[0]
    else:
        metric = metrics

    y = y.squeeze()
    ypred = ypred.squeeze()

    if has_storm_index(y):
        fig = []
        ax = []
        # Plot predictions for each storm individually
        for storm in y.storms.level:
            storm_fig, storm_ax = plot_prediction(
                y,
                ypred,
                metric,
                storm=storm,
                persistence=persistence,
                lead=lead,
                unit=unit,
            )

            fig.append(storm_fig)
            ax.append(storm_ax)

            if use_mlflow:
                mlflow.log_figure(fig, f"prediction_plots/storm_{storm}.png")
            elif pdf_path is not None:
                pdf.savefig(fig)
    else:
        fig, ax = plot_prediction(y,
                                  ypred,
                                  metric,
                                  persistence=persistence,
                                  lead=lead,
                                  unit=unit)
        if use_mlflow:
            mlflow.log_figure(fig, "prediction_plot.png")
        elif pdf_path is not None:
            pdf.savefig(fig)

    if not use_mlflow:
        pdf.close()

    return fig, ax
                        f'f1': model_scores['f1'],
                        f'f1_micro': model_scores['f1_micro'],
                        f'f1_macro': model_scores['f1_macro'],
                        f'precision': model_scores['precision'],
                        f'recall': model_scores['recall'],
                        f'roc_auc': model_scores['roc_auc']
                    })

                    f1_timestep = calc_score_and_std_per_timestep(
                        X_test_df, y_test, y_pred)
                    fig, ax = plt.subplots()
                    ax.plot(range(LAST_TRAIN_TIMESTEP + 1, LAST_TIMESTEP + 1),
                            f1_timestep)
                    ax.set_xlabel('timestep')
                    ax.set_ylabel('f1')
                    mlflow.log_figure(fig, f'f1_timestep_{m}.png')

                    plot_confusion_matrix(
                        y_test,
                        y_pred,
                        path=
                        f'{PLOTS_ROOT}/{m}_{cv}_{c_weight}_confusion_matrix.png'
                    )
                    plot_precision_recall_roc(
                        y_test,
                        y_prob,
                        path=f'{PLOTS_ROOT}/{m}_{cv}_{c_weight}')
                    mlflow.log_artifact(
                        f'{PLOTS_ROOT}/{m}_{cv}_{c_weight}_confusion_matrix.png'
                    )
                    mlflow.log_artifact(
Beispiel #23
0
 for fold_n, (train_idx,
              test_idx) in enumerate(folds.split(X_train_all)):
     print(f'FOLD: {fold_n}')
     print(f'TRAIN: {train_idx}, TEST: {test_idx}')
     X_train, y_train = X_train_all[train_idx], y_train_all[train_idx]
     X_valid, y_valid = X_train_all[test_idx], y_train_all[test_idx]
     print(f'Train shape: {X_train.shape}')
     print(f'Val shape: {X_valid.shape}')
     assert X_train.shape[0] == y_train.shape[0]
     assert X_valid.shape[0] == y_valid.shape[0]
     fig, ax = plt.subplots()
     sns.countplot(y=y_train, label='train', color='slateblue')
     sns.countplot(y=y_valid, label='val', color='turquoise')
     ax.set_title('Train/validation labels count')
     plt.legend()
     mlflow.log_figure(fig, f'labels_count_fold_{fold_n}.png')
     mlflow.lightgbm.autolog()
     run_name = f'{EXPERIMENT_NAME}_fold_{fold_n}'
     with mlflow.start_run(run_name=run_name, nested=True):
         dtrain = lgb.Dataset(
             X_train, label=y_train
         )  # weight=get_unbalanced_weights(y_train, 0.3, 0.7))
         dvalid = lgb.Dataset(
             X_valid, label=y_valid
         )  # weight=get_unbalanced_weights(y_valid, 0.3, 0.7))
         res = {}
         clf = lgb.train(params,
                         dtrain,
                         valid_sets=[dtrain, dvalid],
                         evals_result=res,
                         early_stopping_rounds=50,
Beispiel #24
0
def train_model(
    model,
    X_dev,
    y_dev,
    X_train,
    y_train,
    X_val,
    y_val,
    mlflow_tracking=True,
    log_residuals=True,
    save_mlmodel_separatly=True,
):
    print(f"\nTrain final model on Development set...")

    tic = time.time()
    model_name = type(model).__name__
    if mlflow_tracking:
        # Setup MLflow tracking server
        exp_id = mlflow_set_exp_id("Model:Train")
        run_name = f"{model_name}"
        ## Enable autologging
        mlflow.sklearn.autolog()
        ##* Fit model with MLflow logging
        with mlflow.start_run(experiment_id=exp_id, run_name=run_name) as run:
            run_id = run.info.run_id
            print(f"Active run_id: {run_id} ...\n")
            model = model.fit(X_dev, y_dev)
            toc = time.time()
            ## Disable autologging
            mlflow.sklearn.autolog(disable=True)

            ##* Log custom metrics
            mare_on_dev = mare(y_dev, model.predict(X_dev))
            mare_on_train = mare(y_train, model.predict(X_train))
            mare_on_val = mare(y_val, model.predict(X_val))
            print(f"\nMARE on DEV: {mare_on_dev}")
            print(f"MARE on TRAIN: {mare_on_train}")
            print(f"MARE on VAL: {mare_on_val}")
            mlflow.log_metrics({
                "mare_on_dev": mare_on_dev,
                "mare_on_train": mare_on_train,
                "mare_on_val": mare_on_val,
            })
            ##* Log custom plots
            if log_residuals:
                print(f"\nCalculate and log model's residuals...")
                fig = plot_residuals_errors(model, X_train, y_train, X_val,
                                            y_val)
                mlflow.log_figure(fig, "./plots/residuals_errors.png")
    else:
        ##* Fit trivial
        model = model.fit(X_dev, y_dev)
        toc = time.time()
        exp_id, run_id = None, None

    ## Evaluate time spent
    min, sec = divmod(toc - tic, 60)
    print(f"Model training took: {int(min)}min {int(sec)}sec\n")

    ## Save trained pipeline
    if save_mlmodel_separatly:
        folder = save_mlmodel_aside(model, run_id)
    else:
        print(f"No one model was NOT saved separatly...")
        folder = None

    print(f"\nExperiment ID: {exp_id}")
    print(f"Run ID: {run_id}")
    print(f"Folder: {folder}")

    return exp_id, run_id, folder
Beispiel #25
0
def run(experiment_name: str, run_name: str, config: Config) -> None:
    mlflow.set_experiment(experiment_name)
    train, val, test = __load_data(config.path, config.window_size,
                                   config.batch_size)

    model = __model_fn(config.window_size, config.hidden_size, config.dropout)
    loss_fn = losses.WeightedBinaryCrossEntropy(config.pos_weight)
    optimizer = __optimizer_fn(config.learning_rate)

    input_spec = (tf.TensorSpec((None, config.window_size, len(SENSORS)),
                                dtype=tf.float32),
                  tf.TensorSpec((None, 1), dtype=tf.float32))

    ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              config.output,
                                              max_to_keep=5)

    train_loss = tf.keras.metrics.Mean(name='loss')
    train_metrics = __training_metrics_fn()
    train_step = tf.function(partial(__train_step,
                                     model=model,
                                     optimizer=optimizer,
                                     loss_fn=loss_fn,
                                     loss=train_loss,
                                     metrics=train_metrics),
                             input_signature=input_spec)

    val_loss = tf.keras.metrics.Mean(name='loss')
    val_metrics = __training_metrics_fn()
    val_step = tf.function(partial(__validation_step,
                                   model=model,
                                   loss_fn=loss_fn,
                                   loss=val_loss,
                                   metrics=val_metrics),
                           input_signature=input_spec)

    eval_state = tf.zeros((2, 2), dtype=tf.int32)
    eval_metrics = __evaluation_metrics_fn()
    eval_step = partial(__evaluation_step, model=model, metrics=eval_metrics)

    with mlflow.start_run(run_name=run_name):
        mlflow.log_params(config._asdict())

        # Fitting
        for epoch in range(1, config.epochs + 1):
            train_loss.reset_states()
            for metrics in train_metrics:
                metrics.reset_states()

            val_loss.reset_states()
            for metric in val_metrics:
                metric.reset_states()

            # Training
            for X, y in train:
                train_step(X, y)

            mlflow.log_metric(train_loss.name,
                              train_loss.result().numpy(),
                              step=epoch)
            mlflow.log_metrics(
                {
                    metric.name: metric.result().numpy()
                    for metric in train_metrics
                },
                step=epoch)

            # Validation
            for X, y in val:
                val_step(X, y)

            mlflow.log_metric(f'val_{val_loss.name}',
                              val_loss.result().numpy(),
                              step=epoch)
            mlflow.log_metrics(
                {
                    f'val_{metric.name}': metric.result().numpy()
                    for metric in val_metrics
                },
                step=epoch)

            # Checkpoint
            if epoch % config.checkpoint_rate == 0:
                ckpt_manager.save()

        # Evaluation
        def evaluate(confusion_matrix, client):
            # Reset PR-AUC and Accuracy metrics
            eval_metrics[0].reset_states()
            eval_metrics[3].reset_states()

            results = test[client].reduce(eval_state, eval_step)
            mlflow.log_metrics(f'client_{client}_val_auc',
                               eval_metrics[0].result().numpy())
            mlflow.log_metrics(f'client_{client}_val_acc',
                               eval_metrics[3].result().numpy())

            return confusion_matrix + results

        confusion_matrix = reduce(evaluate, test.clients,
                                  tf.zeros((2, 2), dtype=tf.int32))

        # Confusion matrix
        fig, ax = plt.subplots(figsize=(16, 8))

        sns.heatmap(confusion_matrix,
                    annot=True,
                    fmt='d',
                    cmap=sns.color_palette("Blues"),
                    ax=ax)

        ax.set_xlabel('Predicted')
        ax.set_ylabel('Ground Truth')

        mlflow.log_figure(fig, 'confusion_matrix.png')
        plt.close(fig)

        # Precision Recall
        fig, ax = plt.subplots(figsize=(16, 8))
        sns.lineplot(x=eval_metrics[2].results().numpy(),
                     y=eval_metrics[1].results().numpy(),
                     ax=ax)

        ax.set_xlabel('Recall')
        ax.set_xlim(0., 1.)

        ax.set_ylabel('Precision')
        ax.set_ylim(0., 1.)

        mlflow.log_figure(fig, 'precision_recall.png')
        plt.close(fig)
Beispiel #26
0
                f'f1': model_scores['f1'],
                f'f1_micro': model_scores['f1_micro'],
                f'f1_macro': model_scores['f1_macro'],
                f'precision': model_scores['precision'],
                f'recall': model_scores['recall'],
                f'roc_auc': model_scores['roc_auc']
            })

            f1_timestep = calc_score_and_std_per_timestep(
                X_test_df, y_test, y_pred)
            fig, ax = plt.subplots()
            ax.plot(range(LAST_TRAIN_TIMESTEP + 1, LAST_TIMESTEP + 1),
                    f1_timestep)
            ax.set_xlabel('timestep')
            ax.set_ylabel('f1')
            mlflow.log_figure(fig, f'f1_timestep_{m}_automl.png')

            plot_confusion_matrix(
                y_test,
                y_pred,
                path=f'{PLOTS_ROOT}/automl_{m}_confusion_matrix.png')
            plot_precision_recall_roc(y_test,
                                      y_prob,
                                      path=f'{PLOTS_ROOT}/automl_{m}')
            mlflow.log_artifact(
                f'{PLOTS_ROOT}/automl_{m}_confusion_matrix.png')
            mlflow.log_artifact(
                f'{PLOTS_ROOT}/automl_{m}_precision_recall.png')
            mlflow.log_artifact(f'{PLOTS_ROOT}/automl_{m}_roc.png')

            plt.close('all')
Beispiel #27
0
def mlflow_log_figure(config: DictConfig, fig, target=None):
    figure_config = config["training"]["figure"]
    figure_name = (target + "_" + figure_config["name"]
                   if target is not None else figure_config["name"])
    mlflow.log_figure(fig, "fig/" + figure_name)