Beispiel #1
0
def residuals(regressor, X, y):
    # Create the train and test splits
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, test_size=0.2)

    # Store labels and colors for the legend ordered by call
    regressor.fit(X_train, y_train)
    train_score_ = regressor.score(X_train, y_train)
    test_score_ = regressor.score(X_test, y_test)

    y_pred_train = regressor.predict(X_train)
    residuals_train = y_pred_train - y_train

    y_pred_test = regressor.predict(X_test)
    residuals_test = y_pred_test - y_test

    table = make_table(
        y_pred_train,
        residuals_train,
        y_pred_test,
        residuals_test,
        train_score_,
        test_score_,
    )
    chart = wandb.visualize("wandb/residuals_plot/v1", table)

    return chart
Beispiel #2
0
def learning_curve(
    model,
    X,
    y,
    cv=None,
    shuffle=False,
    random_state=None,
    train_sizes=None,
    n_jobs=1,
    scoring=None,
):
    """Trains model on datasets of varying size and generates plot of score vs size.

    Called by plot_learning_curve to visualize learning curve. Please use the function
    plot_learning_curve() if you wish to visualize your learning curves.
    """
    train_sizes, train_scores, test_scores = model_selection.learning_curve(
        model,
        X,
        y,
        cv=cv,
        n_jobs=n_jobs,
        train_sizes=train_sizes,
        scoring=scoring,
        shuffle=shuffle,
        random_state=random_state,
    )
    train_scores_mean = np.mean(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)

    table = make_table(train_scores_mean, test_scores_mean, train_sizes)
    chart = wandb.visualize("wandb/learning_curve/v1", table)

    return chart
Beispiel #3
0
        def roc_table(fpr_dict, tpr_dict, classes, indices_to_plot):
            data = []
            count = 0

            for i, to_plot in enumerate(indices_to_plot):
                fpr_dict[i], tpr_dict[i], _ = roc_curve(y_true,
                                                        probas[:, i],
                                                        pos_label=classes[i])
                if to_plot:
                    roc_auc = auc(fpr_dict[i], tpr_dict[i])
                    for j in range(len(fpr_dict[i])):
                        if labels is not None and (
                                isinstance(classes[i], int)
                                or isinstance(classes[0], np.integer)):
                            class_dict = labels[classes[i]]
                        else:
                            class_dict = classes[i]
                        fpr = [
                            class_dict,
                            round(fpr_dict[i][j], 3),
                            round(tpr_dict[i][j], 3)
                        ]
                        data.append(fpr)
                        count += 1
                        if count >= chart_limit:
                            wandb.termwarn(
                                "wandb uses only the first %d datapoints to create the plots."
                                % wandb.Table.MAX_ROWS)
                            break
            return wandb.visualize(
                'wandb/roc/v1',
                wandb.Table(columns=['class', 'fpr', 'tpr'], data=data))
Beispiel #4
0
 def heatmap_table(x_labels, y_labels, matrix_values, show_text):
     x_axis = []
     y_axis = []
     values = []
     count = 0
     for i, x in enumerate(x_labels):
         for j, y in enumerate(y_labels):
             x_axis.append(x)
             y_axis.append(y)
             values.append(matrix_values[j][i])
             count += 1
             if count >= chart_limit:
                 wandb.termwarn(
                     "wandb uses only the first %d datapoints to create the plots."
                     % wandb.Table.MAX_ROWS)
                 break
     heatmap_key = "wandb/heatmap/v1" if show_text else "wandb/heatmap_no_text/v1"
     return wandb.visualize(
         heatmap_key,
         wandb.Table(
             columns=["x_axis", "y_axis", "values"],
             data=[[x_axis[i], y_axis[i],
                    round(values[i], 2)] for i in range(len(x_axis))],
         ),
     )
Beispiel #5
0
        def decision_boundaries(decision_boundary_x, decision_boundary_y,
                                decision_boundary_color, train_x, train_y,
                                train_color, test_x, test_y, test_color):
            x_dict = []
            y_dict = []
            color_dict = []
            shape_dict = []
            for i in range(min(len(decision_boundary_x),100)):
                x_dict.append(decision_boundary_x[i])
                y_dict.append(decision_boundary_y[i])
                color_dict.append(decision_boundary_color)
            for i in range(300):
                x_dict.append(test_x[i])
                y_dict.append(test_y[i])
                color_dict.append(test_color[i])
            for i in range(min(len(train_x),600)):
                x_dict.append(train_x[i])
                y_dict.append(train_y[i])
                color_dict.append(train_color[i])

            return wandb.visualize(
                'wandb/decision_boundaries/v1', wandb.Table(
                columns=['x', 'y', 'color'],
                data=[
                    [x_dict[i], y_dict[i], color_dict[i]] for i in range(len(x_dict))
                ]
            ))
Beispiel #6
0
        def residuals(y_pred_train, residuals_train, y_pred_test, residuals_test, train_score_, test_score_):
            y_pred_dict = []
            dataset_dict = []
            residuals_dict = []
            datapoints = 0
            max_datapoints_train = 900
            max_datapoints_train = 100
            for pred, residual in zip(y_pred_train, residuals_train):
                # add class counts from training set
                y_pred_dict.append(pred)
                dataset_dict.append("train")
                residuals_dict.append(residual)
                datapoints += 1
                if(datapoints >= max_datapoints_train):
                    wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS)
                    break
            datapoints = 0
            for pred, residual in zip(y_pred_test, residuals_test):
                # add class counts from training set
                y_pred_dict.append(pred)
                dataset_dict.append("test")
                residuals_dict.append(residual)
                datapoints += 1
                if(datapoints >= max_datapoints_train):
                    wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS)
                    break

            return wandb.visualize(
                'wandb/residuals_plot/v1', wandb.Table(
                columns=['dataset', 'y_pred', 'residuals', 'train_score', 'test_score'],
                data=[
                    [dataset_dict[i], y_pred_dict[i], residuals_dict[i], train_score_, test_score_] for i in range(len(y_pred_dict))
                ]
            ))
def decision_boundaries(
    decision_boundary_x,
    decision_boundary_y,
    decision_boundary_color,
    train_x,
    train_y,
    train_color,
    test_x,
    test_y,
    test_color,
):
    x_dict, y_dict, color_dict = [], [], []
    for i in range(min(len(decision_boundary_x), 100)):
        x_dict.append(decision_boundary_x[i])
        y_dict.append(decision_boundary_y[i])
        color_dict.append(decision_boundary_color)
    for i in range(300):
        x_dict.append(test_x[i])
        y_dict.append(test_y[i])
        color_dict.append(test_color[i])
    for i in range(min(len(train_x), 600)):
        x_dict.append(train_x[i])
        y_dict.append(train_y[i])
        color_dict.append(train_color[i])

    return wandb.visualize(
        "wandb/decision_boundaries/v1",
        wandb.Table(
            columns=["x", "y", "color"],
            data=[[x_dict[i], y_dict[i], color_dict[i]]
                  for i in range(len(x_dict))],
        ),
    )
Beispiel #8
0
        def class_proportions(classes_, class_counts_train, class_counts_test):
            class_dict = []
            dataset_dict = []
            count_dict = []
            for i in range(len(classes_)):
                # add class counts from training set
                class_dict.append(classes_[i])
                dataset_dict.append("train")
                count_dict.append(class_counts_train[i])
                # add class counts from test set
                class_dict.append(classes_[i])
                dataset_dict.append("test")
                count_dict.append(class_counts_test[i])
                if i >= chart_limit:
                    wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS)
                    break

            if labels is not None and (isinstance(class_dict[0], int)
                                or isinstance(class_dict[0], np.integer)):
                class_dict = get_named_labels(labels, class_dict)
            return wandb.visualize(
                'wandb/class_proportions/v1', wandb.Table(
                columns=['class', 'dataset', 'count'],
                data=[
                    [class_dict[i], dataset_dict[i], count_dict[i]] for i in range(len(class_dict))
                ]
            ))
 def pr_table(pr_curves):
     data = []
     count = 0
     for i, class_name in enumerate(pr_curves.keys()):
         precision, recall = pr_curves[class_name]
         for p, r in zip(precision, recall):
             # if class_names are ints and labels are set
             if labels is not None and (isinstance(class_name, int)
                                        or isinstance(
                                            class_name, np.integer)):
                 class_name = labels[class_name]
             # if class_names are ints and labels are not set
             # or, if class_names have something other than ints
             # (string, float, date) - user class_names
             data.append([class_name, round(p, 3), round(r, 3)])
             count += 1
             if count >= chart_limit:
                 wandb.termwarn(
                     "wandb uses only the first %d datapoints to create the plots."
                     % wandb.Table.MAX_ROWS)
                 break
     return wandb.visualize(
         'wandb/pr_curve/v1',
         wandb.Table(columns=['class', 'precision', 'recall'],
                     data=data))
Beispiel #10
0
 def feature_importances_table(feature_names, importances):
     return wandb.visualize(
         'wandb/feature_importances/v1', wandb.Table(
         columns=['feature_names', 'importances'],
         data=[
             [feature_names[i], importances[i]] for i in range(len(feature_names))
         ]
     ))
Beispiel #11
0
 def calibration_curves(model_dict, frac_positives_dict, mean_pred_value_dict, hist_dict, edge_dict):
     return wandb.visualize(
         'wandb/calibration/v1', wandb.Table(
         columns=['model', 'fraction_of_positives', 'mean_predicted_value', 'hist_dict', 'edge_dict'],
         data=[
             [model_dict[i], frac_positives_dict[i], mean_pred_value_dict[i], hist_dict[i], edge_dict[i]] for i in range(len(model_dict))
         ]
     ))
Beispiel #12
0
 def outlier_candidates(distance, outlier_percentage, influence_threshold):
     return wandb.visualize(
         'wandb/outliers/v1', wandb.Table(
         columns=['distance', 'instance_indicies', 'outlier_percentage', 'influence_threshold'],
         data=[
             [distance[i], i, round_3(outlier_percentage_), influence_threshold_] for i in range(len(distance))
         ]
     ))
Beispiel #13
0
def summary_metrics(model=None, X=None, y=None, X_test=None, y_test=None):
    """
    Calculates summary metrics (like mse, mae, r2 score) for both regression and
    classification algorithms.

    Called by plot_summary_metrics to visualize metrics. Please use the function
    plot_summary_metric() if you wish to visualize your summary metrics.
    """
    if (test_missing(model=model, X=X, y=y, X_test=X_test, y_test=y_test) and
        test_types(model=model, X=X, y=y, X_test=X_test, y_test=y_test) and
        test_fitted(model)):
        y = np.asarray(y)
        y_test = np.asarray(y_test)
        metric_name=[]
        metric_value=[]
        model_name = model.__class__.__name__

        params = {}
        # Log model params to wandb.config
        for v in vars(model):
            if isinstance(getattr(model, v), str) \
                or isinstance(getattr(model, v), bool) \
                    or isinstance(getattr(model, v), int) \
                    or isinstance(getattr(model, v), float):
                params[v] = getattr(model, v)

        # Classifier Metrics
        if sklearn.base.is_classifier(model):
            y_pred = model.predict(X_test)
            y_probas = model.predict_proba(X_test)

            metric_name.append("accuracy_score")
            metric_value.append(round_2(sklearn.metrics.accuracy_score(y_test, y_pred)))
            metric_name.append("precision")
            metric_value.append(round_2(sklearn.metrics.precision_score(y_test, y_pred, average="weighted")))
            metric_name.append("recall")
            metric_value.append(round_2(sklearn.metrics.recall_score(y_test, y_pred, average="weighted")))
            metric_name.append("f1_score")
            metric_value.append(round_2(sklearn.metrics.f1_score(y_test, y_pred, average="weighted")))

        # Regression Metrics
        elif sklearn.base.is_regressor(model):
            y_pred = model.predict(X_test)

            metric_name.append("mae")
            metric_value.append(round_2(sklearn.metrics.mean_absolute_error(y_test, y_pred)))
            metric_name.append("mse")
            metric_value.append(round_2(sklearn.metrics.mean_squared_error(y_test, y_pred)))
            metric_name.append("r2_score")
            metric_value.append(round_2(sklearn.metrics.r2_score(y_test, y_pred)))

        return wandb.visualize(
            'wandb/metrics/v1', wandb.Table(
            columns=['metric_name', 'metric_value', 'model_name'],
            data= [
                [metric_name[i], metric_value[i], model_name] for i in range(len(metric_name))
            ]
        ))
Beispiel #14
0
 def elbow_curve(cluster_ranges, clfs, times):
     return wandb.visualize(
         'wandb/elbow/v1',
         wandb.Table(
                 columns=['cluster_ranges', 'errors', 'clustering_time'],
                 data=[
                     [cluster_ranges[i], clfs[i], times[i]] for i in range(len(cluster_ranges))
                 ]
     ))
Beispiel #15
0
 def silhouette(x, y, colors, centerx, centery, y_sil, x_sil, color_sil, silhouette_avg):
     return wandb.visualize(
         'wandb/silhouette_/v1', wandb.Table(
         columns=['x', 'y', 'colors', 'centerx', 'centery', 'y_sil', 'x1', 'x2', 'color_sil', 'silhouette_avg'],
         data=[
             [x[i], y[i], colors[i], None, None,
             y_sil[i], 0, x_sil[i], color_sil[i], silhouette_avg]
             for i in range(len(color_sil))
         ]
     ))
Beispiel #16
0
def confusion_matrix(
    y_true=None,
    y_pred=None,
    labels=None,
    true_labels=None,
    pred_labels=None,
    normalize=False,
):
    """Computes the confusion matrix to evaluate the performance of a classification.

    Called by plot_confusion_matrix to visualize roc curves. Please use the function
    plot_confusion_matrix() if you wish to visualize your confusion matrix.
    """
    cm = metrics.confusion_matrix(y_true, y_pred)

    if labels is None:
        classes = unique_labels(y_true, y_pred)
    else:
        classes = np.asarray(labels)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0

    if true_labels is None:
        true_classes = classes
    else:
        validate_labels(classes, true_labels, "true_labels")

        true_label_indexes = np.in1d(classes, true_labels)

        true_classes = classes[true_label_indexes]
        cm = cm[true_label_indexes]

    if pred_labels is None:
        pred_classes = classes
    else:
        validate_labels(classes, pred_labels, "pred_labels")

        pred_label_indexes = np.in1d(classes, pred_labels)

        pred_classes = classes[pred_label_indexes]
        cm = cm[:, pred_label_indexes]

    table = make_table(cm, pred_classes, true_classes, labels)
    chart = wandb.visualize("wandb/confusion_matrix/v1", table)

    return chart
Beispiel #17
0
 def learning_curve_table(train, test, trainsize):
     data=[]
     for i in range(len(train)):
         if i >= chart_limit/2:
             wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS)
             break
         train_set = ["train", round(train[i],2), trainsize[i]]
         test_set = ["test", round(test[i],2), trainsize[i]]
         data.append(train_set)
         data.append(test_set)
     return wandb.visualize(
         'wandb/learning_curve/v1', wandb.Table(
         columns=['dataset', 'score', 'train_size'],
         data=data
     ))
Beispiel #18
0
def elbow_curve(clusterer, X, cluster_ranges, n_jobs, show_cluster_time):
    if cluster_ranges is None:
        cluster_ranges = range(1, 10, 2)
    else:
        cluster_ranges = sorted(cluster_ranges)

    clfs, times = _compute_results_parallel(n_jobs, clusterer, X,
                                            cluster_ranges)

    clfs = np.absolute(clfs)

    table = make_table(cluster_ranges, clfs, times)
    chart = wandb.visualize("wandb/elbow/v1", table)

    return chart
Beispiel #19
0
 def confusion_matrix_table(cm, label):
     data = []
     count = 0
     pred_classes, true_classes = [label, 'Rest'], [label, 'Rest']
     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
         pred_dict = pred_classes[i]
         true_dict = true_classes[j]
         data.append([pred_dict, true_dict, cm[i, j]])
         count += 1
         if count >= wandb.Table.MAX_ROWS:
             wandb.termwarn(
                 f"wandb uses only the first {wandb.Table.MAX_ROWS} datapoints to create plots."
             )
             break
     return wandb.visualize(
         'wandb/confusion_matrix/v1',
         wandb.Table(columns=['Predicted', 'Actual', 'Count'], data=data))
Beispiel #20
0
def feature_importances(model, feature_names):
    attributes_to_check = [
        "feature_importances_", "feature_log_prob_", "coef_"
    ]
    found_attribute = check_for_attribute_on(model, attributes_to_check)
    if found_attribute is None:
        wandb.termwarn(
            f"could not find any of attributes {', '.join(attributes_to_check)} on classifier. Cannot plot feature importances."
        )
        return
    elif found_attribute == "feature_importances_":
        importances = model.feature_importances_
    elif found_attribute == "coef_":  # ElasticNet-like models
        importances = model.coef_
    elif found_attribute == "feature_log_prob_":
        # coef_ was deprecated in sklearn 0.24, replaced with
        # feature_log_prob_
        importances = model.feature_log_prob_

    if len(importances.shape) > 1:
        n_significant_dims = sum([i > 1 for i in importances.shape])
        if n_significant_dims > 1:
            nd = len(importances.shape)
            wandb.termwarn(
                f"{nd}-dimensional feature importances array passed to plot_feature_importances. "
                f"{nd}-dimensional and higher feature importances arrays are not currently supported. "
                f"These importances will not be plotted.")
            return
        else:
            importances = np.squeeze(importances)

    indices = np.argsort(importances)[::-1]
    importances = importances[indices]

    if feature_names is None:
        feature_names = indices
    else:
        feature_names = np.array(feature_names)[indices]

    table = make_table(feature_names, importances)
    chart = wandb.visualize("wandb/feature_importances/v1", table)

    return chart
Beispiel #21
0
def summary_metrics(model=None, X=None, y=None, X_test=None, y_test=None):
    """Calculates summary metrics for both regressors and classifiers.

    Called by plot_summary_metrics to visualize metrics. Please use the function
    plot_summary_metrics() if you wish to visualize your summary metrics.
    """
    y, y_test = np.asarray(y), np.asarray(y_test)
    metrics = {}
    model_name = model.__class__.__name__

    y_pred = model.predict(X_test)

    if sklearn.base.is_classifier(model):

        accuracy_score = sklearn.metrics.accuracy_score(y_test, y_pred)
        metrics["accuracy_score"] = accuracy_score

        precision = sklearn.metrics.precision_score(y_test, y_pred, average="weighted")
        metrics["precision"] = precision

        recall = sklearn.metrics.recall_score(y_test, y_pred, average="weighted")
        metrics["recall"] = recall

        f1_score = sklearn.metrics.f1_score(y_test, y_pred, average="weighted")
        metrics["f1_score"] = f1_score

    elif sklearn.base.is_regressor(model):

        mae = sklearn.metrics.mean_absolute_error(y_test, y_pred)
        metrics["mae"] = mae

        mse = sklearn.metrics.mean_squared_error(y_test, y_pred)
        metrics["mse"] = mse

        r2_score = sklearn.metrics.r2_score(y_test, y_pred)
        metrics["r2_score"] = r2_score

    metrics = {name: utils.round_2(metric) for name, metric in metrics.items()}

    table = make_table(metrics, model_name)
    chart = wandb.visualize("wandb/metrics/v1", table)

    return chart
Beispiel #22
0
def outlier_candidates(regressor, X, y):
    # Fit a linear model to X and y to compute MSE
    regressor.fit(X, y)

    # Leverage is computed as the diagonal of the projection matrix of X
    leverage = (X * np.linalg.pinv(X).T).sum(1)

    # Compute the rank and the degrees of freedom of the OLS model
    rank = np.linalg.matrix_rank(X)
    df = X.shape[0] - rank

    # Compute the MSE from the residuals
    residuals = y - regressor.predict(X)
    mse = np.dot(residuals, residuals) / df

    # Compute Cook's distance
    residuals_studentized = residuals / np.sqrt(mse) / np.sqrt(1 - leverage)
    distance_ = residuals_studentized**2 / X.shape[1]
    distance_ *= leverage / (1 - leverage)

    # Compute the influence threshold rule of thumb
    influence_threshold_ = 4 / X.shape[0]
    outlier_percentage_ = sum(distance_ >= influence_threshold_) / X.shape[0]
    outlier_percentage_ *= 100.0

    distance_dict, count = [], 0
    for d in distance_:
        distance_dict.append(d)
        count += 1
        if utils.check_against_limit(
                count,
                "outlier_candidates",
                utils.chart_limit,
        ):
            break

    table = make_table(distance_dict, outlier_percentage_,
                       influence_threshold_)
    chart = wandb.visualize("wandb/outliers/v1", table)

    return chart
Beispiel #23
0
 def confusion_matrix_table(cm, pred_classes, true_classes):
     data=[]
     count = 0
     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
         if labels is not None and (isinstance(pred_classes[i], int)
                             or isinstance(pred_classes[0], np.integer)):
             pred_dict = labels[pred_classes[i]]
             true_dict = labels[true_classes[j]]
         else:
             pred_dict = pred_classes[i]
             true_dict = true_classes[j]
         data.append([pred_dict, true_dict, cm[i,j]])
         count+=1
         if count >= chart_limit:
             wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS)
             break
     return wandb.visualize(
         'wandb/confusion_matrix/v1', wandb.Table(
         columns=['Predicted', 'Actual', 'Count'],
         data=data
     ))
Beispiel #24
0
def class_proportions(y_train, y_test, labels):
    # Get the unique values from the dataset
    targets = (y_train,) if y_test is None else (y_train, y_test)
    class_ids = np.array(unique_labels(*targets))

    # Compute the class counts
    counts_train = np.array([(y_train == c).sum() for c in class_ids])
    counts_test = np.array([(y_test == c).sum() for c in class_ids])

    class_column, dataset_column, count_column = make_columns(
        class_ids, counts_train, counts_test
    )

    if labels is not None and (
        isinstance(class_column[0], int) or isinstance(class_column[0], np.integer)
    ):
        class_column = get_named_labels(labels, class_column)

    table = make_table(class_column, dataset_column, count_column)
    chart = wandb.visualize("wandb/class_proportions/v1", table)

    return chart
Beispiel #25
0
def silhouette(clusterer, X, cluster_labels, labels, metric, kmeans):
    # Run clusterer for n_clusters in range(len(cluster_ranges), get cluster labels
    # TODO - keep/delete once we decide if we should train clusterers
    # or ask for trained models
    # clusterer.set_params(n_clusters=n_clusters, random_state=42)
    # cluster_labels = clusterer.fit_predict(X)
    cluster_labels = np.asarray(cluster_labels)
    labels = np.asarray(labels)

    le = LabelEncoder()
    _ = le.fit_transform(cluster_labels)
    n_clusters = len(np.unique(cluster_labels))

    # The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed
    # clusters
    silhouette_avg = silhouette_score(X, cluster_labels, metric=metric)

    # Compute the silhouette scores for each sample
    sample_silhouette_values = silhouette_samples(X,
                                                  cluster_labels,
                                                  metric=metric)

    x_sil, y_sil, color_sil = [], [], []

    count, y_lower = 0, 10
    for i in range(n_clusters):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels
                                                                 == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        y_values = np.arange(y_lower, y_upper)

        for j in range(len(y_values)):
            y_sil.append(y_values[j])
            x_sil.append(ith_cluster_silhouette_values[j])
            color_sil.append(i)
            count += 1
            if utils.check_against_limit(count, "silhouette",
                                         utils.chart_limit):
                break

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    if kmeans:
        centers = clusterer.cluster_centers_
        centerx = centers[:, 0]
        centery = centers[:, 1]

    else:
        centerx = [None] * len(color_sil)
        centery = [None] * len(color_sil)

    table = make_table(
        X[:, 0],
        X[:, 1],
        cluster_labels,
        centerx,
        centery,
        y_sil,
        x_sil,
        color_sil,
        silhouette_avg,
    )
    chart = wandb.visualize("wandb/silhouette_/v1", table)

    return chart