Example #1
0
def plot_centroid_covariance_matrices_evolution(covariance_matrices,
                                                distribution,
                                                name=None):

    distribution = normalise_string(distribution)
    figure_name = "centroids_evolution-{}-covariance_matrices".format(
        distribution)
    figure_name = saving.build_figure_name(figure_name, name)

    y_label = _axis_label_for_symbol(symbol="\\Sigma",
                                     distribution=distribution,
                                     prefix="|",
                                     suffix="(y = k)|")

    n_epochs, n_centroids, __, __ = covariance_matrices.shape
    determinants = numpy.empty([n_epochs, n_centroids])

    for e in range(n_epochs):
        for k in range(n_centroids):
            determinants[e,
                         k] = numpy.prod(numpy.diag(covariance_matrices[e, k]))

    if determinants.all() > 0:
        line_range_ratio = numpy.empty(n_centroids)
        for k in range(n_centroids):
            determinants_min = determinants[:, k].min()
            determinants_max = determinants[:, k].max()
            line_range_ratio[k] = determinants_max / determinants_min
        range_ratio = line_range_ratio.max() / line_range_ratio.min()
        if range_ratio > 1e2:
            y_scale = "log"
        else:
            y_scale = "linear"

    centroids_palette = style.darker_palette(n_centroids)
    epochs = numpy.arange(n_epochs) + 1

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    for k in range(n_centroids):
        axis.plot(epochs,
                  determinants[:, k],
                  color=centroids_palette[k],
                  label="$k = {}$".format(k))

    axis.set_xlabel("Epochs")
    axis.set_ylabel(y_label)

    axis.set_yscale(y_scale)

    axis.legend(loc="best")

    return figure, figure_name
Example #2
0
def plot_centroid_probabilities_evolution(probabilities,
                                          distribution,
                                          linestyle="solid",
                                          name=None):

    distribution = normalise_string(distribution)

    y_label = _axis_label_for_symbol(symbol="\\pi",
                                     distribution=distribution,
                                     suffix="^k")

    figure_name = "centroids_evolution-{}-probabilities".format(distribution)
    figure_name = saving.build_figure_name(figure_name, name)

    n_epochs, n_centroids = probabilities.shape

    centroids_palette = style.darker_palette(n_centroids)
    epochs = numpy.arange(n_epochs) + 1

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    for k in range(n_centroids):
        axis.plot(epochs,
                  probabilities[:, k],
                  color=centroids_palette[k],
                  linestyle=linestyle,
                  label="$k = {}$".format(k))

    axis.set_xlabel("Epochs")
    axis.set_ylabel(y_label)

    axis.legend(loc="best")

    return figure, figure_name
Example #3
0
def analyse_centroid_probabilities(centroids,
                                   name=None,
                                   analysis_level=None,
                                   export_options=None,
                                   analyses_directory=None):

    if name:
        name = normalise_string(name)
    if analysis_level is None:
        analysis_level = defaults["analyses"]["analysis_level"]
    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]

    print("Plotting centroid probabilities.")
    plot_time_start = time()

    posterior_probabilities = None
    prior_probabilities = None

    if "posterior" in centroids and centroids["posterior"]:
        posterior_probabilities = centroids["posterior"]["probabilities"]
        n_centroids = len(posterior_probabilities)
    if "prior" in centroids and centroids["prior"]:
        prior_probabilities = centroids["prior"]["probabilities"]
        n_centroids = len(prior_probabilities)

    centroids_palette = style.darker_palette(n_centroids)
    x_label = "$k$"
    if prior_probabilities is not None:
        if posterior_probabilities is not None:
            y_label = _axis_label_for_symbol(
                symbol="\\pi",
                distribution=normalise_string("posterior"),
                suffix="^k")
            if name:
                plot_name = [name, "posterior", "prior"]
            else:
                plot_name = ["posterior", "prior"]
        else:
            y_label = _axis_label_for_symbol(
                symbol="\\pi",
                distribution=normalise_string("prior"),
                suffix="^k")
            if name:
                plot_name = [name, "prior"]
            else:
                plot_name = "prior"
    elif posterior_probabilities is not None:
        y_label = _axis_label_for_symbol(
            symbol="\\pi",
            distribution=normalise_string("posterior"),
            suffix="^k")
        if name:
            plot_name = [name, "posterior"]
        else:
            plot_name = "posterior"

    figure, figure_name = figures.plot_probabilities(posterior_probabilities,
                                                     prior_probabilities,
                                                     x_label=x_label,
                                                     y_label=y_label,
                                                     palette=centroids_palette,
                                                     uniform=False,
                                                     name=plot_name)
    figures.save_figure(figure=figure,
                        name=figure_name,
                        options=export_options,
                        directory=analyses_directory)

    plot_duration = time() - plot_time_start
    print("Centroid probabilities plotted and saved ({}).".format(
        format_duration(plot_duration)))
Example #4
0
def plot_centroid_means_evolution(means,
                                  distribution,
                                  decomposed=False,
                                  name=None):

    symbol = "\\mu"
    if decomposed:
        decomposition_method = "PCA"
    else:
        decomposition_method = ""
    distribution = normalise_string(distribution)
    suffix = "(y = k)"

    x_label = _axis_label_for_symbol(symbol=symbol,
                                     coordinate=1,
                                     decomposition_method=decomposition_method,
                                     distribution=distribution,
                                     suffix=suffix)
    y_label = _axis_label_for_symbol(symbol=symbol,
                                     coordinate=2,
                                     decomposition_method=decomposition_method,
                                     distribution=distribution,
                                     suffix=suffix)

    figure_name = "centroids_evolution-{}-means".format(distribution)
    figure_name = saving.build_figure_name(figure_name, name)

    n_epochs, n_centroids, latent_size = means.shape

    if latent_size > 2:
        raise ValueError("Dimensions of means should be 2.")

    centroids_palette = style.darker_palette(n_centroids)
    epochs = numpy.arange(n_epochs) + 1

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    colour_bar_scatter_plot = axis.scatter(means[:, 0, 0],
                                           means[:, 0, 1],
                                           c=epochs,
                                           cmap=seaborn.dark_palette(
                                               style.NEUTRAL_COLOUR,
                                               as_cmap=True),
                                           zorder=0)

    for k in range(n_centroids):
        colour = centroids_palette[k]
        colour_map = seaborn.dark_palette(colour, as_cmap=True)
        axis.plot(means[:, k, 0],
                  means[:, k, 1],
                  color=colour,
                  label="$k = {}$".format(k),
                  zorder=k + 1)
        axis.scatter(means[:, k, 0],
                     means[:, k, 1],
                     c=epochs,
                     cmap=colour_map,
                     zorder=n_centroids + k + 1)

    axis.legend(loc="best")

    colour_bar = figure.colorbar(colour_bar_scatter_plot)
    colour_bar.outline.set_linewidth(0)
    colour_bar.set_label("Epochs")

    axis.set_xlabel(x_label)
    axis.set_ylabel(y_label)

    return figure, figure_name
Example #5
0
def plot_values(values,
                colour_coding=None,
                colouring_data_set=None,
                centroids=None,
                sampled_values=None,
                class_name=None,
                feature_index=None,
                figure_labels=None,
                example_tag=None,
                name="scatter"):

    figure_name = name

    if figure_labels:
        title = figure_labels.get("title")
        x_label = figure_labels.get("x label")
        y_label = figure_labels.get("y label")
    else:
        title = "none"
        x_label = "$x$"
        y_label = "$y$"

    if not title:
        title = "none"

    figure_name += "-" + normalise_string(title)

    if colour_coding:
        colour_coding = normalise_string(colour_coding)
        figure_name += "-" + colour_coding
        if "predicted" in colour_coding:
            if colouring_data_set.prediction_specifications:
                figure_name += "-" + (
                    colouring_data_set.prediction_specifications.name)
            else:
                figure_name += "unknown_prediction_method"
        if colouring_data_set is None:
            raise ValueError("Colouring data set not given.")

    if sampled_values is not None:
        figure_name += "-samples"

    values = values.copy()[:, :2]
    if scipy.sparse.issparse(values):
        values = values.A

    # Randomise examples in values to remove any prior order
    n_examples, __ = values.shape
    random_state = numpy.random.RandomState(117)
    shuffled_indices = random_state.permutation(n_examples)
    values = values[shuffled_indices]

    # Adjust marker size based on number of examples
    style._adjust_marker_size_for_scatter_plots(n_examples)

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    axis.set_xlabel(x_label)
    axis.set_ylabel(y_label)

    colour_map = seaborn.dark_palette(style.STANDARD_PALETTE[0], as_cmap=True)

    alpha = 1
    if sampled_values is not None:
        alpha = 0.5

    if colour_coding and ("labels" in colour_coding or "ids" in colour_coding
                          or "class" in colour_coding
                          or colour_coding == "batches"):

        if colour_coding == "predicted_cluster_ids":
            labels = colouring_data_set.predicted_cluster_ids
            class_names = numpy.unique(labels).tolist()
            number_of_classes = len(class_names)
            class_palette = None
            label_sorter = None
        elif colour_coding == "predicted_labels":
            labels = colouring_data_set.predicted_labels
            class_names = colouring_data_set.predicted_class_names
            number_of_classes = colouring_data_set.number_of_predicted_classes
            class_palette = colouring_data_set.predicted_class_palette
            label_sorter = colouring_data_set.predicted_label_sorter
        elif colour_coding == "predicted_superset_labels":
            labels = colouring_data_set.predicted_superset_labels
            class_names = colouring_data_set.predicted_superset_class_names
            number_of_classes = (
                colouring_data_set.number_of_predicted_superset_classes)
            class_palette = colouring_data_set.predicted_superset_class_palette
            label_sorter = colouring_data_set.predicted_superset_label_sorter
        elif "superset" in colour_coding:
            labels = colouring_data_set.superset_labels
            class_names = colouring_data_set.superset_class_names
            number_of_classes = colouring_data_set.number_of_superset_classes
            class_palette = colouring_data_set.superset_class_palette
            label_sorter = colouring_data_set.superset_label_sorter
        elif colour_coding == "batches":
            labels = colouring_data_set.batch_indices.flatten()
            class_names = colouring_data_set.batch_names
            number_of_classes = colouring_data_set.number_of_batches
            class_palette = None
            label_sorter = None
        else:
            labels = colouring_data_set.labels
            class_names = colouring_data_set.class_names
            number_of_classes = colouring_data_set.number_of_classes
            class_palette = colouring_data_set.class_palette
            label_sorter = colouring_data_set.label_sorter

        if not class_palette:
            index_palette = style.lighter_palette(number_of_classes)
            class_palette = {
                class_name: index_palette[i]
                for i, class_name in enumerate(
                    sorted(class_names, key=label_sorter))
            }

        # Examples are shuffled, so should their labels be
        labels = labels[shuffled_indices]

        if ("labels" in colour_coding or "ids" in colour_coding
                or colour_coding == "batches"):
            colours = []
            classes = set()

            for i, label in enumerate(labels):
                colour = class_palette[label]
                colours.append(colour)

                # Plot one example for each class to add labels
                if label not in classes:
                    classes.add(label)
                    axis.scatter(values[i, 0],
                                 values[i, 1],
                                 color=colour,
                                 label=label,
                                 alpha=alpha)

            axis.scatter(values[:, 0], values[:, 1], c=colours, alpha=alpha)

            class_handles, class_labels = axis.get_legend_handles_labels()

            if class_labels:
                class_labels, class_handles = zip(
                    *sorted(zip(class_labels, class_handles),
                            key=(lambda t: label_sorter(t[0])
                                 ) if label_sorter else None))
                class_label_maximum_width = max(map(len, class_labels))
                if class_label_maximum_width <= 5 and number_of_classes <= 20:
                    axis.legend(class_handles, class_labels, loc="best")
                else:
                    if number_of_classes <= 20:
                        class_label_columns = 2
                    else:
                        class_label_columns = 3
                    axis.legend(
                        class_handles,
                        class_labels,
                        bbox_to_anchor=(-0.1, 1.05, 1.1, 0.95),
                        loc="lower left",
                        ncol=class_label_columns,
                        mode="expand",
                        borderaxespad=0.,
                    )

        elif "class" in colour_coding:
            colours = []
            figure_name += "-" + normalise_string(str(class_name))
            ordered_indices_set = {str(class_name): [], "Remaining": []}

            for i, label in enumerate(labels):
                if label == class_name:
                    colour = class_palette[label]
                    ordered_indices_set[str(class_name)].append(i)
                else:
                    colour = style.NEUTRAL_COLOUR
                    ordered_indices_set["Remaining"].append(i)
                colours.append(colour)

            colours = numpy.array(colours)

            z_order_index = 1
            for label, ordered_indices in sorted(ordered_indices_set.items()):
                if label == "Remaining":
                    z_order = 0
                else:
                    z_order = z_order_index
                    z_order_index += 1
                ordered_values = values[ordered_indices]
                ordered_colours = colours[ordered_indices]
                axis.scatter(ordered_values[:, 0],
                             ordered_values[:, 1],
                             c=ordered_colours,
                             label=label,
                             alpha=alpha,
                             zorder=z_order)

                handles, labels = axis.get_legend_handles_labels()
                labels, handles = zip(*sorted(zip(labels, handles),
                                              key=lambda t: label_sorter(t[0])
                                              if label_sorter else None))
                axis.legend(handles,
                            labels,
                            bbox_to_anchor=(-0.1, 1.05, 1.1, 0.95),
                            loc="lower left",
                            ncol=2,
                            mode="expand",
                            borderaxespad=0.)

    elif colour_coding == "count_sum":

        n = colouring_data_set.count_sum[shuffled_indices].flatten()
        scatter_plot = axis.scatter(values[:, 0],
                                    values[:, 1],
                                    c=n,
                                    cmap=colour_map,
                                    alpha=alpha)
        colour_bar = figure.colorbar(scatter_plot)
        colour_bar.outline.set_linewidth(0)
        colour_bar.set_label("Total number of {}s per {}".format(
            colouring_data_set.terms["item"],
            colouring_data_set.terms["example"]))

    elif colour_coding == "feature":
        if feature_index is None:
            raise ValueError("Feature number not given.")
        if feature_index > colouring_data_set.number_of_features:
            raise ValueError("Feature number higher than number of features.")

        feature_name = colouring_data_set.feature_names[feature_index]
        figure_name += "-{}".format(normalise_string(feature_name))

        f = colouring_data_set.values[shuffled_indices, feature_index]
        if scipy.sparse.issparse(f):
            f = f.A
        f = f.squeeze()

        scatter_plot = axis.scatter(values[:, 0],
                                    values[:, 1],
                                    c=f,
                                    cmap=colour_map,
                                    alpha=alpha)
        colour_bar = figure.colorbar(scatter_plot)
        colour_bar.outline.set_linewidth(0)
        colour_bar.set_label(feature_name)

    elif colour_coding is None:
        axis.scatter(values[:, 0],
                     values[:, 1],
                     c="k",
                     alpha=alpha,
                     edgecolors="none")

    else:
        raise ValueError("Colour coding `{}` not found.".format(colour_coding))

    if centroids:
        prior_centroids = centroids["prior"]

        if prior_centroids:
            n_centroids = prior_centroids["probabilities"].shape[0]
        else:
            n_centroids = 0

        if n_centroids > 1:
            centroids_palette = style.darker_palette(n_centroids)
            classes = numpy.arange(n_centroids)

            means = prior_centroids["means"]
            covariance_matrices = prior_centroids["covariance_matrices"]

            for k in range(n_centroids):
                axis.scatter(means[k, 0],
                             means[k, 1],
                             s=60,
                             marker="x",
                             color="black",
                             linewidth=3)
                axis.scatter(means[k, 0],
                             means[k, 1],
                             marker="x",
                             facecolor=centroids_palette[k],
                             edgecolors="black")
                ellipse_fill, ellipse_edge = _covariance_matrix_as_ellipse(
                    covariance_matrices[k],
                    means[k],
                    colour=centroids_palette[k])
                axis.add_patch(ellipse_edge)
                axis.add_patch(ellipse_fill)

    if sampled_values is not None:

        sampled_values = sampled_values.copy()[:, :2]
        if scipy.sparse.issparse(sampled_values):
            sampled_values = sampled_values.A

        sample_colour_map = seaborn.blend_palette(("white", "purple"),
                                                  as_cmap=True)

        x_limits = axis.get_xlim()
        y_limits = axis.get_ylim()

        axis.hexbin(sampled_values[:, 0],
                    sampled_values[:, 1],
                    gridsize=75,
                    cmap=sample_colour_map,
                    linewidths=0.,
                    edgecolors="none",
                    zorder=-100)

        axis.set_xlim(x_limits)
        axis.set_ylim(y_limits)

    # Reset marker size
    style.reset_plot_look()

    return figure, figure_name