def plot_centroid_covariance_matrices_evolution(covariance_matrices, distribution, name=None): distribution = normalise_string(distribution) figure_name = "centroids_evolution-{}-covariance_matrices".format( distribution) figure_name = saving.build_figure_name(figure_name, name) y_label = _axis_label_for_symbol(symbol="\\Sigma", distribution=distribution, prefix="|", suffix="(y = k)|") n_epochs, n_centroids, __, __ = covariance_matrices.shape determinants = numpy.empty([n_epochs, n_centroids]) for e in range(n_epochs): for k in range(n_centroids): determinants[e, k] = numpy.prod(numpy.diag(covariance_matrices[e, k])) if determinants.all() > 0: line_range_ratio = numpy.empty(n_centroids) for k in range(n_centroids): determinants_min = determinants[:, k].min() determinants_max = determinants[:, k].max() line_range_ratio[k] = determinants_max / determinants_min range_ratio = line_range_ratio.max() / line_range_ratio.min() if range_ratio > 1e2: y_scale = "log" else: y_scale = "linear" centroids_palette = style.darker_palette(n_centroids) epochs = numpy.arange(n_epochs) + 1 figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() for k in range(n_centroids): axis.plot(epochs, determinants[:, k], color=centroids_palette[k], label="$k = {}$".format(k)) axis.set_xlabel("Epochs") axis.set_ylabel(y_label) axis.set_yscale(y_scale) axis.legend(loc="best") return figure, figure_name
def plot_centroid_probabilities_evolution(probabilities, distribution, linestyle="solid", name=None): distribution = normalise_string(distribution) y_label = _axis_label_for_symbol(symbol="\\pi", distribution=distribution, suffix="^k") figure_name = "centroids_evolution-{}-probabilities".format(distribution) figure_name = saving.build_figure_name(figure_name, name) n_epochs, n_centroids = probabilities.shape centroids_palette = style.darker_palette(n_centroids) epochs = numpy.arange(n_epochs) + 1 figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() for k in range(n_centroids): axis.plot(epochs, probabilities[:, k], color=centroids_palette[k], linestyle=linestyle, label="$k = {}$".format(k)) axis.set_xlabel("Epochs") axis.set_ylabel(y_label) axis.legend(loc="best") return figure, figure_name
def analyse_centroid_probabilities(centroids, name=None, analysis_level=None, export_options=None, analyses_directory=None): if name: name = normalise_string(name) if analysis_level is None: analysis_level = defaults["analyses"]["analysis_level"] if analyses_directory is None: analyses_directory = defaults["analyses"]["directory"] print("Plotting centroid probabilities.") plot_time_start = time() posterior_probabilities = None prior_probabilities = None if "posterior" in centroids and centroids["posterior"]: posterior_probabilities = centroids["posterior"]["probabilities"] n_centroids = len(posterior_probabilities) if "prior" in centroids and centroids["prior"]: prior_probabilities = centroids["prior"]["probabilities"] n_centroids = len(prior_probabilities) centroids_palette = style.darker_palette(n_centroids) x_label = "$k$" if prior_probabilities is not None: if posterior_probabilities is not None: y_label = _axis_label_for_symbol( symbol="\\pi", distribution=normalise_string("posterior"), suffix="^k") if name: plot_name = [name, "posterior", "prior"] else: plot_name = ["posterior", "prior"] else: y_label = _axis_label_for_symbol( symbol="\\pi", distribution=normalise_string("prior"), suffix="^k") if name: plot_name = [name, "prior"] else: plot_name = "prior" elif posterior_probabilities is not None: y_label = _axis_label_for_symbol( symbol="\\pi", distribution=normalise_string("posterior"), suffix="^k") if name: plot_name = [name, "posterior"] else: plot_name = "posterior" figure, figure_name = figures.plot_probabilities(posterior_probabilities, prior_probabilities, x_label=x_label, y_label=y_label, palette=centroids_palette, uniform=False, name=plot_name) figures.save_figure(figure=figure, name=figure_name, options=export_options, directory=analyses_directory) plot_duration = time() - plot_time_start print("Centroid probabilities plotted and saved ({}).".format( format_duration(plot_duration)))
def plot_centroid_means_evolution(means, distribution, decomposed=False, name=None): symbol = "\\mu" if decomposed: decomposition_method = "PCA" else: decomposition_method = "" distribution = normalise_string(distribution) suffix = "(y = k)" x_label = _axis_label_for_symbol(symbol=symbol, coordinate=1, decomposition_method=decomposition_method, distribution=distribution, suffix=suffix) y_label = _axis_label_for_symbol(symbol=symbol, coordinate=2, decomposition_method=decomposition_method, distribution=distribution, suffix=suffix) figure_name = "centroids_evolution-{}-means".format(distribution) figure_name = saving.build_figure_name(figure_name, name) n_epochs, n_centroids, latent_size = means.shape if latent_size > 2: raise ValueError("Dimensions of means should be 2.") centroids_palette = style.darker_palette(n_centroids) epochs = numpy.arange(n_epochs) + 1 figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() colour_bar_scatter_plot = axis.scatter(means[:, 0, 0], means[:, 0, 1], c=epochs, cmap=seaborn.dark_palette( style.NEUTRAL_COLOUR, as_cmap=True), zorder=0) for k in range(n_centroids): colour = centroids_palette[k] colour_map = seaborn.dark_palette(colour, as_cmap=True) axis.plot(means[:, k, 0], means[:, k, 1], color=colour, label="$k = {}$".format(k), zorder=k + 1) axis.scatter(means[:, k, 0], means[:, k, 1], c=epochs, cmap=colour_map, zorder=n_centroids + k + 1) axis.legend(loc="best") colour_bar = figure.colorbar(colour_bar_scatter_plot) colour_bar.outline.set_linewidth(0) colour_bar.set_label("Epochs") axis.set_xlabel(x_label) axis.set_ylabel(y_label) return figure, figure_name
def plot_values(values, colour_coding=None, colouring_data_set=None, centroids=None, sampled_values=None, class_name=None, feature_index=None, figure_labels=None, example_tag=None, name="scatter"): figure_name = name if figure_labels: title = figure_labels.get("title") x_label = figure_labels.get("x label") y_label = figure_labels.get("y label") else: title = "none" x_label = "$x$" y_label = "$y$" if not title: title = "none" figure_name += "-" + normalise_string(title) if colour_coding: colour_coding = normalise_string(colour_coding) figure_name += "-" + colour_coding if "predicted" in colour_coding: if colouring_data_set.prediction_specifications: figure_name += "-" + ( colouring_data_set.prediction_specifications.name) else: figure_name += "unknown_prediction_method" if colouring_data_set is None: raise ValueError("Colouring data set not given.") if sampled_values is not None: figure_name += "-samples" values = values.copy()[:, :2] if scipy.sparse.issparse(values): values = values.A # Randomise examples in values to remove any prior order n_examples, __ = values.shape random_state = numpy.random.RandomState(117) shuffled_indices = random_state.permutation(n_examples) values = values[shuffled_indices] # Adjust marker size based on number of examples style._adjust_marker_size_for_scatter_plots(n_examples) figure = pyplot.figure() axis = figure.add_subplot(1, 1, 1) seaborn.despine() axis.set_xlabel(x_label) axis.set_ylabel(y_label) colour_map = seaborn.dark_palette(style.STANDARD_PALETTE[0], as_cmap=True) alpha = 1 if sampled_values is not None: alpha = 0.5 if colour_coding and ("labels" in colour_coding or "ids" in colour_coding or "class" in colour_coding or colour_coding == "batches"): if colour_coding == "predicted_cluster_ids": labels = colouring_data_set.predicted_cluster_ids class_names = numpy.unique(labels).tolist() number_of_classes = len(class_names) class_palette = None label_sorter = None elif colour_coding == "predicted_labels": labels = colouring_data_set.predicted_labels class_names = colouring_data_set.predicted_class_names number_of_classes = colouring_data_set.number_of_predicted_classes class_palette = colouring_data_set.predicted_class_palette label_sorter = colouring_data_set.predicted_label_sorter elif colour_coding == "predicted_superset_labels": labels = colouring_data_set.predicted_superset_labels class_names = colouring_data_set.predicted_superset_class_names number_of_classes = ( colouring_data_set.number_of_predicted_superset_classes) class_palette = colouring_data_set.predicted_superset_class_palette label_sorter = colouring_data_set.predicted_superset_label_sorter elif "superset" in colour_coding: labels = colouring_data_set.superset_labels class_names = colouring_data_set.superset_class_names number_of_classes = colouring_data_set.number_of_superset_classes class_palette = colouring_data_set.superset_class_palette label_sorter = colouring_data_set.superset_label_sorter elif colour_coding == "batches": labels = colouring_data_set.batch_indices.flatten() class_names = colouring_data_set.batch_names number_of_classes = colouring_data_set.number_of_batches class_palette = None label_sorter = None else: labels = colouring_data_set.labels class_names = colouring_data_set.class_names number_of_classes = colouring_data_set.number_of_classes class_palette = colouring_data_set.class_palette label_sorter = colouring_data_set.label_sorter if not class_palette: index_palette = style.lighter_palette(number_of_classes) class_palette = { class_name: index_palette[i] for i, class_name in enumerate( sorted(class_names, key=label_sorter)) } # Examples are shuffled, so should their labels be labels = labels[shuffled_indices] if ("labels" in colour_coding or "ids" in colour_coding or colour_coding == "batches"): colours = [] classes = set() for i, label in enumerate(labels): colour = class_palette[label] colours.append(colour) # Plot one example for each class to add labels if label not in classes: classes.add(label) axis.scatter(values[i, 0], values[i, 1], color=colour, label=label, alpha=alpha) axis.scatter(values[:, 0], values[:, 1], c=colours, alpha=alpha) class_handles, class_labels = axis.get_legend_handles_labels() if class_labels: class_labels, class_handles = zip( *sorted(zip(class_labels, class_handles), key=(lambda t: label_sorter(t[0]) ) if label_sorter else None)) class_label_maximum_width = max(map(len, class_labels)) if class_label_maximum_width <= 5 and number_of_classes <= 20: axis.legend(class_handles, class_labels, loc="best") else: if number_of_classes <= 20: class_label_columns = 2 else: class_label_columns = 3 axis.legend( class_handles, class_labels, bbox_to_anchor=(-0.1, 1.05, 1.1, 0.95), loc="lower left", ncol=class_label_columns, mode="expand", borderaxespad=0., ) elif "class" in colour_coding: colours = [] figure_name += "-" + normalise_string(str(class_name)) ordered_indices_set = {str(class_name): [], "Remaining": []} for i, label in enumerate(labels): if label == class_name: colour = class_palette[label] ordered_indices_set[str(class_name)].append(i) else: colour = style.NEUTRAL_COLOUR ordered_indices_set["Remaining"].append(i) colours.append(colour) colours = numpy.array(colours) z_order_index = 1 for label, ordered_indices in sorted(ordered_indices_set.items()): if label == "Remaining": z_order = 0 else: z_order = z_order_index z_order_index += 1 ordered_values = values[ordered_indices] ordered_colours = colours[ordered_indices] axis.scatter(ordered_values[:, 0], ordered_values[:, 1], c=ordered_colours, label=label, alpha=alpha, zorder=z_order) handles, labels = axis.get_legend_handles_labels() labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: label_sorter(t[0]) if label_sorter else None)) axis.legend(handles, labels, bbox_to_anchor=(-0.1, 1.05, 1.1, 0.95), loc="lower left", ncol=2, mode="expand", borderaxespad=0.) elif colour_coding == "count_sum": n = colouring_data_set.count_sum[shuffled_indices].flatten() scatter_plot = axis.scatter(values[:, 0], values[:, 1], c=n, cmap=colour_map, alpha=alpha) colour_bar = figure.colorbar(scatter_plot) colour_bar.outline.set_linewidth(0) colour_bar.set_label("Total number of {}s per {}".format( colouring_data_set.terms["item"], colouring_data_set.terms["example"])) elif colour_coding == "feature": if feature_index is None: raise ValueError("Feature number not given.") if feature_index > colouring_data_set.number_of_features: raise ValueError("Feature number higher than number of features.") feature_name = colouring_data_set.feature_names[feature_index] figure_name += "-{}".format(normalise_string(feature_name)) f = colouring_data_set.values[shuffled_indices, feature_index] if scipy.sparse.issparse(f): f = f.A f = f.squeeze() scatter_plot = axis.scatter(values[:, 0], values[:, 1], c=f, cmap=colour_map, alpha=alpha) colour_bar = figure.colorbar(scatter_plot) colour_bar.outline.set_linewidth(0) colour_bar.set_label(feature_name) elif colour_coding is None: axis.scatter(values[:, 0], values[:, 1], c="k", alpha=alpha, edgecolors="none") else: raise ValueError("Colour coding `{}` not found.".format(colour_coding)) if centroids: prior_centroids = centroids["prior"] if prior_centroids: n_centroids = prior_centroids["probabilities"].shape[0] else: n_centroids = 0 if n_centroids > 1: centroids_palette = style.darker_palette(n_centroids) classes = numpy.arange(n_centroids) means = prior_centroids["means"] covariance_matrices = prior_centroids["covariance_matrices"] for k in range(n_centroids): axis.scatter(means[k, 0], means[k, 1], s=60, marker="x", color="black", linewidth=3) axis.scatter(means[k, 0], means[k, 1], marker="x", facecolor=centroids_palette[k], edgecolors="black") ellipse_fill, ellipse_edge = _covariance_matrix_as_ellipse( covariance_matrices[k], means[k], colour=centroids_palette[k]) axis.add_patch(ellipse_edge) axis.add_patch(ellipse_fill) if sampled_values is not None: sampled_values = sampled_values.copy()[:, :2] if scipy.sparse.issparse(sampled_values): sampled_values = sampled_values.A sample_colour_map = seaborn.blend_palette(("white", "purple"), as_cmap=True) x_limits = axis.get_xlim() y_limits = axis.get_ylim() axis.hexbin(sampled_values[:, 0], sampled_values[:, 1], gridsize=75, cmap=sample_colour_map, linewidths=0., edgecolors="none", zorder=-100) axis.set_xlim(x_limits) axis.set_ylim(y_limits) # Reset marker size style.reset_plot_look() return figure, figure_name