Exemple #1
0
def analyse_centroid_probabilities(centroids,
                                   name=None,
                                   analysis_level=None,
                                   export_options=None,
                                   analyses_directory=None):

    if name:
        name = normalise_string(name)
    if analysis_level is None:
        analysis_level = defaults["analyses"]["analysis_level"]
    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]

    print("Plotting centroid probabilities.")
    plot_time_start = time()

    posterior_probabilities = None
    prior_probabilities = None

    if "posterior" in centroids and centroids["posterior"]:
        posterior_probabilities = centroids["posterior"]["probabilities"]
        n_centroids = len(posterior_probabilities)
    if "prior" in centroids and centroids["prior"]:
        prior_probabilities = centroids["prior"]["probabilities"]
        n_centroids = len(prior_probabilities)

    centroids_palette = style.darker_palette(n_centroids)
    x_label = "$k$"
    if prior_probabilities is not None:
        if posterior_probabilities is not None:
            y_label = _axis_label_for_symbol(
                symbol="\\pi",
                distribution=normalise_string("posterior"),
                suffix="^k")
            if name:
                plot_name = [name, "posterior", "prior"]
            else:
                plot_name = ["posterior", "prior"]
        else:
            y_label = _axis_label_for_symbol(
                symbol="\\pi",
                distribution=normalise_string("prior"),
                suffix="^k")
            if name:
                plot_name = [name, "prior"]
            else:
                plot_name = "prior"
    elif posterior_probabilities is not None:
        y_label = _axis_label_for_symbol(
            symbol="\\pi",
            distribution=normalise_string("posterior"),
            suffix="^k")
        if name:
            plot_name = [name, "posterior"]
        else:
            plot_name = "posterior"

    figure, figure_name = figures.plot_probabilities(posterior_probabilities,
                                                     prior_probabilities,
                                                     x_label=x_label,
                                                     y_label=y_label,
                                                     palette=centroids_palette,
                                                     uniform=False,
                                                     name=plot_name)
    figures.save_figure(figure=figure,
                        name=figure_name,
                        options=export_options,
                        directory=analyses_directory)

    plot_duration = time() - plot_time_start
    print("Centroid probabilities plotted and saved ({}).".format(
        format_duration(plot_duration)))
Exemple #2
0
def analyse_distributions(data_set,
                          colouring_data_set=None,
                          cutoffs=None,
                          preprocessed=False,
                          analysis_level="normal",
                          export_options=None,
                          analyses_directory=None):

    if not colouring_data_set:
        colouring_data_set = data_set

    if analysis_level is None:
        analysis_level = defaults["analyses"]["analysis_level"]

    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]
    distribution_directory = os.path.join(analyses_directory, "histograms")

    data_set_title = data_set.kind + " set"
    data_set_name = data_set.kind
    if data_set.version != "original":
        data_set_title = data_set.version + " " + data_set_title
        data_set_name = None

    data_set_discreteness = data_set.discreteness and not preprocessed

    print("Plotting distributions for {}.".format(data_set_title))

    # Class distribution
    if (data_set.number_of_classes and data_set.number_of_classes < 100
            and colouring_data_set == data_set):
        distribution_time_start = time()
        figure, figure_name = figures.plot_class_histogram(
            labels=data_set.labels,
            class_names=data_set.class_names,
            class_palette=data_set.class_palette,
            normed=True,
            scale="linear",
            label_sorter=data_set.label_sorter,
            name=data_set_name)
        figures.save_figure(figure=figure,
                            name=figure_name,
                            options=export_options,
                            directory=distribution_directory)
        distribution_duration = time() - distribution_time_start
        print("    Class distribution plotted and saved ({}).".format(
            format_duration(distribution_duration)))

    # Superset class distribution
    if data_set.label_superset and colouring_data_set == data_set:
        distribution_time_start = time()
        figure, figure_name = figures.plot_class_histogram(
            labels=data_set.superset_labels,
            class_names=data_set.superset_class_names,
            class_palette=data_set.superset_class_palette,
            normed=True,
            scale="linear",
            label_sorter=data_set.superset_label_sorter,
            name=[data_set_name, "superset"])
        figures.save_figure(figure=figure,
                            name=figure_name,
                            options=export_options,
                            directory=distribution_directory)
        distribution_duration = time() - distribution_time_start
        print("    Superset class distribution plotted and saved ({}).".format(
            format_duration(distribution_duration)))

    # Count distribution
    if scipy.sparse.issparse(data_set.values):
        series = data_set.values.data
        excess_zero_count = data_set.values.size - series.size
    else:
        series = data_set.values.reshape(-1)
        excess_zero_count = 0
    distribution_time_start = time()
    for x_scale in ["linear", "log"]:
        figure, figure_name = figures.plot_histogram(
            series=series,
            excess_zero_count=excess_zero_count,
            label=data_set.tags["value"].capitalize() + "s",
            discrete=data_set_discreteness,
            normed=True,
            x_scale=x_scale,
            y_scale="log",
            name=["counts", data_set_name])
        figures.save_figure(figure=figure,
                            name=figure_name,
                            options=export_options,
                            directory=distribution_directory)
    distribution_duration = time() - distribution_time_start
    print("    Count distribution plotted and saved ({}).".format(
        format_duration(distribution_duration)))

    # Count distributions with cut-off
    if (analysis_level == "extensive" and cutoffs
            and data_set.example_type == "counts"):
        distribution_time_start = time()
        for cutoff in cutoffs:
            figure, figure_name = figures.plot_cutoff_count_histogram(
                series=series,
                excess_zero_count=excess_zero_count,
                cutoff=cutoff,
                normed=True,
                scale="log",
                name=data_set_name)
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=distribution_directory + "-counts")
        distribution_duration = time() - distribution_time_start
        print("    Count distributions with cut-offs plotted and saved ({}).".
              format(format_duration(distribution_duration)))

    # Count sum distribution
    distribution_time_start = time()
    figure, figure_name = figures.plot_histogram(
        series=data_set.count_sum,
        label="Total number of {}s per {}".format(data_set.tags["item"],
                                                  data_set.tags["example"]),
        normed=True,
        y_scale="log",
        name=["count sum", data_set_name])
    figures.save_figure(figure=figure,
                        name=figure_name,
                        options=export_options,
                        directory=distribution_directory)
    distribution_duration = time() - distribution_time_start
    print("    Count sum distribution plotted and saved ({}).".format(
        format_duration(distribution_duration)))

    # Count distributions and count sum distributions for each class
    if analysis_level == "extensive" and colouring_data_set.labels is not None:

        class_count_distribution_directory = distribution_directory
        if data_set.version == "original":
            class_count_distribution_directory += "-classes"

        if colouring_data_set.label_superset:
            labels = colouring_data_set.superset_labels
            class_names = colouring_data_set.superset_class_names
            class_palette = colouring_data_set.superset_class_palette
            label_sorter = colouring_data_set.superset_label_sorter
        else:
            labels = colouring_data_set.labels
            class_names = colouring_data_set.class_names
            class_palette = colouring_data_set.class_palette
            label_sorter = colouring_data_set.label_sorter

        if not class_palette:
            index_palette = style.lighter_palette(
                colouring_data_set.number_of_classes)
            class_palette = {
                class_name: index_palette[i]
                for i, class_name in enumerate(
                    sorted(class_names, key=label_sorter))
            }

        distribution_time_start = time()
        for class_name in class_names:

            class_indices = labels == class_name

            if not class_indices.any():
                continue

            values_label = data_set.values[class_indices]

            if scipy.sparse.issparse(values_label):
                series = values_label.data
                excess_zero_count = values_label.size - series.size
            else:
                series = data_set.values.reshape(-1)
                excess_zero_count = 0

            figure, figure_name = figures.plot_histogram(
                series=series,
                excess_zero_count=excess_zero_count,
                label=data_set.tags["value"].capitalize() + "s",
                discrete=data_set_discreteness,
                normed=True,
                y_scale="log",
                colour=class_palette[class_name],
                name=["counts", data_set_name, "class", class_name])
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=class_count_distribution_directory)

        distribution_duration = time() - distribution_time_start
        print("    Count distributions for each class plotted and saved ({}).".
              format(format_duration(distribution_duration)))

        distribution_time_start = time()
        for class_name in class_names:

            class_indices = labels == class_name
            if not class_indices.any():
                continue

            figure, figure_name = figures.plot_histogram(
                series=data_set.count_sum[class_indices],
                label="Total number of {}s per {}".format(
                    data_set.tags["item"], data_set.tags["example"]),
                normed=True,
                y_scale="log",
                colour=class_palette[class_name],
                name=["count sum", data_set_name, "class", class_name])
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=class_count_distribution_directory)

        distribution_duration = time() - distribution_time_start
        print("    "
              "Count sum distributions for each class plotted and saved ({}).".
              format(format_duration(distribution_duration)))

    print()
Exemple #3
0
def analyse_decompositions(data_sets,
                           other_data_sets=None,
                           centroids=None,
                           colouring_data_set=None,
                           sampled_data_set=None,
                           decomposition_methods=None,
                           highlight_feature_indices=None,
                           symbol=None,
                           title="data set",
                           specifier=None,
                           analysis_level=None,
                           export_options=None,
                           analyses_directory=None):

    if analysis_level is None:
        analysis_level = defaults["analyses"]["analysis_level"]

    centroids_original = centroids

    if isinstance(data_sets, dict):
        data_sets = list(data_sets.values())

    if not isinstance(data_sets, (list, tuple)):
        data_sets = [data_sets]

    if other_data_sets is None:
        other_data_sets = [None] * len(data_sets)
    elif not isinstance(other_data_sets, (list, tuple)):
        other_data_sets = [other_data_sets]

    if len(data_sets) != len(other_data_sets):
        raise ValueError(
            "Lists of data sets and alternative data sets do not have the "
            "same length.")

    specification = None

    base_symbol = symbol

    original_title = title

    if decomposition_methods is None:
        decomposition_methods = [defaults["decomposition_method"]]
    elif not isinstance(decomposition_methods, (list, tuple)):
        decomposition_methods = [decomposition_methods]
    else:
        decomposition_methods = decomposition_methods.copy()
    decomposition_methods.insert(0, None)

    if highlight_feature_indices is None:
        highlight_feature_indices = defaults["analyses"][
            "highlight_feature_indices"]
    elif not isinstance(highlight_feature_indices, (list, tuple)):
        highlight_feature_indices = [highlight_feature_indices]
    else:
        highlight_feature_indices = highlight_feature_indices.copy()

    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]

    for data_set, other_data_set in zip(data_sets, other_data_sets):

        if data_set.values.shape[1] <= 1:
            continue

        title = original_title
        name = normalise_string(title)

        if specifier:
            specification = specifier(data_set)

        if specification:
            name += "-" + str(specification)
            title += " for " + specification

        title += " set"

        if not colouring_data_set:
            colouring_data_set = data_set

        if data_set.version in ["z", "z1"]:
            centroids = copy.deepcopy(centroids_original)
        else:
            centroids = None

        if other_data_set:
            title = "{} set values in {}".format(other_data_set.version, title)
            name = other_data_set.version + "-" + name

        decompositions_directory = os.path.join(analyses_directory, name)

        for decomposition_method in decomposition_methods:

            other_values = None
            sampled_values = None

            if other_data_set:
                other_values = other_data_set.values

            if sampled_data_set:
                sampled_values = sampled_data_set.values

            if not decomposition_method:
                if data_set.number_of_features == 2:
                    values_decomposed = data_set.values
                    other_values_decomposed = other_values
                    sampled_values_decomposed = sampled_values
                    centroids_decomposed = centroids
                else:
                    continue
            else:
                decomposition_method = proper_string(
                    decomposition_method, DECOMPOSITION_METHOD_NAMES)

                values_decomposed = data_set.values
                other_values_decomposed = other_values
                sampled_values_decomposed = sampled_values
                centroids_decomposed = centroids

                other_value_sets_decomposed = {}
                if other_values is not None:
                    other_value_sets_decomposed["other"] = other_values
                if sampled_values is not None:
                    other_value_sets_decomposed["sampled"] = sampled_values
                if not other_value_sets_decomposed:
                    other_value_sets_decomposed = None

                if decomposition_method == "t-SNE":
                    if (data_set.number_of_examples >
                            MAXIMUM_NUMBER_OF_EXAMPLES_FOR_TSNE):
                        print(
                            "The number of examples for {}".format(title),
                            "is too large to decompose it",
                            "using {}. Skipping.".format(decomposition_method))
                        print()
                        continue

                    elif (data_set.number_of_features >
                          MAXIMUM_NUMBER_OF_FEATURES_FOR_TSNE):
                        number_of_pca_components_before_tsne = min(
                            MAXIMUM_NUMBER_OF_PCA_COMPONENTS_BEFORE_TSNE,
                            data_set.number_of_examples - 1)
                        print(
                            "The number of features for {}".format(title),
                            "is too large to decompose it",
                            "using {} in due time.".format(
                                decomposition_method))
                        print("Decomposing {} to {} components using PCA "
                              "beforehand.".format(
                                  title, number_of_pca_components_before_tsne))
                        decompose_time_start = time()
                        (values_decomposed, other_value_sets_decomposed,
                         centroids_decomposed) = decompose(
                             values_decomposed,
                             other_value_sets=other_value_sets_decomposed,
                             centroids=centroids_decomposed,
                             method="pca",
                             number_of_components=(
                                 number_of_pca_components_before_tsne))
                        decompose_duration = time() - decompose_time_start
                        print("{} pre-decomposed ({}).".format(
                            capitalise_string(title),
                            format_duration(decompose_duration)))

                    else:
                        if scipy.sparse.issparse(values_decomposed):
                            values_decomposed = values_decomposed.A
                        if scipy.sparse.issparse(other_values_decomposed):
                            other_values_decomposed = other_values_decomposed.A
                        if scipy.sparse.issparse(sampled_values_decomposed):
                            sampled_values_decomposed = (
                                sampled_values_decomposed.A)

                print("Decomposing {} using {}.".format(
                    title, decomposition_method))
                decompose_time_start = time()
                (values_decomposed, other_value_sets_decomposed,
                 centroids_decomposed) = decompose(
                     values_decomposed,
                     other_value_sets=other_value_sets_decomposed,
                     centroids=centroids_decomposed,
                     method=decomposition_method,
                     number_of_components=2)
                decompose_duration = time() - decompose_time_start
                print("{} decomposed ({}).".format(
                    capitalise_string(title),
                    format_duration(decompose_duration)))
                print()

                if other_value_sets_decomposed:
                    other_values_decomposed = other_value_sets_decomposed.get(
                        "other")
                    sampled_values_decomposed = (
                        other_value_sets_decomposed.get("sampled"))

            if base_symbol:
                symbol = base_symbol
            else:
                symbol = specification

            x_label = _axis_label_for_symbol(
                symbol=symbol,
                coordinate=1,
                decomposition_method=decomposition_method,
            )
            y_label = _axis_label_for_symbol(
                symbol=symbol,
                coordinate=2,
                decomposition_method=decomposition_method,
            )

            figure_labels = {
                "title": decomposition_method,
                "x label": x_label,
                "y label": y_label
            }

            if other_data_set:
                plot_values_decomposed = other_values_decomposed
            else:
                plot_values_decomposed = values_decomposed

            if plot_values_decomposed is None:
                print("No values to plot.\n")
                return

            print("Plotting {}{}.".format(
                "decomposed " if decomposition_method else "", title))

            # No colour-coding
            plot_time_start = time()
            figure, figure_name = figures.plot_values(
                plot_values_decomposed,
                centroids=centroids_decomposed,
                figure_labels=figure_labels,
                example_tag=data_set.tags["example"],
                name=name)
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
            plot_duration = time() - plot_time_start
            print("    {} plotted and saved ({}).".format(
                capitalise_string(title), format_duration(plot_duration)))

            # Samples
            if sampled_data_set:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    centroids=centroids_decomposed,
                    sampled_values=sampled_values_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with samples) plotted and saved ({}).".format(
                    capitalise_string(title), format_duration(plot_duration)))

            # Labels
            if colouring_data_set.labels is not None:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with labels) plotted and saved ({}).".format(
                    capitalise_string(title), format_duration(plot_duration)))

                # Superset labels
                if colouring_data_set.superset_labels is not None:
                    plot_time_start = time()
                    figure, figure_name = figures.plot_values(
                        plot_values_decomposed,
                        colour_coding="superset labels",
                        colouring_data_set=colouring_data_set,
                        centroids=centroids_decomposed,
                        figure_labels=figure_labels,
                        example_tag=data_set.tags["example"],
                        name=name)
                    figures.save_figure(figure=figure,
                                        name=figure_name,
                                        options=export_options,
                                        directory=decompositions_directory)
                    plot_duration = time() - plot_time_start
                    print("    "
                          "{} (with superset labels) plotted and saved ({}).".
                          format(capitalise_string(title),
                                 format_duration(plot_duration)))

                # For each class
                if analysis_level == "extensive":
                    if colouring_data_set.number_of_classes <= 10:
                        plot_time_start = time()
                        for class_name in colouring_data_set.class_names:
                            figure, figure_name = figures.plot_values(
                                plot_values_decomposed,
                                colour_coding="class",
                                colouring_data_set=colouring_data_set,
                                centroids=centroids_decomposed,
                                class_name=class_name,
                                figure_labels=figure_labels,
                                example_tag=data_set.tags["example"],
                                name=name)
                            figures.save_figure(
                                figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
                        plot_duration = time() - plot_time_start
                        print(
                            "    {} (for each class) plotted and saved ({}).".
                            format(capitalise_string(title),
                                   format_duration(plot_duration)))

                    if (colouring_data_set.superset_labels is not None
                            and data_set.number_of_superset_classes <= 10):
                        plot_time_start = time()
                        for superset_class_name in (
                                colouring_data_set.superset_class_names):
                            figure, figure_name = figures.plot_values(
                                plot_values_decomposed,
                                colour_coding="superset class",
                                colouring_data_set=colouring_data_set,
                                centroids=centroids_decomposed,
                                class_name=superset_class_name,
                                figure_labels=figure_labels,
                                example_tag=data_set.tags["example"],
                                name=name)
                            figures.save_figure(
                                figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
                        plot_duration = time() - plot_time_start
                        print("    {} (for each superset class) plotted and "
                              "saved ({}).".format(
                                  capitalise_string(title),
                                  format_duration(plot_duration)))

            # Batches
            if colouring_data_set.has_batches:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="batches",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    "
                      "{} (with batches) plotted and saved ({}).".format(
                          capitalise_string(title),
                          format_duration(plot_duration)))

            # Cluster IDs
            if colouring_data_set.has_predicted_cluster_ids:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted cluster IDs",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print(
                    "    "
                    "{} (with predicted cluster IDs) plotted and saved ({}).".
                    format(capitalise_string(title),
                           format_duration(plot_duration)))

            # Predicted labels
            if colouring_data_set.has_predicted_labels:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    "
                      "{} (with predicted labels) plotted and saved ({}).".
                      format(capitalise_string(title),
                             format_duration(plot_duration)))

            if colouring_data_set.has_predicted_superset_labels:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted superset labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print(
                    "    {} (with predicted superset labels) plotted and saved"
                    " ({}).".format(capitalise_string(title),
                                    format_duration(plot_duration)))

            # Count sum
            plot_time_start = time()
            figure, figure_name = figures.plot_values(
                plot_values_decomposed,
                colour_coding="count sum",
                colouring_data_set=colouring_data_set,
                centroids=centroids_decomposed,
                figure_labels=figure_labels,
                example_tag=data_set.tags["example"],
                name=name)
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
            plot_duration = time() - plot_time_start
            print("    {} (with count sum) plotted and saved ({}).".format(
                capitalise_string(title), format_duration(plot_duration)))

            # Features
            for feature_index in highlight_feature_indices:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="feature",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    feature_index=feature_index,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with {}) plotted and saved ({}).".format(
                    capitalise_string(title),
                    data_set.feature_names[feature_index],
                    format_duration(plot_duration)))

            print()
Exemple #4
0
def analyse_matrices(data_set,
                     plot_distances=False,
                     name=None,
                     export_options=None,
                     analyses_directory=None):

    if plot_distances:
        base_name = "distances"
    else:
        base_name = "heat_maps"

    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]
    analyses_directory = os.path.join(analyses_directory, base_name)

    if not name:
        name = []
    elif not isinstance(name, list):
        name = [name]

    name.insert(0, base_name)

    # Subsampling indices (if necessary)
    random_state = numpy.random.RandomState(57)
    shuffled_indices = random_state.permutation(data_set.number_of_examples)

    # Feature selection for plotting (if necessary)
    feature_indices_for_plotting = None
    if (not plot_distances and data_set.number_of_features >
            MAXIMUM_NUMBER_OF_FEATURES_FOR_HEAT_MAPS):
        feature_variances = data_set.values.var(axis=0)
        if isinstance(feature_variances, numpy.matrix):
            feature_variances = feature_variances.A.squeeze()
        feature_indices_for_plotting = numpy.argsort(
            feature_variances)[-MAXIMUM_NUMBER_OF_FEATURES_FOR_HEAT_MAPS:]
        feature_indices_for_plotting.sort()

    # Class palette
    class_palette = data_set.class_palette
    if data_set.labels is not None and not class_palette:
        index_palette = style.lighter_palette(data_set.number_of_classes)
        class_palette = {
            class_name: tuple(index_palette[i])
            for i, class_name in enumerate(
                sorted(data_set.class_names, key=data_set.label_sorter))
        }

    # Axis labels
    example_label = data_set.tags["example"].capitalize() + "s"
    feature_label = data_set.tags["feature"].capitalize() + "s"
    value_label = data_set.tags["value"].capitalize() + "s"

    version = data_set.version
    symbol = None
    value_name = "values"

    if version in ["z", "x"]:
        symbol = "$\\mathbf{{{}}}$".format(version)
        value_name = "component"
    elif version in ["y"]:
        symbol = "${}$".format(version)
        value_name = "value"

    if version in ["y", "z"]:
        feature_label = " ".join([symbol, value_name + "s"])

    if plot_distances:
        if version in ["y", "z"]:
            value_label = symbol
        else:
            value_label = version

    if feature_indices_for_plotting is not None:
        feature_label = "{} most varying {}".format(
            len(feature_indices_for_plotting), feature_label.lower())

    plot_string = "Plotting heat map for {} values."
    if plot_distances:
        plot_string = "Plotting pairwise distances in {} space."
    print(plot_string.format(data_set.version))

    sorting_methods = ["hierarchical_clustering"]

    if data_set.labels is not None:
        sorting_methods.insert(0, "labels")

    for sorting_method in sorting_methods:

        distance_metrics = [None]

        if plot_distances or sorting_method == "hierarchical_clustering":
            distance_metrics = ["Euclidean", "cosine"]

        for distance_metric in distance_metrics:

            start_time = time()

            if (sorting_method == "hierarchical_clustering"
                    and data_set.number_of_examples >
                    MAXIMUM_NUMBER_OF_EXAMPLES_FOR_DENDROGRAM):
                sample_size = MAXIMUM_NUMBER_OF_EXAMPLES_FOR_DENDROGRAM
            elif (data_set.number_of_examples >
                  MAXIMUM_NUMBER_OF_EXAMPLES_FOR_HEAT_MAPS):
                sample_size = MAXIMUM_NUMBER_OF_EXAMPLES_FOR_HEAT_MAPS
            else:
                sample_size = None

            indices = numpy.arange(data_set.number_of_examples)

            if sample_size:
                indices = shuffled_indices[:sample_size]
                example_label = "{} randomly sampled {}".format(
                    sample_size, data_set.tags["example"] + "s")

            figure, figure_name = figures.plot_matrix(
                feature_matrix=data_set.values[indices],
                plot_distances=plot_distances,
                example_label=example_label,
                feature_label=feature_label,
                value_label=value_label,
                sorting_method=sorting_method,
                distance_metric=distance_metric,
                labels=(data_set.labels[indices]
                        if data_set.labels is not None else None),
                label_kind=data_set.tags["class"],
                class_palette=class_palette,
                feature_indices_for_plotting=feature_indices_for_plotting,
                name_parts=name +
                [data_set.version, distance_metric, sorting_method])
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=analyses_directory)

            duration = time() - start_time

            plot_kind_string = "Heat map for {} values".format(
                data_set.version)

            if plot_distances:
                plot_kind_string = "{} distances in {} space".format(
                    distance_metric.capitalize(), data_set.version)

            subsampling_string = ""

            if sample_size:
                subsampling_string = "{} {} randomly sampled examples".format(
                    "for" if plot_distances else "of", sample_size)

            sort_string = "sorted using {}".format(
                sorting_method.replace("_", " "))

            if (not plot_distances
                    and sorting_method == "hierarchical_clustering"):
                sort_string += " (with {} distances)".format(distance_metric)

            print("    " + " ".join([
                s for s in [
                    plot_kind_string, subsampling_string, sort_string,
                    "plotted and saved", "({})".format(
                        format_duration(duration))
                ] if s
            ]) + ".")

    print()