Example #1
0
def build_training_string(model_string, epoch_start, number_of_epochs,
                          data_string):

    if epoch_start == 0:
        training_string = "Training {} for {} epochs on {}.".format(
            model_string, number_of_epochs, data_string)
    elif epoch_start < number_of_epochs:
        training_string = (
            "Continue training {} for {} additionally epochs (up to {} epochs)"
            " on {}.".format(model_string, number_of_epochs - epoch_start,
                             number_of_epochs, data_string))
    elif epoch_start == number_of_epochs:
        training_string = (
            "{} has already been trained for {} epochs on {}.".format(
                capitalise_string(model_string), number_of_epochs,
                data_string))
    elif epoch_start > number_of_epochs:
        training_string = (
            "{} has already been trained for more than {} epochs on {}. "
            "Loading model trained for {} epochs.".format(
                capitalise_string(model_string), number_of_epochs, data_string,
                epoch_start))
    else:
        raise ValueError("Cannot train a negative amount.")

    return training_string
Example #2
0
def plot_series(series, x_label, y_label, sort=False, scale="linear",
                bar=False, colour=None, name=None):

    figure_name = saving.build_figure_name("series", name)

    if not colour:
        colour = style.STANDARD_PALETTE[0]

    series_length = series.shape[0]

    x = numpy.linspace(0, series_length, series_length)

    y_log = scale == "log"

    if sort:
        # Sort descending
        series = numpy.sort(series)[::-1]
        x_label = "sorted " + x_label
        figure_name += "-sorted"

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    if bar:
        axis.bar(x, series, log=y_log, color=colour, alpha=0.4)
    else:
        axis.plot(x, series, color=colour)
        axis.set_yscale(scale)

    axis.set_xlabel(capitalise_string(x_label))
    axis.set_ylabel(capitalise_string(y_label))

    return figure, figure_name
Example #3
0
def plot_accuracy_evolution(accuracies, name=None):

    figure_name = saving.build_figure_name("accuracies", name)
    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    for accuracies_kind, accuracies in sorted(accuracies.items()):
        if accuracies is None:
            continue
        elif accuracies_kind == "training":
            line_style = "solid"
            colour = style.STANDARD_PALETTE[0]
        elif accuracies_kind == "validation":
            line_style = "dashed"
            colour = style.STANDARD_PALETTE[1]

        label = "{} set".format(capitalise_string(accuracies_kind))
        epochs = numpy.arange(len(accuracies)) + 1
        axis.plot(epochs,
                  100 * accuracies,
                  color=colour,
                  linestyle=line_style,
                  label=label)

    handles, labels = axis.get_legend_handles_labels()
    labels, handles = zip(*sorted(zip(labels, handles), key=lambda t: t[0]))

    axis.legend(handles, labels, loc="best")

    axis.set_xlabel("Epoch")
    axis.set_ylabel("Accuracies")

    return figure, figure_name
Example #4
0
def plot_variable_label_correlations(variable_vector,
                                     variable_name,
                                     colouring_data_set,
                                     name="variable_label_correlations"):

    figure_name = saving.build_figure_name(name)
    n_examples = variable_vector.shape[0]

    class_names_to_class_ids = numpy.vectorize(
        lambda class_name: colouring_data_set.class_name_to_class_id[class_name
                                                                     ])
    class_ids_to_class_names = numpy.vectorize(
        lambda class_name: colouring_data_set.class_id_to_class_name[class_name
                                                                     ])

    labels = colouring_data_set.labels
    class_names = colouring_data_set.class_names
    number_of_classes = colouring_data_set.number_of_classes
    class_palette = colouring_data_set.class_palette
    label_sorter = colouring_data_set.label_sorter

    if not class_palette:
        index_palette = style.lighter_palette(number_of_classes)
        class_palette = {
            class_name: index_palette[i]
            for i, class_name in enumerate(
                sorted(class_names, key=label_sorter))
        }

    random_state = numpy.random.RandomState(117)
    shuffled_indices = random_state.permutation(n_examples)
    variable_vector = variable_vector[shuffled_indices]

    labels = labels[shuffled_indices]
    label_ids = numpy.expand_dims(class_names_to_class_ids(labels), axis=-1)
    colours = [class_palette[label] for label in labels]

    unique_class_ids = numpy.unique(label_ids)
    unique_class_names = class_ids_to_class_names(unique_class_ids)

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    axis.scatter(variable_vector, label_ids, c=colours, s=1)

    axis.set_yticks(unique_class_ids)
    axis.set_yticklabels(unique_class_names)

    axis.set_xlabel(variable_name)
    axis.set_ylabel(capitalise_string(colouring_data_set.terms["class"]))

    return figure, figure_name
Example #5
0
def validate_model_parameters(reconstruction_distribution=None,
                              number_of_reconstruction_classes=None,
                              model_type=None,
                              latent_distribution=None,
                              parameterise_latent_posterior=None):

    # Validate piecewise categorical likelihood
    if reconstruction_distribution and number_of_reconstruction_classes:
        if number_of_reconstruction_classes > 0:
            piecewise_categorical_likelihood_errors = []

            if reconstruction_distribution == "bernoulli":
                piecewise_categorical_likelihood_errors.append(
                    "the Bernoulli distribution")

            if "zero-inflated" in reconstruction_distribution:
                piecewise_categorical_likelihood_errors.append(
                    "zero-inflated distributions")

            if "constrained" in reconstruction_distribution:
                piecewise_categorical_likelihood_errors.append(
                    "constrained distributions")

            if len(piecewise_categorical_likelihood_errors) > 0:
                piecewise_categorical_likelihood_error = (
                    "{} cannot be piecewise categorical.".format(
                        capitalise_string(
                            enumerate_strings(
                                piecewise_categorical_likelihood_errors,
                                conjunction="or"))))
                raise ValueError(piecewise_categorical_likelihood_error)

    # Validate parameterisation of latent posterior for VAE
    if model_type and latent_distribution and parameterise_latent_posterior:
        if "VAE" in model_type:
            if (not (model_type in ["VAE"]
                     and latent_distribution == "gaussian mixture")
                    and parameterise_latent_posterior):

                parameterise_error = (
                    "Cannot parameterise latent posterior parameters for {} "
                    "or {} distribution.".format(model_type,
                                                 latent_distribution))
                raise ValueError(parameterise_error)
Example #6
0
def analyse_decompositions(data_sets,
                           other_data_sets=None,
                           centroids=None,
                           colouring_data_set=None,
                           sampled_data_set=None,
                           decomposition_methods=None,
                           highlight_feature_indices=None,
                           symbol=None,
                           title="data set",
                           specifier=None,
                           analysis_level=None,
                           export_options=None,
                           analyses_directory=None):

    if analysis_level is None:
        analysis_level = defaults["analyses"]["analysis_level"]

    centroids_original = centroids

    if isinstance(data_sets, dict):
        data_sets = list(data_sets.values())

    if not isinstance(data_sets, (list, tuple)):
        data_sets = [data_sets]

    if other_data_sets is None:
        other_data_sets = [None] * len(data_sets)
    elif not isinstance(other_data_sets, (list, tuple)):
        other_data_sets = [other_data_sets]

    if len(data_sets) != len(other_data_sets):
        raise ValueError(
            "Lists of data sets and alternative data sets do not have the "
            "same length.")

    specification = None

    base_symbol = symbol

    original_title = title

    if decomposition_methods is None:
        decomposition_methods = [defaults["decomposition_method"]]
    elif not isinstance(decomposition_methods, (list, tuple)):
        decomposition_methods = [decomposition_methods]
    else:
        decomposition_methods = decomposition_methods.copy()
    decomposition_methods.insert(0, None)

    if highlight_feature_indices is None:
        highlight_feature_indices = defaults["analyses"][
            "highlight_feature_indices"]
    elif not isinstance(highlight_feature_indices, (list, tuple)):
        highlight_feature_indices = [highlight_feature_indices]
    else:
        highlight_feature_indices = highlight_feature_indices.copy()

    if analyses_directory is None:
        analyses_directory = defaults["analyses"]["directory"]

    for data_set, other_data_set in zip(data_sets, other_data_sets):

        if data_set.values.shape[1] <= 1:
            continue

        title = original_title
        name = normalise_string(title)

        if specifier:
            specification = specifier(data_set)

        if specification:
            name += "-" + str(specification)
            title += " for " + specification

        title += " set"

        if not colouring_data_set:
            colouring_data_set = data_set

        if data_set.version in ["z", "z1"]:
            centroids = copy.deepcopy(centroids_original)
        else:
            centroids = None

        if other_data_set:
            title = "{} set values in {}".format(other_data_set.version, title)
            name = other_data_set.version + "-" + name

        decompositions_directory = os.path.join(analyses_directory, name)

        for decomposition_method in decomposition_methods:

            other_values = None
            sampled_values = None

            if other_data_set:
                other_values = other_data_set.values

            if sampled_data_set:
                sampled_values = sampled_data_set.values

            if not decomposition_method:
                if data_set.number_of_features == 2:
                    values_decomposed = data_set.values
                    other_values_decomposed = other_values
                    sampled_values_decomposed = sampled_values
                    centroids_decomposed = centroids
                else:
                    continue
            else:
                decomposition_method = proper_string(
                    decomposition_method, DECOMPOSITION_METHOD_NAMES)

                values_decomposed = data_set.values
                other_values_decomposed = other_values
                sampled_values_decomposed = sampled_values
                centroids_decomposed = centroids

                other_value_sets_decomposed = {}
                if other_values is not None:
                    other_value_sets_decomposed["other"] = other_values
                if sampled_values is not None:
                    other_value_sets_decomposed["sampled"] = sampled_values
                if not other_value_sets_decomposed:
                    other_value_sets_decomposed = None

                if decomposition_method == "t-SNE":
                    if (data_set.number_of_examples >
                            MAXIMUM_NUMBER_OF_EXAMPLES_FOR_TSNE):
                        print(
                            "The number of examples for {}".format(title),
                            "is too large to decompose it",
                            "using {}. Skipping.".format(decomposition_method))
                        print()
                        continue

                    elif (data_set.number_of_features >
                          MAXIMUM_NUMBER_OF_FEATURES_FOR_TSNE):
                        number_of_pca_components_before_tsne = min(
                            MAXIMUM_NUMBER_OF_PCA_COMPONENTS_BEFORE_TSNE,
                            data_set.number_of_examples - 1)
                        print(
                            "The number of features for {}".format(title),
                            "is too large to decompose it",
                            "using {} in due time.".format(
                                decomposition_method))
                        print("Decomposing {} to {} components using PCA "
                              "beforehand.".format(
                                  title, number_of_pca_components_before_tsne))
                        decompose_time_start = time()
                        (values_decomposed, other_value_sets_decomposed,
                         centroids_decomposed) = decompose(
                             values_decomposed,
                             other_value_sets=other_value_sets_decomposed,
                             centroids=centroids_decomposed,
                             method="pca",
                             number_of_components=(
                                 number_of_pca_components_before_tsne))
                        decompose_duration = time() - decompose_time_start
                        print("{} pre-decomposed ({}).".format(
                            capitalise_string(title),
                            format_duration(decompose_duration)))

                    else:
                        if scipy.sparse.issparse(values_decomposed):
                            values_decomposed = values_decomposed.A
                        if scipy.sparse.issparse(other_values_decomposed):
                            other_values_decomposed = other_values_decomposed.A
                        if scipy.sparse.issparse(sampled_values_decomposed):
                            sampled_values_decomposed = (
                                sampled_values_decomposed.A)

                print("Decomposing {} using {}.".format(
                    title, decomposition_method))
                decompose_time_start = time()
                (values_decomposed, other_value_sets_decomposed,
                 centroids_decomposed) = decompose(
                     values_decomposed,
                     other_value_sets=other_value_sets_decomposed,
                     centroids=centroids_decomposed,
                     method=decomposition_method,
                     number_of_components=2)
                decompose_duration = time() - decompose_time_start
                print("{} decomposed ({}).".format(
                    capitalise_string(title),
                    format_duration(decompose_duration)))
                print()

                if other_value_sets_decomposed:
                    other_values_decomposed = other_value_sets_decomposed.get(
                        "other")
                    sampled_values_decomposed = (
                        other_value_sets_decomposed.get("sampled"))

            if base_symbol:
                symbol = base_symbol
            else:
                symbol = specification

            x_label = _axis_label_for_symbol(
                symbol=symbol,
                coordinate=1,
                decomposition_method=decomposition_method,
            )
            y_label = _axis_label_for_symbol(
                symbol=symbol,
                coordinate=2,
                decomposition_method=decomposition_method,
            )

            figure_labels = {
                "title": decomposition_method,
                "x label": x_label,
                "y label": y_label
            }

            if other_data_set:
                plot_values_decomposed = other_values_decomposed
            else:
                plot_values_decomposed = values_decomposed

            if plot_values_decomposed is None:
                print("No values to plot.\n")
                return

            print("Plotting {}{}.".format(
                "decomposed " if decomposition_method else "", title))

            # No colour-coding
            plot_time_start = time()
            figure, figure_name = figures.plot_values(
                plot_values_decomposed,
                centroids=centroids_decomposed,
                figure_labels=figure_labels,
                example_tag=data_set.tags["example"],
                name=name)
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
            plot_duration = time() - plot_time_start
            print("    {} plotted and saved ({}).".format(
                capitalise_string(title), format_duration(plot_duration)))

            # Samples
            if sampled_data_set:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    centroids=centroids_decomposed,
                    sampled_values=sampled_values_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with samples) plotted and saved ({}).".format(
                    capitalise_string(title), format_duration(plot_duration)))

            # Labels
            if colouring_data_set.labels is not None:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with labels) plotted and saved ({}).".format(
                    capitalise_string(title), format_duration(plot_duration)))

                # Superset labels
                if colouring_data_set.superset_labels is not None:
                    plot_time_start = time()
                    figure, figure_name = figures.plot_values(
                        plot_values_decomposed,
                        colour_coding="superset labels",
                        colouring_data_set=colouring_data_set,
                        centroids=centroids_decomposed,
                        figure_labels=figure_labels,
                        example_tag=data_set.tags["example"],
                        name=name)
                    figures.save_figure(figure=figure,
                                        name=figure_name,
                                        options=export_options,
                                        directory=decompositions_directory)
                    plot_duration = time() - plot_time_start
                    print("    "
                          "{} (with superset labels) plotted and saved ({}).".
                          format(capitalise_string(title),
                                 format_duration(plot_duration)))

                # For each class
                if analysis_level == "extensive":
                    if colouring_data_set.number_of_classes <= 10:
                        plot_time_start = time()
                        for class_name in colouring_data_set.class_names:
                            figure, figure_name = figures.plot_values(
                                plot_values_decomposed,
                                colour_coding="class",
                                colouring_data_set=colouring_data_set,
                                centroids=centroids_decomposed,
                                class_name=class_name,
                                figure_labels=figure_labels,
                                example_tag=data_set.tags["example"],
                                name=name)
                            figures.save_figure(
                                figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
                        plot_duration = time() - plot_time_start
                        print(
                            "    {} (for each class) plotted and saved ({}).".
                            format(capitalise_string(title),
                                   format_duration(plot_duration)))

                    if (colouring_data_set.superset_labels is not None
                            and data_set.number_of_superset_classes <= 10):
                        plot_time_start = time()
                        for superset_class_name in (
                                colouring_data_set.superset_class_names):
                            figure, figure_name = figures.plot_values(
                                plot_values_decomposed,
                                colour_coding="superset class",
                                colouring_data_set=colouring_data_set,
                                centroids=centroids_decomposed,
                                class_name=superset_class_name,
                                figure_labels=figure_labels,
                                example_tag=data_set.tags["example"],
                                name=name)
                            figures.save_figure(
                                figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
                        plot_duration = time() - plot_time_start
                        print("    {} (for each superset class) plotted and "
                              "saved ({}).".format(
                                  capitalise_string(title),
                                  format_duration(plot_duration)))

            # Batches
            if colouring_data_set.has_batches:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="batches",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    "
                      "{} (with batches) plotted and saved ({}).".format(
                          capitalise_string(title),
                          format_duration(plot_duration)))

            # Cluster IDs
            if colouring_data_set.has_predicted_cluster_ids:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted cluster IDs",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print(
                    "    "
                    "{} (with predicted cluster IDs) plotted and saved ({}).".
                    format(capitalise_string(title),
                           format_duration(plot_duration)))

            # Predicted labels
            if colouring_data_set.has_predicted_labels:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    "
                      "{} (with predicted labels) plotted and saved ({}).".
                      format(capitalise_string(title),
                             format_duration(plot_duration)))

            if colouring_data_set.has_predicted_superset_labels:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="predicted superset labels",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name,
                )
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print(
                    "    {} (with predicted superset labels) plotted and saved"
                    " ({}).".format(capitalise_string(title),
                                    format_duration(plot_duration)))

            # Count sum
            plot_time_start = time()
            figure, figure_name = figures.plot_values(
                plot_values_decomposed,
                colour_coding="count sum",
                colouring_data_set=colouring_data_set,
                centroids=centroids_decomposed,
                figure_labels=figure_labels,
                example_tag=data_set.tags["example"],
                name=name)
            figures.save_figure(figure=figure,
                                name=figure_name,
                                options=export_options,
                                directory=decompositions_directory)
            plot_duration = time() - plot_time_start
            print("    {} (with count sum) plotted and saved ({}).".format(
                capitalise_string(title), format_duration(plot_duration)))

            # Features
            for feature_index in highlight_feature_indices:
                plot_time_start = time()
                figure, figure_name = figures.plot_values(
                    plot_values_decomposed,
                    colour_coding="feature",
                    colouring_data_set=colouring_data_set,
                    centroids=centroids_decomposed,
                    feature_index=feature_index,
                    figure_labels=figure_labels,
                    example_tag=data_set.tags["example"],
                    name=name)
                figures.save_figure(figure=figure,
                                    name=figure_name,
                                    options=export_options,
                                    directory=decompositions_directory)
                plot_duration = time() - plot_time_start
                print("    {} (with {}) plotted and saved ({}).".format(
                    capitalise_string(title),
                    data_set.feature_names[feature_index],
                    format_duration(plot_duration)))

            print()
Example #7
0
def plot_profile_comparison(observed_series, expected_series,
                            expected_series_total_standard_deviations=None,
                            expected_series_explained_standard_deviations=None,
                            x_name="feature", y_name="value", sort=True,
                            sort_by="expected", sort_direction="ascending",
                            x_scale="linear", y_scale="linear", y_cutoff=None,
                            name=None):

    sort_by = normalise_string(sort_by)
    sort_direction = normalise_string(sort_direction)
    figure_name = saving.build_figure_name("profile_comparison", name)

    if scipy.sparse.issparse(observed_series):
        observed_series = observed_series.A.squeeze()

    if scipy.sparse.issparse(expected_series_total_standard_deviations):
        expected_series_total_standard_deviations = (
            expected_series_total_standard_deviations.A.squeeze())

    if scipy.sparse.issparse(expected_series_explained_standard_deviations):
        expected_series_explained_standard_deviations = (
            expected_series_explained_standard_deviations.A.squeeze())

    observed_colour = style.STANDARD_PALETTE[0]
    expected_palette = seaborn.light_palette(style.STANDARD_PALETTE[1], 5)

    expected_colour = expected_palette[-1]
    expected_total_standard_deviations_colour = expected_palette[1]
    expected_explained_standard_deviations_colour = expected_palette[3]

    if sort:
        x_label = "{}s sorted {} by {} {}s [sort index]".format(
            capitalise_string(x_name), sort_direction, sort_by, y_name.lower())
    else:
        x_label = "{}s [original index]".format(capitalise_string(x_name))
    y_label = capitalise_string(y_name) + "s"

    observed_label = "Observed"
    expected_label = "Expected"
    expected_total_standard_deviations_label = "Total standard deviation"
    expected_explained_standard_deviations_label = (
        "Explained standard deviation")

    # Sorting
    if sort_by == "expected":
        sort_series = expected_series
        expected_marker = ""
        expected_line_style = "solid"
        expected_z_order = 3
        observed_marker = "o"
        observed_line_style = ""
        observed_z_order = 2
    elif sort_by == "observed":
        sort_series = observed_series
        expected_marker = "o"
        expected_line_style = ""
        expected_z_order = 2
        observed_marker = ""
        observed_line_style = "solid"
        observed_z_order = 3

    if sort:
        sort_indices = numpy.argsort(sort_series)
        if sort_direction == "descending":
            sort_indices = sort_indices[::-1]
        elif sort_direction != "ascending":
            raise ValueError(
                "Sort direction can either be ascending or descending.")
    else:
        sort_indices = slice(None)

    # Standard deviations
    if expected_series_total_standard_deviations is not None:
        with_total_standard_deviations = True
        expected_series_total_standard_deviations_lower = (
            expected_series - expected_series_total_standard_deviations)
        expected_series_total_standard_deviations_upper = (
            expected_series + expected_series_total_standard_deviations)
    else:
        with_total_standard_deviations = False

    if (expected_series_explained_standard_deviations is not None
            and expected_series_explained_standard_deviations.mean() > 0):
        with_explained_standard_deviations = True
        expected_series_explained_standard_deviations_lower = (
            expected_series - expected_series_explained_standard_deviations)
        expected_series_explained_standard_deviations_upper = (
            expected_series + expected_series_explained_standard_deviations)
    else:
        with_explained_standard_deviations = False

    # Figure
    if y_scale == "both":
        figure, axes = pyplot.subplots(nrows=2, sharex=True)
        figure.subplots_adjust(hspace=0.1)
        axis_upper = axes[0]
        axis_lower = axes[1]
        axis_upper.set_zorder = 1
        axis_lower.set_zorder = 0
    else:
        figure = pyplot.figure()
        axis = figure.add_subplot(1, 1, 1)
        axes = [axis]

    handles = []
    feature_indices = numpy.arange(len(observed_series)) + 1

    for i, axis in enumerate(axes):
        observed_plot, = axis.plot(
            feature_indices,
            observed_series[sort_indices],
            label=observed_label,
            color=observed_colour,
            marker=observed_marker,
            linestyle=observed_line_style,
            zorder=observed_z_order
        )
        if i == 0:
            handles.append(observed_plot)
        expected_plot, = axis.plot(
            feature_indices,
            expected_series[sort_indices],
            label=expected_label,
            color=expected_colour,
            marker=expected_marker,
            linestyle=expected_line_style,
            zorder=expected_z_order
        )
        if i == 0:
            handles.append(expected_plot)
        if with_total_standard_deviations:
            axis.fill_between(
                feature_indices,
                expected_series_total_standard_deviations_lower[sort_indices],
                expected_series_total_standard_deviations_upper[sort_indices],
                color=expected_total_standard_deviations_colour,
                zorder=0
            )
            expected_plot_standard_deviations_values = (
                matplotlib.patches.Patch(
                    label=expected_total_standard_deviations_label,
                    color=expected_total_standard_deviations_colour
                )
            )
            if i == 0:
                handles.append(expected_plot_standard_deviations_values)
        if with_explained_standard_deviations:
            axis.fill_between(
                feature_indices,
                expected_series_explained_standard_deviations_lower[
                    sort_indices],
                expected_series_explained_standard_deviations_upper[
                    sort_indices],
                color=expected_explained_standard_deviations_colour,
                zorder=1
            )
            expected_plot_standard_deviations_expectations = (
                matplotlib.patches.Patch(
                    label=expected_explained_standard_deviations_label,
                    color=expected_explained_standard_deviations_colour
                )
            )
            if i == 0:
                handles.append(expected_plot_standard_deviations_expectations)

    if y_scale == "both":
        axis_upper.legend(
            handles=handles,
            loc="best"
        )
        seaborn.despine(ax=axis_upper)
        seaborn.despine(ax=axis_lower)

        axis_upper.set_yscale("log", nonposy="clip")
        axis_lower.set_yscale("linear")
        figure.text(0.04, 0.5, y_label, va="center", rotation="vertical")

        axis_lower.set_xscale(x_scale)
        axis_lower.set_xlabel(x_label)

        y_upper_min, y_upper_max = axis_upper.get_ylim()
        y_lower_min, y_lower_max = axis_lower.get_ylim()
        axis_upper.set_ylim(y_cutoff, y_upper_max)

        y_lower_min = max(-1, y_lower_min)
        axis_lower.set_ylim(y_lower_min, y_cutoff)

    else:
        axis.legend(
            handles=handles,
            loc="best"
        )
        seaborn.despine()

        y_scale_arguments = {}
        if y_scale == "log":
            y_scale_arguments["nonposy"] = "clip"
        axis.set_yscale(y_scale, **y_scale_arguments)
        axis.set_ylabel(y_label)

        axis.set_xscale(x_scale)
        axis.set_xlabel(x_label)

        y_min, y_max = axis.get_ylim()
        y_min = max(-1, y_min)

        if y_cutoff:
            if y_scale == "linear":
                y_max = y_cutoff
            elif y_scale == "log":
                y_min = y_cutoff

        axis.set_ylim(y_min, y_max)

    return figure, figure_name
Example #8
0
def plot_histogram(series, excess_zero_count=0, label=None, normed=False,
                   discrete=False, x_scale="linear", y_scale="linear",
                   colour=None, name=None):

    series = series.copy()

    figure_name = "histogram"

    if normed:
        figure_name += "-normed"

    figure_name = saving.build_figure_name(figure_name, name)

    figure = pyplot.figure()
    axis = figure.add_subplot(1, 1, 1)
    seaborn.despine()

    series_length = len(series) + excess_zero_count

    series_max = series.max()

    if discrete and series_max < MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS:
        number_of_bins = int(numpy.ceil(series_max)) + 1
        bin_range = numpy.array((-0.5, series_max + 0.5))
    else:
        if series_max < MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS:
            number_of_bins = "auto"
        else:
            number_of_bins = MAXIMUM_NUMBER_OF_BINS_FOR_HISTOGRAMS
        bin_range = numpy.array((series.min(), series_max))

    if colour is None:
        colour = style.STANDARD_PALETTE[0]

    if x_scale == "log":
        series += 1
        bin_range += 1
        label += " (shifted one)"
        figure_name += "-log_values"

    y_log = y_scale == "log"

    histogram, bin_edges = numpy.histogram(
        series,
        bins=number_of_bins,
        range=bin_range
    )

    histogram[0] += excess_zero_count

    width = bin_edges[1] - bin_edges[0]
    bin_centres = bin_edges[:-1] + width / 2

    if normed:
        histogram = histogram / series_length

    axis.bar(
        bin_centres,
        histogram,
        width=width,
        log=y_log,
        color=colour,
        alpha=0.4
    )

    axis.set_xscale(x_scale)
    axis.set_xlabel(capitalise_string(label))

    if normed:
        axis.set_ylabel("Frequency")
    else:
        axis.set_ylabel("Number of counts")

    return figure, figure_name