Beispiel #1
0
def bayes_factor_f_score_heatmap(
    bayes_factors_df,
    save_to_file=None,
):
    lf = lfig.LatexFigure()
    ax1 = lf.new_axis()

    bayes_factor_by_f_score = pd.pivot_table(bayes_factors_df,
                                             values='log10_bayes_factor',
                                             index=['f_score_a'],
                                             columns=['f_score_b'],
                                             aggfunc=np.median)
    print("bayes_factor_by_id: \n")

    mask = np.tri(bayes_factor_by_f_score.shape[0], k=0).T
    sns.heatmap(
        bayes_factor_by_f_score,
        cmap=matplotlib.cm.PRGn,  # TODO get from ES?
        mask=mask,
        annot=True,
        ax=ax1,
        cbar_kws={
            "orientation": "vertical",
            "label": r"$\log_{10}\left(B_{a,b}\right)$"
        })
    ax1.set_ylabel('$F(a)$')
    ax1.set_xlabel('$F(b)$')
    ax1.set_title('$F(A) > F(B)$')

    lf.fig.suptitle(r"$\log_{10}$ Bayes factor by F score",
                    fontsize=25,
                    y=1.15)

    if save_to_file is not None:
        lf.save(save_to_file)
Beispiel #2
0
def plot_configuration_sweep(results, save_to_file=None):
    import matplotlib

    colours = {
        'low': matplotlib.colors.BASE_COLORS['g'],
        'medium-low': matplotlib.colors.CSS4_COLORS['gold'],
        'medium-high': matplotlib.colors.CSS4_COLORS['goldenrod'],
        'high': matplotlib.colors.BASE_COLORS['r']
    }

    # fig, ax  = plt.subplots(figsize=(15, 10))
    lf = lfig.LatexFigure()
    ax = lf.new_axis()

    sns.swarmplot(x='config_id',
                  y='champion_f_score',
                  data=results,
                  color='grey',
                  size=200 / len(results),
                  dodge=False,
                  ax=ax)
    sns.boxplot(x='config_id',
                y='champion_f_score',
                data=results,
                color='grey',
                hue='ResourceRequirement',
                hue_order=['low', 'medium-low', 'medium-high', 'high'],
                palette=colours,
                dodge=False,
                ax=ax)

    ax.set_xlabel('Configuration ID')
    ax.set_ylabel('Champion F-scores')

    if save_to_file is not None:
        plt.savefig(save_to_file)
Beispiel #3
0
def plot_scores(
    scores,
    exploration_classes,
    unique_exploration_classes,
    exploration_strategies,
    coefficients_of_determination=None,
    coefficient_determination_latex_name=None,
    f_scores=None,
    plot_r_squared=True,
    plot_f_scores=False,
    entropy=None,
    inf_gain=None,
    true_model=None,
    exploration_rule=None,
    batch_nearest_num_params_as_winners=True,
    collective_analysis_pickle_file=None,
    save_file='model_scores.png',
    figure_format="png"
):
    models = list(scores.keys())
    latex_true_op = unique_exploration_classes[exploration_rule].latex_name(
        name=true_model
    )

    latex_model_names = [
        exploration_classes[model].latex_name(model)
        for model in models
    ]
    coeff_of_determination = [
        coefficient_determination_latex_name[latex_mod]
        for latex_mod in latex_model_names
    ]

    f_scores_list = [
        f_scores[latex_mod]
        for latex_mod in latex_model_names
    ]

    latex_scores_dict = {}
    for mod in models:
        latex_mod = exploration_classes[mod].latex_name(mod)
        latex_scores_dict[latex_mod] = scores[mod]

    batch_correct_models = []
    if batch_nearest_num_params_as_winners == True:
        num_true_params = len(
            qmla.construct_models.get_constituent_names_from_name(
                true_model
            )
        )
        for mod in models:
            num_params = len(
                qmla.construct_models.get_constituent_names_from_name(mod)
            )

            # if (
            #     np.abs(num_true_params - num_params) == 1
            # ):
            #     # must be exactly one parameter smaller
            #     batch_correct_models.append(
            #         mod
            #     )

    mod_scores = scores
    scores = list(scores.values())
    num_runs = sum(scores)

    # fig, ax = plt.subplots()
    width = 0.75  # the width of the bars
    ind = np.arange(len(scores))  # the x locations for the groups
    colours = ['blue' for i in ind]
    batch_success_rate = correct_success_rate = 0
    for mod in batch_correct_models:
        mod_latex = exploration_classes[mod].latex_name(mod)
        mod_idx = latex_model_names.index(mod_latex)
        colours[mod_idx] = 'orange'
        batch_success_rate += mod_scores[mod]
    if true_model in models:
        batch_success_rate += mod_scores[true_model]
        correct_success_rate = mod_scores[true_model]

    batch_success_rate /= num_runs
    correct_success_rate /= num_runs
    batch_success_rate *= 100
    correct_success_rate *= 100  # percent

    results_collection = {
        'type': exploration_rule,
        'true_model': latex_true_op,
        'scores': latex_scores_dict
    }
    if collective_analysis_pickle_file is not None:
        # no longer used/accessed by this function
        if os.path.isfile(collective_analysis_pickle_file) is False:
            combined_analysis = {
                'scores': results_collection
            }
            pickle.dump(
                combined_analysis,
                open(collective_analysis_pickle_file, 'wb')
            )
        else:
            # load current analysis dict, add to it and rewrite it.
            combined_analysis = pickle.load(
                open(collective_analysis_pickle_file, 'rb')
            )
            combined_analysis['scores'] = results_collection
            pickle.dump(
                combined_analysis,
                open(collective_analysis_pickle_file, 'wb')
            )

    try:
        true_idx = latex_model_names.index(
            latex_true_op
        )
        colours[true_idx] = 'green'

    except BaseException:
        pass

    lf = lfig.LatexFigure(
        fraction=0.5, 
        auto_label=False
    )
    ax1 = lf.new_axis()

    # ax.barh(ind, scores, width, color="blue")
    ax1.barh(ind, scores, width, color=colours)
    ax1.set_yticks(ind + width / 2)
    ax1.set_yticklabels(
        latex_model_names,
        minor=False
    )
    ax1.set_xlabel('Wins')
    xticks_pos = list(range(max(scores) + 1))
    ax1.set_xticks(
        xticks_pos,
        minor=False
    )
    custom_lines = [
        Line2D([0], [0], color='green', lw=4),
        # Line2D([0], [0], color='orange', lw=4),
        Line2D([0], [0], color='blue', lw=4),
        # Line2D([0], [0], color='black', lw=4, ls='--'),
    ]
    custom_handles = [
        r'$\hat{{H}}_0$ ({}$\%$)'.format(int(correct_success_rate)),
        # r'True/Close ({}$\%$)'.format(int(batch_success_rate)),
        'Other',
        # '$R^2$'
    ]

    if plot_r_squared == True:
        ax2 = ax1.twiny()
        ax2.barh(
            ind,
            coeff_of_determination,
            width / 2,
            color=colours,
            label='$R^2$',
            linestyle='--',
            fill=False,
        )
        # ax2.invert_xaxis()
        ax2.set_xlabel('$R^2$')
        ax2.xaxis.tick_top()

        r_sq_x_ticks = [
            min(coeff_of_determination),
            0,
            1
        ]
        ax2.set_xticks(r_sq_x_ticks)
        ax2.legend(
            bbox_to_anchor=(1.0, 0.9),
        )
    elif plot_f_scores == True:
        ax2 = ax1.twiny()
        ax2.barh(
            ind,
            f_scores_list,
            width / 2,
            color=colours,
            label='F-score',
            linestyle='--',
            fill=False,
        )
        # ax2.invert_xaxis()
        ax2.set_xlabel('F-score')
        ax2.xaxis.tick_top()

        f_score_x_ticks = [
            # min(coeff_of_determination),
            0,
            1
        ]
        ax2.set_xticks(f_score_x_ticks)
        ax2.legend(
            bbox_to_anchor=(1.0, 0.9),
        )

    plot_title = str(
        'Number of QMD instances won by models with $R^2$.'
    )

    if entropy is not None:
        plot_title += str(
            r'\n$\mathcal{S}$='
            + str(round(entropy, 2))
        )
    if inf_gain is not None:
        plot_title += str(
            r'\t $\mathcal{IG}$='
            + str(round(inf_gain, 2))
        )
    ax1.legend(
        custom_lines,
        custom_handles,
        bbox_to_anchor=(1.0, 0.8),
    )

    # ax1.set_ylabel('Model')
    lf.save(save_file, file_format=figure_format)
Beispiel #4
0
def plot_terms_and_parameters(results_path,
                              save_to_file=None,
                              figure_format="png"):
    storage_instances = glob.glob(results_path + '/' + 'storage*')

    all_learned_params = {}

    for s in storage_instances:
        storage = pickle.load(open(os.path.join(s), "rb"))
        learned_params = storage.LearnedParameters

        for p in learned_params:
            if p not in all_learned_params:
                all_learned_params[p] = [learned_params[p]]
            else:
                all_learned_params[p].append(learned_params[p])

    run_info = pickle.load(open(os.path.join(results_path, "run_info.p"),
                                "rb"))
    exploration_strategy = qmla.get_exploration_strategy.get_exploration_class(
        run_info['exploration_rule'])
    true_params = run_info['params_dict']

    # Draw figure
    lf = lfig.LatexFigure(auto_label=False,
                          auto_gridspec=len(all_learned_params),
                          gridspec_params={
                              'wspace': 0.15,
                              'hspace': 0.3
                          })
    params = sorted(all_learned_params.keys())
    for p in params:
        ax = lf.new_axis()

        param_occurences = all_learned_params[p]
        label = r"$\hat{{t}} \in \hat{{H}}^{{\prime}}$"

        ax.hist(param_occurences, color='grey', label=label)

        # median
        param_median = np.round(np.median(param_occurences), 1)
        ax.axvline(param_median,
                   color='blue',
                   ls=':',
                   label=r"$\bar{{ \alpha^{{\prime}} }}$")

        if p in true_params:
            true_p = np.round(true_params[p], 1)
            ax.axvline(true_p, color='red', ls='--', label=r"$\alpha_0$")

        ax.set_title(exploration_strategy.latex_name(p))
        if ax.row == 0 and ax.col == lf.num_cols - 1:
            ax.legend(bbox_to_anchor=(1.025, 0.85),
                      # ncol=3
                      )

    lf.fig.text(
        0.5,
        -0.12,
        r"Parameter ($\alpha$) value",
        ha='center',
    )
    lf.fig.text(
        0.04,
        0.5,
        "Occurences",
        va='center',
        rotation='vertical',
    )

    if save_to_file is not None:
        lf.save(save_to_file, file_format=figure_format)
Beispiel #5
0
def average_parameter_estimates(
    directory_name,
    results_path,
    results_file_name_start='results',
    exploration_rule=None,
    unique_exploration_classes=None,
    top_number_models=2,
    true_params_dict=None,
    save_to_file=None,
    save_directory=None,
    figure_format='png',
    plot_prefix='',
):
    r"""
    Plots progression of parameter estimates against experiment number
    for the top models, i.e. those which win the most. 

    TODO: refactor this code - it should not need to unpickle
    all the files which have already been unpickled and stored in the summary
    results CSV.

    :param directory_name: path to directory where results .p files are stored.
    :param results_patha: path to CSV with all results for this run.
    :param exploration_rule: the name of the exploration strategy used. 
    :param unique_exploration_classes: dict with single instance of each exploration strategy class
        used in this run.
    :param top_number_models: Number of models to compute averages for 
        (top by number of instance wins). 
    :param true_params_dict: dict with true parameter for each parameter in the 
        true model.
    :param save_to_file: if not None, path to save PNG. 

    :returns None:
    """

    from matplotlib import cm
    plt.switch_backend('agg')  # to try fix plt issue on BC
    results = pd.read_csv(results_path, index_col='QID')
    all_winning_models = list(results.loc[:, 'NameAlphabetical'])
    if len(all_winning_models) > top_number_models:
        winning_models = rank_models(all_winning_models)[0:top_number_models]
    else:
        winning_models = list(set(all_winning_models))

    os.chdir(directory_name)
    pickled_files = []
    for file in os.listdir(directory_name):
        if file.endswith(".p") and file.startswith(results_file_name_start):
            pickled_files.append(file)

    parameter_estimates_from_qmd = {}
    num_experiments_by_name = {}

    latex_terms = {}
    exploration_strategies = {}

    for f in pickled_files:
        fname = directory_name + '/' + str(f)
        result = pickle.load(open(fname, 'rb'))
        track_parameter_estimates = result['Trackplot_parameter_estimates']

        alph = result['NameAlphabetical']
        if alph in parameter_estimates_from_qmd.keys():
            parameter_estimates_from_qmd[alph].append(
                track_parameter_estimates)
        else:
            parameter_estimates_from_qmd[alph] = [track_parameter_estimates]
            num_experiments_by_name[alph] = result['NumExperiments']

        if alph not in list(exploration_strategies.keys()):
            try:
                exploration_strategies[alph] = result['ExplorationRule']
            except BaseException:
                exploration_strategies[alph] = exploration_rule

    unique_exploration_strategies = list(
        set(list(exploration_strategies.values())))
    exploration_classes = {}
    for g in list(exploration_strategies.keys()):
        try:
            exploration_classes[g] = unique_exploration_classes[
                exploration_strategies[g]]
        except BaseException:
            exploration_classes[g] = None

    for name in winning_models:
        num_experiments = num_experiments_by_name[name]
        epochs = range(num_experiments_by_name[name] + 1)

        parameters_for_this_name = parameter_estimates_from_qmd[name]
        num_wins_for_name = len(parameters_for_this_name)
        terms = sorted(
            qmla.construct_models.get_constituent_names_from_name(name))
        num_terms = len(terms)
        lf = lfig.LatexFigure(auto_label=False, auto_gridspec=num_terms)

        cm_subsection = np.linspace(0, 0.8, num_terms)
        colours = [cm.Paired(x) for x in cm_subsection]

        parameters = {}
        for t in terms:
            parameters[t] = {}

            for e in epochs:
                parameters[t][e] = []

        for i in range(len(parameters_for_this_name)):
            track_params = parameters_for_this_name[i]
            for t in terms:
                for e in epochs:
                    try:
                        parameters[t][e].append(track_params[t][e])
                    except:
                        parameters[t][e] = [track_params[t][e]]

        avg_parameters = {}
        std_devs = {}
        for p in terms:
            avg_parameters[p] = {}
            std_devs[p] = {}

            for e in epochs:
                avg_parameters[p][e] = np.median(parameters[p][e])
                std_devs[p][e] = np.std(parameters[p][e])

        for term in sorted(terms):
            ax = lf.new_axis()

            latex_terms[term] = exploration_classes[name].latex_name(term)
            averages = np.array([avg_parameters[term][e] for e in epochs])
            standard_dev = np.array([std_devs[term][e] for e in epochs])

            param_lw = 3
            try:
                true_val = true_params_dict[term]
                true_term_latex = exploration_classes[name].latex_name(term)
                ax.axhline(true_val,
                           label=str('True'),
                           ls='--',
                           color='red',
                           lw=param_lw)
            except BaseException:
                pass

            fill_between_sigmas(
                ax,
                parameters[term],
                epochs,
                legend=False,
                only_one_sigma=True,
            )
            ax.plot([e + 1 for e in epochs],
                    averages,
                    lw=param_lw,
                    label="Estimate",
                    color='blue')
            latex_term = exploration_classes[name].latex_name(term)
            ax.set_title(str(latex_term))

            if (ax.row == 0 and ax.col == lf.num_cols - 1):
                ax.legend(bbox_to_anchor=(1.05, 0.85))

        latex_name = exploration_classes[name].latex_name(name)
        lf.fig.text(0.45, -0.04, 'Experiment', ha='center')
        lf.fig.text(-0.04, 0.5, 'Parameter', va='center', rotation='vertical')

        if save_directory is not None:
            save_file = os.path.join(save_directory,
                                     '{}params_{}'.format(plot_prefix, name))
            lf.save(save_file, file_format=figure_format)
Beispiel #6
0
def plot_dynamics_multiple_models(directory_name,
                                  results_path,
                                  results_file_name_start='results',
                                  use_experimental_data=False,
                                  dataset=None,
                                  true_expectation_value_path=None,
                                  probes_plot_file=None,
                                  exploration_rule=None,
                                  unique_exploration_classes=None,
                                  top_number_models=2,
                                  save_true_expec_vals_alone_plot=True,
                                  collective_analysis_pickle_file=None,
                                  return_results=False,
                                  save_to_file=None,
                                  figure_format='png'):
    r"""
    Plots reproduced dynamics against time
    for the top models, i.e. those which win the most. 

    TODO: refactor this code - it should not need to unpickle
    all the files which have already been unpickled and stored in the summary
    results CSV.
    TODO: this is a very old method and can surely be improved using Pandas dataframes now stored. 

    :param directory_name: path to directory where results .p files are stored.
    :param results_path: path to CSV with all results for this run.
    :param results_file_name_start: 
    :param use_experimental_data: bool, whether experimental (fixed) data was used.
    :param true_expectation_value_path: path to file containing pre-computed expectation 
        values.
    :param probes_plot_file: path to file with specific probes (states) to use
        for plotting purposes for consistency.  
    :param exploration_rule: the name of the exploration strategy used. 
    :param unique_exploration_classes: dict with single instance of each exploration strategy class
        used in this run.
    :param top_number_models: Number of models to compute averages for 
        (top by number of instance wins). 
    :param true_params_dict: dict with true parameter for each parameter in the 
        true model.
    :param save_true_expec_vals_alone_plot: bool, whether to save a 
        separate plot only of true expectation values, in addition
        to reproduced dynamics.
    :param collective_analysis_pickle_file: if not None, store analysed data
        to this path. 
    :param return_results: bool, to return the analysed data upon function call.
    :param save_to_file: if not None, path to save PNG. 

    :returns None:
    """
    plt.switch_backend('agg')

    # results = pd.DataFrame.from_csv(
    results = pd.read_csv(results_path, index_col='QID')

    all_winning_models = list(results.loc[:, 'NameAlphabetical'])

    def rank_models(n):
        return sorted(set(n), key=n.count)[::-1]

    # from
    # https://codegolf.stackexchange.com/questions/17287/sort-the-distinct-elements-of-a-list-in-descending-order-by-frequency

    if len(all_winning_models) > top_number_models:
        winning_models = rank_models(all_winning_models)[0:top_number_models]
    else:
        winning_models = list(set(all_winning_models))

    cm_subsection = np.linspace(0, 0.8, len(winning_models))
    colours = [cm.viridis(x) for x in cm_subsection]

    experimental_measurements = pickle.load(
        open(str(true_expectation_value_path), 'rb'))

    expectation_values_by_name = {}
    os.chdir(directory_name)
    pickled_files = []
    for file in os.listdir(directory_name):
        # if file.endswith(".p") and file.startswith("results"):
        if (file.endswith(".p") and file.startswith(results_file_name_start)):
            pickled_files.append(file)
    num_results_files = len(pickled_files)
    exploration_strategies = {}
    for f in pickled_files:
        fname = directory_name + '/' + str(f)
        result = pickle.load(open(fname, 'rb'))
        alph = result['NameAlphabetical']
        expec_values = result['ExpectationValues']

        if alph in expectation_values_by_name.keys():
            expectation_values_by_name[alph].append(expec_values)
        else:
            expectation_values_by_name[alph] = [expec_values]

        if alph not in list(exploration_strategies.keys()):
            exploration_strategies[alph] = result['ExplorationRule']

    exploration_classes = {}
    for g in list(exploration_strategies.keys()):
        try:
            exploration_classes[g] = unique_exploration_classes[
                exploration_strategies[g]]
        except BaseException:
            exploration_classes[g] = None

    try:
        true_model = unique_exploration_classes[exploration_rule].true_model
    except BaseException:
        print("Couldn't find exploration strategy of {} in \n {}".format(
            exploration_rule, unique_exploration_classes))
        raise

    collect_expectation_values = {
        'means': {},
        'medians': {},
        'true': {},
        'mean_std_dev': {},
        'success_rate': {},
        'r_squared': {}
    }
    success_rate_by_term = {}
    nmod = len(winning_models)
    if nmod == 1:
        lf = lfig.LatexFigure(auto_label=False, )
    else:
        ncols = int(np.ceil(np.sqrt(nmod)))
        nrows = int(np.ceil(nmod / ncols)) + 1  # 1 extra row for "master"
        lf = lfig.LatexFigure(auto_label=False, gridspec_layout=(nrows, ncols))

    axes_so_far = 1
    full_plot_axis = lf.new_axis(force_position=(0, 0), span=(1, 'all'))
    model_statistics = {}

    for term in winning_models:
        expectation_values = {}
        num_sets_of_this_name = len(expectation_values_by_name[term])
        for i in range(num_sets_of_this_name):
            learned_expectation_values = (expectation_values_by_name[term][i])

            for t in list(experimental_measurements.keys()):
                try:
                    expectation_values[t].append(learned_expectation_values[t])
                except BaseException:
                    try:
                        expectation_values[t] = [learned_expectation_values[t]]
                    except BaseException:
                        # if t can't be found, move on
                        pass

        means = {}
        std_dev = {}
        true = {}
        t_values = {}
        lower_iqr_expectation_values = {}
        higher_iqr_expectation_values = {}

        # times = sorted(list(experimental_measurements.keys()))
        true_times = sorted(list(expectation_values.keys()))
        times = sorted(list(expectation_values.keys()))
        times = [np.round(t, 2) if t > 0.1 else t for t in times]
        flag = True
        one_sample = True
        for t in times:
            means[t] = np.mean(expectation_values[t])
            std_dev[t] = np.std(expectation_values[t])
            lower_iqr_expectation_values[t] = np.percentile(
                expectation_values[t], 25)
            higher_iqr_expectation_values[t] = np.percentile(
                expectation_values[t], 75)
            true[t] = experimental_measurements[t]
            if num_sets_of_this_name > 1:
                expec_values_array = np.array([[i]
                                               for i in expectation_values[t]])
                # print("shape going into ttest:", np.shape(true_expec_values_array))
                if use_experimental_data == True:
                    t_val = stats.ttest_1samp(
                        expec_values_array,  # list of expec vals for this t
                        true[t],  # true expec val of t
                        axis=0,
                        nan_policy='omit')
                else:
                    true_dist = stats.norm.rvs(
                        loc=true[t],
                        scale=0.001,
                        size=np.shape(expec_values_array))
                    t_val = stats.ttest_ind(
                        expec_values_array,  # list of expec vals for this t
                        true_dist,  # true expec val of t
                        axis=0,
                        nan_policy='omit')

                if np.isnan(float(t_val[1])) == False:
                    # t_values[t] = 1-t_val[1]
                    t_values[t] = t_val[1]
                else:
                    print("t_val is nan for t=", t)

        true_exp = [true[t] for t in times]
        # TODO should this be the number of times this model won???
        num_runs = num_sets_of_this_name
        success_rate = 0

        for t in times:

            true_likelihood = true[t]
            mean = means[t]
            std = std_dev[t]
            credible_region = (2 / np.sqrt(num_runs)) * std

            if ((true_likelihood < (mean + credible_region))
                    and (true_likelihood > (mean - credible_region))):
                success_rate += 1 / len(times)

        mean_exp = np.array([means[t] for t in times])
        std_dev_exp = np.array([std_dev[t] for t in times])
        lower_iqr_exp = np.array(
            [lower_iqr_expectation_values[t] for t in times])
        higher_iqr_exp = np.array(
            [higher_iqr_expectation_values[t] for t in times])
        residuals = (mean_exp - true_exp)**2
        sum_residuals = np.sum(residuals)
        mean_true_val = np.mean(true_exp)
        true_mean_minus_val = (true_exp - mean_true_val)**2
        sum_of_squares = np.sum(true_mean_minus_val)
        if sum_of_squares != 0:
            final_r_squared = 1 - sum_residuals / sum_of_squares
        else:
            print("[multiQMD plots] sum of squares 0")
            final_r_squared = -100

        # R^2 for interquartile range
        lower_iqr_sum_residuals = np.sum((lower_iqr_exp - true_exp)**2)
        lower_iqr_sum_of_squares = np.sum(
            (lower_iqr_exp - np.mean(lower_iqr_exp))**2)
        lower_iqr_r_sq = 1 - (lower_iqr_sum_residuals /
                              lower_iqr_sum_of_squares)
        higher_iqr_sum_residuals = np.sum((higher_iqr_exp - true_exp)**2)
        higher_iqr_sum_of_squares = np.sum(
            (higher_iqr_exp - np.mean(higher_iqr_exp))**2)
        higher_iqr_r_sq = 1 - (higher_iqr_sum_residuals /
                               higher_iqr_sum_of_squares)

        name = exploration_classes[term].latex_name(term)
        description = r"{}".format(name)

        if term == true_model:
            description += ' (= $\hat{{H}}_0$)'

        description_w_bayes_t_value = str(name + ' : ' +
                                          str(round(success_rate, 2)) + ' (' +
                                          str(num_sets_of_this_name) + ').')

        collect_expectation_values['means'][name] = mean_exp
        collect_expectation_values['mean_std_dev'][name] = std_dev_exp
        collect_expectation_values['success_rate'][name] = success_rate
        model_statistics[name] = {
            'r_squared_median_exp_val': final_r_squared,
            'mean_expectation_values': mean_exp,
            'mean_std_dev': std_dev_exp,
            'success_rate_t_test': success_rate,
            'num_wins': num_sets_of_this_name,
            'win_percentage':
            int(100 * num_sets_of_this_name / num_results_files),
            'num_instances': num_results_files,
            'lower_iqr_exp_val': lower_iqr_exp,
            'higher_iqr_exp_val': higher_iqr_exp,
            'lower_iqr_r_sq': lower_iqr_r_sq,
            'higher_iqr_r_sq': higher_iqr_r_sq,
            'times': times
        }
        if nmod > 1:
            ax = lf.new_axis()
            ax.plot(times,
                    mean_exp,
                    c=colours[winning_models.index(term)],
                    label=description)
            ax.fill_between(
                times,
                mean_exp - std_dev_exp,
                mean_exp + std_dev_exp,
                alpha=0.2,
                facecolor=colours[winning_models.index(term)],
            )
            ax.set_ylim(0, 1)
            ax.set_xlim(0, max(times))

            success_rate_by_term[term] = success_rate
            ax.set_title('Mean Expectation Values')
            ax.scatter(times, true_exp, color='r', s=5, label='System')
            ax.plot(times, true_exp, color='r', alpha=0.3)
            ax.set_yticks([0, 0.5, 1.0])
            ax.set_title(description)

        # Add this model to "master" plot

        high_level_label = str(name)
        if term == true_model:
            high_level_label += ' (= $\hat{{H}}_0$)'

        full_plot_axis.plot(times,
                            mean_exp,
                            c=colours[winning_models.index(term)],
                            label=high_level_label)
    full_plot_axis.scatter(times, true_exp, color='r', s=5, label='System')
    full_plot_axis.plot(times, true_exp, color='r', alpha=0.3)

    full_plot_axis.legend(ncol=5, )
    full_plot_axis.set_ylim(0, 1.25)
    full_plot_axis.set_yticks([0, 0.5, 1.0])
    full_plot_axis.set_xlim(0, max(times))
    if nmod > 1:
        lf.fig.text(0.45, -0.04, 'Time', ha='center')
        lf.fig.text(-0.04,
                    0.5,
                    'Expectation Value',
                    va='center',
                    rotation='vertical')
    else:
        full_plot_axis.set_ylabel("Expectation value")
        full_plot_axis.set_xlabel("Time (a.u)")

    if save_to_file is not None:
        lf.fig.suptitle("Dynamics of trained models")
        lf.save(save_to_file, file_format=figure_format)

    # Also save an image of the only the system dynamics
    if (save_true_expec_vals_alone_plot == True and save_to_file is not None):
        lf = lfig.LatexFigure(fraction=0.75, auto_label=False)
        ax = lf.new_axis()
        ax.plot(times,
                true_exp,
                marker='o',
                color='r',
                label='System'
                # alpha = 0.3
                )
        ax.set_xlabel('Time')
        ax.set_ylabel('Expectation Value')
        ax.legend()
        true_only_fig_file = str(save_to_file + '_system')
        ax.set_title("True model dynamics")
        lf.save(true_only_fig_file, file_format=figure_format)

    # add the combined analysis dict
    collect_expectation_values['times'] = true_times
    collect_expectation_values['true'] = true_exp

    if collective_analysis_pickle_file is not None:
        if os.path.isfile(collective_analysis_pickle_file) is False:
            pickle.dump(model_statistics,
                        open(collective_analysis_pickle_file, 'wb'))
        else:
            # load current analysis dict, add to it and rewrite it.
            combined_analysis = pickle.load(
                open(collective_analysis_pickle_file, 'rb'))
            for model in model_statistics.keys():
                new_keys = list(model_statistics[model].keys())
                for key in new_keys:
                    combined_analysis[model][key] = model_statistics[model][
                        key]
            pickle.dump(combined_analysis,
                        open(collective_analysis_pickle_file, 'wb'))
    else:
        print("[analyse] collective analysis path:",
              collective_analysis_pickle_file)

    if return_results == True:
        expectation_values_by_latex_name = {}
        for term in winning_models:
            latex_name = unique_exploration_classes[
                exploration_rule].latex_name(term)
            expectation_values_by_latex_name[
                latex_name] = expectation_values_by_name[term]

        return times, mean_exp, std_dev_exp, winning_models, term, true, description, expectation_values_by_latex_name, expectation_values_by_name