fscoring.InfoGain())
 tree_learner = octree.TreeLearner(split=split_const, min_instances=10,
                         same_majority_pruning=True, store_instances=True)
 from Orange.data import Table
 
 # TEST for equality of "original" vs. "pickled/unpickled" Orange trees
 from PyMTL.util import pickle_obj, unpickle_obj
 import numpy as np
 for i in range(10):
     data = Table(os.path.join(results_path, "bool_func-a8d4n100g2tg5nse0.0rs15"
                           "nls10-seed63-complete_test/orange_merged_learn-"
                           "repetition{}.tab".format(i)))
     tree = tree_learner(data)
     pickle_path = os.path.join(results_path, "test-pickle.pkl")
     pickle_obj(tree, pickle_path)
     unpickled_tree = unpickle_obj(pickle_path)
     print ("Repetition {} original vs. pickled/unpickled tree equality:".
            format(i)),
     print np.all(tree[e] == unpickled_tree[e] for e in data)
 os.remove(pickle_path)
 
 data = Table(os.path.join(results_path, "bool_func-a8d4n100g2tg5nse0.0rs15"
                           "nls10-seed63-complete_test/orange_merged_learn-"
                           "repetition0.tab"))
 tree = tree_learner(data)
 tex_file = os.path.join(results_path, "test-tree.tex")
 pdf_file = os.path.join(results_path, "test-tree.pdf")
 draw_and_save_tikz_tree_document(tree, tex_file)
 import subprocess
 subprocess.call(["-c", "pdflatex -interaction=batchmode {0} && "
                  "rm {1}.{{aux,log}} && pdfcrop --margins 10 {1}.pdf {2}".
Esempio n. 2
0
def combine_experiment_results(results_path_fmt, chan_par_values,
        file_name_fmt, repeats, error_measure="std", title="", xlabel=""):
    """Combine the results of experiments matching the given results_path_fmt,
    where the value of a particular parameter changes according to the given
    chan_par_values list.
    Draw a plot showing the combined results and save it to the given file name
    pattern.
    
    Parameters
    ----------
    results_path_fmt : string
        Template for directory names containing pickled results. It must contain
        exactly one pair of braces ({}) where the value of the changing
        parameter will be put.
    chan_par_values : list
        A list of values of the changing parameter (i.e. the parameter, whose
        value is not fixed during this experiment).
    file_name_fmt : string
        Template for the paths where to save the drawn plots. It must contain
        exactly one pair of braces ({}) where the base learner's name will be
        put.
    repeats : int
        Number of repetitions of each experiment.
    error_measure : string
        Indicator of which measure to use for plots' error bars.
        Currently, only "std" (standard deviation) and "95ci" (95% confidence
        intervals) are supported.
    title : string (optinal)
        The title of each plot.
    xlabel : string (optinal)
        Each plot's x-axis label.
    
    """
    # key for extracting the AUC results from the pickled dictionaries
    AUC_STRING = ("Results for AUC (weighting method: all_equal, error margin "
                  "measure: std)")
    
    ERROR_MEASURE_TO_PRINT = {"std" : "std. dev.",
                              "95ci" : "95% conf. intervals"}
    
    def _convert_error(err):
        """Convert the given error value according to the error_measure
        parameter.
        
        """
        if error_measure == "std":
            return err
        elif error_measure == "95ci":
            return convert_std_to_ci95(err, repeats)
        else:
            raise ValueError("Unknown error measure: {}".format(error_measure))
    
    res = dict()
    # unpickle the results
    for v in chan_par_values:
        results_path = results_path_fmt.format(v)
        res[v] = unpickle_obj(os.path.join(results_path, "overall_results.pkl"))
    
    # extract the names of base learners and learners
    base_learners = res[chan_par_values[0]][AUC_STRING].keys()
    learners = res[chan_par_values[0]][AUC_STRING][base_learners[0]].keys()
    
    # transform the results to match the input of the plot_multiple_separate()
    # function
    x_points = OrderedDict()
    avgs = OrderedDict()
    errs = OrderedDict()
    for v in chan_par_values:
        res_auc = res[v][AUC_STRING]
        for bl in base_learners:
            if bl not in x_points:
                x_points[bl] = OrderedDict()
            if bl not in avgs:
                avgs[bl] = OrderedDict()
            if bl not in errs:
                errs[bl] = OrderedDict()
            for l in learners:
                if l not in x_points[bl]:
                    x_points[bl][l] = []
                if l not in avgs[bl]:
                    avgs[bl][l] = []
                if l not in errs[bl]:
                    errs[bl][l] = []
                x_points[bl][l].append(v)
                avg, err = res_auc[bl][l]
                avgs[bl][l].append(avg)
                errs[bl][l].append(_convert_error(err))
    
    # generate the plot description objects
    plot_desc = OrderedDict()
    for bl in base_learners:
        plot_desc[bl] = []
        for l in learners:
            plot_desc[bl].append(LinePlotDesc(x_points[bl][l], avgs[bl][l],
                errs[bl][l], l, color=LEARNERS_TO_COLORS[l],
                ecolor=LEARNERS_TO_COLORS[l]))
    
    # draw and save the plots
    
    if title == "":
        title = "Error bars show {}".format(ERROR_MEASURE_TO_PRINT[
                                            error_measure])
    else:
        title = title + " (error bars show {})".format(ERROR_MEASURE_TO_PRINT[
                                                        error_measure])
    plot_multiple_separate(plot_desc, file_name_fmt,
        title=title, xlabel=xlabel, ylabel="AUC",
        x_tick_points=chan_par_values,
        ylim_bottom=0, ylim_top=1, error_bars=True)
Esempio n. 3
0
def combine_experiment_results(results_path_fmt,
                               chan_par_values,
                               file_name_fmt,
                               repeats,
                               error_measure="std",
                               title="",
                               xlabel=""):
    """Combine the results of experiments matching the given results_path_fmt,
    where the value of a particular parameter changes according to the given
    chan_par_values list.
    Draw a plot showing the combined results and save it to the given file name
    pattern.
    
    Parameters
    ----------
    results_path_fmt : string
        Template for directory names containing pickled results. It must contain
        exactly one pair of braces ({}) where the value of the changing
        parameter will be put.
    chan_par_values : list
        A list of values of the changing parameter (i.e. the parameter, whose
        value is not fixed during this experiment).
    file_name_fmt : string
        Template for the paths where to save the drawn plots. It must contain
        exactly one pair of braces ({}) where the base learner's name will be
        put.
    repeats : int
        Number of repetitions of each experiment.
    error_measure : string
        Indicator of which measure to use for plots' error bars.
        Currently, only "std" (standard deviation) and "95ci" (95% confidence
        intervals) are supported.
    title : string (optinal)
        The title of each plot.
    xlabel : string (optinal)
        Each plot's x-axis label.
    
    """
    # key for extracting the AUC results from the pickled dictionaries
    AUC_STRING = ("Results for AUC (weighting method: all_equal, error margin "
                  "measure: std)")

    ERROR_MEASURE_TO_PRINT = {
        "std": "std. dev.",
        "95ci": "95% conf. intervals"
    }

    def _convert_error(err):
        """Convert the given error value according to the error_measure
        parameter.
        
        """
        if error_measure == "std":
            return err
        elif error_measure == "95ci":
            return convert_std_to_ci95(err, repeats)
        else:
            raise ValueError("Unknown error measure: {}".format(error_measure))

    res = dict()
    # unpickle the results
    for v in chan_par_values:
        results_path = results_path_fmt.format(v)
        res[v] = unpickle_obj(os.path.join(results_path,
                                           "overall_results.pkl"))

    # extract the names of base learners and learners
    base_learners = res[chan_par_values[0]][AUC_STRING].keys()
    learners = res[chan_par_values[0]][AUC_STRING][base_learners[0]].keys()

    # transform the results to match the input of the plot_multiple_separate()
    # function
    x_points = OrderedDict()
    avgs = OrderedDict()
    errs = OrderedDict()
    for v in chan_par_values:
        res_auc = res[v][AUC_STRING]
        for bl in base_learners:
            if bl not in x_points:
                x_points[bl] = OrderedDict()
            if bl not in avgs:
                avgs[bl] = OrderedDict()
            if bl not in errs:
                errs[bl] = OrderedDict()
            for l in learners:
                if l not in x_points[bl]:
                    x_points[bl][l] = []
                if l not in avgs[bl]:
                    avgs[bl][l] = []
                if l not in errs[bl]:
                    errs[bl][l] = []
                x_points[bl][l].append(v)
                avg, err = res_auc[bl][l]
                avgs[bl][l].append(avg)
                errs[bl][l].append(_convert_error(err))

    # generate the plot description objects
    plot_desc = OrderedDict()
    for bl in base_learners:
        plot_desc[bl] = []
        for l in learners:
            plot_desc[bl].append(
                LinePlotDesc(x_points[bl][l],
                             avgs[bl][l],
                             errs[bl][l],
                             l,
                             color=LEARNERS_TO_COLORS[l],
                             ecolor=LEARNERS_TO_COLORS[l]))

    # draw and save the plots

    if title == "":
        title = "Error bars show {}".format(
            ERROR_MEASURE_TO_PRINT[error_measure])
    else:
        title = title + " (error bars show {})".format(
            ERROR_MEASURE_TO_PRINT[error_measure])
    plot_multiple_separate(plot_desc,
                           file_name_fmt,
                           title=title,
                           xlabel=xlabel,
                           ylabel="AUC",
                           x_tick_points=chan_par_values,
                           ylim_bottom=0,
                           ylim_top=1,
                           error_bars=True)
                                      store_instances=True)
    from Orange.data import Table

    # TEST for equality of "original" vs. "pickled/unpickled" Orange trees
    from PyMTL.util import pickle_obj, unpickle_obj
    import numpy as np
    for i in range(10):
        data = Table(
            os.path.join(
                results_path, "bool_func-a8d4n100g2tg5nse0.0rs15"
                "nls10-seed63-complete_test/orange_merged_learn-"
                "repetition{}.tab".format(i)))
        tree = tree_learner(data)
        pickle_path = os.path.join(results_path, "test-pickle.pkl")
        pickle_obj(tree, pickle_path)
        unpickled_tree = unpickle_obj(pickle_path)
        print("Repetition {} original vs. pickled/unpickled tree equality:".
              format(i)),
        print np.all(tree[e] == unpickled_tree[e] for e in data)
    os.remove(pickle_path)

    data = Table(
        os.path.join(
            results_path, "bool_func-a8d4n100g2tg5nse0.0rs15"
            "nls10-seed63-complete_test/orange_merged_learn-"
            "repetition0.tab"))
    tree = tree_learner(data)
    tex_file = os.path.join(results_path, "test-tree.tex")
    pdf_file = os.path.join(results_path, "test-tree.pdf")
    draw_and_save_tikz_tree_document(tree, tex_file)
    import subprocess