fscoring.InfoGain()) tree_learner = octree.TreeLearner(split=split_const, min_instances=10, same_majority_pruning=True, store_instances=True) from Orange.data import Table # TEST for equality of "original" vs. "pickled/unpickled" Orange trees from PyMTL.util import pickle_obj, unpickle_obj import numpy as np for i in range(10): data = Table(os.path.join(results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition{}.tab".format(i))) tree = tree_learner(data) pickle_path = os.path.join(results_path, "test-pickle.pkl") pickle_obj(tree, pickle_path) unpickled_tree = unpickle_obj(pickle_path) print ("Repetition {} original vs. pickled/unpickled tree equality:". format(i)), print np.all(tree[e] == unpickled_tree[e] for e in data) os.remove(pickle_path) data = Table(os.path.join(results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition0.tab")) tree = tree_learner(data) tex_file = os.path.join(results_path, "test-tree.tex") pdf_file = os.path.join(results_path, "test-tree.pdf") draw_and_save_tikz_tree_document(tree, tex_file) import subprocess subprocess.call(["-c", "pdflatex -interaction=batchmode {0} && " "rm {1}.{{aux,log}} && pdfcrop --margins 10 {1}.pdf {2}".
def combine_experiment_results(results_path_fmt, chan_par_values, file_name_fmt, repeats, error_measure="std", title="", xlabel=""): """Combine the results of experiments matching the given results_path_fmt, where the value of a particular parameter changes according to the given chan_par_values list. Draw a plot showing the combined results and save it to the given file name pattern. Parameters ---------- results_path_fmt : string Template for directory names containing pickled results. It must contain exactly one pair of braces ({}) where the value of the changing parameter will be put. chan_par_values : list A list of values of the changing parameter (i.e. the parameter, whose value is not fixed during this experiment). file_name_fmt : string Template for the paths where to save the drawn plots. It must contain exactly one pair of braces ({}) where the base learner's name will be put. repeats : int Number of repetitions of each experiment. error_measure : string Indicator of which measure to use for plots' error bars. Currently, only "std" (standard deviation) and "95ci" (95% confidence intervals) are supported. title : string (optinal) The title of each plot. xlabel : string (optinal) Each plot's x-axis label. """ # key for extracting the AUC results from the pickled dictionaries AUC_STRING = ("Results for AUC (weighting method: all_equal, error margin " "measure: std)") ERROR_MEASURE_TO_PRINT = {"std" : "std. dev.", "95ci" : "95% conf. intervals"} def _convert_error(err): """Convert the given error value according to the error_measure parameter. """ if error_measure == "std": return err elif error_measure == "95ci": return convert_std_to_ci95(err, repeats) else: raise ValueError("Unknown error measure: {}".format(error_measure)) res = dict() # unpickle the results for v in chan_par_values: results_path = results_path_fmt.format(v) res[v] = unpickle_obj(os.path.join(results_path, "overall_results.pkl")) # extract the names of base learners and learners base_learners = res[chan_par_values[0]][AUC_STRING].keys() learners = res[chan_par_values[0]][AUC_STRING][base_learners[0]].keys() # transform the results to match the input of the plot_multiple_separate() # function x_points = OrderedDict() avgs = OrderedDict() errs = OrderedDict() for v in chan_par_values: res_auc = res[v][AUC_STRING] for bl in base_learners: if bl not in x_points: x_points[bl] = OrderedDict() if bl not in avgs: avgs[bl] = OrderedDict() if bl not in errs: errs[bl] = OrderedDict() for l in learners: if l not in x_points[bl]: x_points[bl][l] = [] if l not in avgs[bl]: avgs[bl][l] = [] if l not in errs[bl]: errs[bl][l] = [] x_points[bl][l].append(v) avg, err = res_auc[bl][l] avgs[bl][l].append(avg) errs[bl][l].append(_convert_error(err)) # generate the plot description objects plot_desc = OrderedDict() for bl in base_learners: plot_desc[bl] = [] for l in learners: plot_desc[bl].append(LinePlotDesc(x_points[bl][l], avgs[bl][l], errs[bl][l], l, color=LEARNERS_TO_COLORS[l], ecolor=LEARNERS_TO_COLORS[l])) # draw and save the plots if title == "": title = "Error bars show {}".format(ERROR_MEASURE_TO_PRINT[ error_measure]) else: title = title + " (error bars show {})".format(ERROR_MEASURE_TO_PRINT[ error_measure]) plot_multiple_separate(plot_desc, file_name_fmt, title=title, xlabel=xlabel, ylabel="AUC", x_tick_points=chan_par_values, ylim_bottom=0, ylim_top=1, error_bars=True)
def combine_experiment_results(results_path_fmt, chan_par_values, file_name_fmt, repeats, error_measure="std", title="", xlabel=""): """Combine the results of experiments matching the given results_path_fmt, where the value of a particular parameter changes according to the given chan_par_values list. Draw a plot showing the combined results and save it to the given file name pattern. Parameters ---------- results_path_fmt : string Template for directory names containing pickled results. It must contain exactly one pair of braces ({}) where the value of the changing parameter will be put. chan_par_values : list A list of values of the changing parameter (i.e. the parameter, whose value is not fixed during this experiment). file_name_fmt : string Template for the paths where to save the drawn plots. It must contain exactly one pair of braces ({}) where the base learner's name will be put. repeats : int Number of repetitions of each experiment. error_measure : string Indicator of which measure to use for plots' error bars. Currently, only "std" (standard deviation) and "95ci" (95% confidence intervals) are supported. title : string (optinal) The title of each plot. xlabel : string (optinal) Each plot's x-axis label. """ # key for extracting the AUC results from the pickled dictionaries AUC_STRING = ("Results for AUC (weighting method: all_equal, error margin " "measure: std)") ERROR_MEASURE_TO_PRINT = { "std": "std. dev.", "95ci": "95% conf. intervals" } def _convert_error(err): """Convert the given error value according to the error_measure parameter. """ if error_measure == "std": return err elif error_measure == "95ci": return convert_std_to_ci95(err, repeats) else: raise ValueError("Unknown error measure: {}".format(error_measure)) res = dict() # unpickle the results for v in chan_par_values: results_path = results_path_fmt.format(v) res[v] = unpickle_obj(os.path.join(results_path, "overall_results.pkl")) # extract the names of base learners and learners base_learners = res[chan_par_values[0]][AUC_STRING].keys() learners = res[chan_par_values[0]][AUC_STRING][base_learners[0]].keys() # transform the results to match the input of the plot_multiple_separate() # function x_points = OrderedDict() avgs = OrderedDict() errs = OrderedDict() for v in chan_par_values: res_auc = res[v][AUC_STRING] for bl in base_learners: if bl not in x_points: x_points[bl] = OrderedDict() if bl not in avgs: avgs[bl] = OrderedDict() if bl not in errs: errs[bl] = OrderedDict() for l in learners: if l not in x_points[bl]: x_points[bl][l] = [] if l not in avgs[bl]: avgs[bl][l] = [] if l not in errs[bl]: errs[bl][l] = [] x_points[bl][l].append(v) avg, err = res_auc[bl][l] avgs[bl][l].append(avg) errs[bl][l].append(_convert_error(err)) # generate the plot description objects plot_desc = OrderedDict() for bl in base_learners: plot_desc[bl] = [] for l in learners: plot_desc[bl].append( LinePlotDesc(x_points[bl][l], avgs[bl][l], errs[bl][l], l, color=LEARNERS_TO_COLORS[l], ecolor=LEARNERS_TO_COLORS[l])) # draw and save the plots if title == "": title = "Error bars show {}".format( ERROR_MEASURE_TO_PRINT[error_measure]) else: title = title + " (error bars show {})".format( ERROR_MEASURE_TO_PRINT[error_measure]) plot_multiple_separate(plot_desc, file_name_fmt, title=title, xlabel=xlabel, ylabel="AUC", x_tick_points=chan_par_values, ylim_bottom=0, ylim_top=1, error_bars=True)
store_instances=True) from Orange.data import Table # TEST for equality of "original" vs. "pickled/unpickled" Orange trees from PyMTL.util import pickle_obj, unpickle_obj import numpy as np for i in range(10): data = Table( os.path.join( results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition{}.tab".format(i))) tree = tree_learner(data) pickle_path = os.path.join(results_path, "test-pickle.pkl") pickle_obj(tree, pickle_path) unpickled_tree = unpickle_obj(pickle_path) print("Repetition {} original vs. pickled/unpickled tree equality:". format(i)), print np.all(tree[e] == unpickled_tree[e] for e in data) os.remove(pickle_path) data = Table( os.path.join( results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition0.tab")) tree = tree_learner(data) tex_file = os.path.join(results_path, "test-tree.tex") pdf_file = os.path.join(results_path, "test-tree.pdf") draw_and_save_tikz_tree_document(tree, tex_file) import subprocess