def test_tasks(self, learners, base_learners, measures, results_path, save_orange_data=False): """Repeat the following experiment self._repeats times: Prepare tasks' data with the _prepare_tasks_data() function. Test the performance of the given learning algorithms with the given base learning algorithms and compute the testing results using the given scoring measures. Process the obtained repetition scores with the _process_repetition_scores() function. Note: This function only test some specific combinations of base_learners and learners as used by the binarization experiment. Arguments: learners -- ordered dictionary with items of the form (name, learner), where name is a string representing the learner's name and learner is a MTL method (e.g. ERM, NoMerging, ...) base learners -- ordered dictionary with items of the form (name, learner), where name is a string representing the base learner's name and learner is a scikit-learn estimator object measures -- list of strings representing measure's names (currently, only CA and AUC are supported) results_path -- string representing the path where to save any extra information about the running of this test (currently, only used for pickling the results when there is an error in calling the learner) save_orange_data -- boolean indicating whether to save the Orange data tables created with the call to self._prepare_tasks_data() function """ rpt_scores = OrderedDict() dend_info = {bl : OrderedDict() for bl in base_learners.iterkeys()} for i in range(self._repeats): self._repetition_number = i self._prepare_tasks_data(**self._tasks_data_params) if save_orange_data: self._save_orange_data(i, results_path) rpt_scores[i] = {bl : dict() for bl in base_learners.iterkeys()} for bl in base_learners: for l in learners: start = time.clock() try: if isinstance(learners[l], bin_exp.TreeMarkedAndMergedLearner): R = learners[l](self._tasks.keys(), self._merged_learn_data_orange, base_learners[bl]) elif isinstance(base_learners[bl], Orange.core.Learner): wrapped_bl = OrangeClassifierWrapper( orange_learner=base_learners[bl]) R = learners[l](self._tasks, wrapped_bl) else: raise ValueError("An unexpected combination of " "base_learner and leaner detected: {} and " "{}".format(type(base_learners[bl]), type(learners[l]))) except Exception as e: logger.exception("There was an error during repetition:" " {} with base learner: {} and learner: {}.".\ format(i, bl, l)) if i > 0: logger.info("Saving the results of previous " "repetitions.") # remove the scores of the last repetition del rpt_scores[i] # process the remaining repetition scores self._process_repetition_scores(rpt_scores, dend_info) # pickle them to a file pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl") self.pickle_test_results(pickle_path_fmt) # re-raise the original exception import sys exc_info = sys.exc_info() raise exc_info[1], None, exc_info[2] rpt_scores[i][bl][l] = self._test_tasks(R["task_models"], measures) end = time.clock() logger.debug("Finished repetition: {}, base learner: {}, " "learner: {} in {:.2f}s".format(i, bl, l, end-start)) # store dendrogram info if the results contain it if "dend_info" in R: dend_info[bl][i] = R["dend_info"] # pickle and visualize the decision tree if the learner is a # (sub)class of TreeMarkedAndMergedLearner if isinstance(learners[l], bin_exp.TreeMarkedAndMergedLearner): tree = R["task_models"].values()[0] pickle_path = os.path.join(results_path, "{}-{}-" "repeat{}.pkl".format(bl, l, i)) svg_path = os.path.join(results_path, "{}-{}-repeat{}" ".svg".format(bl, l, i)) tikz_path = os.path.join(results_path, "{}-{}-repeat{}" "-tikz.tex".format(bl, l, i)) pickle_obj(tree, pickle_path) save_treegraph_image(tree, svg_path) draw_and_save_tikz_tree_document(tree, tikz_path) self._process_repetition_scores(rpt_scores, dend_info)
def test_tasks(self, learners, base_learners, measures, results_path, save_orange_data=False): """Repeat the following experiment self._repeats times: Prepare tasks' data with the _prepare_tasks_data() function. Test the performance of the given learning algorithms with the given base learning algorithms and compute the testing results using the given scoring measures. Process the obtained repetition scores with the _process_repetition_scores() function. Note: This function only test some specific combinations of base_learners and learners as used by the binarization experiment. Arguments: learners -- ordered dictionary with items of the form (name, learner), where name is a string representing the learner's name and learner is a MTL method (e.g. ERM, NoMerging, ...) base learners -- ordered dictionary with items of the form (name, learner), where name is a string representing the base learner's name and learner is a scikit-learn estimator object measures -- list of strings representing measure's names (currently, only CA and AUC are supported) results_path -- string representing the path where to save any extra information about the running of this test (currently, only used for pickling the results when there is an error in calling the learner) save_orange_data -- boolean indicating whether to save the Orange data tables created with the call to self._prepare_tasks_data() function """ rpt_scores = OrderedDict() dend_info = {bl: OrderedDict() for bl in base_learners.iterkeys()} for i in range(self._repeats): self._repetition_number = i self._prepare_tasks_data(**self._tasks_data_params) if save_orange_data: self._save_orange_data(i, results_path) rpt_scores[i] = {bl: dict() for bl in base_learners.iterkeys()} for bl in base_learners: for l in learners: start = time.clock() try: if isinstance(learners[l], bin_exp.TreeMarkedAndMergedLearner): R = learners[l](self._tasks.keys(), self._merged_learn_data_orange, base_learners[bl]) elif isinstance(base_learners[bl], Orange.core.Learner): wrapped_bl = OrangeClassifierWrapper( orange_learner=base_learners[bl]) R = learners[l](self._tasks, wrapped_bl) else: raise ValueError( "An unexpected combination of " "base_learner and leaner detected: {} and " "{}".format(type(base_learners[bl]), type(learners[l]))) except Exception as e: logger.exception("There was an error during repetition:" " {} with base learner: {} and learner: {}.".\ format(i, bl, l)) if i > 0: logger.info("Saving the results of previous " "repetitions.") # remove the scores of the last repetition del rpt_scores[i] # process the remaining repetition scores self._process_repetition_scores( rpt_scores, dend_info) # pickle them to a file pickle_path_fmt = os.path.join( results_path, "bl-{}.pkl") self.pickle_test_results(pickle_path_fmt) # re-raise the original exception import sys exc_info = sys.exc_info() raise exc_info[1], None, exc_info[2] rpt_scores[i][bl][l] = self._test_tasks( R["task_models"], measures) end = time.clock() logger.debug("Finished repetition: {}, base learner: {}, " "learner: {} in {:.2f}s".format( i, bl, l, end - start)) # store dendrogram info if the results contain it if "dend_info" in R: dend_info[bl][i] = R["dend_info"] # pickle and visualize the decision tree if the learner is a # (sub)class of TreeMarkedAndMergedLearner if isinstance(learners[l], bin_exp.TreeMarkedAndMergedLearner): tree = R["task_models"].values()[0] pickle_path = os.path.join( results_path, "{}-{}-" "repeat{}.pkl".format(bl, l, i)) svg_path = os.path.join( results_path, "{}-{}-repeat{}" ".svg".format(bl, l, i)) tikz_path = os.path.join( results_path, "{}-{}-repeat{}" "-tikz.tex".format(bl, l, i)) pickle_obj(tree, pickle_path) save_treegraph_image(tree, svg_path) draw_and_save_tikz_tree_document(tree, tikz_path) self._process_repetition_scores(rpt_scores, dend_info)
"nls10-seed63-complete_test/orange_merged_learn-" "repetition{}.tab".format(i))) tree = tree_learner(data) pickle_path = os.path.join(results_path, "test-pickle.pkl") pickle_obj(tree, pickle_path) unpickled_tree = unpickle_obj(pickle_path) print ("Repetition {} original vs. pickled/unpickled tree equality:". format(i)), print np.all(tree[e] == unpickled_tree[e] for e in data) os.remove(pickle_path) data = Table(os.path.join(results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition0.tab")) tree = tree_learner(data) tex_file = os.path.join(results_path, "test-tree.tex") pdf_file = os.path.join(results_path, "test-tree.pdf") draw_and_save_tikz_tree_document(tree, tex_file) import subprocess subprocess.call(["-c", "pdflatex -interaction=batchmode {0} && " "rm {1}.{{aux,log}} && pdfcrop --margins 10 {1}.pdf {2}". format(tex_file, tex_file[:-4], pdf_file)], shell=True, cwd=results_path) # initialize Qt application import sys from OWWidget import QApplication a = QApplication(sys.argv) from PyMTL.orange_visualizations import save_treegraph_image save_treegraph_image(tree, os.path.join(results_path, "test-tree.svg"))
unpickled_tree = unpickle_obj(pickle_path) print("Repetition {} original vs. pickled/unpickled tree equality:". format(i)), print np.all(tree[e] == unpickled_tree[e] for e in data) os.remove(pickle_path) data = Table( os.path.join( results_path, "bool_func-a8d4n100g2tg5nse0.0rs15" "nls10-seed63-complete_test/orange_merged_learn-" "repetition0.tab")) tree = tree_learner(data) tex_file = os.path.join(results_path, "test-tree.tex") pdf_file = os.path.join(results_path, "test-tree.pdf") draw_and_save_tikz_tree_document(tree, tex_file) import subprocess subprocess.call([ "-c", "pdflatex -interaction=batchmode {0} && " "rm {1}.{{aux,log}} && pdfcrop --margins 10 {1}.pdf {2}".format( tex_file, tex_file[:-4], pdf_file) ], shell=True, cwd=results_path) # initialize Qt application import sys from OWWidget import QApplication a = QApplication(sys.argv) from PyMTL.orange_visualizations import save_treegraph_image save_treegraph_image(tree, os.path.join(results_path, "test-tree.svg"))