def do_analysis(self): """A greedy algorithm for heuristic partitioning searches""" log.info("Performing greedy analysis") models = self.cfg.models model_selection = self.cfg.model_selection partnum = len(self.cfg.partitions) self.total_scheme_num = submodels.count_greedy_schemes(partnum) log.info("This will result in a maximum of %s schemes being created", self.total_scheme_num) self.total_subset_num = submodels.count_greedy_subsets(partnum) log.info( "PartitionFinder will have to analyse a maximum of %d subsets of sites to complete this analysis" % (self.total_subset_num) ) if self.total_subset_num > 10000: log.warning("%d is a lot of subsets, this might take a long time to analyse", self.total_subset_num) log.warning("Perhaps consider using a different search scheme instead (see Manual)") # clear any schemes that are currently loaded # TODO Not sure we need this... self.cfg.schemes.clear_schemes() # start with the most partitioned scheme start_description = range(len(self.cfg.partitions)) start_scheme = scheme.create_scheme(self.cfg, 1, start_description) log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) result = self.analyse_scheme(start_scheme, models) def get_score(my_result): # TODO: this is bad. Should use self.cfg.model_selection, or write # a new model_selection for scheme.py if model_selection == "aic": score = my_result.aic elif model_selection == "aicc": score = my_result.aicc elif model_selection == "bic": score = my_result.bic else: log.error("Unrecognised model_selection variable '%s', please check" % (score)) raise AnalysisError return score best_result = result best_score = get_score(result) step = 1 cur_s = 2 # now we try out all lumpings of the current scheme, to see if we can find a better one # and if we do, we just keep going while True: log.info("***Greedy algorithm step %d***" % step) # get a list of all possible lumpings of the best_scheme lumpings = algorithm.lumpings(start_description) # we reset the counters as we go, for better user information self.total_scheme_num = len(lumpings) self.schemes_analysed = 0 best_lumping_score = None for lumped_description in lumpings: lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description) cur_s += 1 result = self.analyse_scheme(lumped_scheme, models) new_score = get_score(result) if best_lumping_score == None or new_score < best_lumping_score: best_lumping_score = new_score best_lumping_result = result best_lumping_scheme = lumped_scheme best_lumping_desc = lumped_description if best_lumping_score < best_score: best_scheme = best_lumping_scheme best_score = best_lumping_score best_result = best_lumping_result start_description = best_lumping_desc if len(set(best_lumping_desc)) == 1: # then it's the scheme with everything equal, so quit break step += 1 else: break log.info("Greedy algorithm finished after %d steps" % step) log.info( "Highest scoring scheme is scheme %s, with %s score of %.3f" % (best_result.scheme.name, model_selection, best_score) ) self.best_result = best_result
def do_analysis(self): '''A greedy algorithm for heuristic partitioning searches''' log.info("Performing greedy analysis") partnum = len(self.cfg.partitions) scheme_count = submodels.count_greedy_schemes(partnum) subset_count = submodels.count_greedy_subsets(partnum) self.cfg.progress.begin(scheme_count, subset_count) # Start with the most partitioned scheme start_description = range(len(self.cfg.partitions)) start_scheme = scheme.create_scheme( self.cfg, "start_scheme", start_description) log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) self.analyse_scheme(start_scheme) step = 1 cur_s = 2 # Now we try out all lumpings of the current scheme, to see if we can # find a better one and if we do, we just keep going while True: log.info("***Greedy algorithm step %d***" % step) # Get a list of all possible lumpings of the best_scheme lumpings = algorithm.lumpings(start_description) # Save the current best score we have in results old_best_score = self.results.best_score for lumped_description in lumpings: lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description) cur_s += 1 # This is just checking to see if a scheme is any good, if it # is, we remember and write it later self.analyse_scheme(lumped_scheme) # Did out best score change (It ONLY gets better -- see in # results.py) if self.results.best_score == old_best_score: # It didn't, so we're done break # Let's look further. We use the description from our best scheme # (which will be the one that just changed in the last lumpings # iteration) start_description = self.results.best_result.scheme.description # Rename and record the best scheme for this step self.results.best_scheme.name = "step_%d" % step self.cfg.reporter.write_scheme_summary( self.results.best_scheme, self.results.best_result) # If it's the scheme with everything equal, quit if len(set(start_description)) == 1: break # Go do the next round... step += 1 log.info("Greedy algorithm finished after %d steps" % step) log.info("Highest scoring scheme is scheme %s, with %s score of %.3f" % (self.results.best_scheme.name, self.cfg.model_selection, self.results.best_score)) txt = "Best scheme according to Greedy algorithm, analysed with %s" % self.cfg.model_selection self.cfg.reporter.write_best_scheme(txt, self.results)
def do_analysis(self): '''A greedy algorithm for heuristic partitioning searches''' log.info("Performing greedy analysis") partnum = len(self.cfg.partitions) scheme_count = submodels.count_greedy_schemes(partnum) subset_count = submodels.count_greedy_subsets(partnum) self.cfg.progress.begin(scheme_count, subset_count) # Start with the most partitioned scheme start_description = range(len(self.cfg.partitions)) start_scheme = scheme.create_scheme(self.cfg, "start_scheme", start_description) log.info("Analysing starting scheme (scheme %s)" % start_scheme.name) self.analyse_scheme(start_scheme) step = 1 cur_s = 2 # Now we try out all lumpings of the current scheme, to see if we can # find a better one and if we do, we just keep going while True: log.info("***Greedy algorithm step %d***" % step) # Get a list of all possible lumpings of the best_scheme lumpings = algorithm.lumpings(start_description) # Save the current best score we have in results old_best_score = self.results.best_score for lumped_description in lumpings: lumped_scheme = scheme.create_scheme(self.cfg, cur_s, lumped_description) cur_s += 1 # This is just checking to see if a scheme is any good, if it # is, we remember and write it later self.analyse_scheme(lumped_scheme) # Did out best score change (It ONLY gets better -- see in # results.py) if self.results.best_score == old_best_score: # It didn't, so we're done break # Let's look further. We use the description from our best scheme # (which will be the one that just changed in the last lumpings # iteration) start_description = self.results.best_result.scheme.description # Rename and record the best scheme for this step self.results.best_scheme.name = "step_%d" % step self.cfg.reporter.write_scheme_summary(self.results.best_scheme, self.results.best_result) # If it's the scheme with everything equal, quit if len(set(start_description)) == 1: break # Go do the next round... step += 1 log.info("Greedy algorithm finished after %d steps" % step) log.info("Highest scoring scheme is scheme %s, with %s score of %.3f" % (self.results.best_scheme.name, self.cfg.model_selection, self.results.best_score)) self.cfg.reporter.write_best_scheme(self.results)