def _main_loop(self, ref_set, scenario_builder, population): stop = False last_changed = 0 iteration = 0 while not stop: start_loop = datetime.now() # create the pool from combining solutions from ref_set logging.info("Performing combinations") start = datetime.now() pool = self._combine(ref_set, scenario_builder) logging.info("Combinations %s" % (datetime.now() - start)) # improve pool logging.info("Improving best combinations") start = datetime.now() pool = self._mp_improve(pool, scenario_builder) logging.info("Improvements %s" % (datetime.now() - start)) # join ref_set and pool together union = deepcopy(ref_set) union.add_container(pool) union.sort() new_ref_set = self._ref_set_update(union) if ref_set.same(new_ref_set): logging.info("Ref_set not changed") new_ref_set = ScatterPhenoScenarioContainer() for i in range(self._b/2): new_ref_set.add(union.get(i)) # get the most diverse solutions to what # we already have in ref_set while new_ref_set.len() < self._b: new_ref_set.add(population.get_diverse(new_ref_set)) if ref_set.same(new_ref_set): last_changed += 1 else: last_changed = 0 if last_changed >= 5: logging.info("Reached optimal solution, terminating...") stop = True if os.path.exists('/home/eey9/.stop_scatter_search'): stop = True logging.info("Stopping because of file flag...") ref_set = new_ref_set iteration += 1 logging.info("Completed iteration %d" % iteration) self._report(ref_set, scenario_builder) t_delta = datetime.now() - start_loop logging.info("Iteration time %s" % t_delta) return
def _ref_set_update(self, source): source.sort() ref_set = ScatterPhenoScenarioContainer() for i in range(self._b/2): ref_set.add(source.get(i)) # get the most diverse solutions to what we already have in ref_set while ref_set.len() < self._b: ref_set.add(source.get_diverse(ref_set)) return ref_set
def _improve(self, individual, scenario_builder): start = datetime.now() base = importr("base") candidate_list = self._build_candidate_list(individual) improvements = ScatterPhenoScenarioContainer() for var in candidate_list: new_scenario = scenario_builder.flip(individual, var) if new_scenario.same(individual) or \ not new_scenario.valid(process=True): continue new_scenario = self._evaluate(new_scenario, base) if not improvements.contains(new_scenario): improvements.add(new_scenario) for i in range(len(individual.get_solution())): for j in range(i + 1, len(individual.get_solution())): new_scenario = scenario_builder.swap(individual, i, j) if new_scenario.same(individual) or \ not new_scenario.valid(process=True): continue new_scenario = self._evaluate(new_scenario, base) if not improvements.contains(new_scenario): improvements.add(new_scenario) if not self._database.contains(new_scenario): self._database.add(new_scenario) improvements.sort() logging.info("self._improve finished - %s" % (datetime.now() - start)) return {'individual': individual, 'improvements': improvements}
def g1(self, count): done = False iterations = 0 while not done: solutions = ScatterPhenoScenarioContainer() seed_solution = self._random_solution() solutions.add(seed_solution) h_max = self._n # maximum number of solutions is h_max - 1 if count > h_max - 1: raise Exception("Could not generate %d solutions with " "G1 and h_max %d" % (count, h_max)) for h in range(2, h_max): new_solution = deepcopy(seed_solution) index = 0 while index < self._n: new_solution.toggle(index) index += h if solutions.len() < count and \ new_solution.valid(process=self._process): solutions.add(new_solution) else: break if solutions.len() == count: done = True else: iterations += 1 if iterations > 10: raise Exception("Could not generate enough valid solutions") return solutions
def _combine(self, container, scenario_builder): # build subsets combinations = self._build_combinations(container) pool = ScatterPhenoScenarioContainer() for combination in combinations: start = datetime.now() try: new_scenario = scenario_builder.combine(combination[0], combination[1], self._score_table) except NoValidSolutionException: logging.info("Combination %d/%d - %s: no valid solution" % (combinations.index(combination) + 1, len(combinations), (datetime.now() - start))) continue self._update_score_table(new_scenario) if not pool.contains(new_scenario): pool.add(new_scenario) # see where does the scenario qualify to be in container try: j = container.index(next(x for x in container.get_all() if new_scenario.get_utility() < x.get_utility())) scenario_builder.success(self._b - j) except StopIteration: continue # Worse than anything in ref_set, does not qualify logging.info("Combination %d/%d - %s" % (combinations.index(combination) + 1, len(combinations), (datetime.now() - start))) return pool
def __init__(self, data, scenario, root_dir, load=False): """data - {'cd_data': [], 'ml_data': []} scenario - 'simple_ml' or 'process_ml' """ # hardcoded algorithm variables, could supply them to the # constructor if needed # self._PSize = 45 TODO real value self._PSize = 12 # weight for previous score entry, when updating the score table self._alpha = 0.3 # self._b = 20 TODO real value self._b = 8 self._proc_count = 4 # set class variables self._variables = [formats.STEM_COUNT, formats.CANOPY_HEIGHT, formats.TRANSMISSION, formats.FLOWERING_SCORE, formats.LEAF_AREA_INDEX, formats.COL, formats.ROW, formats.DD, formats.GENOTYPE, formats.RAINFALL, formats.DOY, formats.PAR] self._variables.sort() self._scenario = scenario self._root_dir = root_dir if scenario == "simple_ml": self._methods = ['rf', 'knn', 'gbm'] elif scenario == "compound": self._methods = ['NaiveMLProcessModelMemfix', 'GAWinModel'] else: raise Exception("STUB") # TODO self._data = self._hack_data(data) self._months = list(set([x[formats.DATE].strftime("%B") for x in self._data['ml_data']])) self._months.sort() # find maximum RMSE for methods self._max_rmse = self._get_max_rmse() # DB to contain all solutions ever explored self._database = ScatterPhenoScenarioContainer() self._score_table = self._empty_score_table() if load: sc_file = os.path.join(self._root_dir, 'score_table.csv') self._score_table = CSVFileReader(sc_file).get_content() for entry in self._score_table: entry['score'] = float(entry['score']) entry['value'] = (entry['value'] == "True") db_file = os.path.join(self._root_dir, 'database.csv') self._database.load_file(db_file, self._data) self._update_score_table() self._run_algorithm2() else: self._run_algorithm()