def elite_evaluate(self, models, n=1): ensemble_model = Model( rnn_size=self.rnn_size + self.elite.rnn_size, controller_size=self.controller_size + self.elite.controller_size, output_size=self.output_size + self.elite.output_size, no_rew_early_stop=self.no_rew_early_stop) for i, model in enumerate(models): combined_weights = merge_all( [self.elite.get_weights(), model.get_weights()]) info = { 'weights': combined_weights, 'attr': ensemble_model.get_pickle_obj(), 'id': i } for _ in range(n): self.training_queue.put(info) tf.keras.backend.clear_session() del ensemble_model gc.collect() results = [] while len(results) < len(models) * n: result = self.results_queue.get() results.append(result) for result in results: models[result['id']].add_results(result['result'])
def evolutionary_leap(self, p_leap_keep=0.2, n_ensemble_eval=2): ensemble_model = Model( rnn_size=self.rnn_size + self.elite.rnn_size, controller_size=self.controller_size + self.elite.controller_size, output_size=self.output_size + self.elite.output_size, no_rew_early_stop=self.no_rew_early_stop) self.elite_evaluate(self.models, n=n_ensemble_eval) self.models.sort(key=attrgetter('fitness'), reverse=True) n_init_pop = int(self.pop_size * p_leap_keep) survivors = self.models[:n_init_pop] new_models = [ Model(rnn_size=self.rnn_size + self.elite.rnn_size, controller_size=self.controller_size + self.elite.controller_size, output_size=self.output_size + self.elite.output_size, no_rew_early_stop=self.no_rew_early_stop) for _ in range(self.pop_size) ] for i, model in enumerate(survivors): new_models[i].set_weights( merge_all([self.elite.get_weights(), model.get_weights()])) tf.keras.backend.clear_session() del self.models gc.collect() self.models = new_models new_pop = self.models[:n_init_pop] self.evaluate(new_pop) new_pop.sort(key=attrgetter('fitness'), reverse=True) candidates = new_pop[:self.n_candidates] self.evaluate(candidates, n=self.n_candidate_eval) candidates.sort(key=attrgetter('fitness'), reverse=True) candidates[0].fitness = 1000 i = 1 while n_init_pop + i < self.pop_size + 1: competitors = np.random.choice( new_pop, size=2, replace=False) # tournament selection winner = max(competitors, key=attrgetter('fitness')) self.models[-i].copy_model(winner) self.models[-i].mutate() i += 1 #self.rnn_size *= 2 #self.controller_size *= 2 #self.output_size *= 2 return candidates[0]
def evolve_elite_support(self): ensemble_model = Model( rnn_size=self.rnn_size + self.elite.rnn_size, controller_size=self.controller_size + self.elite.controller_size, output_size=self.output_size + self.elite.output_size, no_rew_early_stop=self.no_rew_early_stop) self.elite_evaluate(self.models) self.models.sort(key=attrgetter('fitness'), reverse=True) survivors = self.models[:int(self.p_keep * self.pop_size)] candidates = survivors[:self.n_candidates] self.elite_evaluate(candidates, n=self.n_candidate_eval) candidates.sort(key=attrgetter('fitness'), reverse=True) candidates[0].fitness = 1000 survivors.sort(key=attrgetter('fitness'), reverse=True) for i in range(len(survivors)): survivors[i].add_rank(i) i = 1 while len(survivors) + i < self.pop_size + 1: competitors = np.random.choice( survivors, size=2, replace=False) # tournament selection winner = max(competitors, key=attrgetter('fitness')) self.models[-i].copy_model(winner) self.models[-i].mutate() i += 1 ensemble_model.set_weights( merge_all([self.elite.get_weights(), candidates[0].get_weights()])) pick = candidates[0].get_pickle_obj() ensemble_model.copy_model(pick, from_pickle=True) ensemble_model.rnn_size = self.rnn_size + self.elite.rnn_size ensemble_model.controller_size = self.controller_size + self.elite.controller_size ensemble_model.output_size = self.output_size + self.elite.output_size return ensemble_model
# break #name = './saved/0132result_500.64363311437586' #new_model = Model(rnn_size=1024, controller_size=512, output_size=48) #new_model.load_weights(name) #models_new = [new_model] models = b_models + s_models + t_models #+ [model_0] tot_rnn = sum([m.rnn_size for m in models]) tot_con = sum([m.controller_size for m in models]) tot_out = sum([m.output_size for m in models]) ensemble_model = Model(rnn_size=tot_rnn, controller_size=tot_con, output_size=tot_out) #for i, w in enumerate(ensemble_model.get_weights()): # print(w) ensemble_model.set_weights(merge_all([m.get_weights() for m in models])) models = [ensemble_model, model_0] #name = './saved/0090result_680.0453514739107' rew = test(env, models, vae_model, disp=True, n=40, avg=True) print('avg:', np.mean(rew)) print('std', np.std(rew)) env.close() #model.load_all(latest=True)
def evolutionary_leap(self, p_mate=0.25, n_alpha=3, p_mate_keep=0.25, n_init_eval=2, n_ensemble_eval=3): self.evaluate(self.models, n=n_init_eval) self.models.sort(key=attrgetter('fitness'), reverse=True) mates = self.models[:int(p_mate * self.pop_size)] alphas = mates[:n_alpha] #tot_rnn = sum([m.rnn_size for m in models]) #tot_con = sum([m.controller_size for m in models]) ensemble_model = Model(rnn_size=2 * self.rnn_size, controller_size=2 * self.controller_size, output_size=2 * self.output_size, no_rew_early_stop=self.no_rew_early_stop) all_combined_weights = {} n_put = 0 for i, alpha in enumerate(alphas): for y, mate in enumerate(mates): if i >= y: continue combined_weights = merge_all( [alpha.get_weights(), mate.get_weights()]) all_combined_weights[(i, y)] = combined_weights info = { 'weights': combined_weights, 'attr': ensemble_model.get_pickle_obj(), 'id': (i, y) } for _ in range(n_ensemble_eval): self.training_queue.put(info) n_put += 1 results = {} gotten = 0 while gotten < n_put: result = self.results_queue.get() gotten += 1 if result['id'] not in results: results[result['id']] = [] results[result['id']] += result['result'] new_pop = sorted(results.items(), key=lambda x: sum(x[1]), reverse=True) n_init_pop = int(len(new_pop) * p_mate_keep) new_pop = new_pop[:n_init_pop] print(new_pop) tf.keras.backend.clear_session() del self.models gc.collect() self.models = [ Model(rnn_size=2 * self.rnn_size, controller_size=2 * self.controller_size, output_size=2 * self.output_size, no_rew_early_stop=self.no_rew_early_stop) for _ in range(self.pop_size) ] for i, (key, res) in enumerate(new_pop): self.models[i].add_results(res) self.models[i].set_weights(all_combined_weights[key]) new_pop = self.models[:n_init_pop] new_pop.sort(key=attrgetter('fitness'), reverse=True) candidates = new_pop[:self.n_candidates] self.evaluate(candidates, n=self.n_candidate_eval) candidates.sort(key=attrgetter('fitness'), reverse=True) candidates[0].fitness = 1000 i = 1 while n_init_pop + i < self.pop_size + 1: competitors = np.random.choice( new_pop, size=2, replace=False) # tournament selection winner = max(competitors, key=attrgetter('fitness')) self.models[-i].copy_model(winner) self.models[-i].mutate() i += 1 self.rnn_size *= 2 self.controller_size *= 2 self.output_size *= 2 return new_pop[0]