def main4(): sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml') csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml') bs = NoveltySearch() backup_robot = copy.deepcopy(sim.env.robot) for t in range(10000): #if(t%500==0): # sim.env.robot = copy.deepcopy(backup_robot) print(t) print('archive size: ',len(bs.behavior_archive)) #bs.reset_archive() actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]] time.sleep(0.005) sim.render() bestval = -9999 bestact = actions[0] values = [0,0,0] actions_behvs = [[],[],[]] for i in range(len(actions)): val = 0 for j in range(2): #first step finder_obs, radar_obs, done = csim.step(actions[i], 0.2) new_val,behv = rollout(csim,10) #comment block below for fitness only #new_val = bs.get_novelty(behv) bs.put_behavior(behv) actions_behvs[i].append(behv) csim.env.robot = copy.deepcopy(sim.env.robot) best_val = 0 for i in range(len(actions_behvs)): for behv in actions_behvs[i]: v = bs.get_novelty(behv) values[i]+=v #if(v>best_val): # bestact = actions[i] # best_val = v if(values[i]>bestval): bestval = values[i] bestact = actions[i] print('values:',values) if(sum(values)<1): sim.env.robot = copy.deepcopy(backup_robot) sim.step(bestact, 0.2) pygame.event.pump() if done: break print("Episode finished after {} timesteps".format(t+1)) print(sim.evaluate_fitness())
def main5(): sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml') csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml') for t in range(30000): time.sleep(0.005) sim.render() keys = pygame.key.get_pressed() action = [0, 0.4, 0] if keys[pygame.K_LEFT]: action = [0.05, 0.2, 0] if keys[pygame.K_RIGHT]: action = [0, 0.2, 0.05] actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]] values = [0,0,0] for i in range(len(actions)): finder_obs, radar_obs, done = csim.step(actions[i], 0.2) val = 0 for j in range(20): new_val,behv = rollout(sim,5) val+=new_val csim.env.robot = copy.deepcopy(sim.env.robot) values[i] = val print('values: ',values) finder_obs, radar_obs, done = sim.step(action, 0.2) pygame.event.pump() if done: print("Episode finished after {} timesteps".format(t+1)) print(sim.evaluate_fitness()) break
def main6(path): #sim = MazeSimulator(render=False, xml_file=path) csim = MazeSimulator(render=False, xml_file=path) #m = player.mcts_act(csim,500,20,do_ns=True) #actions = m.run(csim,path) for i in range(1): m =player.mcts_act(csim,1000,20,do_ns=True) actions = m.run(csim,path) actions=actions for act in actions: #print('lol') #time.sleep(0.005) #sim.render() _,_,done = csim.step(act, 0.2) #print('Reward: ',sim.evaluate_fitness()) pygame.event.pump()
def __init__(self, rollout_actions_length, environment, mutation_probability, num_pop, use_shift_buffer=False, flip_at_least_one=True, discount_factor=1, ignore_frames=0, do_ns=False, path=''): self._logger = logging.getLogger('RHEA') self._rollout_actions_length = rollout_actions_length self._environment = MazeSimulator(render=True, xml_file=path) self.path = path self._environment.env.robot = copy.deepcopy(environment.env.robot) self.environment = environment self._use_shift_buffer = use_shift_buffer self._flip_at_least_one = flip_at_least_one self._mutation_probability = mutation_probability self._discount_factor = discount_factor self.num_pop = num_pop self._ignore_frames = ignore_frames self.best_solution_val = -99999 self.best_solution = None self.cur_best_solution_val = -99999 self.cur_best_solution = None self.cur_best_novelty = -99999 self.cur_best_novelty_sol = None self.history = [] self.ns = ns.NoveltySearch() self.old_pop = [] self.ns = ns.NoveltySearch(behavior_type='ad_hoc') #self.ns = ns.NoveltySearch(behavior_type='trajectory') #self.ns = ns.NoveltySearch(behavior_type='hamming') #self.ns = ns.NoveltySearch(behavior_type='entropy') #self.ns = ns.NoveltySearch(behavior_switch=True) self.ea = ea.EA('default') self.do_ns = do_ns #self.behv_state =copy.deepcopy(environment) #self.behv_last_visit =copy.deepcopy(environment) #self.behv_rewards =copy.deepcopy(environment) self.playout_count = 0 # Initialize the solution to a random sequence if self._use_shift_buffer: self._solution = self._random_solution() self.tree = {} self.store_tree = False
def main2(): sim = MazeSimulator(render=False, xml_file='hardmaze_env.xml') for t in range(1000): print(t) #time.sleep(0.005) #sim.render() act = random.randint(0,2) if(act==0): action = [0, 0.4, 0] elif(act==1): action = [0.05, 0.2, 0] elif(act==2): action = [0, 0.2, 0.05] finder_obs, radar_obs, done = sim.step(action, 0.2) #pygame.event.pump() if done: break print("Episode finished after {} timesteps".format(t+1)) print(sim.evaluate_fitness())
def main(): sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml') for t in range(30000): time.sleep(0.005) sim.render() keys = pygame.key.get_pressed() action = [0, 0.4, 0] if keys[pygame.K_LEFT]: action = [0.05, 0.2, 0] if keys[pygame.K_RIGHT]: action = [0, 0.2, 0.05] finder_obs, radar_obs, done = sim.step(action, 0.2) pygame.event.pump() if done: print("Episode finished after {} timesteps".format(t+1)) print(sim.evaluate_fitness()) break
def main7(path): #sim = MazeSimulator(render=False, xml_file=path) csim = MazeSimulator(render=False, xml_file=path) player.rhea_act(csim,it=1,pop_evolution=300,pop_num=6,rollout_limit=40,mutation_prob = 0.1, do_ns=True,run_type=3,path=path)
def main3(): sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml') csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml') for t in range(10000): print(t) actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]] time.sleep(0.005) sim.render() bestval = -9999 bestact = actions[0] values = [0,0,0] for i in range(len(actions)): val = 0 for j in range(5): #first step finder_obs, radar_obs, done = csim.step(actions[i], 0.2) new_val,behv = rollout(csim,20) val+=new_val csim.env.robot = copy.deepcopy(sim.env.robot) values[i] = val if(val>bestval): bestact = actions[i] bestval = val print('values: ',values) sim.step(bestact, 0.2) print('Reward: ',sim.evaluate_fitness()) pygame.event.pump() if done: break print("Episode finished after {} timesteps".format(t+1)) print(sim.evaluate_fitness())
def evaluate_rollouts( self, env, solutions, discount=1, ignore_frames=0, ): rewards = [] nv_rews = [] behvs = [] backup_env = MazeSimulator(render=False, xml_file=self.path) backup_env.env.robot = copy.deepcopy(env.env.robot) for sol in solutions: return_r = 0 return_n = 0 acts = [] for act in sol: acts.append(act) #time.sleep(0.005) env.render() actions = [a for a in env.get_possible_actions()] _, _, done = env.step(actions[act - 1], 0.2) pygame.event.pump() r = env.evaluate_fitness() if (r > 0): print('reward: ', r) a = 2 / 0 #return_r+=r*discount return_r = r if (self.do_ns): behv = (int(env.env.robot.location[0]), int(env.env.robot.location[1])) #Vai guardando o behavior de cada passo. Quando done = True(ep acabar), #adiciona todo o behavior do ep no arquivo. #Mas so adiciona no arquivo se store_behavior for True self.ns.build_behavior(behv, act, done, False) if (done): break if (self.do_ns): #behv =(env.posx,env.posy) behvs.append(self.ns.episode_behavior) #return_n =self.ns.get_novelty(behv) #print('solution behavior: ',self.ns.episode_behavior) return_n = self.ns.get_approx_novelty(self.ns.episode_behavior, k=1000, done=True) #nv_rews.append(return_n) nv_rews.append(return_r * 0 + return_n * 1.0) print('novelty reward: ', return_n) print('distance reward: ', return_r) self.playout_count += 1 if (done == False): self.ns.build_behavior(behv, act, True, False) #Store in tree if allowed if (self.store_tree): self.expand_tree(sol, return_r) #############Evaluating Rollouts by ########## #############rewards and/or Diversity ######## #caso novelty, a rec(novelty do behv) e so no fim # do rollout #salvando melhor rollout e melhor retorno obtido #de todos os tempos if (return_r > self.best_solution_val): if (do_debug): print('best reward now: ', return_r) #self.best_solution = np.concatenate((np.asarray(self.history),sol)) self.best_solution = self.history + acts self.best_solution_val = return_r #salvando sol mais diversa e com maior nivel de #diversidade. Deve ser usado na pop atual apenas if (return_n > self.cur_best_novelty): #print('best novelty from current pop: ',return_n) self.cur_best_novelty_sol = self.history + acts self.cur_best_novelty = return_n #salvando melhor retorno e sol da pop atual if (return_r > self.cur_best_solution_val): #self.best_solution = np.concatenate((np.asarray(self.history),sol)) self.cur_best_solution = self.history + acts self.cur_best_solution_val = return_r rewards.append(return_r) env.env.robot = copy.deepcopy(backup_env.env.robot) if (self.do_ns): return np.asarray(nv_rews), behvs return np.asarray(rewards), behvs
def run(self,env,path): best_rewards = [] start_time = time.time() root = Node(None, None) best_actions = [] best_reward = float("-inf") state = MazeSimulator(render=True, xml_file=path) real_move=50 c=0 for p in range(self.playouts): print("Playout: ",p) state.env.robot = copy.deepcopy(env.env.robot) sum_reward = 0 node = root terminal = False actions = [] # selection while node.children: if node.explored_children < len(node.children): child = node.children[node.explored_children] node.explored_children += 1 node = child else: if(not self.do_ns): #node = max(node.children, key=avg) #node = max(node.children, key=ucb) node = getBestChild(node) else: r =random.random() if(r<self.e): #node = max(node.children, key=avgn) node = max(node.children, key=half) else: node = getBestChild(node) #node = max(node.children, key=ucb) self.e*=self.decay #print(node.action) #time.sleep(0.005) state.render() _, _, terminal = state.step(node.action,0.2) if(self.do_ns): behv = (int(state.env.robot.location[0]),int(state.env.robot.location[1]) ) self.ns.build_behavior(behv,node.action,False,False) pygame.event.pump() reward = state.evaluate_fitness() if(reward>0): print('reward: ',reward) a=2/0 #sum_reward += reward actions.append(node.action) # expansion if not terminal: #node.children = [Node(node, a) for a in combinations(state.action_space)] node.children = [Node(node, a) for a in state.get_possible_actions()] random.shuffle(node.children) # playout while not terminal: pactions =state.get_possible_actions() action =pactions[random.randint(0,len(pactions)-1)] _, _, terminal = state.step(action,0.2) reward = state.evaluate_fitness() #sum_reward += reward actions.append(action) if(self.do_ns): behv = state.env.robot.location self.ns.build_behavior(behv,action,False,False) if len(actions) > self.max_depth: sum_reward -= 100 break sum_reward = state.evaluate_fitness() # remember best #if best_reward < sum_reward: # best_reward = sum_reward # best_actions = actions nv_reward = 0 #behavior if(self.do_ns): behv = (int(state.env.robot.location[0]),int(state.env.robot.location[1]) ) #behv =(state.posx,state.posy) #nv_reward =self.ns.get_novelty(behv) self.ns.build_behavior(behv,action,False,store_behavior=False) nv_reward=self.ns.get_approx_novelty(self.ns.episode_behavior,done=True) self.ns.build_behavior(behv,action,True,store_behavior=True) #self.ns.set_behavior_in_archive(behv,self.ns.behavior_archive,True) #self.ns.put_behavior(behv) # backpropagate print('reward:',sum_reward) print('nv_reward:',nv_reward) while node: node.visits += 1 node.value += sum_reward node.nv_value += nv_reward node =node.parent sum_reward = 0 nv_reward = 0 print('e: ',self.e) best_actions = actions return best_actions