コード例 #1
0
def main4():
    sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml')
    csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml')
    bs = NoveltySearch()
    backup_robot = copy.deepcopy(sim.env.robot)

    for t in range(10000):
        #if(t%500==0):
        #    sim.env.robot = copy.deepcopy(backup_robot)

        print(t)
        print('archive size: ',len(bs.behavior_archive))
        #bs.reset_archive()
        actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]]
        time.sleep(0.005)
        sim.render()
        bestval = -9999
        bestact = actions[0]
        values = [0,0,0]
        actions_behvs = [[],[],[]]
        for i in range(len(actions)):
            val = 0

            for j in range(2):
                #first step
                finder_obs, radar_obs, done = csim.step(actions[i], 0.2)
                new_val,behv = rollout(csim,10)

                #comment block below for fitness only
                #new_val = bs.get_novelty(behv) 
                bs.put_behavior(behv)
                actions_behvs[i].append(behv)
                
                csim.env.robot = copy.deepcopy(sim.env.robot) 
        best_val = 0
        for i in range(len(actions_behvs)):
            for behv in actions_behvs[i]:
                v = bs.get_novelty(behv)
                values[i]+=v

                #if(v>best_val):
                #    bestact = actions[i]
                #    best_val = v

            if(values[i]>bestval):
                bestval = values[i]
                bestact = actions[i]
        print('values:',values)
        if(sum(values)<1):
            sim.env.robot = copy.deepcopy(backup_robot)


            
        sim.step(bestact, 0.2)

        pygame.event.pump()
        if done:
            break
    print("Episode finished after {} timesteps".format(t+1))
    print(sim.evaluate_fitness())
コード例 #2
0
def main5():
    sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml')
    csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml')
    
    for t in range(30000):
        time.sleep(0.005)
        sim.render()
        keys = pygame.key.get_pressed()
        action = [0, 0.4, 0]
        if keys[pygame.K_LEFT]:
            action = [0.05, 0.2, 0]
        if keys[pygame.K_RIGHT]:
            action = [0, 0.2, 0.05]

        actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]]
        values = [0,0,0]
        for i in range(len(actions)):
            finder_obs, radar_obs, done = csim.step(actions[i], 0.2)
            val = 0
            for j in range(20):
                new_val,behv = rollout(sim,5)
                val+=new_val
                csim.env.robot = copy.deepcopy(sim.env.robot) 
            values[i] = val
        print('values: ',values)

        finder_obs, radar_obs, done = sim.step(action, 0.2)

        pygame.event.pump()
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print(sim.evaluate_fitness())
            break
コード例 #3
0
def main6(path):
    #sim = MazeSimulator(render=False, xml_file=path)
    csim = MazeSimulator(render=False, xml_file=path)
    #m = player.mcts_act(csim,500,20,do_ns=True)
    #actions = m.run(csim,path)
    
    for i in range(1):
        m =player.mcts_act(csim,1000,20,do_ns=True)
        actions = m.run(csim,path)
        actions=actions
        for act in actions:
            #print('lol')
            #time.sleep(0.005)
            #sim.render()
            _,_,done = csim.step(act, 0.2)
            #print('Reward: ',sim.evaluate_fitness())

            pygame.event.pump()
コード例 #4
0
ファイル: rhea.py プロジェクト: abcp4/SFPNovelty
    def __init__(self,
                 rollout_actions_length,
                 environment,
                 mutation_probability,
                 num_pop,
                 use_shift_buffer=False,
                 flip_at_least_one=True,
                 discount_factor=1,
                 ignore_frames=0,
                 do_ns=False,
                 path=''):

        self._logger = logging.getLogger('RHEA')

        self._rollout_actions_length = rollout_actions_length
        self._environment = MazeSimulator(render=True, xml_file=path)
        self.path = path
        self._environment.env.robot = copy.deepcopy(environment.env.robot)
        self.environment = environment
        self._use_shift_buffer = use_shift_buffer
        self._flip_at_least_one = flip_at_least_one
        self._mutation_probability = mutation_probability
        self._discount_factor = discount_factor
        self.num_pop = num_pop
        self._ignore_frames = ignore_frames
        self.best_solution_val = -99999
        self.best_solution = None
        self.cur_best_solution_val = -99999
        self.cur_best_solution = None
        self.cur_best_novelty = -99999
        self.cur_best_novelty_sol = None

        self.history = []
        self.ns = ns.NoveltySearch()
        self.old_pop = []
        self.ns = ns.NoveltySearch(behavior_type='ad_hoc')
        #self.ns = ns.NoveltySearch(behavior_type='trajectory')
        #self.ns = ns.NoveltySearch(behavior_type='hamming')
        #self.ns = ns.NoveltySearch(behavior_type='entropy')
        #self.ns = ns.NoveltySearch(behavior_switch=True)
        self.ea = ea.EA('default')
        self.do_ns = do_ns
        #self.behv_state =copy.deepcopy(environment)
        #self.behv_last_visit =copy.deepcopy(environment)
        #self.behv_rewards =copy.deepcopy(environment)
        self.playout_count = 0

        # Initialize the solution to a random sequence
        if self._use_shift_buffer:
            self._solution = self._random_solution()

        self.tree = {}
        self.store_tree = False
コード例 #5
0
def main2():
    sim = MazeSimulator(render=False, xml_file='hardmaze_env.xml')
    
    for t in range(1000):
        print(t)
        #time.sleep(0.005)
        #sim.render()
        act = random.randint(0,2)
        if(act==0):
            action = [0, 0.4, 0]
        elif(act==1):
            action = [0.05, 0.2, 0]
        elif(act==2):
            action = [0, 0.2, 0.05]

        finder_obs, radar_obs, done = sim.step(action, 0.2)

        #pygame.event.pump()
        if done:
            break
    print("Episode finished after {} timesteps".format(t+1))
    print(sim.evaluate_fitness())
コード例 #6
0
def main():
    sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml')

    for t in range(30000):
        time.sleep(0.005)
        sim.render()
        keys = pygame.key.get_pressed()
        action = [0, 0.4, 0]
        if keys[pygame.K_LEFT]:
            action = [0.05, 0.2, 0]
        if keys[pygame.K_RIGHT]:
            action = [0, 0.2, 0.05]

        finder_obs, radar_obs, done = sim.step(action, 0.2)

        pygame.event.pump()
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            print(sim.evaluate_fitness())
            break
コード例 #7
0
def main7(path):
    #sim = MazeSimulator(render=False, xml_file=path)
    csim = MazeSimulator(render=False, xml_file=path)
    player.rhea_act(csim,it=1,pop_evolution=300,pop_num=6,rollout_limit=40,mutation_prob = 0.1,
                          do_ns=True,run_type=3,path=path)
コード例 #8
0
def main3():
    sim = MazeSimulator(render=True, xml_file='hardmaze_env.xml')
    csim = MazeSimulator(render=False, xml_file='hardmaze_env.xml')

    for t in range(10000):
        print(t)
        actions = [[0, 0.4, 0],[0.05, 0.2, 0],[0, 0.2, 0.05]]
        time.sleep(0.005)
        sim.render()
        bestval = -9999
        bestact = actions[0]
        values = [0,0,0]
        for i in range(len(actions)):
            val = 0
            for j in range(5):
                #first step
                finder_obs, radar_obs, done = csim.step(actions[i], 0.2)
                new_val,behv = rollout(csim,20)
                val+=new_val
                csim.env.robot = copy.deepcopy(sim.env.robot) 
            values[i] = val
            if(val>bestval):
                bestact = actions[i]
                bestval = val
        print('values: ',values)
            
        sim.step(bestact, 0.2)
        print('Reward: ',sim.evaluate_fitness())

        pygame.event.pump()
        if done:
            break
    print("Episode finished after {} timesteps".format(t+1))
    print(sim.evaluate_fitness())
コード例 #9
0
ファイル: rhea.py プロジェクト: abcp4/SFPNovelty
    def evaluate_rollouts(
        self,
        env,
        solutions,
        discount=1,
        ignore_frames=0,
    ):
        rewards = []
        nv_rews = []
        behvs = []
        backup_env = MazeSimulator(render=False, xml_file=self.path)
        backup_env.env.robot = copy.deepcopy(env.env.robot)

        for sol in solutions:
            return_r = 0
            return_n = 0
            acts = []

            for act in sol:
                acts.append(act)
                #time.sleep(0.005)
                env.render()
                actions = [a for a in env.get_possible_actions()]
                _, _, done = env.step(actions[act - 1], 0.2)
                pygame.event.pump()
                r = env.evaluate_fitness()
                if (r > 0):
                    print('reward: ', r)
                    a = 2 / 0
                #return_r+=r*discount
                return_r = r

                if (self.do_ns):
                    behv = (int(env.env.robot.location[0]),
                            int(env.env.robot.location[1]))

                    #Vai guardando o behavior de cada passo. Quando done = True(ep acabar),
                    #adiciona todo o behavior do ep no arquivo.
                    #Mas so adiciona no arquivo se store_behavior for True
                    self.ns.build_behavior(behv, act, done, False)

                if (done):
                    break

            if (self.do_ns):
                #behv =(env.posx,env.posy)
                behvs.append(self.ns.episode_behavior)
                #return_n =self.ns.get_novelty(behv)
                #print('solution behavior: ',self.ns.episode_behavior)
                return_n = self.ns.get_approx_novelty(self.ns.episode_behavior,
                                                      k=1000,
                                                      done=True)

                #nv_rews.append(return_n)
                nv_rews.append(return_r * 0 + return_n * 1.0)

                print('novelty reward: ', return_n)
                print('distance reward: ', return_r)

                self.playout_count += 1

                if (done == False):
                    self.ns.build_behavior(behv, act, True, False)

            #Store in tree if allowed
            if (self.store_tree):
                self.expand_tree(sol, return_r)
            #############Evaluating Rollouts by ##########
            #############rewards and/or Diversity ########

            #caso novelty, a rec(novelty do behv) e so no fim
            # do rollout

            #salvando melhor rollout e melhor retorno obtido
            #de todos os tempos
            if (return_r > self.best_solution_val):
                if (do_debug):
                    print('best reward now: ', return_r)
                #self.best_solution = np.concatenate((np.asarray(self.history),sol))
                self.best_solution = self.history + acts
                self.best_solution_val = return_r

            #salvando sol mais diversa e com maior nivel de
            #diversidade. Deve ser usado na pop atual apenas
            if (return_n > self.cur_best_novelty):
                #print('best novelty from current pop: ',return_n)
                self.cur_best_novelty_sol = self.history + acts
                self.cur_best_novelty = return_n

            #salvando melhor retorno e sol da pop atual
            if (return_r > self.cur_best_solution_val):
                #self.best_solution = np.concatenate((np.asarray(self.history),sol))
                self.cur_best_solution = self.history + acts
                self.cur_best_solution_val = return_r

            rewards.append(return_r)

            env.env.robot = copy.deepcopy(backup_env.env.robot)

        if (self.do_ns):
            return np.asarray(nv_rews), behvs

        return np.asarray(rewards), behvs
コード例 #10
0
    def run(self,env,path):
        best_rewards = []
        start_time = time.time()

        root = Node(None, None)

        best_actions = []
        best_reward = float("-inf")
        state = MazeSimulator(render=True, xml_file=path)
        real_move=50
        c=0
        for p in range(self.playouts):
            print("Playout: ",p)
            state.env.robot = copy.deepcopy(env.env.robot) 
            sum_reward = 0
            node = root
            terminal = False
            actions = []

            # selection
            while node.children:
                if node.explored_children < len(node.children):
                    child = node.children[node.explored_children]
                    node.explored_children += 1
                    node = child
                else:
                    if(not self.do_ns):
                        #node = max(node.children, key=avg)
                        #node = max(node.children, key=ucb)
                        node = getBestChild(node)
                    else:
                        r =random.random()
                        if(r<self.e):
                            #node = max(node.children, key=avgn)
                            node = max(node.children, key=half)
                        else:
                            
                            node = getBestChild(node)
                            #node = max(node.children, key=ucb)
                        self.e*=self.decay
               
                #print(node.action)
                #time.sleep(0.005)
                state.render()
                _, _, terminal = state.step(node.action,0.2)

                if(self.do_ns):
                    behv = (int(state.env.robot.location[0]),int(state.env.robot.location[1]) ) 
                    self.ns.build_behavior(behv,node.action,False,False)

                pygame.event.pump()
                reward = state.evaluate_fitness()
                if(reward>0):
                    print('reward: ',reward)
                    a=2/0
                #sum_reward += reward
                actions.append(node.action)

            # expansion
            if not terminal:
                #node.children = [Node(node, a) for a in combinations(state.action_space)]
                node.children = [Node(node, a) for a in state.get_possible_actions()]
                random.shuffle(node.children)

            # playout
            while not terminal:
                pactions =state.get_possible_actions()
                action =pactions[random.randint(0,len(pactions)-1)]
    
                _, _, terminal = state.step(action,0.2)
                reward = state.evaluate_fitness()
                #sum_reward += reward
                actions.append(action)

                if(self.do_ns):
                    behv = state.env.robot.location
                    self.ns.build_behavior(behv,action,False,False)

                if len(actions) > self.max_depth:
                    sum_reward -= 100
                    break
            sum_reward = state.evaluate_fitness()

            # remember best
            #if best_reward < sum_reward:
            #    best_reward = sum_reward
            #    best_actions = actions
            
            nv_reward = 0
            #behavior
            if(self.do_ns):
                behv = (int(state.env.robot.location[0]),int(state.env.robot.location[1]) )  
                    
                #behv =(state.posx,state.posy)
                #nv_reward =self.ns.get_novelty(behv)

                self.ns.build_behavior(behv,action,False,store_behavior=False)
                nv_reward=self.ns.get_approx_novelty(self.ns.episode_behavior,done=True)
                self.ns.build_behavior(behv,action,True,store_behavior=True)
                #self.ns.set_behavior_in_archive(behv,self.ns.behavior_archive,True)
                
                
                #self.ns.put_behavior(behv)
                
            
            # backpropagate
            print('reward:',sum_reward)
            print('nv_reward:',nv_reward)
            while node:
                node.visits += 1
                node.value += sum_reward
                node.nv_value += nv_reward
                node =node.parent
            

        sum_reward = 0
        nv_reward = 0
        print('e: ',self.e)
        best_actions = actions
        
        return best_actions