예제 #1
0
 def add(self,data):
     self.no_of_nodes+=1
     if self.head == None:
         node = Node(data)
         node.node_next = None
         self.head = node
     else:
         node = Node(data)
         node.node_next = self.head
         self.head = node
예제 #2
0
    def start(self, observation):
        self.episode_counter += 1
        if self.keep_tree and self.root is None:
            self.root = Node(None, observation)
            self.expansion(self.root)

        if self.keep_tree:
            self.subtree_node = self.root
        else:
            self.subtree_node = Node(None, observation)
            self.expansion(self.subtree_node)

        action = BaseDynaAgent.start(self, observation)
        return action
예제 #3
0
    def expansion(self, node):
        for a in self.action_list:
            next_state, is_terminal, reward = self.true_model(
                node.get_state(),
                a)  # with the assumption of deterministic model
            # if np.array_equal(next_state, node.get_state()):
            #     continue
            value = self.get_initial_value(next_state)
            child = Node(node,
                         next_state,
                         is_terminal=is_terminal,
                         action_from_par=a,
                         reward_from_par=reward,
                         value=value)
            node.add_child(child)

            buffer_prev_state = self.getStateRepresentation(node.get_state())
            act_ind = self.getActionIndex(a)
            buffer_prev_action = torch.tensor([act_ind],
                                              device=self.device).view(1, 1)
            buffer_reward = torch.tensor([reward], device=self.device)
            buffer_state = None
            buffer_action = None
            if not is_terminal:
                buffer_state = self.getStateRepresentation(next_state)
                buffer_action = self.policy(buffer_state)
            self.updateTransitionBuffer(
                utils.transition(buffer_prev_state, buffer_prev_action,
                                 buffer_reward, buffer_state, buffer_action,
                                 is_terminal, self.time_step, 0))
예제 #4
0
    def expansion(self, node):
        children_list = []
        sort_list = []
        for a in self.action_list:
            next_state, is_terminal, reward = self.true_model(
                node.get_state(),
                a)  # with the assumption of deterministic model
            # if np.array_equal(next_state, node.get_state()):
            #     continue
            value = self.get_initial_value(next_state)
            child = Node(node,
                         next_state,
                         is_terminal=is_terminal,
                         action_from_par=a,
                         reward_from_par=reward,
                         value=value)
            children_list.append(child)
            sort_value = self.get_state_value(next_state)
            sort_list.append(sort_value)

        children_list = [
            x for _, x in sorted(zip(sort_list, children_list),
                                 key=lambda pair: pair[0],
                                 reverse=True)
        ]
        for i in range(self.branch_factor):
            node.add_child(children_list[i])
예제 #5
0
    def enqueue(self, data):
        n = Node(data)
        if self.count() == 0:
            self.first = self.last = n
            return

        self.last.next = n
        self.last = n
예제 #6
0
    def step(self, reward, observation):
        if not self.keep_subtree:
            self.subtree_node = Node(None, observation)
            self.expansion(self.subtree_node)

        for i in range(self.num_iterations):
            self.MCTS_iteration()
        action, sub_tree = self.choose_action()
        self.subtree_node = sub_tree
        return action
예제 #7
0
    def start(self, observation):
        if self.keep_tree and self.root is None:
            self.root = Node(None, observation)
            self.expansion(self.root)

        if self.keep_tree:
            self.subtree_node = self.root
            print(self.subtree_node.get_avg_value())
        else:
            self.subtree_node = Node(None, observation)
            self.expansion(self.subtree_node)

        # self.render_tree()

        for i in range(self.num_iterations):
            self.MCTS_iteration()
        action, sub_tree = self.choose_action()
        self.subtree_node = sub_tree
        return action
예제 #8
0
    def append_to_tail(self, data):
        end = Node(data)
        n = self.head

        if n is None:
            self.head = end
            return

        while n.next is not None:
            n = n.next

        n.next = end
예제 #9
0
 def expansion(self, node):
     for a in self.action_list:
         next_state, is_terminal, reward = self.true_model(
             node.get_state(),
             a)  # with the assumption of deterministic model
         # if np.array_equal(next_state, node.get_state()):
         #     continue
         value = self.get_initial_value(next_state)
         child = Node(node,
                      next_state,
                      is_terminal=is_terminal,
                      action_from_par=a,
                      reward_from_par=reward,
                      value=value)
         node.add_child(child)
예제 #10
0
    def expansion(self, node):
        for a in self.action_list:
            next_state, is_terminal, reward = self.true_model(
                node.get_state(),
                a)  # with the assumption of deterministic model
            # if np.array_equal(next_state, node.get_state()):
            #     continue
            value = self.get_initial_value(next_state)
            child = Node(node,
                         next_state,
                         is_terminal=is_terminal,
                         action_from_par=a,
                         reward_from_par=reward,
                         value=value)
            node.add_child(child)

            buffer_prev_state = self.getStateRepresentation(node.get_state())
            act_ind = self.getActionIndex(a)
            buffer_prev_action = torch.tensor([act_ind],
                                              device=self.device).view(1, 1)
            buffer_reward = torch.tensor([reward], device=self.device)
            buffer_state = None
            buffer_action = None
            if not is_terminal:
                buffer_state = self.getStateRepresentation(next_state)
                buffer_action = self.policy(buffer_state)
            with torch.no_grad():
                real_prev_action = self.action_list[buffer_prev_action.item()]
                prev_state_value = self.getStateActionValue(
                    buffer_prev_state, real_prev_action).item()
                state_value = 0
                if not is_terminal:
                    state_value = self._vf['q']['network'](buffer_state).max(
                        1)[1].view(1, 1).item()
                    buffer_state = buffer_state.float()
                td_error = buffer_reward.item(
                ) + self.gamma * state_value - prev_state_value
                if (td_error >= self.td_average):
                    self.updateTransitionBuffer(
                        utils.transition(buffer_prev_state.float(),
                                         buffer_prev_action,
                                         buffer_reward.float(), buffer_state,
                                         None, is_terminal, self.time_step, 0))
                    self.mcts_count += 1

                    self.update_average_td_error(td_error)
예제 #11
0
    def step(self, reward, observation):
        if not self.keep_subtree:
            self.subtree_node = Node(None, observation)
            self.expansion(self.subtree_node)

        self.time_step += 1

        self.state = self.getStateRepresentation(observation)

        reward = torch.tensor([reward], device=self.device)
        self.action = self.policy(self.state)

        # store the new transition in buffer
        if self.episode_counter % 2 == 1:
            self.updateTransitionBuffer(
                utils.transition(self.prev_state, self.prev_action, reward,
                                 self.state, self.action, False,
                                 self.time_step, 0))
        # update target
        if self._target_vf['counter'] >= self._target_vf['update_rate']:
            self.setTargetValueFunction(self._vf['q'], 'q')
            # self.setTargetValueFunction(self._vf['s'], 's')

        # update value function with the buffer
        if self._vf['q']['training']:
            if len(self.transition_buffer) >= self._vf['q']['batch_size']:
                transition_batch = self.getTransitionFromBuffer(
                    n=self._vf['q']['batch_size'])
                self.updateValueFunction(transition_batch, 'q')
        if self._vf['s']['training']:
            if len(self.transition_buffer) >= self._vf['s']['batch_size']:
                transition_batch = self.getTransitionFromBuffer(
                    n=self._vf['q']['batch_size'])
                self.updateValueFunction(transition_batch, 's')

        # train/plan with model
        self.trainModel()
        self.plan()

        self.updateStateRepresentation()

        self.prev_state = self.getStateRepresentation(observation)
        self.prev_action = self.action  # another option:** we can again call self.policy function **

        return self.action_list[self.prev_action.item()]
 def add(self, element):
     new_node = Node(element)
     # new_node.get_children()
     new_node.set_children(self.head_node)
     self.head_node = new_node
     self.size += 1
예제 #13
0
 def push(self, data):
     n = Node(data)
     n.next = self.top
     self.top = n