Esempio n. 1
0
    def update(self, initial_goals, desired_goals):
        if self.achieved_trajectory_pool.counter == 0:
            self.pool = copy.deepcopy(desired_goals)
            return

        achieved_pool, achieved_pool_init_state = self.achieved_trajectory_pool.pad(
        )
        candidate_goals = []
        candidate_edges = []
        candidate_id = []

        agent = self.args.agent
        achieved_value = []
        for i in range(len(achieved_pool)):
            obs = [
                goal_concat(achieved_pool_init_state[i], achieved_pool[i][j])
                for j in range(achieved_pool[i].shape[0])
            ]
            feed_dict = {agent.raw_obs_ph: obs}
            value = agent.sess.run(agent.q_pi, feed_dict)[:, 0]
            value = np.clip(value, -1.0 / (1.0 - self.args.gamma), 0)
            achieved_value.append(value.copy())

        n = 0
        graph_id = {'achieved': [], 'desired': []}
        for i in range(len(achieved_pool)):
            n += 1
            graph_id['achieved'].append(n)
        for i in range(len(desired_goals)):
            n += 1
            graph_id['desired'].append(n)
        n += 1
        self.match_lib.clear(n)

        for i in range(len(achieved_pool)):
            self.match_lib.add(0, graph_id['achieved'][i], 1, 0)
        for i in range(len(achieved_pool)):
            for j in range(len(desired_goals)):
                res = np.sqrt(
                    np.sum(np.square(achieved_pool[i] - desired_goals[j]),
                           axis=1)) - achieved_value[i] / (
                               self.args.hgg_L / self.max_dis /
                               (1 - self.args.gamma))
                match_dis = np.min(res) + self.goal_distance(
                    achieved_pool[i][0], initial_goals[j]) * self.args.hgg_c
                match_idx = np.argmin(res)

                edge = self.match_lib.add(graph_id['achieved'][i],
                                          graph_id['desired'][j], 1,
                                          c_double(match_dis))
                candidate_goals.append(achieved_pool[i][match_idx])
                candidate_edges.append(edge)
                candidate_id.append(j)
        for i in range(len(desired_goals)):
            self.match_lib.add(graph_id['desired'][i], n, 1, 0)

        match_count = self.match_lib.cost_flow(0, n)
        assert match_count == self.length

        explore_goals = [0] * self.length
        for i in range(len(candidate_goals)):
            if self.match_lib.check_match(candidate_edges[i]) == 1:
                explore_goals[candidate_id[i]] = candidate_goals[i].copy()
        assert len(explore_goals) == self.length
        self.pool = np.array(explore_goals)
Esempio n. 2
0
    def update(self, initial_goals, desired_goals):
        if self.achieved_trajectory_pool.counter == 0:
            self.pool = copy.deepcopy(desired_goals)
            return

        achieved_pool, achieved_pool_init_state = self.achieved_trajectory_pool.pad(
        )
        candidate_goals = []
        candidate_edges = []
        candidate_id = []

        agent = self.args.agent
        achieved_value = []
        for i in range(len(achieved_pool)):
            obs = [
                goal_concat(achieved_pool_init_state[i], achieved_pool[i][j])
                for j in range(achieved_pool[i].shape[0])
            ]
            feed_dict = {agent.raw_obs_ph: obs}
            value = agent.sess.run(agent.q_pi, feed_dict)[:, 0]
            value = np.clip(value, -1.0 / (1.0 - self.args.gamma), 0)
            achieved_value.append(value.copy())

        n = 0
        graph_id = {'achieved': [], 'desired': []}
        for i in range(len(achieved_pool)):
            n += 1
            graph_id['achieved'].append(n)
        for i in range(len(desired_goals)):
            n += 1
            graph_id['desired'].append(n)
        n += 1
        self.match_lib.clear(n)

        for i in range(len(achieved_pool)):
            self.match_lib.add(0, graph_id['achieved'][i], 1, 0)
        for i in range(len(achieved_pool)):
            for j in range(len(desired_goals)):

                # use graph_goal_distance here!
                if self.args.graph:
                    size = achieved_pool[i].shape[0]
                    res_1 = np.zeros(size)
                    for k in range(size):
                        res_1[k] = self.get_graph_goal_distance(
                            achieved_pool[i][k], desired_goals[j])
                    res = res_1 - achieved_value[i] / (self.args.hgg_L /
                                                       self.max_dis /
                                                       (1 - self.args.gamma))
                elif self.args.route and self.args.env == 'FetchPickObstacle-v1':
                    size = achieved_pool[i].shape[0]
                    res_1 = np.zeros(size)
                    for k in range(size):
                        res_1[k] = self.get_route_goal_distance(
                            achieved_pool[i][k], desired_goals[j])
                    res = res_1 - achieved_value[i] / (self.args.hgg_L /
                                                       self.max_dis /
                                                       (1 - self.args.gamma))
                else:
                    res = np.sqrt(
                        np.sum(np.square(achieved_pool[i] - desired_goals[j]),
                               axis=1)) - achieved_value[i] / (
                                   self.args.hgg_L / self.max_dis /
                                   (1 - self.args.gamma))  # that was original

                match_dis = np.min(res) + goal_distance(
                    achieved_pool[i][0], initial_goals[j]
                ) * self.args.hgg_c  # distance of initial positions: take l2 norm_as before
                match_idx = np.argmin(res)

                edge = self.match_lib.add(graph_id['achieved'][i],
                                          graph_id['desired'][j], 1,
                                          c_double(match_dis))
                candidate_goals.append(achieved_pool[i][match_idx])
                candidate_edges.append(edge)
                candidate_id.append(j)
        for i in range(len(desired_goals)):
            self.match_lib.add(graph_id['desired'][i], n, 1, 0)

        match_count = self.match_lib.cost_flow(0, n)
        assert match_count == self.length

        explore_goals = [0] * self.length
        for i in range(len(candidate_goals)):
            if self.match_lib.check_match(candidate_edges[i]) == 1:
                explore_goals[candidate_id[i]] = candidate_goals[i].copy()
        assert len(explore_goals) == self.length
        self.pool = np.array(explore_goals)