def update(self, initial_goals, desired_goals): if self.achieved_trajectory_pool.counter == 0: self.pool = copy.deepcopy(desired_goals) return achieved_pool, achieved_pool_init_state = self.achieved_trajectory_pool.pad( ) candidate_goals = [] candidate_edges = [] candidate_id = [] agent = self.args.agent achieved_value = [] for i in range(len(achieved_pool)): obs = [ goal_concat(achieved_pool_init_state[i], achieved_pool[i][j]) for j in range(achieved_pool[i].shape[0]) ] feed_dict = {agent.raw_obs_ph: obs} value = agent.sess.run(agent.q_pi, feed_dict)[:, 0] value = np.clip(value, -1.0 / (1.0 - self.args.gamma), 0) achieved_value.append(value.copy()) n = 0 graph_id = {'achieved': [], 'desired': []} for i in range(len(achieved_pool)): n += 1 graph_id['achieved'].append(n) for i in range(len(desired_goals)): n += 1 graph_id['desired'].append(n) n += 1 self.match_lib.clear(n) for i in range(len(achieved_pool)): self.match_lib.add(0, graph_id['achieved'][i], 1, 0) for i in range(len(achieved_pool)): for j in range(len(desired_goals)): res = np.sqrt( np.sum(np.square(achieved_pool[i] - desired_goals[j]), axis=1)) - achieved_value[i] / ( self.args.hgg_L / self.max_dis / (1 - self.args.gamma)) match_dis = np.min(res) + self.goal_distance( achieved_pool[i][0], initial_goals[j]) * self.args.hgg_c match_idx = np.argmin(res) edge = self.match_lib.add(graph_id['achieved'][i], graph_id['desired'][j], 1, c_double(match_dis)) candidate_goals.append(achieved_pool[i][match_idx]) candidate_edges.append(edge) candidate_id.append(j) for i in range(len(desired_goals)): self.match_lib.add(graph_id['desired'][i], n, 1, 0) match_count = self.match_lib.cost_flow(0, n) assert match_count == self.length explore_goals = [0] * self.length for i in range(len(candidate_goals)): if self.match_lib.check_match(candidate_edges[i]) == 1: explore_goals[candidate_id[i]] = candidate_goals[i].copy() assert len(explore_goals) == self.length self.pool = np.array(explore_goals)
def update(self, initial_goals, desired_goals): if self.achieved_trajectory_pool.counter == 0: self.pool = copy.deepcopy(desired_goals) return achieved_pool, achieved_pool_init_state = self.achieved_trajectory_pool.pad( ) candidate_goals = [] candidate_edges = [] candidate_id = [] agent = self.args.agent achieved_value = [] for i in range(len(achieved_pool)): obs = [ goal_concat(achieved_pool_init_state[i], achieved_pool[i][j]) for j in range(achieved_pool[i].shape[0]) ] feed_dict = {agent.raw_obs_ph: obs} value = agent.sess.run(agent.q_pi, feed_dict)[:, 0] value = np.clip(value, -1.0 / (1.0 - self.args.gamma), 0) achieved_value.append(value.copy()) n = 0 graph_id = {'achieved': [], 'desired': []} for i in range(len(achieved_pool)): n += 1 graph_id['achieved'].append(n) for i in range(len(desired_goals)): n += 1 graph_id['desired'].append(n) n += 1 self.match_lib.clear(n) for i in range(len(achieved_pool)): self.match_lib.add(0, graph_id['achieved'][i], 1, 0) for i in range(len(achieved_pool)): for j in range(len(desired_goals)): # use graph_goal_distance here! if self.args.graph: size = achieved_pool[i].shape[0] res_1 = np.zeros(size) for k in range(size): res_1[k] = self.get_graph_goal_distance( achieved_pool[i][k], desired_goals[j]) res = res_1 - achieved_value[i] / (self.args.hgg_L / self.max_dis / (1 - self.args.gamma)) elif self.args.route and self.args.env == 'FetchPickObstacle-v1': size = achieved_pool[i].shape[0] res_1 = np.zeros(size) for k in range(size): res_1[k] = self.get_route_goal_distance( achieved_pool[i][k], desired_goals[j]) res = res_1 - achieved_value[i] / (self.args.hgg_L / self.max_dis / (1 - self.args.gamma)) else: res = np.sqrt( np.sum(np.square(achieved_pool[i] - desired_goals[j]), axis=1)) - achieved_value[i] / ( self.args.hgg_L / self.max_dis / (1 - self.args.gamma)) # that was original match_dis = np.min(res) + goal_distance( achieved_pool[i][0], initial_goals[j] ) * self.args.hgg_c # distance of initial positions: take l2 norm_as before match_idx = np.argmin(res) edge = self.match_lib.add(graph_id['achieved'][i], graph_id['desired'][j], 1, c_double(match_dis)) candidate_goals.append(achieved_pool[i][match_idx]) candidate_edges.append(edge) candidate_id.append(j) for i in range(len(desired_goals)): self.match_lib.add(graph_id['desired'][i], n, 1, 0) match_count = self.match_lib.cost_flow(0, n) assert match_count == self.length explore_goals = [0] * self.length for i in range(len(candidate_goals)): if self.match_lib.check_match(candidate_edges[i]) == 1: explore_goals[candidate_id[i]] = candidate_goals[i].copy() assert len(explore_goals) == self.length self.pool = np.array(explore_goals)