def viz_convergence(states, idx_trajs, log, queue_size=1000, cstr_enabled=False): """Plot the expected goal distributions """ import collections import bn_irl_common as bic import matplotlib.cm as cm new_idx_traj = [] for idx in idx_trajs[0]: if not (idx in new_idx_traj): new_idx_traj.append(idx) idx_traj = new_idx_traj # expected goals and constraints from a demonstration goal_features, cstr_ids, cstr_mus, param_dict = bic.get_expected_goal( log, states, enable_cstr=cstr_enabled, queue_size=queue_size, idx_traj=idx_traj, return_params=True) goal_states = param_dict.get('goal_states', []) cstr_counts = param_dict.get('cstr_counts', []) expected_n_goal = len(goal_states) goal_counts = [np.zeros(len(idx_traj)) for _ in range(expected_n_goal)] for idx, (goal, z) in enumerate(zip(log['goals'], log['z'])): for j in range(expected_n_goal): if j < len(goal): i = idx_traj.index(goal[j][0]) goal_counts[j][i] += 1 import matplotlib.pyplot as plt fig = plt.figure() colors = cm.rainbow(np.linspace(0, 1, expected_n_goal)) for i in range(expected_n_goal): if i > 9: continue ax = fig.add_subplot(expected_n_goal, 2, i * 2 + 1) plt.bar(range(len(goal_counts[i])), goal_counts[i]) if cstr_enabled: ax = fig.add_subplot(expected_n_goal, 2, i * 2 + 1 + 1) plt.bar(range(len(cstr_counts[i])), cstr_counts[i]) ## plt.plot(exp_cstr_ids[i], color=colors[i]) ## ax4 = fig.add_subplot(614) ## plt.plot(gs_x_std) ## ax5 = fig.add_subplot(615) ## plt.plot(expected_n_goal) ## plt.plot(losses) plt.show()
def find_goal(mdp, env, log, states, feature_fn, roadmap, error=1e-10, ths=1e-3, queue_size=1000, enable_cstr=True, cstr_ths=2.33, use_discrete_state=True, use_nearest_goal=True, return_policy=True, **kwargs): irl_support_feature_ids = log['support_feature_ids'] irl_support_feature_values = log['support_feature_values'] cstr_feat_id = log['cstr_feat_id'] # expected goals and constraints from a demonstration goal_features, cstr_ids, cstr_mus, _ = bic.get_expected_goal( log, states, enable_cstr=enable_cstr, queue_size=queue_size) # Now, we find goal and constraint on a new environment # n_cstr equals n_partitions cstr_fns = make_test_cstr_fns(env, feature_fn, states, cstr_mus, cstr_feat_id, ths=cstr_ths) if use_discrete_state: T_org = copy.copy(mdp.T) # find feature goals features = fu.get_features_from_states(env, states, feature_fn) distFunc = kwargs.get('distFunc', None) # compute q_mat per sub-goal new_goals = [] for i, (f_id, c_id, c_mu) in enumerate(zip(goal_features, cstr_ids, cstr_mus)): print "Find {}th goal, cstr={} ".format(i, c_id) # feature goal idx = irl_support_feature_ids.index(f_id) f = irl_support_feature_values[idx] # get rewards rewards = mdp.get_rewards() rewards = np.array(rewards) rewards[np.where(rewards > 0)] = 0. # find the closest feature from goal features d = np.linalg.norm(features - f, ord=np.inf, axis=-1) dist_ths = ths ## if np.amin(d) > dist_ths: ## dist_ths = np.amin(d) #from IPython import embed; embed()#; sys.exit() bad_goals = [] while True: s_ids = [j for j in range(len(d)) if d[j] <= dist_ths] if len(s_ids) > 0: goal_found = False for idx in s_ids: if idx in bad_goals: continue # find a goal that violates constraints if c_id == 0 and cstr_fns[i](idx) is False: print "Removed bad goals violating constraints" print features[idx] bad_goals.append(idx) continue rx1, _ = dijkstra_planning.dijkstra_planning( env, env.start_state, states[idx], env.roadmap, env.states, distFunc=distFunc) if rx1 is not None: goal_found = True break bad_goals.append(idx) print s_ids, " : Goal found? ", goal_found, " dist ths: ", dist_ths if goal_found is False: print "Goal feature may not match with current goal setup?" if goal_found: break dist_ths += ths print "Found goals: ", s_ids ## print states[s_ids] ## print env.get_goal_state() #from IPython import embed; embed()#; sys.exit() # Select the nearest state from goal and start states if len(s_ids) > 1 and use_nearest_goal is False: dist = [] for j, idx in enumerate(s_ids): rx1, _ = dijkstra_planning.dijkstra_planning( env, env.start_state, states[idx], env.roadmap, env.states, distFunc=distFunc) if rx1 is None: dist.append(np.inf) continue rx2, _ = dijkstra_planning.dijkstra_planning( env, env.start_state, states[idx], env.roadmap, env.states, distFunc=distFunc) if rx1 is None: dist.append(np.inf) continue dist.append(len(rx1) + len(rx2)) #from IPython import embed; embed(); sys.exit() min_j = np.argmin(dist) s_ids = s_ids[min_j:min_j + 1] print "Selected a reachable state as a goal {}".format(s_ids) ## elif len(s_ids)>1 and use_nearest_goal: ## s_ if return_policy: rewards[s_ids] = 1. mdp.set_rewards(rewards) # NOTE: we only use single constraint (0: constrained, 1: free) if enable_cstr is False or (cstr_fns is None or c_id > 0 or c_id == -1): #or (type(cstr_fns[i]) is list and c_id == len(cstr_fns[i])) \ #or c_id == -1: # no constraint case mdp.T = copy.copy(T_org) else: # constraint case validity_map = cstr_fns[i](range(len(states)))[roadmap] validity_map[:, 0] = True T = T_org * validity_map[:, np.newaxis, :] sum_T = np.sum(T, axis=-1) sum_T[np.where(sum_T == 0.)] = 1. T /= sum_T[:, :, np.newaxis] mdp.T = T ## #from IPython import embed; embed()#; sys.exit() ## #sys.path.insert(0,'..') ## from viz import viz as v ## r = cstr_fns[i](range(len(states))) ## v.reward_plot(r, states) ## ## v.reward_plot_3d(r, states, env) ## #sys.exit() mdp.set_goal(s_ids) ## values, param_dict = mdp.solve_mdp(error, return_params=True)#, max_cnt=100) policy, values = mdp.find_policy(error) else: policy = [] if distFunc is None: idx = np.argmin( np.linalg.norm(states[s_ids] - env.get_start_state(), axis=-1)) else: idx = np.argmin(distFunc(states[s_ids], env.get_start_state())) if enable_cstr: new_goals.append( [s_ids[idx], copy.copy(policy), f_id, c_mu, c_id]) else: new_goals.append([s_ids[idx], copy.copy(policy), f_id]) return new_goals else: new_goals = [] state = env.get_start_state() for i, (f_id, c_id, c_mu) in enumerate(zip(goal_features, cstr_ids, cstr_mus)): print "Find {}th goal, cstr={} ".format(i, c_id) # feature goal idx = irl_support_feature_ids.index(f_id) f = irl_support_feature_values[idx] if enable_cstr: # find the closest state from a feature f s = find_minimum_cost_state(state, env, f, feature_fn, cstr_feat_id, c_id, c_mu, cstr_ths) new_goals.append([s, None, f_id, c_mu, c_id]) else: # find the closest state from a feature f s = find_minimum_cost_state(state, env, f, feature_fn) new_goals.append([s, None, f_id]) state = s return new_goals
def find_goal(mdp, env, log, states, feature_fn, cstr_fn=None, error=1e-10, ths=1e-3,\ queue_size=1000, use_nearest_goal=True, **kwargs): irl_support_feature_ids = log['support_feature_ids'] irl_support_feature_values = log['support_feature_values'] goal_features, _, _, _ = bic.get_expected_goal(log, states, queue_size=queue_size) T_org = copy.copy(mdp.T) # find feature goals features = fu.get_features_from_states(env, states, feature_fn) distFunc = kwargs.get('distFunc', None) # compute q_mat for a sub-goal new_goals = [] for i, f_id in enumerate(goal_features): print "Find {}th goal".format(i) # feature goal idx = irl_support_feature_ids.index(f_id) f = irl_support_feature_values[idx] # get rewards rewards = mdp.get_rewards() rewards = np.array(rewards) rewards[np.where(rewards>0)]=0. # find the closest state from a goal d = np.linalg.norm(features-f, ord=np.inf, axis=-1) dist_ths = ths if np.amin(d) > dist_ths: dist_ths = np.amin(d) bad_goals = [] while True: s_ids = [j for j in range(len(d)) if d[j] <= dist_ths] if len(s_ids)>0: goal_found=False for idx in s_ids: if idx in bad_goals: continue rx1, _ = dijkstra_planning.dijkstra_planning(env, env.start_state, states[idx], env.roadmap, env.states, distFunc=distFunc) if rx1 is not None: goal_found = True break bad_goals.append(idx) print s_ids, goal_found, dist_ths if goal_found: break dist_ths += ths print "----------------------------" print "Found sub-goals: ", s_ids print "----------------------------", len(s_ids) # Select the nearest state from goal and start states if len(s_ids)>1 and use_nearest_goal is False: dist = [] for j, idx in enumerate(s_ids): rx1, _ = dijkstra_planning.dijkstra_planning(env, env.start_state, states[idx], env.roadmap, env.states, distFunc=distFunc) if rx1 is None: dist.append(np.inf) continue rx2, _ = dijkstra_planning.dijkstra_planning(env, states[idx], env.goal_state, env.roadmap, env.states, distFunc=distFunc) if rx2 is None: dist.append(np.inf) continue dist.append(len(rx1)+len(rx2)) #from IPython import embed; embed(); sys.exit() min_j = np.argmin(dist) s_ids = s_ids[min_j:min_j+1] print "Selected a reachable state as a goal {}".format(s_ids) # set new rewards rewards[s_ids] = 1. mdp.set_rewards(rewards) # print "Start solve policy with new reward and T" mdp.T = copy.copy(T_org) ## values, param_dict = mdp.solve_mdp(error, return_params=True) policy, values = mdp.find_policy(error) new_goals.append([s_ids[0], copy.copy(policy), f_id]) ## new_goals.append([s_ids[0], copy.copy(param_dict['q']), f_id]) return new_goals