def post_process(z, goals): tally_z = ut.tally(z) if len(goals)!=len(tally_z): goals = goals[:-1] #[g for i, g in enumerate(goals) if i<len(goals)-1] for i in reversed(range(len(tally_z))): if tally_z[i] == 0: for j, p in enumerate(z): if p>i: z[j]-=1 goals = [g for j, g in enumerate(goals) if j!=i] ## if len(goals) != len(ut.tally(z)): ## goals = goals[:-1] #[g for j, g in enumerate(goals) if j!=len(goals)-1] assert len(goals)==len(ut.tally(z)), "number of partitions {} and goals {} are different".format(len(ut.tally(z)),len(goals)) return z, goals
def get_expected_goal(log, states, queue_size=1000, enable_cstr=False, idx_traj=None, return_params=False): """Return a set of expected goal features and constraints """ idx_traj = log['support_states'] # 1. expected number of goals and expected feature ids n_g_queue = collections.deque(maxlen=queue_size) for i, goals in enumerate(log['goals'][-queue_size:]): n_g_queue.append(len(goals)) n_goal = np.argmax(ut.tally(n_g_queue)) print "expected goals: ", n_goal # 2. collect queues for expected feature ids state_deque = [collections.deque(maxlen=queue_size) for _ in range(n_goal)] feature_deque = [collections.deque(maxlen=queue_size) for _ in range(n_goal)] if enable_cstr: # a sequence of mu per partition feat_len = len(log['support_feature_values'][0]) cstr_deque = [collections.deque(maxlen=queue_size) for _ in range(n_goal)] cstr_mu_deque = [[collections.deque(maxlen=queue_size) for _ in range(feat_len)] \ for _ in range(n_goal)] for i, goals in enumerate(log['goals'][-queue_size:]): for j, goal in enumerate(goals): if j>= n_goal: continue state_deque[j].append( goal[0] ) feature_deque[j].append( goal[2] ) if enable_cstr: assert len(goal)>3, "no cstr info" cstr_deque[j].append( goal[-1] ) c_mu = goal[-2]['mu'] for k, mu in enumerate( c_mu ): cstr_mu_deque[j][k].append( mu ) # 3. compute expected feature and constraint indices goal_states = [] goal_features = [] cstr_ids = [] cstr_mu = [] cstr_counts = [] for i in range(n_goal): f_id = np.argmax( ut.tally(feature_deque[i]) ) goal_features.append( f_id ) # expected goal id from support states (idx_traj) g_id = idx_traj.index( np.argmax(ut.tally(state_deque[i])) ) goal_states.append( g_id ) # expected constraints if enable_cstr: cstr_list = np.array(list(cstr_deque[i])) cstr_count = ut.tally(cstr_list[ np.where(np.array(state_deque[i])==idx_traj[g_id])[0] ]) #cstr_count = ut.tally(cstr_deque[i]) cstr_id = np.argmax( cstr_count ) cstr_ids.append( cstr_id ) cstr_counts.append( cstr_count ) print "cstr_id: ", cstr_id mus = [] for k in range(feat_len): f_max = np.amax(cstr_mu_deque[i][k]); f_min = np.amin(cstr_mu_deque[i][k]) n_bins = 20 hist, _ = np.histogram(cstr_mu_deque[i][k], bins=n_bins, range=(f_min, f_max)) mus.append( np.argmax(hist)/float(n_bins)*float(f_max-f_min)+f_min ) cstr_mu.append(mus) if return_params: d = {'goal_states': goal_states, 'cstr_counts': cstr_counts} else: d = {} if enable_cstr: return goal_features, cstr_ids, cstr_mu, d else: return goal_features,\ [None for _ in range(len(goal_features))],\ [None for _ in range(len(goal_features))], d
def visualization(env, z, goals, observations, states, support_states, support_feature_ids, trajs=None, alpha=1., punishment=5., q_mat=None): from mdp.bn_irl import bn_irl_common as bic import matplotlib.pyplot as plt import matplotlib.cm as cm ## fig = plt.figure(figsize=(10,10)) ## fig = plt.figure() fig, ax = plt.subplots() ## if q_mat is not None: ## img = np.zeros((60,60)) ## for q, s in zip(q_mat, states): ## print q ## img[int(s[0]), int(s[1])] += np.sum(q) ## ## img -= np.amin(img) ## ## img /= np.amax(img) ## ## mag = 0.1 ## ## img = np.exp(mag*img)/np.sum(np.exp(mag*img)) ## plt.imshow(img.T, origin='lower', interpolation='nearest') if trajs is not None: for traj in trajs: plt.plot(np.array(traj)[:,0], np.array(traj)[:,1], 'r-', alpha=0.2) ## plt.show() ## sys.exit() plt.plot(env.objects[:,0], env.objects[:,1], 'ko') occurrence = ut.tally(z) colors = cm.rainbow(np.linspace(0, 1, len(occurrence))) ## print len(occurrence), len(goals) for i in range(len(occurrence)): ids = [j for j, v in enumerate(z) if v==i] obs_zi = np.array(observations)[ids] if len(obs_zi)<1: continue obs = np.array([states[obs_zi[j].state] for j in range(len(obs_zi))]) a=[] for o in obs_zi: a.append( bic.likelihood(o, goals[i], alpha=alpha, normalization=False, punishment=punishment) ) a = np.array(a)-min(a) a = np.array(a)/max(a) for j, o in enumerate(obs): plt.plot([o[0]], [o[1]], 'o', c=colors[i], markersize=5, alpha=0.2)#, alpha=a[j]) ## plt.scatter(obs[:,0], obs[:,1], c=colors[i]) goal_states = bic.get_state_goal_from_feature(goals[i][2], support_states, support_feature_ids) goal_states = states[goal_states] plt.scatter(goal_states[:,0], goal_states[:,1], marker='P', c=colors[i], s=400) #plt.text(goal_states[:,0], goal_states[:,1]-5, str(i) ) plt.text(env.start_state[0]-3, env.start_state[1]-5, "Start", fontsize=16 ) plt.text(env.goal_state[0]-3, env.goal_state[1]-5, "Goal", fontsize=16 ) lines = [plt.scatter(np.linspace(-10,-5,len(colors)), np.linspace(-10,-5,len(colors)),\ marker='P', c=colors, s=50), plt.scatter(np.linspace(-10,-5,len(colors)), np.linspace(-10,-5,len(colors)),\ marker='o', c=colors[i], s=50, alpha=0.2)] labels = ['Sub-goal', 'Partition'] plt.legend(lines, labels, fontsize='large', loc=1) plt.xlim(env.observation_space.low[0], env.observation_space.high[0] ) plt.ylim(env.observation_space.low[1], env.observation_space.high[1] ) plt.xticks([]),plt.yticks([]) plt.show()