Example #1
0
def post_process(z, goals):
    tally_z = ut.tally(z)

    if len(goals)!=len(tally_z):
        goals = goals[:-1] #[g for i, g in enumerate(goals) if i<len(goals)-1]
    
    for i in reversed(range(len(tally_z))):
        if tally_z[i] == 0:
            for j, p in enumerate(z):
                if p>i: z[j]-=1
            goals = [g for j, g in enumerate(goals) if j!=i]

    ## if len(goals) != len(ut.tally(z)):
    ##     goals = goals[:-1] #[g for j, g in enumerate(goals) if j!=len(goals)-1]
            
    assert len(goals)==len(ut.tally(z)), "number of partitions {} and goals {} are different".format(len(ut.tally(z)),len(goals))
    
    return z, goals
Example #2
0
def get_expected_goal(log, states, queue_size=1000, enable_cstr=False, idx_traj=None,
                      return_params=False):
    """Return a set of expected goal features and constraints
    """
    idx_traj = log['support_states']
    
    # 1. expected number of goals and expected feature ids
    n_g_queue = collections.deque(maxlen=queue_size)    
    for i, goals in enumerate(log['goals'][-queue_size:]):
        n_g_queue.append(len(goals))
    n_goal = np.argmax(ut.tally(n_g_queue))
    print "expected goals: ", n_goal

    # 2. collect queues for expected feature ids
    state_deque = [collections.deque(maxlen=queue_size) for _ in range(n_goal)]    
    feature_deque = [collections.deque(maxlen=queue_size) for _ in range(n_goal)]
    if enable_cstr:
        # a sequence of mu per partition
        feat_len = len(log['support_feature_values'][0])        
        cstr_deque    = [collections.deque(maxlen=queue_size) for _ in range(n_goal)]
        cstr_mu_deque = [[collections.deque(maxlen=queue_size) for _ in range(feat_len)] \
                         for _ in range(n_goal)]
        
    for i, goals in enumerate(log['goals'][-queue_size:]):
        for j, goal in enumerate(goals):
            if j>= n_goal: continue
            state_deque[j].append( goal[0] )
            feature_deque[j].append( goal[2] )
            if enable_cstr:
                assert len(goal)>3, "no cstr info"
                cstr_deque[j].append( goal[-1] )
                c_mu = goal[-2]['mu']
                for k, mu in enumerate( c_mu ):
                    cstr_mu_deque[j][k].append( mu )

    # 3. compute expected feature and constraint indices
    goal_states = []
    goal_features = []
    cstr_ids = []
    cstr_mu  = []
    cstr_counts = []
    for i in range(n_goal):
        f_id = np.argmax( ut.tally(feature_deque[i]) )
        goal_features.append( f_id )

        # expected goal id from support states (idx_traj)
        g_id = idx_traj.index( np.argmax(ut.tally(state_deque[i])) )
        goal_states.append( g_id )

        # expected constraints
        if enable_cstr:
            cstr_list  = np.array(list(cstr_deque[i]))
            cstr_count = ut.tally(cstr_list[ np.where(np.array(state_deque[i])==idx_traj[g_id])[0] ])
            #cstr_count = ut.tally(cstr_deque[i])
            cstr_id = np.argmax( cstr_count )
            cstr_ids.append( cstr_id )
            cstr_counts.append( cstr_count )
            print "cstr_id: ", cstr_id
            
            mus = []
            for k in range(feat_len):
                f_max   = np.amax(cstr_mu_deque[i][k]); f_min = np.amin(cstr_mu_deque[i][k])
                n_bins  = 20
                hist, _ = np.histogram(cstr_mu_deque[i][k], bins=n_bins, range=(f_min, f_max))
                mus.append( np.argmax(hist)/float(n_bins)*float(f_max-f_min)+f_min  )
            cstr_mu.append(mus)

    if return_params:
        d = {'goal_states': goal_states,
             'cstr_counts': cstr_counts}
    else: d = {}

    if enable_cstr:            
        return goal_features, cstr_ids, cstr_mu, d
    else:
        return goal_features,\
          [None for _ in range(len(goal_features))],\
          [None for _ in range(len(goal_features))], d
Example #3
0
def visualization(env, z, goals, observations, states, support_states, support_feature_ids, trajs=None,
                  alpha=1., punishment=5., q_mat=None):

    from mdp.bn_irl import bn_irl_common as bic
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm
    ## fig = plt.figure(figsize=(10,10))
    ## fig = plt.figure()
    fig, ax = plt.subplots()


    ## if q_mat is not None:
    ##     img = np.zeros((60,60))
    ##     for q, s in zip(q_mat, states):
    ##         print q
    ##         img[int(s[0]), int(s[1])] += np.sum(q)
    ##     ## img -= np.amin(img)
    ##     ## img /= np.amax(img)
    ##     ## mag = 0.1
    ##     ## img = np.exp(mag*img)/np.sum(np.exp(mag*img))
    ##     plt.imshow(img.T, origin='lower', interpolation='nearest')

        
    if trajs is not None:
        for traj in trajs:
            plt.plot(np.array(traj)[:,0], np.array(traj)[:,1], 'r-', alpha=0.2)
    ## plt.show()
    ## sys.exit()
    
    plt.plot(env.objects[:,0], env.objects[:,1], 'ko')

    occurrence = ut.tally(z)
    colors = cm.rainbow(np.linspace(0, 1, len(occurrence)))
    ## print len(occurrence), len(goals)
    for i in range(len(occurrence)):
        ids = [j for j, v in enumerate(z) if v==i]
        obs_zi = np.array(observations)[ids]
        if len(obs_zi)<1: continue
        obs  = np.array([states[obs_zi[j].state] for j in range(len(obs_zi))])

        a=[]
        for o in obs_zi:
            a.append( bic.likelihood(o, goals[i], alpha=alpha, normalization=False, punishment=punishment) )
        a = np.array(a)-min(a)
        a = np.array(a)/max(a)
        
        for j, o in enumerate(obs):
            plt.plot([o[0]], [o[1]], 'o', c=colors[i], markersize=5, alpha=0.2)#, alpha=a[j])
        ## plt.scatter(obs[:,0], obs[:,1], c=colors[i])

        goal_states = bic.get_state_goal_from_feature(goals[i][2], support_states, support_feature_ids)
        goal_states = states[goal_states]
        plt.scatter(goal_states[:,0], goal_states[:,1], marker='P', c=colors[i], s=400)
        #plt.text(goal_states[:,0], goal_states[:,1]-5, str(i) )

    plt.text(env.start_state[0]-3, env.start_state[1]-5, "Start", fontsize=16 )
    plt.text(env.goal_state[0]-3, env.goal_state[1]-5, "Goal", fontsize=16 )

    lines  = [plt.scatter(np.linspace(-10,-5,len(colors)), np.linspace(-10,-5,len(colors)),\
                          marker='P', c=colors, s=50),
              plt.scatter(np.linspace(-10,-5,len(colors)), np.linspace(-10,-5,len(colors)),\
                       marker='o', c=colors[i], s=50, alpha=0.2)]
    labels = ['Sub-goal', 'Partition'] 
    plt.legend(lines, labels, fontsize='large', loc=1)

    plt.xlim(env.observation_space.low[0], env.observation_space.high[0] )
    plt.ylim(env.observation_space.low[1], env.observation_space.high[1] )
    plt.xticks([]),plt.yticks([])
    
    plt.show()