Esempio n. 1
0
def viz_convergence(states,
                    idx_trajs,
                    log,
                    queue_size=1000,
                    cstr_enabled=False):
    """Plot the expected goal distributions
    """
    import collections
    import bn_irl_common as bic
    import matplotlib.cm as cm

    new_idx_traj = []
    for idx in idx_trajs[0]:
        if not (idx in new_idx_traj):
            new_idx_traj.append(idx)
    idx_traj = new_idx_traj

    # expected goals and constraints from a demonstration
    goal_features, cstr_ids, cstr_mus, param_dict = bic.get_expected_goal(
        log,
        states,
        enable_cstr=cstr_enabled,
        queue_size=queue_size,
        idx_traj=idx_traj,
        return_params=True)
    goal_states = param_dict.get('goal_states', [])
    cstr_counts = param_dict.get('cstr_counts', [])
    expected_n_goal = len(goal_states)

    goal_counts = [np.zeros(len(idx_traj)) for _ in range(expected_n_goal)]
    for idx, (goal, z) in enumerate(zip(log['goals'], log['z'])):
        for j in range(expected_n_goal):
            if j < len(goal):
                i = idx_traj.index(goal[j][0])
                goal_counts[j][i] += 1

    import matplotlib.pyplot as plt
    fig = plt.figure()
    colors = cm.rainbow(np.linspace(0, 1, expected_n_goal))
    for i in range(expected_n_goal):
        if i > 9: continue
        ax = fig.add_subplot(expected_n_goal, 2, i * 2 + 1)
        plt.bar(range(len(goal_counts[i])), goal_counts[i])
        if cstr_enabled:
            ax = fig.add_subplot(expected_n_goal, 2, i * 2 + 1 + 1)
            plt.bar(range(len(cstr_counts[i])), cstr_counts[i])
            ## plt.plot(exp_cstr_ids[i], color=colors[i])

    ## ax4 = fig.add_subplot(614)
    ## plt.plot(gs_x_std)
    ## ax5 = fig.add_subplot(615)
    ## plt.plot(expected_n_goal)
    ## plt.plot(losses)
    plt.show()
Esempio n. 2
0
def find_goal(mdp,
              env,
              log,
              states,
              feature_fn,
              roadmap,
              error=1e-10,
              ths=1e-3,
              queue_size=1000,
              enable_cstr=True,
              cstr_ths=2.33,
              use_discrete_state=True,
              use_nearest_goal=True,
              return_policy=True,
              **kwargs):

    irl_support_feature_ids = log['support_feature_ids']
    irl_support_feature_values = log['support_feature_values']
    cstr_feat_id = log['cstr_feat_id']

    # expected goals and constraints from a demonstration
    goal_features, cstr_ids, cstr_mus, _ = bic.get_expected_goal(
        log, states, enable_cstr=enable_cstr, queue_size=queue_size)

    # Now, we find goal and constraint on a new environment
    # n_cstr equals n_partitions
    cstr_fns = make_test_cstr_fns(env,
                                  feature_fn,
                                  states,
                                  cstr_mus,
                                  cstr_feat_id,
                                  ths=cstr_ths)

    if use_discrete_state:
        T_org = copy.copy(mdp.T)

        # find feature goals
        features = fu.get_features_from_states(env, states, feature_fn)
        distFunc = kwargs.get('distFunc', None)

        # compute q_mat per sub-goal
        new_goals = []
        for i, (f_id, c_id,
                c_mu) in enumerate(zip(goal_features, cstr_ids, cstr_mus)):
            print "Find {}th goal, cstr={} ".format(i, c_id)
            # feature goal
            idx = irl_support_feature_ids.index(f_id)
            f = irl_support_feature_values[idx]

            # get rewards
            rewards = mdp.get_rewards()
            rewards = np.array(rewards)
            rewards[np.where(rewards > 0)] = 0.

            # find the closest feature from goal features
            d = np.linalg.norm(features - f, ord=np.inf, axis=-1)
            dist_ths = ths

            ## if np.amin(d) > dist_ths:
            ##     dist_ths = np.amin(d)

            #from IPython import embed; embed()#; sys.exit()

            bad_goals = []
            while True:
                s_ids = [j for j in range(len(d)) if d[j] <= dist_ths]
                if len(s_ids) > 0:
                    goal_found = False
                    for idx in s_ids:
                        if idx in bad_goals: continue

                        # find a goal that violates constraints
                        if c_id == 0 and cstr_fns[i](idx) is False:
                            print "Removed bad goals violating constraints"
                            print features[idx]
                            bad_goals.append(idx)
                            continue

                        rx1, _ = dijkstra_planning.dijkstra_planning(
                            env,
                            env.start_state,
                            states[idx],
                            env.roadmap,
                            env.states,
                            distFunc=distFunc)
                        if rx1 is not None:
                            goal_found = True
                            break
                        bad_goals.append(idx)
                    print s_ids, " : Goal found? ", goal_found, " dist ths: ", dist_ths
                    if goal_found is False:
                        print "Goal feature may not match with current goal setup?"
                    if goal_found: break
                dist_ths += ths
            print "Found goals: ", s_ids
            ## print states[s_ids]
            ## print env.get_goal_state()
            #from IPython import embed; embed()#; sys.exit()

            # Select the nearest state from goal and start states
            if len(s_ids) > 1 and use_nearest_goal is False:
                dist = []
                for j, idx in enumerate(s_ids):
                    rx1, _ = dijkstra_planning.dijkstra_planning(
                        env,
                        env.start_state,
                        states[idx],
                        env.roadmap,
                        env.states,
                        distFunc=distFunc)
                    if rx1 is None:
                        dist.append(np.inf)
                        continue
                    rx2, _ = dijkstra_planning.dijkstra_planning(
                        env,
                        env.start_state,
                        states[idx],
                        env.roadmap,
                        env.states,
                        distFunc=distFunc)
                    if rx1 is None:
                        dist.append(np.inf)
                        continue
                    dist.append(len(rx1) + len(rx2))

                #from IPython import embed; embed(); sys.exit()
                min_j = np.argmin(dist)
                s_ids = s_ids[min_j:min_j + 1]
                print "Selected a reachable state as a goal {}".format(s_ids)
            ## elif len(s_ids)>1 and use_nearest_goal:
            ##     s_

            if return_policy:

                rewards[s_ids] = 1.
                mdp.set_rewards(rewards)

                # NOTE: we only use single constraint (0: constrained, 1: free)
                if enable_cstr is False or (cstr_fns is None or c_id > 0
                                            or c_id == -1):
                    #or (type(cstr_fns[i]) is list and c_id == len(cstr_fns[i])) \
                    #or c_id == -1:
                    # no constraint case
                    mdp.T = copy.copy(T_org)
                else:
                    # constraint case
                    validity_map = cstr_fns[i](range(len(states)))[roadmap]
                    validity_map[:, 0] = True
                    T = T_org * validity_map[:, np.newaxis, :]
                    sum_T = np.sum(T, axis=-1)
                    sum_T[np.where(sum_T == 0.)] = 1.
                    T /= sum_T[:, :, np.newaxis]
                    mdp.T = T

                    ## #from IPython import embed; embed()#; sys.exit()
                    ## #sys.path.insert(0,'..')
                    ## from viz import viz as v
                    ## r = cstr_fns[i](range(len(states)))
                    ## v.reward_plot(r, states)
                    ## ## v.reward_plot_3d(r, states, env)
                    ## #sys.exit()

                mdp.set_goal(s_ids)
                ## values, param_dict = mdp.solve_mdp(error, return_params=True)#, max_cnt=100)
                policy, values = mdp.find_policy(error)
            else:
                policy = []

            if distFunc is None:
                idx = np.argmin(
                    np.linalg.norm(states[s_ids] - env.get_start_state(),
                                   axis=-1))
            else:
                idx = np.argmin(distFunc(states[s_ids], env.get_start_state()))

            if enable_cstr:
                new_goals.append(
                    [s_ids[idx],
                     copy.copy(policy), f_id, c_mu, c_id])
            else:
                new_goals.append([s_ids[idx], copy.copy(policy), f_id])

        return new_goals

    else:
        new_goals = []
        state = env.get_start_state()
        for i, (f_id, c_id,
                c_mu) in enumerate(zip(goal_features, cstr_ids, cstr_mus)):
            print "Find {}th goal, cstr={} ".format(i, c_id)
            # feature goal
            idx = irl_support_feature_ids.index(f_id)
            f = irl_support_feature_values[idx]

            if enable_cstr:
                # find the closest state from a feature f
                s = find_minimum_cost_state(state, env, f, feature_fn,
                                            cstr_feat_id, c_id, c_mu, cstr_ths)
                new_goals.append([s, None, f_id, c_mu, c_id])
            else:
                # find the closest state from a feature f
                s = find_minimum_cost_state(state, env, f, feature_fn)
                new_goals.append([s, None, f_id])
            state = s

        return new_goals
Esempio n. 3
0
def find_goal(mdp, env, log, states, feature_fn, cstr_fn=None, error=1e-10, ths=1e-3,\
              queue_size=1000, use_nearest_goal=True, **kwargs):

    irl_support_feature_ids    = log['support_feature_ids']
    irl_support_feature_values = log['support_feature_values']

    goal_features, _, _, _ = bic.get_expected_goal(log, states, queue_size=queue_size)
    T_org = copy.copy(mdp.T)

    # find feature goals
    features = fu.get_features_from_states(env, states, feature_fn)    

    distFunc = kwargs.get('distFunc', None)

    # compute q_mat for a sub-goal
    new_goals = []
    for i, f_id in enumerate(goal_features):
        print "Find {}th goal".format(i)
        # feature goal
        idx  = irl_support_feature_ids.index(f_id)
        f    = irl_support_feature_values[idx]
       
        # get rewards
        rewards = mdp.get_rewards()
        rewards = np.array(rewards)
        rewards[np.where(rewards>0)]=0.

        # find the closest state from a goal
        d = np.linalg.norm(features-f, ord=np.inf, axis=-1)        
        dist_ths = ths

        if np.amin(d) > dist_ths:
            dist_ths = np.amin(d)
        
        bad_goals = []
        while True:
            s_ids = [j for j in range(len(d)) if d[j] <= dist_ths]            
            if len(s_ids)>0:
                goal_found=False
                for idx in s_ids:
                    if idx in bad_goals: continue
                    rx1, _ = dijkstra_planning.dijkstra_planning(env, env.start_state, states[idx],
                                                                 env.roadmap, env.states,
                                                                 distFunc=distFunc)
                    if rx1 is not None:
                        goal_found = True
                        break
                    bad_goals.append(idx)
                print s_ids, goal_found, dist_ths
                if goal_found: break            
            dist_ths += ths
        print "----------------------------"
        print "Found sub-goals: ", s_ids
        print "----------------------------", len(s_ids)

        # Select the nearest state from goal and start states
        if len(s_ids)>1 and use_nearest_goal is False:
            dist = []
            for j, idx in enumerate(s_ids):
                rx1, _ = dijkstra_planning.dijkstra_planning(env, env.start_state, states[idx],
                                                             env.roadmap, env.states,
                                                             distFunc=distFunc)
                if rx1 is None:
                    dist.append(np.inf)
                    continue
                rx2, _ = dijkstra_planning.dijkstra_planning(env, states[idx], env.goal_state,
                                                             env.roadmap, env.states,
                                                             distFunc=distFunc)
                if rx2 is None:
                    dist.append(np.inf)
                    continue
                dist.append(len(rx1)+len(rx2))

            #from IPython import embed; embed(); sys.exit()
            min_j = np.argmin(dist)
            s_ids = s_ids[min_j:min_j+1]
            print "Selected a reachable state as a goal {}".format(s_ids)

        # set new rewards
        rewards[s_ids] = 1.
        mdp.set_rewards(rewards)

        #
        print "Start solve policy with new reward and T"
        mdp.T          = copy.copy(T_org)
        ## values, param_dict = mdp.solve_mdp(error, return_params=True)
        policy, values = mdp.find_policy(error)
        new_goals.append([s_ids[0], copy.copy(policy), f_id])
        ## new_goals.append([s_ids[0], copy.copy(param_dict['q']), f_id])

    return new_goals