Example #1
0
demonstrations = utils.rollout_from_usa(10, 100, u_sa_A, mdp_env_A)
print("demonstration")
print(demonstrations)

#Now let's run Bayesian IRL on this demo in this mdp with a placeholder feature to see what happens.

beta = 100.0
step_stdev = 0.1
burn = 100
skip = 5
num_samples = 2000
sample_norm = None
birl = bayesian_irl.BayesianIRL(mdp_env_A,
                                beta,
                                step_stdev,
                                debug=False,
                                mcmc_norm=sample_norm)

map_w, map_u, r_chain, u_chain = birl.sample_posterior(demonstrations,
                                                       num_samples, True)

import matplotlib.pyplot as plt
for w in range(len(r_chain[0])):
    plt.plot(r_chain[:, w], label="feature {}".format(w))
plt.legend()
plt.show()

print("MAP")
print("map_weights", map_w)
map_r = np.dot(mdp_env_A.state_features, map_w)
    print("demo", d, demo)
    traj_demonstrations.append(demo)
    for s_a in demo:
        demo_set.add(s_a)
demonstrations = list(demo_set)
print("demonstration")
print(demonstrations)

state_feature_list = [tuple(fs) for fs in mdp_env.state_features]
# pg.get_policy_string_from_trajectory(traj_demonstrations[0], state_feature_list, mdp_env, filename="./figs/lava_corridor/demo_lava_world.png")


#Now let's run Bayesian IRL on this demo in this mdp with a placeholder feature to see what happens.


birl = bayesian_irl.BayesianIRL(mdp_env, beta, step_stdev, debug=False, mcmc_norm=mcmc_norm, likelihood=likelihood)

# r_hyps = [#[0,-1],
#         [-1,0],
#         #[-0.1232111, -0.8767889],
#         #[-0.25691869, -0.74308131],
#         [-0.05609254, -0.94390746],
#         [-0.57066167,  0.42933833],
#         [-0.55, 0.45],
#         [-.1,-0.9]
#         ]

# for i,r in enumerate(r_hyps):
#     print()
#     print("TRIAL ", i)
#     #generate random rewards
    #demonstrations = [(1,0), (0,0)]

    # mdp_env = mdp_worlds.machine_teaching_toy_featurized()
    # demonstrations = [(2,3),(5,0),(4,0),(3,2)]

    mdp_env = mdp_worlds.lava_ambiguous_aaai18()
    u_sa = mdp.solve_mdp_lp(mdp_env)
    #generate demo from state 5 to terminal
    demonstrations = utils.rollout_from_usa(5, 10, u_sa, mdp_env)
    print(demonstrations)

    traj_demonstrations = [demonstrations]

    beta = 10.0
    step_stdev = 0.1
    birl = bayesian_irl.BayesianIRL(mdp_env, beta, step_stdev, debug=False)

    num_samples = 200
    burn = 50
    skip = 2
    map_w, map_u, r_chain, u_chain = birl.sample_posterior(
        demonstrations, num_samples)
    print("map_weights", map_w)
    map_r = np.dot(mdp_env.state_features, map_w)
    utils.print_as_grid(map_r, mdp_env)
    #print("Map policy")
    #utils.print_policy_from_occupancies(map_u, mdp_env)

    # print("chain")
    # for r in r_chain:
    #     print(r)
Example #4
0
print(demonstrations)

state_feature_list = [tuple(fs) for fs in train_mdp.state_features]

#Now let's run Bayesian IRL on this demo in this mdp with a placeholder feature to see what happens.
beta = 10.0
step_stdev = 0.05

num_samples = 1000
mcmc_norm = "inf"
likelihood = "birl"

birl = bayesian_irl.BayesianIRL(train_mdp,
                                beta,
                                step_stdev,
                                debug=False,
                                mcmc_norm=mcmc_norm,
                                likelihood=likelihood,
                                prior="non-pos")

map_w, map_u, r_chain, u_chain = birl.sample_posterior(demonstrations,
                                                       num_samples, True)

print(train_mdp.feature_weights)

burn = 200
skip = 10
r_chain_burned = r_chain[burn::skip]

u_expert = utils.u_sa_from_demos(traj_demonstrations, train_mdp)
expert_returns = np.sort(np.dot(r_chain_burned, u_expert))