def worldA(): T = getT() state_features = np.zeros((25, 3)) for x in xrange(5): for y in xrange(5): state_features[5*x + y] = np.array([0,1,0]) for x in xrange(5): for y in xrange(5): if x == 0 or y == 0 or x == 4 or y == 4: state_features[5*x+y] = np.array([1,0,0]) state_features[5*2+2] = np.array([0,0,0]) state_features[5*2+4] = np.array([1,0,1]) modelA = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features) modelA_IRL = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features) modelA_EM = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features, ignore_tau=True) modelA_EM.set_tau(tau_EM()) nu = np.zeros(25) nu[5*2+0] = 1.0 sigma = np.array([0.5, 0.5]) Theta = np.array([[0, 20, 30], [20, 0, 30]]) tau = np.zeros((2, 2, 25)) for i in xrange(25): tau[0][0][i] = tau[1][1][i] = 1.0 for i in [(5*2), (5*2+2)]: tau[0][1][i] = 0.7 tau[0][0][i] = 0.3 tau[1][0][i] = tau[1][1][i] = 0.5 simA = Simulator(modelA, nu, T, sigma, Theta, tau=tau) trajectories = simA.trajectories(125, 20) trajectories_test = simA.trajectories(40, 20) irl, mlirl, expert = [], [], [] for batch in xrange(1,6): modelA_IRL = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features) modelA_EM = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features, ignore_tau=True) modelA_EM.set_tau(tau_EM()) modelA_IRL.learn(trajectories[:batch*25], 1e-3, 7) print "Us" irl.append(modelA_IRL.test(trajectories_test)) print "MLIRL" modelA_EM.learn(trajectories[:batch*25], 1e-3, 7) mlirl.append(modelA_EM.test(trajectories_test)) print "Expert" expert.append(modelA.test(trajectories_test)) print irl, mlirl, expert
def worldB(): T = getT() state_features = np.zeros((25, 2)) dynamic_features = np.zeros((25,2)) for x in xrange(5): for y in xrange(5): state_features[5*x + y] = np.array([0,0]) dynamic_features[5*x+ y] = np.array([0,1]) state_features[5*4+0] = np.array([0,1]) state_features[5*0+4] = np.array([1,0]) dynamic_features[5*2+0] = np.array([1,1]) dynamic_features[5*2+1] = np.array([1,1]) dynamic_features[5*2+2] = np.array([1,1]) dynamic_features[5*3+2] = np.array([1,1]) dynamic_features[5*4+2] = np.array([1,1]) modelB = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features, dynamic_features=dynamic_features) modelB_IRL = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features,dynamic_features=dynamic_features) modelB_EM = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features) modelB_EM.set_tau(tau_EM()) nu = np.zeros(25) nu[5*2+0] = 1.0 sigma = np.array([0.0, 1.0]) Theta = np.array([[30,0], [0, 30]]) omega = np.zeros([2,2,2]) omega[0,0] = np.array([-11,12]) omega[1,0] = np.array([13,-12]) simB = Simulator(modelB, nu, T, sigma, Theta, omega=omega) trajectories = simB.trajectories(125, 20) trajectories_test = simB.trajectories(40, 20) irl, mlirl, expert = [], [], [] for batch in xrange(1,6): modelB_IRL = IRLModel(25, 4, 2, 2, T, 0.95, 0.01, state_features,dynamic_features=dynamic_features) modelB_EM = IRLModel(25, 4, 2, 2, T, 0.95, 0.01, state_features,ignore_tau=True) modelB_EM.set_tau(tau_EM()) modelB_IRL.learn(trajectories[:125], 1e-3, 7) irl.append(modelB_IRL.test(trajectories_test)) modelB_EM.learn(trajectories[:batch*25], 1e-3, 7) mlirl.append(modelB_EM.test(trajectories_test)) expert.append(modelB.test(trajectories_test)) print irl, mlirl, expert