예제 #1
0
def worldA():
    T = getT()
    state_features = np.zeros((25, 3))
    for x in xrange(5):
        for y in xrange(5):
            state_features[5*x + y] = np.array([0,1,0])

    for x in xrange(5):
        for y in xrange(5):
            if x == 0 or y == 0 or x == 4 or y == 4:
                state_features[5*x+y] = np.array([1,0,0])
    state_features[5*2+2] = np.array([0,0,0])
    state_features[5*2+4] = np.array([1,0,1])

    modelA = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features)
    modelA_IRL = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features)
    modelA_EM = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features, ignore_tau=True)
    modelA_EM.set_tau(tau_EM())

    nu = np.zeros(25)
    nu[5*2+0] = 1.0
    sigma = np.array([0.5, 0.5])
    Theta = np.array([[0, 20, 30],
                      [20, 0, 30]])
    tau = np.zeros((2, 2, 25))

    for i in xrange(25):
        tau[0][0][i] = tau[1][1][i] = 1.0

    for i in [(5*2), (5*2+2)]:
        tau[0][1][i] = 0.7
        tau[0][0][i] = 0.3
        tau[1][0][i] = tau[1][1][i] = 0.5

    simA = Simulator(modelA, nu, T, sigma, Theta, tau=tau)
    trajectories = simA.trajectories(125, 20)
    trajectories_test = simA.trajectories(40, 20)

    irl, mlirl, expert = [], [], []

    for batch in xrange(1,6):
        modelA_IRL = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features)
        modelA_EM = IRLModel(25, 4, 2, 3, T, 0.95, 0.01, state_features, ignore_tau=True)
        modelA_EM.set_tau(tau_EM())

        modelA_IRL.learn(trajectories[:batch*25], 1e-3, 7)
        print "Us"
        irl.append(modelA_IRL.test(trajectories_test))

        print "MLIRL"
        modelA_EM.learn(trajectories[:batch*25], 1e-3, 7)
        mlirl.append(modelA_EM.test(trajectories_test))

        print "Expert"
        expert.append(modelA.test(trajectories_test))
        print irl, mlirl, expert
예제 #2
0
def worldB():
    T = getT()
    state_features = np.zeros((25, 2))
    dynamic_features = np.zeros((25,2))

    for x in xrange(5):
        for y in xrange(5):
            state_features[5*x + y] = np.array([0,0])
            dynamic_features[5*x+ y] = np.array([0,1])

    state_features[5*4+0] = np.array([0,1])
    state_features[5*0+4] = np.array([1,0])

    dynamic_features[5*2+0] = np.array([1,1])
    dynamic_features[5*2+1] = np.array([1,1])
    dynamic_features[5*2+2] = np.array([1,1])
    dynamic_features[5*3+2] = np.array([1,1])
    dynamic_features[5*4+2] = np.array([1,1])


    modelB = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features, dynamic_features=dynamic_features)
    modelB_IRL = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features,dynamic_features=dynamic_features)
    modelB_EM = IRLModel(25, 4, 2, 2, T, 0.95, 0.1, state_features)

    modelB_EM.set_tau(tau_EM())


    nu = np.zeros(25)
    nu[5*2+0] = 1.0
    sigma = np.array([0.0, 1.0])
    Theta = np.array([[30,0],
                      [0, 30]])
    omega = np.zeros([2,2,2])
    omega[0,0] = np.array([-11,12])
    omega[1,0] = np.array([13,-12])

    simB = Simulator(modelB, nu, T, sigma, Theta, omega=omega)
    trajectories = simB.trajectories(125, 20)
    trajectories_test = simB.trajectories(40, 20)

    irl, mlirl, expert = [], [], []

    for batch in xrange(1,6):
        modelB_IRL = IRLModel(25, 4, 2, 2, T, 0.95, 0.01, state_features,dynamic_features=dynamic_features)
        modelB_EM = IRLModel(25, 4, 2, 2, T, 0.95, 0.01, state_features,ignore_tau=True)

        modelB_EM.set_tau(tau_EM())

        modelB_IRL.learn(trajectories[:125], 1e-3, 7)
        irl.append(modelB_IRL.test(trajectories_test))

        modelB_EM.learn(trajectories[:batch*25], 1e-3, 7)
        mlirl.append(modelB_EM.test(trajectories_test))

        expert.append(modelB.test(trajectories_test))
        print irl, mlirl, expert