コード例 #1
0
ファイル: rayTests.py プロジェクト: royf/ddo
def planner():
    print("-----Planning Demonstration-----")
    MAP_NAME = 'resources/GridWorldMaps/experiment1.txt'
    gmap = np.loadtxt(MAP_NAME, dtype=np.uint8)

    g = GridWorldEnv(copy.copy(gmap), noise=0.1)
    g.generateRandomStartGoal()
    v = ValueIterationPlanner(g)
    traj = v.plan(max_depth=100)
    new_traj = []
    for t in traj:
        a = np.zeros(shape=(4, 1))
        s = np.zeros(shape=(2, 1))
        a[t[1]] = 1
        s[0:2, 0] = t[0]
        new_traj.append((s, a))

    return new_traj
コード例 #2
0
def runPolicies(demonstrations=100,
        super_iterations=2000,
        sub_iterations=0,
        learning_rate=1e-2,
        env_noise=0.3):

    m  = GridWorldNNModel(2, statedim=(2,1))

    MAP_NAME = 'resources/GridWorldMaps/experiment1.txt'
    gmap = np.loadtxt(MAP_NAME, dtype=np.uint8)
    full_traj = []
    vis_traj = []

    for i in range(0,demonstrations):
        print("Traj",i)
        g = GridWorldEnv(copy.copy(gmap), noise=env_noise)

        g.generateRandomStartGoal() 
        start = np.argwhere(g.map == g.START)[0]
        goal = np.argwhere(g.map == g.GOAL)[0]
        #generate trajectories start in same room and end different room
        while not ((inRoom1(start) and inRoom2(goal))  or\
                   (inRoom2(start) and inRoom1(goal))):
              g.generateRandomStartGoal()
              start = np.argwhere(g.map == g.START)[0]
              goal = np.argwhere(g.map == g.GOAL)[0]


        print(np.argwhere(g.map == g.START), np.argwhere(g.map == g.GOAL))

        v = ValueIterationPlanner(g)
        traj = v.plan(max_depth=100)
        
        new_traj = []
        for t in traj:
            a = np.zeros(shape=(4,1))

            s = np.zeros(shape=(2,1))

            a[t[1]] = 1

            s[:,0] = t[0]
            #s[2:4,0] = np.argwhere(g.map == g.START)[0]
            #s[4:6,0] = np.argwhere(g.map == g.GOAL)[0]

            new_traj.append((s,a))

        full_traj.append(new_traj)
        vis_traj.extend(new_traj)

    #raise ValueError("")

    #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp1-trajs.png")


    m.sess.run(tf.initialize_all_variables())

    with tf.variable_scope("optimizer"):
        opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

        m.train(opt, full_traj, super_iterations, sub_iterations)

    actions = np.eye(4)


    g = GridWorldEnv(copy.copy(gmap), noise=0.1)
    g.generateRandomStartGoal()

    for i in range(m.k):
        states = g.getAllStates()
        policy_hash = {}
        trans_hash = {}

        for s in states:

            t = np.zeros(shape=(2,1))

            t[:,0] = s
            #t[2:4,0] = np.argwhere(g.map == g.START)[0]
            #t[4:6,0] = np.argwhere(g.map == g.GOAL)[0]


            l = [ np.ravel(m.evalpi(i, [(t, actions[j,:])] ))  for j in g.possibleActions(s)]

            if len(l) == 0:
                continue

            #print(i, s,l, m.evalpsi(i,ns))
            action = g.possibleActions(s)[np.argmax(l)]

            policy_hash[s] = action

            print("Transition: ",m.evalpsi(i, [(t, actions[1,:])]), t)
            trans_hash[s] = np.ravel(m.evalpsi(i, [(t, actions[1,:])]))

        g.visualizePolicy(policy_hash, trans_hash, blank=True, filename="resources/results/exp1a-policy"+str(i)+".png")
コード例 #3
0
def runPolicies(demonstrations=200,
                super_iterations=1000,
                sub_iterations=1,
                learning_rate=1e-3,
                env_noise=0.1):

    m = GridWorldModel(4)

    MAP_NAME = 'resources/GridWorldMaps/experiment2.txt'
    gmap = np.loadtxt(MAP_NAME, dtype=np.uint8)
    full_traj = []
    vis_traj = []

    for i in range(0, demonstrations):
        print("Traj", i)
        g = GridWorldEnv(copy.copy(gmap), noise=env_noise)
        g.generateRandomStartGoal()
        v = ValueIterationPlanner(g)
        traj = v.plan(max_depth=100)

        new_traj = []
        for t in traj:
            a = np.zeros(shape=(4, 1))
            a[t[1]] = 1

            new_traj.append((t[0], a))

        full_traj.append(new_traj)
        vis_traj.extend(new_traj)

    #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp2-trajs.png")

    opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
    m.train(opt, full_traj, super_iterations, sub_iterations)

    actions = np.eye(4)

    g = GridWorldEnv(copy.copy(gmap), noise=0.0)

    for i in range(m.k):
        states = g.getAllStates()
        policy_hash = {}
        trans_hash = {}

        for s in states:

            #print([m.evalpi(i,ns, actions[:,j]) for j in range(4)])
            l = [
                np.ravel(m.evalpi(i, [(s, actions[j, :])]))
                for j in g.possibleActions(s)
            ]

            if len(l) == 0:
                continue

            #print(i, s,l, m.evalpsi(i,ns))
            action = g.possibleActions(s)[np.argmax(l)]

            policy_hash[s] = action

            #print(transitions[i].eval(np.array(ns)))
            trans_hash[s] = np.ravel(m.evalpsi(i, [(s, actions[1, :])]))

        g.visualizePolicy(policy_hash,
                          trans_hash,
                          blank=True,
                          filename="resources/results/exp2-policy" + str(i) +
                          ".png")
コード例 #4
0
ファイル: experiment3.py プロジェクト: abalakrishna123/ddo
def runPolicies(demonstrations=100,
                super_iterations=100,
                sub_iterations=1000,
                learning_rate=1e-3,
                env_noise=0.1):

    m = GridWorldModel((2, 1), (4, 1), 3)

    MAP_NAME = 'resources/GridWorldMaps/experiment3.txt'
    gmap = np.loadtxt(MAP_NAME, dtype=np.uint8)
    full_traj = []
    vis_traj = []

    for i in range(0, demonstrations):
        print("Traj", i)
        g = GridWorldEnv(copy.copy(gmap), noise=env_noise)
        g.generateRandomStartGoal()
        v = ValueIterationPlanner(g)
        traj = v.plan(max_depth=100)

        new_traj = []
        for t in traj:
            a = np.zeros(shape=(4, 1))
            a[t[1]] = 1

            new_traj.append((t[0], a))

        full_traj.append(new_traj)
        vis_traj.extend(new_traj)

    g.visualizePlan(vis_traj,
                    blank=True,
                    filename="resources/results/exp3-trajs.png")

    opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
    loss = m.getLossFunction()[0]
    train = opt.minimize(loss)
    init = tf.initialize_all_variables()

    #with m.sess as sess:
    m.sess.run(init)

    for it in range(super_iterations):
        print("Iteration", it)
        batch = m.sampleBatch(full_traj)
        for i in range(sub_iterations):
            m.sess.run(train, batch)

    actions = np.eye(4)

    g = GridWorldEnv(copy.copy(gmap), noise=0.0)

    for i in range(m.k):
        states = g.getAllStates()
        policy_hash = {}
        trans_hash = {}

        for s in states:

            #print([m.evalpi(i,ns, actions[:,j]) for j in range(4)])
            l = [
                np.ravel(m.evalpi(i, [(s, actions[j, :])]))
                for j in g.possibleActions(s)
            ]

            if len(l) == 0:
                continue

            #print(i, s,l, m.evalpsi(i,ns))
            action = g.possibleActions(s)[np.argmax(l)]

            policy_hash[s] = action

            #print(transitions[i].eval(np.array(ns)))
            trans_hash[s] = 0

        g.visualizePolicy(policy_hash,
                          trans_hash,
                          blank=True,
                          filename="resources/results/exp3-policy" + str(i) +
                          ".png")
コード例 #5
0
mmap = np.loadtxt('resources/GridWorldMaps/11x11-Rooms-Modes.txt',
                  dtype=np.uint8)

m = GridWorldModel(3, statedim=(gmap.shape[0], gmap.shape[1]))

demonstrations = 100

full_traj = []
vis_traj = []

print(gmap.shape)

for i in range(demonstrations):
    print("Traj", i)
    # g = SwitchedGridWorldEnv(copy.copy(gmap), copy.copy(mmap), noise=0.3)
    g = GridWorldEnv(copy.copy(gmap), noise=0.3)

    v = ValueIterationPlanner(g)
    traj = v.plan(max_depth=100)
    # g.visualizePlan(traj)
    print("Beg Traj")
    print(traj)
    print("End Traj")

    new_traj = []
    for t in traj:
        a = np.zeros(shape=(4, 1))

        s = np.zeros(shape=(gmap.shape[0], gmap.shape[1]))

        a[t[1]] = 1
コード例 #6
0
ファイル: experiment1.py プロジェクト: vrmehta29/DDO
def runPolicies(
        demonstrations=20,
        super_iterations=1000,  #10000
        sub_iterations=0,
        learning_rate=10,
        env_noise=0.3):

    m = GridWorldModel(4, statedim=(8, 9))

    MAP_NAME = 'resources/GridWorldMaps/experiment1.txt'
    gmap = np.loadtxt(MAP_NAME, dtype=np.uint8)
    full_traj = []
    vis_traj = []

    for i in range(0, demonstrations):
        print("Traj", i)
        g = GridWorldEnv(copy.copy(gmap), noise=env_noise)
        # print("Initialized")

        g.generateRandomStartGoal()
        start = np.argwhere(g.map == g.START)[0]
        goal = np.argwhere(g.map == g.GOAL)[0]
        #generate trajectories start in same room and end different room
        while not ((inRoom1(start) and inRoom2(goal))  or\
             (inRoom2(start) and inRoom1(goal))):
            # print(inr)
            g.generateRandomStartGoal()
            start = np.argwhere(g.map == g.START)[0]
            goal = np.argwhere(g.map == g.GOAL)[0]

        print(np.argwhere(g.map == g.START), np.argwhere(g.map == g.GOAL))

        v = ValueIterationPlanner(g)
        traj = v.plan(max_depth=100)

        print(len(traj), 'length of the trajectory')
        #this is length depends on the start, and goal state and the planner output.

        new_traj = []
        for t in traj:

            # now for the length of the trajectory it took to get there, iterate over each step
            a = np.zeros(shape=(4, 1))

            s = np.zeros(shape=(8, 9))

            a[t[1]] = 1

            s[t[0][0], t[0][1]] = 1
            #s[2:4,0] = np.argwhere(g.map == g.START)[0]
            #s[4:6,0] = np.argwhere(g.map == g.GOAL)[0]

            new_traj.append((s, a))

        full_traj.append(new_traj)
        vis_traj.extend(new_traj)
    print(np.shape(full_traj[0][0][1]), "full trajectory")
    #raise ValueError("")

    #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp1-trajs.png")

    m.sess.run(tf.initialize_all_variables())

    with tf.variable_scope("optimizer"):
        opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #define he optimizer,  put the full trajectorty, 1000, 0
        closs, tloss = m.train(opt, full_traj, super_iterations,
                               sub_iterations)

    print(closs, len(closs), 'this is closs')
    plt.plot(range(len(closs)), closs)
    plt.savefig('closs.png')
    plt.plot(range(len(tloss)), tloss)
    plt.savefig('tloss.png')

    actions = np.eye(4)

    g = GridWorldEnv(copy.copy(gmap), noise=0.0)

    g.generateRandomStartGoal()

    for i in range(m.k):
        states = g.getAllStates()
        print('\n', states, '\n', 'this is all states', m.k)
        policy_hash = {}
        trans_hash = {}

        for s in states:

            t = np.zeros(shape=(8, 9))

            t[s[0], s[1]] = 1
            #t[2:4,0] = np.argwhere(g.map == g.START)[0]
            #t[4:6,0] = np.argwhere(g.map == g.GOAL)[0]

            #np.ravel returns the elements of the combined set of elements.
            l = [
                np.ravel(m.evalpi(i, [(t, actions[j, :])]))
                for j in g.possibleActions(s)
            ]
            print('\n', l, 'l', g.possibleActions(s), 'possible actioins')

            if len(l) == 0:
                continue

            #print(i, s,l, m.evalpsi(i,ns))
            action = g.possibleActions(s)[np.argmax(l)]

            policy_hash[s] = action

            #print("Transition: ",m.evalpsi(i, [(t, actions[1,:])]), t)
            trans_hash[s] = np.ravel(m.evalpsi(i, [(t, actions[1, :])]))

        g.visualizePolicy(policy_hash,
                          trans_hash,
                          blank=True,
                          filename="resources/results/exp1-policy" + str(i) +
                          ".png")