def planner(): print("-----Planning Demonstration-----") MAP_NAME = 'resources/GridWorldMaps/experiment1.txt' gmap = np.loadtxt(MAP_NAME, dtype=np.uint8) g = GridWorldEnv(copy.copy(gmap), noise=0.1) g.generateRandomStartGoal() v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) new_traj = [] for t in traj: a = np.zeros(shape=(4, 1)) s = np.zeros(shape=(2, 1)) a[t[1]] = 1 s[0:2, 0] = t[0] new_traj.append((s, a)) return new_traj
def runPolicies(demonstrations=100, super_iterations=2000, sub_iterations=0, learning_rate=1e-2, env_noise=0.3): m = GridWorldNNModel(2, statedim=(2,1)) MAP_NAME = 'resources/GridWorldMaps/experiment1.txt' gmap = np.loadtxt(MAP_NAME, dtype=np.uint8) full_traj = [] vis_traj = [] for i in range(0,demonstrations): print("Traj",i) g = GridWorldEnv(copy.copy(gmap), noise=env_noise) g.generateRandomStartGoal() start = np.argwhere(g.map == g.START)[0] goal = np.argwhere(g.map == g.GOAL)[0] #generate trajectories start in same room and end different room while not ((inRoom1(start) and inRoom2(goal)) or\ (inRoom2(start) and inRoom1(goal))): g.generateRandomStartGoal() start = np.argwhere(g.map == g.START)[0] goal = np.argwhere(g.map == g.GOAL)[0] print(np.argwhere(g.map == g.START), np.argwhere(g.map == g.GOAL)) v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) new_traj = [] for t in traj: a = np.zeros(shape=(4,1)) s = np.zeros(shape=(2,1)) a[t[1]] = 1 s[:,0] = t[0] #s[2:4,0] = np.argwhere(g.map == g.START)[0] #s[4:6,0] = np.argwhere(g.map == g.GOAL)[0] new_traj.append((s,a)) full_traj.append(new_traj) vis_traj.extend(new_traj) #raise ValueError("") #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp1-trajs.png") m.sess.run(tf.initialize_all_variables()) with tf.variable_scope("optimizer"): opt = tf.train.AdamOptimizer(learning_rate=learning_rate) m.train(opt, full_traj, super_iterations, sub_iterations) actions = np.eye(4) g = GridWorldEnv(copy.copy(gmap), noise=0.1) g.generateRandomStartGoal() for i in range(m.k): states = g.getAllStates() policy_hash = {} trans_hash = {} for s in states: t = np.zeros(shape=(2,1)) t[:,0] = s #t[2:4,0] = np.argwhere(g.map == g.START)[0] #t[4:6,0] = np.argwhere(g.map == g.GOAL)[0] l = [ np.ravel(m.evalpi(i, [(t, actions[j,:])] )) for j in g.possibleActions(s)] if len(l) == 0: continue #print(i, s,l, m.evalpsi(i,ns)) action = g.possibleActions(s)[np.argmax(l)] policy_hash[s] = action print("Transition: ",m.evalpsi(i, [(t, actions[1,:])]), t) trans_hash[s] = np.ravel(m.evalpsi(i, [(t, actions[1,:])])) g.visualizePolicy(policy_hash, trans_hash, blank=True, filename="resources/results/exp1a-policy"+str(i)+".png")
def runPolicies(demonstrations=200, super_iterations=1000, sub_iterations=1, learning_rate=1e-3, env_noise=0.1): m = GridWorldModel(4) MAP_NAME = 'resources/GridWorldMaps/experiment2.txt' gmap = np.loadtxt(MAP_NAME, dtype=np.uint8) full_traj = [] vis_traj = [] for i in range(0, demonstrations): print("Traj", i) g = GridWorldEnv(copy.copy(gmap), noise=env_noise) g.generateRandomStartGoal() v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) new_traj = [] for t in traj: a = np.zeros(shape=(4, 1)) a[t[1]] = 1 new_traj.append((t[0], a)) full_traj.append(new_traj) vis_traj.extend(new_traj) #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp2-trajs.png") opt = tf.train.AdamOptimizer(learning_rate=learning_rate) m.train(opt, full_traj, super_iterations, sub_iterations) actions = np.eye(4) g = GridWorldEnv(copy.copy(gmap), noise=0.0) for i in range(m.k): states = g.getAllStates() policy_hash = {} trans_hash = {} for s in states: #print([m.evalpi(i,ns, actions[:,j]) for j in range(4)]) l = [ np.ravel(m.evalpi(i, [(s, actions[j, :])])) for j in g.possibleActions(s) ] if len(l) == 0: continue #print(i, s,l, m.evalpsi(i,ns)) action = g.possibleActions(s)[np.argmax(l)] policy_hash[s] = action #print(transitions[i].eval(np.array(ns))) trans_hash[s] = np.ravel(m.evalpsi(i, [(s, actions[1, :])])) g.visualizePolicy(policy_hash, trans_hash, blank=True, filename="resources/results/exp2-policy" + str(i) + ".png")
def runPolicies(demonstrations=100, super_iterations=100, sub_iterations=1000, learning_rate=1e-3, env_noise=0.1): m = GridWorldModel((2, 1), (4, 1), 3) MAP_NAME = 'resources/GridWorldMaps/experiment3.txt' gmap = np.loadtxt(MAP_NAME, dtype=np.uint8) full_traj = [] vis_traj = [] for i in range(0, demonstrations): print("Traj", i) g = GridWorldEnv(copy.copy(gmap), noise=env_noise) g.generateRandomStartGoal() v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) new_traj = [] for t in traj: a = np.zeros(shape=(4, 1)) a[t[1]] = 1 new_traj.append((t[0], a)) full_traj.append(new_traj) vis_traj.extend(new_traj) g.visualizePlan(vis_traj, blank=True, filename="resources/results/exp3-trajs.png") opt = tf.train.AdamOptimizer(learning_rate=learning_rate) loss = m.getLossFunction()[0] train = opt.minimize(loss) init = tf.initialize_all_variables() #with m.sess as sess: m.sess.run(init) for it in range(super_iterations): print("Iteration", it) batch = m.sampleBatch(full_traj) for i in range(sub_iterations): m.sess.run(train, batch) actions = np.eye(4) g = GridWorldEnv(copy.copy(gmap), noise=0.0) for i in range(m.k): states = g.getAllStates() policy_hash = {} trans_hash = {} for s in states: #print([m.evalpi(i,ns, actions[:,j]) for j in range(4)]) l = [ np.ravel(m.evalpi(i, [(s, actions[j, :])])) for j in g.possibleActions(s) ] if len(l) == 0: continue #print(i, s,l, m.evalpsi(i,ns)) action = g.possibleActions(s)[np.argmax(l)] policy_hash[s] = action #print(transitions[i].eval(np.array(ns))) trans_hash[s] = 0 g.visualizePolicy(policy_hash, trans_hash, blank=True, filename="resources/results/exp3-policy" + str(i) + ".png")
mmap = np.loadtxt('resources/GridWorldMaps/11x11-Rooms-Modes.txt', dtype=np.uint8) m = GridWorldModel(3, statedim=(gmap.shape[0], gmap.shape[1])) demonstrations = 100 full_traj = [] vis_traj = [] print(gmap.shape) for i in range(demonstrations): print("Traj", i) # g = SwitchedGridWorldEnv(copy.copy(gmap), copy.copy(mmap), noise=0.3) g = GridWorldEnv(copy.copy(gmap), noise=0.3) v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) # g.visualizePlan(traj) print("Beg Traj") print(traj) print("End Traj") new_traj = [] for t in traj: a = np.zeros(shape=(4, 1)) s = np.zeros(shape=(gmap.shape[0], gmap.shape[1])) a[t[1]] = 1
def runPolicies( demonstrations=20, super_iterations=1000, #10000 sub_iterations=0, learning_rate=10, env_noise=0.3): m = GridWorldModel(4, statedim=(8, 9)) MAP_NAME = 'resources/GridWorldMaps/experiment1.txt' gmap = np.loadtxt(MAP_NAME, dtype=np.uint8) full_traj = [] vis_traj = [] for i in range(0, demonstrations): print("Traj", i) g = GridWorldEnv(copy.copy(gmap), noise=env_noise) # print("Initialized") g.generateRandomStartGoal() start = np.argwhere(g.map == g.START)[0] goal = np.argwhere(g.map == g.GOAL)[0] #generate trajectories start in same room and end different room while not ((inRoom1(start) and inRoom2(goal)) or\ (inRoom2(start) and inRoom1(goal))): # print(inr) g.generateRandomStartGoal() start = np.argwhere(g.map == g.START)[0] goal = np.argwhere(g.map == g.GOAL)[0] print(np.argwhere(g.map == g.START), np.argwhere(g.map == g.GOAL)) v = ValueIterationPlanner(g) traj = v.plan(max_depth=100) print(len(traj), 'length of the trajectory') #this is length depends on the start, and goal state and the planner output. new_traj = [] for t in traj: # now for the length of the trajectory it took to get there, iterate over each step a = np.zeros(shape=(4, 1)) s = np.zeros(shape=(8, 9)) a[t[1]] = 1 s[t[0][0], t[0][1]] = 1 #s[2:4,0] = np.argwhere(g.map == g.START)[0] #s[4:6,0] = np.argwhere(g.map == g.GOAL)[0] new_traj.append((s, a)) full_traj.append(new_traj) vis_traj.extend(new_traj) print(np.shape(full_traj[0][0][1]), "full trajectory") #raise ValueError("") #g.visualizePlan(vis_traj,blank=True, filename="resources/results/exp1-trajs.png") m.sess.run(tf.initialize_all_variables()) with tf.variable_scope("optimizer"): opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) #define he optimizer, put the full trajectorty, 1000, 0 closs, tloss = m.train(opt, full_traj, super_iterations, sub_iterations) print(closs, len(closs), 'this is closs') plt.plot(range(len(closs)), closs) plt.savefig('closs.png') plt.plot(range(len(tloss)), tloss) plt.savefig('tloss.png') actions = np.eye(4) g = GridWorldEnv(copy.copy(gmap), noise=0.0) g.generateRandomStartGoal() for i in range(m.k): states = g.getAllStates() print('\n', states, '\n', 'this is all states', m.k) policy_hash = {} trans_hash = {} for s in states: t = np.zeros(shape=(8, 9)) t[s[0], s[1]] = 1 #t[2:4,0] = np.argwhere(g.map == g.START)[0] #t[4:6,0] = np.argwhere(g.map == g.GOAL)[0] #np.ravel returns the elements of the combined set of elements. l = [ np.ravel(m.evalpi(i, [(t, actions[j, :])])) for j in g.possibleActions(s) ] print('\n', l, 'l', g.possibleActions(s), 'possible actioins') if len(l) == 0: continue #print(i, s,l, m.evalpsi(i,ns)) action = g.possibleActions(s)[np.argmax(l)] policy_hash[s] = action #print("Transition: ",m.evalpsi(i, [(t, actions[1,:])]), t) trans_hash[s] = np.ravel(m.evalpsi(i, [(t, actions[1, :])])) g.visualizePolicy(policy_hash, trans_hash, blank=True, filename="resources/results/exp1-policy" + str(i) + ".png")