def publishIterationPolicies(self, policies): # Translate this from the number states into strings for row in xrange(0, len(policies)): for col in xrange(0, len(policies[row])): s = State(Point(row, col)) if (s in self.map.wall_locs) or (s.loc.row >= self.map.map_height) \ or (s.loc.row < 0) or (s.loc.col >= self.map.map_width) \ or (s.loc.col < 0): policies[row][col] = "WALL" elif s in self.map.pit_locs: policies[row][col] = "PIT" elif self.map.isGoalState(s): policies[row][col] = "GOAL" elif policies[row][col] == AStarSearch.ACTION_LEFT: policies[row][col] = "W" elif policies[row][col] == AStarSearch.ACTION_RIGHT: policies[row][col] = "E" elif policies[row][col] == AStarSearch.ACTION_UP: policies[row][col] = "N" elif policies[row][col] == AStarSearch.ACTION_DOWN: policies[row][col] = "S" if policies[row][col] == -1 or policies[row][col] == None: print "Bad stuff happened at (" + str(row) + ", " + str( col) + ")" # Flatten the array flattened = itertools.chain.from_iterable(policies) #print "FLAT: " + str(list(flattened)) msg = PolicyList() msg.data = list(flattened) self.mdp_path_pub.publish(msg)
def mdp(): config = read_config() move_list = config["move_list"] mapWidth = config["map_size"][1] mapHeight = config["map_size"][0] obstacles = config["walls"] pits = config["pits"] start = config["start"] goal = config["goal"] iter_limit = config["max_iterations"] threshold = config["threshold_difference"] board = [] for i in range(mapHeight): board.append([]) for j in range(mapWidth): cur = boardPos(0, "E") if [i, j] in obstacles: cur.value = config["reward_for_hitting_wall"] cur.policy = "WALL" elif [i, j] in pits: cur.value = config["reward_for_falling_in_pit"] cur.policy = "PIT" elif [i, j] == goal: cur.value = config["reward_for_reaching_goal"] cur.policy = "GOAL" board[i].append(cur) #print len(result), len(result[0]) policy = [] for i in range(mapHeight): for j in range(mapWidth): policy.append(board[i][j].policy) policy_publisher.publish(PolicyList(policy)) rospy.sleep(1) iter_count = 0 while iter_count < iter_limit: board, stop = update_board(board, config) policy = [] for i in range(mapHeight): for j in range(mapWidth): policy.append(board[i][j].policy) policy_publisher.publish(PolicyList(policy)) if stop: break iter_count += 1 for i in range(mapHeight): for j in range(mapWidth): print i, j, board[i][j]
def __init__(self): rospy.init_node('Robot', anonymous=True) self.path_pub = rospy.Publisher( "results/path_list", AStarPath, queue_size = 10 ) self.policy_pub = rospy.Publisher( "results/policy_list", PolicyList, queue_size = 10 ) self.sim_pub = rospy.Publisher( "map_node/sim_complete", Bool, queue_size = 10 ) path_list = astar.astar() for pl in path_list: asp = AStarPath() asp.data = pl rospy.sleep(1) self.path_pub.publish(asp) markov = mdp() pl = PolicyList() pl.data = markov.policy_list rospy.sleep(1) rospy.sleep(1) rospy.sleep(1) self.policy_pub.publish(pl) #print markov.policy_list #for m in markov.policy_list: #pl = PolicyList() #pl.data = m #rospy.sleep(1) #self.policy_pub.publish(pl) self.sim_pub.publish(True) rospy.sleep(1) rospy.signal_shutdown("Done.")
from std_msgs.msg import String, Bool
def __init__(self): self.config = read_config() rospy.init_node("robot") self.map_y = (read_config()["map_size"][0]) self.map_x = (read_config()["map_size"][1]) self.move_list = (read_config()["move_list"]) self.start = (read_config()["start"]) self.goal = (read_config()["goal"]) self.walls = (read_config()["walls"]) self.pits = (read_config()["pits"]) self.move_list = (read_config()["move_list"]) self.max_iterations = (read_config()["max_iterations"]) self.threshold_difference = (read_config()["threshold_difference"]) self.reward_step = (read_config()["reward_for_each_step"]) self.reward_wall = (read_config()["reward_for_hitting_wall"]) self.reward_goal = (read_config()["reward_for_reaching_goal"]) self.reward_pit = (read_config()["reward_for_falling_in_pit"]) self.discount = (read_config()["discount_factor"]) self.prob_forward = (read_config()["prob_move_forward"]) self.prob_back = (read_config()["prob_move_backward"]) self.prob_left = (read_config()["prob_move_left"]) self.prob_right = (read_config()["prob_move_right"]) self.mapV =[[[0,0,0,0] for x in range(self.map_x)] for y in range(self.map_y)] self.mapVpre =[[[0,0,0,0] for x in range(self.map_x)] for y in range(self.map_y)] self.mapV[self.goal[0]][self.goal[1]] = [self.reward_goal,self.reward_goal,self.reward_goal,self.reward_goal] self.mapVpre[self.goal[0]][self.goal[1]] = [self.reward_goal,self.reward_goal,self.reward_goal,self.reward_goal] for pit in self.pits: self.mapV[pit[0]][pit[1]] = [self.reward_pit,self.reward_pit,self.reward_pit,self.reward_pit] self.mapVpre[pit[0]][pit[1]] = [self.reward_pit,self.reward_pit,self.reward_pit,self.reward_pit] self.rate = rospy.Rate(1) self.astar_publisher = rospy.Publisher( "/results/path_list", AStarPath, queue_size = 10 ) self.mdp_publisher = rospy.Publisher( "/results/policy_list", PolicyList, queue_size = 10 ) self.sim_complete = rospy.Publisher( "/map_node/sim_complete", Bool, queue_size = 10 ) self.rate.sleep() self.path = getAstar() self.mess = AStarPath() self.mess1 = PolicyList() for step in self.path: self.mess.data = step self.astar_publisher.publish(self.mess) self.rate.sleep() self.count = 0 while(self.count < self.max_iterations): for y in range(self.map_y): for x in range(self.map_x): self.mapVpre[y][x] = self.mapV[y][x] self.mapV,self.policy = getMdp(self.mapV) self.mess1.data = [] for y in range(self.map_y): for x in range(self.map_x): self.mess1.data.append(self.policy[y][x]) self.mdp_publisher.publish(self.mess1) self.count +=1 self.sim_complete.publish(Bool(True)) rospy.signal_shutdown("complete") rospy.spin()
correct += 1 percentage = correct * 100.0 / (len(policy) * len(policy[0])) print "Optimal policy from MDP: \n", policy, "\n\n" print "Optimal policy from model based learning: \n", policy1, "\n\n" print "Percentage of correctness: \n", percentage, "%", "\n\n" #publish path and policy for x in mypath: path = AStarPath() path.data = x rospy.sleep(1) rb.astar_publisher.publish(path) newpo = [] for x in policy: for y in x: newpo.append(y) po = PolicyList() po.data = newpo rospy.sleep(1) rb.policy_publisher.publish(po) rospy.sleep(4) # publish finish simu rb.simu_publisher.publish(True) rospy.sleep(4) # shutdown rospy.signal_shutdown("Done")
def __init__(self): self.config = read_config() self.probF = self.config["prob_move_forward"] self.probB = self.config["prob_move_backward"] self.probL = self.config["prob_move_left"] self.probR = self.config["prob_move_right"] self.probAct = [self.probF, self.probB, self.probL, self.probR] self.maxI = self.config["max_iterations"] self.threshDiff = self.config["threshold_difference"] self.stepRwrd = self.config["reward_for_each_step"] self.wallRwrd = self.config["reward_for_hitting_wall"] self.goalRwrd = self.config["reward_for_reaching_goal"] self.pitRwrd = self.config["reward_for_falling_in_pit"] self.row = self.config["map_size"][0] self.column = self.config["map_size"][1] self.start = self.config["start"] self.goal = self.config["goal"] self.goal_r = self.goal[0] self.goal_c = self.goal[1] self.start_r = self.start[0] self.start_c = self.start[1] self.pits = self.config["pits"] self.walls = self.config["walls"] self.act = [[[1, 0], [-1, 0], [0, -1], [0, 1]], [[-1, 0], [1, 0], [0, 1], [0, -1]], [[0, -1], [0, 1], [-1, 0], [1, 0]], [[0, 1], [0, -1], [1, 0], [-1, 0]]] #forward backward left right #south r+1, c+0 r-1, c+0 r+0, c-1 r+0, c+1 #north r-1, c+0 r+1, c+0 r+0, c+1 r+0, c-1 #west r+0, c-1 r+0, c+1 r-1, c+0 r+1, c+0 #east r+0, c+1 r+0, c-1 r+1,c+0 r-1, c+0 self.mmap1 = [] self.mmap2 = [] self.policyMap = [] #creating maps self.createMap(self.mmap1, 0, False) self.createMap(self.mmap2, 0, False) self.createMap(self.policyMap, "", True) self.MDP_Algo() self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10) policyList = PolicyList() policyL = [] for r in range(len(self.policyMap)): for c in range(len(self.policyMap[r])): policyL.append(self.policyMap[r][c]) #print policyL policyList.data = policyL rospy.sleep(0.3) self.resPub.publish(policyList)
def __init__(self): self.config = read_config() self.probF = self.config["prob_move_forward"] self.probB = self.config["prob_move_backward"] self.probL = self.config["prob_move_left"] self.probR = self.config["prob_move_right"] self.probAct = [ self.probF, self.probB, self.probL, self.probR] self.maxI = self.config["max_iterations"] self.threshDiff = self.config["threshold_difference"] self.stepRwrd = self.config["reward_for_each_step"] self.wallRwrd = self.config["reward_for_hitting_wall"] self.goalRwrd = self.config["reward_for_reaching_goal"] self.pitRwrd = self.config["reward_for_falling_in_pit"] self.row = self.config["map_size"][0] self.column = self.config["map_size"][1] self.start = self.config["start"] self.goal = self.config["goal"] self.goal_r = self.goal[0] self.goal_c = self.goal[1] self.start_r = self.start[0] self.start_c = self.start[1] self.pits = self.config["pits"] self.walls = self.config["walls"] self.act = [ [[1,0],[-1,0],[0,-1],[0,1]], [[-1,0],[1,0],[0,1],[0,-1]], [[0,-1],[0,1],[-1,0],[1,0]], [[0,1],[0,-1],[1,0],[-1,0]] ] #forward backward left right #south r+1, c+0 r-1, c+0 r+0, c-1 r+0, c+1 #north r-1, c+0 r+1, c+0 r+0, c+1 r+0, c-1 #west r+0, c-1 r+0, c+1 r-1, c+0 r+1, c+0 #east r+0, c+1 r+0, c-1 r+1,c+0 r-1, c+0 self.mmap1 = [] self.mmap2 = [] self.policyMap = [] self.noChange = False self.setValueMap = True #creating maps self.createMap(self.mmap1, 0, False) self.createMap(self.mmap2, 0, False)#this is the value map, and it contains temp values from temp policy self.createMap(self.policyMap, 1, False)#every location contains an arbitrary starting policy self.MDP_Algo() print "\n" print np.reshape(self.policyMap, (self.row, self.column)) self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10) policyList = PolicyList() policyL = [] for r in range( len(self.policyMap)): for c in range(len(self.policyMap[r]) ): if self.policyMap[r][c] == 0 and not self.is_Wall(r, c): policyL.append("S") if self.policyMap[r][c] == 1: policyL.append("N") if self.policyMap[r][c] == 2: policyL.append("W") if self.policyMap[r][c] == 3: policyL.append("E") if self.is_Pit(r,c): policyL.append("PIT") if self.is_Wall(r,c): policyL.append("WALL") if self.goal_r == r and self.goal_c == c: policyL.append("GOAL") print policyL policyList.data = policyL rospy.sleep(0.3) self.resPub.publish(policyList)
def __init__(self): self.config = read_config() self.probF = self.config["prob_move_forward"] self.probB = self.config["prob_move_backward"] self.probL = self.config["prob_move_left"] self.probR = self.config["prob_move_right"] self.probAct = [self.probF, self.probB, self.probL, self.probR] self.maxI = self.config["max_iterations"] self.threshDiff = self.config["threshold_difference"] self.stepRwrd = self.config["reward_for_each_step"] self.wallRwrd = self.config["reward_for_hitting_wall"] self.goalRwrd = self.config["reward_for_reaching_goal"] self.pitRwrd = self.config["reward_for_falling_in_pit"] self.row = self.config["map_size"][0] self.column = self.config["map_size"][1] self.start = self.config["start"] self.goal = self.config["goal"] self.goal_r = self.goal[0] self.goal_c = self.goal[1] self.start_r = self.start[0] self.start_c = self.start[1] self.pits = self.config["pits"] self.walls = self.config["walls"] self.act = [[[1, 0], [-1, 0], [0, -1], [0, 1]], [[-1, 0], [1, 0], [0, 1], [0, -1]], [[0, -1], [0, 1], [-1, 0], [1, 0]], [[0, 1], [0, -1], [1, 0], [-1, 0]]] #forward backward left right #south r+1, c+0 r-1, c+0 r+0, c-1 r+0, c+1 #north r-1, c+0 r+1, c+0 r+0, c+1 r+0, c-1 #west r+0, c-1 r+0, c+1 r-1, c+0 r+1, c+0 #east r+0, c+1 r+0, c-1 r+1,c+0 r-1, c+0 self.mmap1 = [] self.mmap2 = [] self.policyMap = [] self.noChange = False self.setValueMap = True #creating maps self.createMap(self.mmap1, 0, False) self.createMap( self.mmap2, 0, False ) #this is the value map, and it contains temp values from temp policy self.createMap( self.policyMap, 1, False) #every location contains an arbitrary starting policy self.MDP_Algo() print "\n" print np.reshape(self.policyMap, (self.row, self.column)) self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10) policyList = PolicyList() policyL = [] for r in range(len(self.policyMap)): for c in range(len(self.policyMap[r])): if self.policyMap[r][c] == 0 and not self.is_Wall(r, c): policyL.append("S") if self.policyMap[r][c] == 1: policyL.append("N") if self.policyMap[r][c] == 2: policyL.append("W") if self.policyMap[r][c] == 3: policyL.append("E") if self.is_Pit(r, c): policyL.append("PIT") if self.is_Wall(r, c): policyL.append("WALL") if self.goal_r == r and self.goal_c == c: policyL.append("GOAL") print policyL policyList.data = policyL rospy.sleep(0.3) self.resPub.publish(policyList)
def __init__(self): rospy.init_node("Robot") self.config = read_config() #Publishers self.results_AStar_pub = rospy.Publisher("/results/path_list", AStarPath, queue_size=10, latch=True ) self.results_MDP_pub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10, latch=True ) self.sim_complete_pub = rospy.Publisher("/map_node/sim_complete", Bool, queue_size=10, latch=True ) #A* Inputs self.move_list = self.config["move_list"] self.map_size = self.config["map_size"] self.start = self.config["start"] self.goal = self.config["goal"] self.walls = self.config["walls"] self.pits = self.config["pits"] #MDP Inputs #probabilities self.p_f = self.config["prob_move_forward"] self.p_b = self.config["prob_move_backward"] self.p_l = self.config["prob_move_left"] self.p_r = self.config["prob_move_right"] #rewards self.r_step = self.config["reward_for_each_step"] self.r_wall = self.config["reward_for_hitting_wall"] self.r_goal = self.config["reward_for_reaching_goal"] self.r_pit = self.config["reward_for_falling_in_pit"] #iteration max_iterations = self.config["max_iterations"] threshold_difference =self.config["threshold_difference"] index = 0 last_reward = 1 this_reward = 0 opt_move = AStarPath() #self.p_learning = self.config["p_learning"] #self.a_learning = self.config["a_learning"] #pLearn plearn.p_learn() #aLearn alearn.a_learn() #A* rospy.sleep(0.1) opt_path = astar.a_opt_search() for move in opt_path: opt_move.data = move rospy.sleep(0.1) self.results_AStar_pub.publish(opt_move) #MDP opt_policy = PolicyList() opt_policy.data = mdp.m_opt_search() self.results_MDP_pub.publish(opt_policy) rospy.sleep(0.1) #Sim Complete rospy.sleep(1) self.sim_complete_pub.publish(True) rospy.sleep(1) rospy.signal_shutdown(True)
def __init__(self): rospy.init_node("Robot") self.config = read_config() #Publishers self.results_AStar_pub = rospy.Publisher("/results/path_list", AStarPath, queue_size=10, latch=True) self.results_MDP_pub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10, latch=True) self.sim_complete_pub = rospy.Publisher("/map_node/sim_complete", Bool, queue_size=10, latch=True) #A* Inputs self.move_list = self.config["move_list"] self.map_size = self.config["map_size"] self.start = self.config["start"] self.goal = self.config["goal"] self.walls = self.config["walls"] self.pits = self.config["pits"] #MDP Inputs #probabilities self.p_f = self.config["prob_move_forward"] self.p_b = self.config["prob_move_backward"] self.p_l = self.config["prob_move_left"] self.p_r = self.config["prob_move_right"] #rewards self.r_step = self.config["reward_for_each_step"] self.r_wall = self.config["reward_for_hitting_wall"] self.r_goal = self.config["reward_for_reaching_goal"] self.r_pit = self.config["reward_for_falling_in_pit"] #iteration max_iterations = self.config["max_iterations"] threshold_difference = self.config["threshold_difference"] index = 0 last_reward = 1 this_reward = 0 opt_move = AStarPath() #self.p_learning = self.config["p_learning"] #self.a_learning = self.config["a_learning"] #pLearn plearn.p_learn() #aLearn alearn.a_learn() #A* rospy.sleep(0.1) opt_path = astar.a_opt_search() for move in opt_path: opt_move.data = move rospy.sleep(0.1) self.results_AStar_pub.publish(opt_move) #MDP opt_policy = PolicyList() opt_policy.data = mdp.m_opt_search() self.results_MDP_pub.publish(opt_policy) rospy.sleep(0.1) #Sim Complete rospy.sleep(1) self.sim_complete_pub.publish(True) rospy.sleep(1) rospy.signal_shutdown(True)
print "Percentage of correctness: \n", percentage, "%", "\n\n" #publish path and policy for x in mypath: path = AStarPath() path.data = x rospy.sleep(1) rb.astar_publisher.publish(path) newpo = [] for x in policy: for y in x: newpo.append(y) po = PolicyList() po.data = newpo rospy.sleep(1) rb.policy_publisher.publish(po) rospy.sleep(4) # publish finish simu rb.simu_publisher.publish(True) rospy.sleep(4) # shutdown rospy.signal_shutdown("Done")
def __init__(self): self.config = read_config() self.probF = self.config["prob_move_forward"] self.probB = self.config["prob_move_backward"] self.probL = self.config["prob_move_left"] self.probR = self.config["prob_move_right"] self.probAct = [ self.probF, self.probB, self.probL, self.probR] self.maxI = self.config["max_iterations"] self.threshDiff = self.config["threshold_difference"] self.stepRwrd = self.config["reward_for_each_step"] self.wallRwrd = self.config["reward_for_hitting_wall"] self.goalRwrd = self.config["reward_for_reaching_goal"] self.pitRwrd = self.config["reward_for_falling_in_pit"] self.row = self.config["map_size"][0] self.column = self.config["map_size"][1] self.start = self.config["start"] self.goal = self.config["goal"] self.goal_r = self.goal[0] self.goal_c = self.goal[1] self.start_r = self.start[0] self.start_c = self.start[1] self.pits = self.config["pits"] self.walls = self.config["walls"] self.act = [ [[1, 0 ], [-1,0], [0, -1], [0, 1]], [[-1, 0],[1, 0], [0,1], [0,-1]], [[0,-1], [0,1], [-1,0], [1,0]], [[0,1],[0, -1],[1, 0],[-1, 0]] ] #forward backward left right #south r+1, c+0 r-1, c+0 r+0, c-1 r+0, c+1 #north r-1, c+0 r+1, c+0 r+0, c+1 r+0, c-1 #west r+0, c-1 r+0, c+1 r-1, c+0 r+1, c+0 #east r+0, c+1 r+0, c-1 r+1,c+0 r-1, c+0 self.mmap1 = [] self.mmap2 = [] self.policyMap = [] #creating maps self.createMap(self.mmap1, 0, False) self.createMap(self.mmap2, 0, False) self.createMap(self.policyMap, "", True) self.MDP_Algo() self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10) policyList = PolicyList() policyL = [] for r in range( len(self.policyMap)): for c in range(len(self.policyMap[r]) ): policyL.append(self.policyMap[r][c]) #print policyL policyList.data = policyL rospy.sleep(0.3) self.resPub.publish(policyList)