Beispiel #1
0
    def publishIterationPolicies(self, policies):
        # Translate this from the number states into strings
        for row in xrange(0, len(policies)):
            for col in xrange(0, len(policies[row])):
                s = State(Point(row, col))
                if (s in self.map.wall_locs) or (s.loc.row >= self.map.map_height) \
                           or (s.loc.row < 0) or (s.loc.col >= self.map.map_width) \
                           or (s.loc.col < 0):
                    policies[row][col] = "WALL"

                elif s in self.map.pit_locs:
                    policies[row][col] = "PIT"
                elif self.map.isGoalState(s):
                    policies[row][col] = "GOAL"
                elif policies[row][col] == AStarSearch.ACTION_LEFT:
                    policies[row][col] = "W"
                elif policies[row][col] == AStarSearch.ACTION_RIGHT:
                    policies[row][col] = "E"
                elif policies[row][col] == AStarSearch.ACTION_UP:
                    policies[row][col] = "N"
                elif policies[row][col] == AStarSearch.ACTION_DOWN:
                    policies[row][col] = "S"

                if policies[row][col] == -1 or policies[row][col] == None:
                    print "Bad stuff happened at (" + str(row) + ", " + str(
                        col) + ")"

        # Flatten the array
        flattened = itertools.chain.from_iterable(policies)
        #print "FLAT: " + str(list(flattened))
        msg = PolicyList()
        msg.data = list(flattened)
        self.mdp_path_pub.publish(msg)
Beispiel #2
0
def mdp():
	config = read_config()
	move_list = config["move_list"]
	mapWidth = config["map_size"][1]
	mapHeight = config["map_size"][0]	
	obstacles = config["walls"]
	pits = config["pits"]
	start = config["start"]
	goal = config["goal"]
	iter_limit = config["max_iterations"]
	threshold = config["threshold_difference"]

	board = []

	for i in range(mapHeight):
		board.append([])
		for j in range(mapWidth):
			cur = boardPos(0, "E")
			if [i, j] in obstacles:			
				cur.value = config["reward_for_hitting_wall"]
				cur.policy = "WALL"
			elif [i, j] in pits:
				cur.value = config["reward_for_falling_in_pit"]
				cur.policy = "PIT"
			elif [i, j] == goal:
				cur.value = config["reward_for_reaching_goal"]
				cur.policy = "GOAL"
			board[i].append(cur)
		

	#print len(result), len(result[0])
	policy = []
	for i in range(mapHeight):
		for j in range(mapWidth):
			policy.append(board[i][j].policy)
	policy_publisher.publish(PolicyList(policy))
	
	rospy.sleep(1)

	iter_count = 0

	while iter_count < iter_limit:
		board, stop = update_board(board, config)
		policy = []
		for i in range(mapHeight):
			for j in range(mapWidth):
				policy.append(board[i][j].policy)
		policy_publisher.publish(PolicyList(policy))
		if stop:
			break
		iter_count += 1
		
	for i in range(mapHeight):
		for j in range(mapWidth):
			print i, j, board[i][j]
Beispiel #3
0
    def __init__(self):
        rospy.init_node('Robot', anonymous=True)

        self.path_pub = rospy.Publisher(
            "results/path_list",
            AStarPath,
            queue_size = 10
        )
        self.policy_pub = rospy.Publisher(
            "results/policy_list",
            PolicyList,
            queue_size = 10
        )
        self.sim_pub = rospy.Publisher(
            "map_node/sim_complete",
            Bool,
            queue_size = 10
        )

        path_list = astar.astar()
        for pl in path_list:
            asp = AStarPath()
            asp.data = pl
            rospy.sleep(1)
            self.path_pub.publish(asp)

        markov = mdp()
        pl = PolicyList()
        pl.data = markov.policy_list
        rospy.sleep(1)


        rospy.sleep(1)
        rospy.sleep(1)
        self.policy_pub.publish(pl)
        #print markov.policy_list
        #for m in markov.policy_list:
            #pl = PolicyList()
            #pl.data = m
            #rospy.sleep(1)
            #self.policy_pub.publish(pl)

        self.sim_pub.publish(True)
        rospy.sleep(1)
        rospy.signal_shutdown("Done.")
Beispiel #4
0
from std_msgs.msg import String, Bool
Beispiel #5
0
from std_msgs.msg import String, Bool
Beispiel #6
0
    def __init__(self):
	self.config = read_config()
	rospy.init_node("robot")
	self.map_y = (read_config()["map_size"][0])
	self.map_x = (read_config()["map_size"][1])
	self.move_list = (read_config()["move_list"])
	self.start = (read_config()["start"])
	self.goal = (read_config()["goal"])
	self.walls = (read_config()["walls"])
	self.pits = (read_config()["pits"])
	self.move_list = (read_config()["move_list"])
	self.max_iterations = (read_config()["max_iterations"])
	self.threshold_difference = (read_config()["threshold_difference"])
	self.reward_step = (read_config()["reward_for_each_step"])
	self.reward_wall = (read_config()["reward_for_hitting_wall"])
	self.reward_goal = (read_config()["reward_for_reaching_goal"])
	self.reward_pit = (read_config()["reward_for_falling_in_pit"])
	
	self.discount = (read_config()["discount_factor"])
	self.prob_forward = (read_config()["prob_move_forward"])
	self.prob_back = (read_config()["prob_move_backward"])
	self.prob_left = (read_config()["prob_move_left"])
	self.prob_right = (read_config()["prob_move_right"])
	self.mapV =[[[0,0,0,0] for x in range(self.map_x)] for y in range(self.map_y)]
	self.mapVpre =[[[0,0,0,0] for x in range(self.map_x)] for y in range(self.map_y)]
	self.mapV[self.goal[0]][self.goal[1]] = [self.reward_goal,self.reward_goal,self.reward_goal,self.reward_goal]
	self.mapVpre[self.goal[0]][self.goal[1]] = [self.reward_goal,self.reward_goal,self.reward_goal,self.reward_goal]
	for pit in self.pits:
	    self.mapV[pit[0]][pit[1]] = [self.reward_pit,self.reward_pit,self.reward_pit,self.reward_pit]
	    self.mapVpre[pit[0]][pit[1]] = [self.reward_pit,self.reward_pit,self.reward_pit,self.reward_pit]
	self.rate = rospy.Rate(1)
	self.astar_publisher = rospy.Publisher(
		"/results/path_list",
		AStarPath,
		queue_size = 10
	)
	self.mdp_publisher = rospy.Publisher(
		"/results/policy_list",
		PolicyList,
		queue_size = 10
	)
	self.sim_complete = rospy.Publisher(
		"/map_node/sim_complete",
		Bool,
		queue_size = 10
	)
	self.rate.sleep()
	self.path = getAstar()
	self.mess = AStarPath()
	self.mess1 = PolicyList()
	for step in self.path:
		self.mess.data = step
		self.astar_publisher.publish(self.mess)
		self.rate.sleep()
	self.count = 0
	while(self.count < self.max_iterations):
		for y in range(self.map_y):
			for x in range(self.map_x):
				self.mapVpre[y][x] = self.mapV[y][x]
		self.mapV,self.policy = getMdp(self.mapV)
		self.mess1.data = []
		for y in range(self.map_y):
			for x in range(self.map_x):
				self.mess1.data.append(self.policy[y][x])
		self.mdp_publisher.publish(self.mess1)
		
		self.count +=1
	self.sim_complete.publish(Bool(True))
	rospy.signal_shutdown("complete")
	rospy.spin()
Beispiel #7
0
                correct += 1

    percentage = correct * 100.0 / (len(policy) * len(policy[0]))
    print "Optimal policy from MDP: \n", policy, "\n\n"
    print "Optimal policy from model based learning: \n", policy1, "\n\n"
    print "Percentage of correctness: \n", percentage, "%", "\n\n"
    #publish path and policy

    for x in mypath:
        path = AStarPath()
        path.data = x
        rospy.sleep(1)
        rb.astar_publisher.publish(path)

    newpo = []
    for x in policy:
        for y in x:
            newpo.append(y)

    po = PolicyList()
    po.data = newpo
    rospy.sleep(1)
    rb.policy_publisher.publish(po)

    rospy.sleep(4)
    # publish finish simu
    rb.simu_publisher.publish(True)
    rospy.sleep(4)
    # shutdown
    rospy.signal_shutdown("Done")
Beispiel #8
0
    def __init__(self):
        self.config = read_config()

        self.probF = self.config["prob_move_forward"]
        self.probB = self.config["prob_move_backward"]
        self.probL = self.config["prob_move_left"]
        self.probR = self.config["prob_move_right"]
        self.probAct = [self.probF, self.probB, self.probL, self.probR]

        self.maxI = self.config["max_iterations"]
        self.threshDiff = self.config["threshold_difference"]

        self.stepRwrd = self.config["reward_for_each_step"]
        self.wallRwrd = self.config["reward_for_hitting_wall"]
        self.goalRwrd = self.config["reward_for_reaching_goal"]
        self.pitRwrd = self.config["reward_for_falling_in_pit"]

        self.row = self.config["map_size"][0]
        self.column = self.config["map_size"][1]

        self.start = self.config["start"]
        self.goal = self.config["goal"]
        self.goal_r = self.goal[0]
        self.goal_c = self.goal[1]
        self.start_r = self.start[0]
        self.start_c = self.start[1]
        self.pits = self.config["pits"]
        self.walls = self.config["walls"]

        self.act = [[[1, 0], [-1, 0], [0, -1], [0, 1]],
                    [[-1, 0], [1, 0], [0, 1], [0, -1]],
                    [[0, -1], [0, 1], [-1, 0], [1, 0]],
                    [[0, 1], [0, -1], [1, 0], [-1, 0]]]
        #forward   backward   left      right
        #south  r+1, c+0   r-1, c+0   r+0, c-1  r+0, c+1
        #north  r-1, c+0   r+1, c+0   r+0, c+1  r+0, c-1
        #west	r+0, c-1   r+0, c+1   r-1, c+0  r+1, c+0
        #east   r+0, c+1   r+0, c-1   r+1,c+0   r-1, c+0

        self.mmap1 = []
        self.mmap2 = []
        self.policyMap = []

        #creating maps
        self.createMap(self.mmap1, 0, False)
        self.createMap(self.mmap2, 0, False)
        self.createMap(self.policyMap, "", True)

        self.MDP_Algo()

        self.resPub = rospy.Publisher("/results/policy_list",
                                      PolicyList,
                                      queue_size=10)
        policyList = PolicyList()

        policyL = []

        for r in range(len(self.policyMap)):
            for c in range(len(self.policyMap[r])):
                policyL.append(self.policyMap[r][c])

        #print policyL

        policyList.data = policyL
        rospy.sleep(0.3)
        self.resPub.publish(policyList)
	def __init__(self):
		self.config = read_config()

		self.probF = self.config["prob_move_forward"]
		self.probB = self.config["prob_move_backward"]
		self.probL = self.config["prob_move_left"]
		self.probR = self.config["prob_move_right"]
		self.probAct = [ self.probF, self.probB, self.probL, self.probR]

		self.maxI = self.config["max_iterations"]
		self.threshDiff = self.config["threshold_difference"]
		
		self.stepRwrd = self.config["reward_for_each_step"]
		self.wallRwrd = self.config["reward_for_hitting_wall"]
		self.goalRwrd = self.config["reward_for_reaching_goal"]
		self.pitRwrd = self.config["reward_for_falling_in_pit"]

                self.row = self.config["map_size"][0]
                self.column = self.config["map_size"][1]
		
		self.start = self.config["start"]
                self.goal = self.config["goal"]
                self.goal_r = self.goal[0]
                self.goal_c = self.goal[1]
                self.start_r = self.start[0]
                self.start_c = self.start[1]
                self.pits = self.config["pits"]
                self.walls = self.config["walls"]
	
		self.act = [     [[1,0],[-1,0],[0,-1],[0,1]],
				 [[-1,0],[1,0],[0,1],[0,-1]],
				 [[0,-1],[0,1],[-1,0],[1,0]],
				 [[0,1],[0,-1],[1,0],[-1,0]]
				]
					#forward   backward   left      right 
				#south  r+1, c+0   r-1, c+0   r+0, c-1  r+0, c+1
				#north  r-1, c+0   r+1, c+0   r+0, c+1  r+0, c-1
				#west	r+0, c-1   r+0, c+1   r-1, c+0  r+1, c+0
				#east   r+0, c+1   r+0, c-1   r+1,c+0   r-1, c+0 
		
		self.mmap1 = []
                self.mmap2 = []
		self.policyMap = []
		self.noChange = False
		self.setValueMap = True	
	
		#creating maps 
		self.createMap(self.mmap1, 0, False)
		self.createMap(self.mmap2, 0, False)#this is the value map, and it contains temp values from temp policy	
		self.createMap(self.policyMap, 1, False)#every location contains an arbitrary starting policy
		
		self.MDP_Algo()	
		
		print "\n"
		print np.reshape(self.policyMap, (self.row, self.column))
                
		self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10)
		policyList = PolicyList()


		policyL = []

		for r in range( len(self.policyMap)):
			for c in range(len(self.policyMap[r]) ):
				if self.policyMap[r][c] == 0 and not self.is_Wall(r, c):
					policyL.append("S")	
                
				if self.policyMap[r][c] == 1:
					policyL.append("N")	
				
				if self.policyMap[r][c] == 2:
					policyL.append("W")	
				
				if self.policyMap[r][c] == 3:
					policyL.append("E")	
					
				if self.is_Pit(r,c):
					policyL.append("PIT")	
				
				if self.is_Wall(r,c):
					policyL.append("WALL")	
				
				if self.goal_r == r and self.goal_c ==  c:
					policyL.append("GOAL")	

		print policyL 
		
		policyList.data = policyL
		rospy.sleep(0.3)
		self.resPub.publish(policyList)
Beispiel #10
0
    def __init__(self):
        self.config = read_config()

        self.probF = self.config["prob_move_forward"]
        self.probB = self.config["prob_move_backward"]
        self.probL = self.config["prob_move_left"]
        self.probR = self.config["prob_move_right"]
        self.probAct = [self.probF, self.probB, self.probL, self.probR]

        self.maxI = self.config["max_iterations"]
        self.threshDiff = self.config["threshold_difference"]

        self.stepRwrd = self.config["reward_for_each_step"]
        self.wallRwrd = self.config["reward_for_hitting_wall"]
        self.goalRwrd = self.config["reward_for_reaching_goal"]
        self.pitRwrd = self.config["reward_for_falling_in_pit"]

        self.row = self.config["map_size"][0]
        self.column = self.config["map_size"][1]

        self.start = self.config["start"]
        self.goal = self.config["goal"]
        self.goal_r = self.goal[0]
        self.goal_c = self.goal[1]
        self.start_r = self.start[0]
        self.start_c = self.start[1]
        self.pits = self.config["pits"]
        self.walls = self.config["walls"]

        self.act = [[[1, 0], [-1, 0], [0, -1], [0, 1]],
                    [[-1, 0], [1, 0], [0, 1], [0, -1]],
                    [[0, -1], [0, 1], [-1, 0], [1, 0]],
                    [[0, 1], [0, -1], [1, 0], [-1, 0]]]
        #forward   backward   left      right
        #south  r+1, c+0   r-1, c+0   r+0, c-1  r+0, c+1
        #north  r-1, c+0   r+1, c+0   r+0, c+1  r+0, c-1
        #west	r+0, c-1   r+0, c+1   r-1, c+0  r+1, c+0
        #east   r+0, c+1   r+0, c-1   r+1,c+0   r-1, c+0

        self.mmap1 = []
        self.mmap2 = []
        self.policyMap = []
        self.noChange = False
        self.setValueMap = True

        #creating maps
        self.createMap(self.mmap1, 0, False)
        self.createMap(
            self.mmap2, 0, False
        )  #this is the value map, and it contains temp values from temp policy
        self.createMap(
            self.policyMap, 1,
            False)  #every location contains an arbitrary starting policy

        self.MDP_Algo()

        print "\n"
        print np.reshape(self.policyMap, (self.row, self.column))

        self.resPub = rospy.Publisher("/results/policy_list",
                                      PolicyList,
                                      queue_size=10)
        policyList = PolicyList()

        policyL = []

        for r in range(len(self.policyMap)):
            for c in range(len(self.policyMap[r])):
                if self.policyMap[r][c] == 0 and not self.is_Wall(r, c):
                    policyL.append("S")

                if self.policyMap[r][c] == 1:
                    policyL.append("N")

                if self.policyMap[r][c] == 2:
                    policyL.append("W")

                if self.policyMap[r][c] == 3:
                    policyL.append("E")

                if self.is_Pit(r, c):
                    policyL.append("PIT")

                if self.is_Wall(r, c):
                    policyL.append("WALL")

                if self.goal_r == r and self.goal_c == c:
                    policyL.append("GOAL")

        print policyL

        policyList.data = policyL
        rospy.sleep(0.3)
        self.resPub.publish(policyList)
Beispiel #11
0
	def __init__(self):
		rospy.init_node("Robot")
		self.config = read_config()
		#Publishers
		self.results_AStar_pub = rospy.Publisher("/results/path_list", 
			AStarPath, 
			queue_size=10, 
			latch=True
		)
		self.results_MDP_pub = rospy.Publisher("/results/policy_list",
			PolicyList,
			queue_size=10,
			latch=True
		)
		self.sim_complete_pub = rospy.Publisher("/map_node/sim_complete",
			Bool,
			queue_size=10,
			latch=True
		)
		#A* Inputs
		self.move_list = self.config["move_list"]
		self.map_size = self.config["map_size"]
		self.start = self.config["start"]
		self.goal = self.config["goal"]
		self.walls = self.config["walls"]
		self.pits = self.config["pits"]
		#MDP Inputs
		#probabilities
		self.p_f = self.config["prob_move_forward"]
		self.p_b = self.config["prob_move_backward"]
		self.p_l = self.config["prob_move_left"]
		self.p_r = self.config["prob_move_right"]
		#rewards
		self.r_step = self.config["reward_for_each_step"]
		self.r_wall = self.config["reward_for_hitting_wall"]
		self.r_goal = self.config["reward_for_reaching_goal"]
		self.r_pit = self.config["reward_for_falling_in_pit"]
		#iteration
		max_iterations = self.config["max_iterations"]
		threshold_difference =self.config["threshold_difference"]
		index = 0		
		last_reward = 1
		this_reward = 0
		opt_move = AStarPath()
		#self.p_learning = self.config["p_learning"]
		#self.a_learning = self.config["a_learning"]
		#pLearn
		plearn.p_learn()
		#aLearn
		alearn.a_learn()
		#A*
		rospy.sleep(0.1)
		opt_path = astar.a_opt_search()
		for move in opt_path:
			opt_move.data = move
			rospy.sleep(0.1)
			self.results_AStar_pub.publish(opt_move)
		#MDP
		opt_policy = PolicyList()
		opt_policy.data = mdp.m_opt_search()
		self.results_MDP_pub.publish(opt_policy)
		rospy.sleep(0.1)
		#Sim Complete
		rospy.sleep(1)
		self.sim_complete_pub.publish(True)
		rospy.sleep(1)
		rospy.signal_shutdown(True)
Beispiel #12
0
 def __init__(self):
     rospy.init_node("Robot")
     self.config = read_config()
     #Publishers
     self.results_AStar_pub = rospy.Publisher("/results/path_list",
                                              AStarPath,
                                              queue_size=10,
                                              latch=True)
     self.results_MDP_pub = rospy.Publisher("/results/policy_list",
                                            PolicyList,
                                            queue_size=10,
                                            latch=True)
     self.sim_complete_pub = rospy.Publisher("/map_node/sim_complete",
                                             Bool,
                                             queue_size=10,
                                             latch=True)
     #A* Inputs
     self.move_list = self.config["move_list"]
     self.map_size = self.config["map_size"]
     self.start = self.config["start"]
     self.goal = self.config["goal"]
     self.walls = self.config["walls"]
     self.pits = self.config["pits"]
     #MDP Inputs
     #probabilities
     self.p_f = self.config["prob_move_forward"]
     self.p_b = self.config["prob_move_backward"]
     self.p_l = self.config["prob_move_left"]
     self.p_r = self.config["prob_move_right"]
     #rewards
     self.r_step = self.config["reward_for_each_step"]
     self.r_wall = self.config["reward_for_hitting_wall"]
     self.r_goal = self.config["reward_for_reaching_goal"]
     self.r_pit = self.config["reward_for_falling_in_pit"]
     #iteration
     max_iterations = self.config["max_iterations"]
     threshold_difference = self.config["threshold_difference"]
     index = 0
     last_reward = 1
     this_reward = 0
     opt_move = AStarPath()
     #self.p_learning = self.config["p_learning"]
     #self.a_learning = self.config["a_learning"]
     #pLearn
     plearn.p_learn()
     #aLearn
     alearn.a_learn()
     #A*
     rospy.sleep(0.1)
     opt_path = astar.a_opt_search()
     for move in opt_path:
         opt_move.data = move
         rospy.sleep(0.1)
         self.results_AStar_pub.publish(opt_move)
     #MDP
     opt_policy = PolicyList()
     opt_policy.data = mdp.m_opt_search()
     self.results_MDP_pub.publish(opt_policy)
     rospy.sleep(0.1)
     #Sim Complete
     rospy.sleep(1)
     self.sim_complete_pub.publish(True)
     rospy.sleep(1)
     rospy.signal_shutdown(True)
Beispiel #13
0
from std_msgs.msg import String, Bool
Beispiel #14
0
  print "Percentage of correctness: \n", percentage, "%", "\n\n"
  #publish path and policy

  for x in mypath:
    path = AStarPath()
    path.data = x
    rospy.sleep(1)
    rb.astar_publisher.publish(path)  

  newpo = []
  for x in policy:
    for y in x:
      newpo.append(y)


  po = PolicyList()
  po.data = newpo
  rospy.sleep(1)
  rb.policy_publisher.publish(po)
  
  rospy.sleep(4)
  # publish finish simu
  rb.simu_publisher.publish(True)
  rospy.sleep(4)
  # shutdown
  rospy.signal_shutdown("Done")




Beispiel #15
0
	def __init__(self):
		self.config = read_config()

		self.probF = self.config["prob_move_forward"]
		self.probB = self.config["prob_move_backward"]
		self.probL = self.config["prob_move_left"]
		self.probR = self.config["prob_move_right"]
		self.probAct = [ self.probF, self.probB, self.probL, self.probR]

		self.maxI = self.config["max_iterations"]
		self.threshDiff = self.config["threshold_difference"]
		
		self.stepRwrd = self.config["reward_for_each_step"]
		self.wallRwrd = self.config["reward_for_hitting_wall"]
		self.goalRwrd = self.config["reward_for_reaching_goal"]
		self.pitRwrd = self.config["reward_for_falling_in_pit"]

                self.row = self.config["map_size"][0]
                self.column = self.config["map_size"][1]
		
		self.start = self.config["start"]
                self.goal = self.config["goal"]
                self.goal_r = self.goal[0]
                self.goal_c = self.goal[1]
                self.start_r = self.start[0]
                self.start_c = self.start[1]
                self.pits = self.config["pits"]
                self.walls = self.config["walls"]
	
		self.act = [ [[1, 0 ], [-1,0], [0, -1], [0, 1]],
				 [[-1, 0],[1, 0], [0,1], [0,-1]],
				 [[0,-1], [0,1], [-1,0], [1,0]],
				 [[0,1],[0, -1],[1, 0],[-1, 0]]
				]
					#forward   backward   left      right 
				#south  r+1, c+0   r-1, c+0   r+0, c-1  r+0, c+1
				#north  r-1, c+0   r+1, c+0   r+0, c+1  r+0, c-1
				#west	r+0, c-1   r+0, c+1   r-1, c+0  r+1, c+0
				#east   r+0, c+1   r+0, c-1   r+1,c+0   r-1, c+0 
		
		self.mmap1 = []
                self.mmap2 = []
		self.policyMap = []
		
		#creating maps 
		self.createMap(self.mmap1, 0, False)
		self.createMap(self.mmap2, 0, False)	
		self.createMap(self.policyMap, "", True)	
		
		self.MDP_Algo()	
		
                self.resPub = rospy.Publisher("/results/policy_list", PolicyList, queue_size=10)
		policyList = PolicyList()
	
		policyL = []

		for r in range( len(self.policyMap)):
			for c in range(len(self.policyMap[r]) ):
				policyL.append(self.policyMap[r][c])	
                
		#print policyL 
		
		policyList.data = policyL
		rospy.sleep(0.3)
		self.resPub.publish(policyList)