Esempi in Python per GridWorld, esempi in Python per GridWorld.GridWorld

Esempio n. 1

0

Mostra file

File: Flow.py Progetto: casiarobot/MotionPlanningWithDNN

def solve(world, goalInWorld):
    solution = GridWorld(world.width, world.height)
    # print(solution.cells)
    solution.cells = [Cell(cell) for cell in world.cells]
    # print(solution.cells)
    goal = solution.get(goalInWorld.col, goalInWorld.line)
    goal.cost = 0
    closed = []
    opened = [goal]

    # reopen = 0

    while len(opened):
        # print(enigmaAsStr(solution, goal))
        # print("opened:", [(c.col, c.line) for c in opened])
        cell = opened.pop()
        closed.append(cell)

        # for adj in solution.getAdjacentCells(cell):
        for adj in solution.getAccessibleCells(cell):
            # print("cell", cell, "has got a adj", adj)
            if adj.reachable:  # we ignore obstacles
                direction = Direction.fromTo(adj, cell)
                cost = cell.cost + direction.cost()
                if adj.cost == -1:  # or adj.cost > cost:  # if not used yet
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
                if adj.cost > cost:  # if not used yet
                    # reopen += 1
                    # print("reopen", reopen)
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
    return solution

Esempio n. 2

0

Mostra file

File: Flow.py Progetto: laura-rieger/MotionPlanningWithDNN

def solve(world, goalInWorld):
    solution = GridWorld(world.width, world.height)
    # print(solution.cells)
    solution.cells = [Cell(cell) for cell in world.cells]
    # print(solution.cells)
    goal = solution.get(goalInWorld.col, goalInWorld.line)
    goal.cost = 0
    closed = []
    opened = [goal]

    # reopen = 0

    while len(opened):
        # print(enigmaAsStr(solution, goal))
        # print("opened:", [(c.col, c.line) for c in opened])
        cell = opened.pop()
        closed.append(cell)

        # for adj in solution.getAdjacentCells(cell):
        for adj in solution.getAccessibleCells(cell):
            # print("cell", cell, "has got a adj", adj)
            if adj.reachable:  # we ignore obstacles
                direction = Direction.fromTo(adj, cell)
                cost = cell.cost + direction.cost()
                if adj.cost == -1:  # or adj.cost > cost:  # if not used yet
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
                if adj.cost > cost:  # if not used yet
                    # reopen += 1
                    # print("reopen", reopen)
                    adj.direction = direction
                    adj.cost = cost
                    opened.append(adj)
    return solution

Esempio n. 3

0

Mostra file

File: Flow.py Progetto: casiarobot/MotionPlanningWithDNN

 def setUp(self):
     self.world = GridWorld(10, 10)
     self.obstaclesProb = 0.2
     self.world.addRandomObstacles(
         math.floor(self.world.getLength() * self.obstaclesProb))
     for cell in self.world.cells:
         if cell.reachable:
             self.goal = cell
             break

Esempio n. 4

0

Mostra file

File: MazeRunner.py Progetto: eriktoger/Reinforcment-Learning

 def createSmallMaze(self):
     #should be GridWorldSmall()
     self.GridWorldGame = GridWorld((5, 5))
     cols = self.GridWorldGame.size[0]
     rows = self.GridWorldGame.size[1]
     self.MAZE_X = cols * 32
     self.MAZE_Y = rows * 32
     FRAME = 8
     self.START_X = (self.MAX_X - cols *
                     32) / 2 + FRAME  #what happens if its not 0 in %32
     self.START_Y = (self.MAX_Y - rows * 32) / 2 + FRAME
     self.smileyPos = (self.START_X, self.START_Y)

Esempio n. 5

0

Mostra file

File: MARL.py Progetto: Vrroom/RL-FILA

def main():
    env = GridWorld()
    _, es1, ts1 = independentQLearning(env, lambda x: x < 100, 0)
    qList, es2, ts2 = shareStateQLearning(env, lambda x: x < 100, 0)
    iQL = plt.scatter(es1, ts1, c='red')
    ssQL = plt.scatter(es2, ts2, c='blue')
    iQL.set_label("Independent")
    ssQL.set_label("5 Predators, 2 Prey, Share State")
    plt.xlabel("Episodes")
    plt.ylabel("Cumulative TimeSteps")
    plt.legend()
    plt.show()
    env.simulateTrajectory(qList)

Esempio n. 6

0

Mostra file

File: Flow.py Progetto: laura-rieger/MotionPlanningWithDNN

class Tests(unittest.TestCase):
    def setUp(self):
        self.world = GridWorld(10, 10)
        self.obstaclesProb = 0.2
        self.world.addRandomObstacles(math.floor(self.world.getLength() * self.obstaclesProb))
        for cell in self.world.cells:
            if cell.reachable:
                self.goal = cell
                break

    def test_runs(self):
        solution = solve(self.world, self.goal)
        print(enigmaAsStr(solution, self.goal))

Esempio n. 7

0

Mostra file

File: Flow.py Progetto: casiarobot/MotionPlanningWithDNN

class Tests(unittest.TestCase):
    def setUp(self):
        self.world = GridWorld(10, 10)
        self.obstaclesProb = 0.2
        self.world.addRandomObstacles(
            math.floor(self.world.getLength() * self.obstaclesProb))
        for cell in self.world.cells:
            if cell.reachable:
                self.goal = cell
                break

    def test_runs(self):
        solution = solve(self.world, self.goal)
        print(enigmaAsStr(solution, self.goal))

Esempio n. 8

0

Mostra file

def evaluate(goals, EQ):
    env = GridWorld(goals=goals, T_states=T_states)
    policy = EQ_P(EQ)
    state = env.reset()
    done = False
    t = 0
    G = 0
    while not done and t < 100:
        action = policy[state]
        state_, reward, done, _ = env.step(action)
        state = state_
        G += reward
        t += 1
    return G

Esempio n. 9

0

Mostra file

    def openMDPGUI(self):
        global w, g
        if self.checkSettingValues():
            self.master.destroy()

            df = float(self.discFactor.get())
            rews = list(map(lambda x: float(x.get()), self.rewValue))
            probs = list(map(lambda x: float(x.get()), self.probValue))

            w = GridWorld([[
                GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                GridWorld.CELL_EXIT
            ],
                           [
                               GridWorld.CELL_VOID, GridWorld.CELL_WALL,
                               GridWorld.CELL_VOID, GridWorld.CELL_PIT
                           ],
                           [
                               GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                               GridWorld.CELL_VOID, GridWorld.CELL_VOID
                           ]])
            w.setDiscountFactor(df)
            w.setRewards(rews[0], rews[1], rews[2])
            w.setProbabilities(probs[0], probs[1], probs[2], probs[3])

            g = MDPGUI(w)

Esempio n. 10

0

Mostra file

def objectiveFunction(args):

    learning_rate, min_epsilon, max_epsilon, epsilon_decay, discount_factor = args

    num_of_episodes = 500
    max_steps = 1000

    environment = GridWorld()

    agentQ = Q_Agent(environment,
                     epsilon=max_epsilon,
                     learning_rate=learning_rate,
                     discount_factor=discount_factor)

    train(environment,
          agentQ,
          episodes=num_of_episodes,
          max_steps_per_episode=max_steps,
          min_epsilon=min_epsilon,
          max_epsilon=max_epsilon,
          epsilon_decay=epsilon_decay)
    mean_reward = test(environment, agentQ, episodes=1000)

    value_map = np.zeros((environment.height, environment.width))
    for x in range(environment.height):
        for y in range(environment.width):
            q_values_of_state = agentQ.q_table[(x, y)]
            maxValue = max(q_values_of_state.values())
            value_map[x, y] = maxValue

    if save == True:
        utils.plotValueFunction(value_map,
                                os.path.join(save_path, 'heatmap.jpg'))

    return -(mean_reward)

Esempio n. 11

0

Mostra file

File: Flow.py Progetto: laura-rieger/MotionPlanningWithDNN

 def setUp(self):
     self.world = GridWorld(10, 10)
     self.obstaclesProb = 0.2
     self.world.addRandomObstacles(math.floor(self.world.getLength() * self.obstaclesProb))
     for cell in self.world.cells:
         if cell.reachable:
             self.goal = cell
             break

Esempio n. 12

0

Mostra file

File: main.py Progetto: GreatAlexander/Games

def gridworld():
	''' Create complete discrete environment for MDP modelling (InSpace Tiled), including Rewards and Transition probabilities'''
	w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID], 
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
	
	w.setRewards(-0.04, -1, 1)
	w.setProbabilities(0.8, 0.1, 0.1, 0)
#	w.setDiscountFactor(0.6)
	return w

Esempio n. 13

0

Mostra file

File: testQLearner.py Progetto: walkacross/MC3_P3

def baseTester():
    ''' runs a somewhat comprehensive test'''
    try:
        import QLearner as ql
    except:
        pass

    #it is worth noting here that num_states can be 100 for any grid < 10x10 using the tuckerHash
    #we need a new hash algo if we are to use a grid outside those parameters
    baseKwargs = {'num_states':100, 'alpha':1.0, 'gamma':0.9, 'rar':0.5, 'radr':0.99, 'dyna':0, 'verbose':False}
    '''
    if you want to add your own test, add it here. I use a tuple to indicate one test it is:
    (csv file, expected convergence iterations, kwarg modifier, test name)
    '''
    myTestList = [('testEasyWorld.csv', 800, 13,{}, 'easy test'),
                  ('world01.csv', 7000, 16, {}, 'Tucker Test 1'),
                  ('world02.csv', 7000, 17, {}, 'Tucker Test 2'),
                  ('testGridWorld.csv', 5000, 20, {}, 'Leo Base Test'),
                  ('testGridWorld.csv', 18000, 20, {'alpha':.2}, 'Test Learning Rate'),
                  ('testEasyWorld.csv', 700, 13, {'rar': 0.05}, 'Test Exploration'),
                  ('testEasyWorld.csv', 700, 13, {'radr': 0.8}, 'Test Exploration Decay'),
                  ('testGridWorld.csv', 3000, 20, {'gamma':0.8}, 'Test Discount Rate'),
                  ('testGridWorld.csv', 1100, 20, {'dyna':100}, 'Test Dyna'),
                  ]
    
    fdtest=myTestList[7:9]              
                  
    #for test in myTestList:
    for test in fdtest:             
        print '-------------------------------'
        print test[4]
        world = GridWorld(test[0])
        testKwargs = copy(baseKwargs)
        for k in test[3].keys():
            testKwargs[k] = test[3][k]
        print 'parameters %s' % str(testKwargs)
        learner = ql.QLearner(**testKwargs)
        print world.grid
        myTester = QTester(world, learner)
        nIter = test[1]
        totalIter = nIter
        lastPolicyLength = 0
        #someone let me know if there's a better way to check for convergence time
        while (totalIter < (test[1] * 1.4)):
           myTester.nIter(nIter)
           nIter = int(.05*test[1])
           myPolicy = myTester.getPolicy()
           policyLength = len(myPolicy)
           totalIter += nIter
           if (lastPolicyLength == policyLength) and (policyLength < 100):
              print 'converged in approx %i iterations' % totalIter
              print policyLength, myPolicy, test[2]
              break
           lastPolicyLength = policyLength
        if (test[1]*1.2 >= totalIter) and (policyLength == test[2]):
           print '*** TEST PASSED ***'
        else:
           print 'xxx TEST FAILED xxx'

Esempio n. 14

0

Mostra file

 def setUp(self):
     self.n = 5
     self.p = 1
     self.gridworld = GridWorld(self.n, self.p)
     self.go_right_policy = np.ones(self.n * self.n, dtype=int)
     self.discount = 0.9
     self.large_discount = 0.2
     self.policy = np.array(
             [['TERMINAL', 'RIGHT', 'RIGHT', 'RIGHT', 'TERMINAL'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP'],
              ['RIGHT', 'RIGHT', 'RIGHT', 'RIGHT', 'UP']])
     self.policy_large_discount = np.array(
             [['TERMINAL', 'LEFT', 'RIGHT', 'RIGHT', 'TERMINAL'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP'],
              ['UP', 'LEFT', 'RIGHT', 'RIGHT', 'UP']])

Esempio n. 15

0

Mostra file

 def __init__(self,
              epsilon=0.01,
              greedy=False,
              alpha=0.1,
              gamma=0.95,
              visual=True,
              goal=(10, 8),
              agentPose=(1, 1, 'up'),
              showTrial=True,
              randomReset=False,
              epsilonStrat=1,
              epsilonFactor=500):
     """
     gridWorld: GridWorld object
     epsilon: value used for epsilon greedy search
     alpha: step size
     gamma: discount favtor
     """
     self.actionValues = Counter()
     self.epsilonFactor = epsilonFactor
     self.randomReset = randomReset
     self.epsilon = epsilon
     self.greedy = greedy
     self.epsilonStrat = epsilonStrat
     self.goal = goal
     self.Q = dict()
     self.gridWorld = GridWorld(goal,
                                agentPose,
                                visual=visual,
                                showTrial=showTrial,
                                randomReset=randomReset)
     self.actions = self.gridWorld.getActions()
     self.Model = dict()
     self.alpha = alpha
     self.PriorityQueue = PriorityQueue()
     self.gamma = gamma
     self.exp = []
     self.rewards = dict()
     self.rewardNums = dict()
     self.predecessors = defaultdict(set)
     self.initQValues()

Esempio n. 16

0

Mostra file

def start_grid_mdp():
    """
    starts the program, restarts if the user wants to
    """
    grid = load_grid(get_file_path())
    world = GridWorld(grid)
    move_costs = get_move_cost()
    gamma = get_gamma()
    eval_steps = get_evaluation_steps()
    MDP(world, eval_steps, gamma, move_costs)
    if start_again():
        start_grid_mdp()

Esempio n. 17

0

Mostra file

    def buildBiasEngine(self):
        """ 
			Simulates MDPs with varying bias to build a bias inference engine.
		"""

        print "Loading MDPs...\n"

        # Unnecessary progress bar for terminal
        bar = pyprind.ProgBar(len(self.test))
        for i in self.test:
            self.sims.append(
                GridWorld(self.grid, i, self.discount, self.tau, self.epsilon))
            bar.update()

        print "\nDone loading MDPs..."

Esempio n. 18

0

Mostra file

File: GUI.py Progetto: GreatAlexander/Games

    def openMDPGUI(self):
        global w, g
        if self.checkSettingValues():
            self.master.destroy()

            df = float(self.discFactor.get())
            rews = list(map(lambda x: float(x.get()), self.rewValue))
            probs = list(map(lambda x: float(x.get()), self.probValue))

            w = GridWorld(
                [
                    [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT],
                    [GridWorld.CELL_VOID, GridWorld.CELL_WALL, GridWorld.CELL_VOID, GridWorld.CELL_PIT],
                    [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
                ]
            )
            w.setDiscountFactor(df)
            w.setRewards(rews[0], rews[1], rews[2])
            w.setProbabilities(probs[0], probs[1], probs[2], probs[3])

            g = MDPGUI(w)

Esempio n. 19

0

Mostra file

import numpy as np
from matplotlib import pyplot as plt
import deepdish as dd
from GridWorld import GridWorld
from library import *

env = GridWorld()
T_states = [(3, 3), (3, 9), (9, 3), (9, 9), (1, 1), (1, 2), (1, 3), (1, 4),
            (1, 5), (1, 7), (1, 8), (1, 9), (1, 10), (1, 11), (11, 1), (11, 2),
            (11, 3), (11, 4), (11, 5), (11, 7), (11, 8), (11, 9), (11, 10),
            (2, 1), (3, 1), (4, 1), (5, 1), (7, 1), (8, 1), (9, 1), (10, 1),
            (2, 11), (3, 11), (4, 11), (5, 11), (6, 11), (8, 11), (9, 11),
            (10, 11), (11, 11)]

###################################### Qs
BTasksQ = [[t] for t in T_states]
###################################### EQs
Bases = []
n = int(np.ceil(np.log2(len(T_states))))
m = (2**n) / 2
for i in range(n):
    Bases.append([])
    b = False
    for j in range(0, 2**n):
        if j >= len(T_states):
            break
        if b:
            Bases[i].append(1)  #1=True=rmax
        else:
            Bases[i].append(0)  #0=False=rmin
        if (j + 1) % m == 0:

Esempio n. 20

0

Mostra file

File: TestValueIteration.py Progetto: Yun-Han/GridWorld-MDP

from GridWorld import GridWorld
from GridWorld import GridWorldAdditive
from ValueIteration import ValueIteration

# Run Value Iteration in different Grid World environments
if __name__ == "__main__":
    gamma = 0.9
    print("Grid world Value Iteration with discounted rewards gamma = %.2f\n" % gamma)
    terminals = {(0, 3): +1, (1, 3): -1}
    gw = GridWorld((3, 4), 0.8, [(1, 1)], terminals)
    vi = ValueIteration()
    values = vi.valueIteration(gw, gamma)
    gw.printValues(values)
    qvalues = vi.getQValues(gw, values, gamma)
    gw.printQValues(qvalues)
    policy = vi.getPolicy(gw, values, gamma)
    gw.printPolicy(policy)

    reward = -0.01
    print("Grid world Value Iteration with additive rewards = %.2f\n" % reward)
    gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward)
    values = vi.valueIteration(gwa, 1, 100)
    gwa.printValues(values)
    qvalues = vi.getQValues(gwa, values, 1)
    gwa.printQValues(qvalues)
    policy = vi.getPolicy(gwa, values, 1)
    gwa.printPolicy(policy)
 
    reward = -0.04
    print("Grid World with additive rewards = %.2f\n" % reward)
    gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward)

Esempio n. 21

0

Mostra file

File: main.py Progetto: GreatAlexander/Games

		hlpStr = ("Markov Decision Process Examples\n"
				  "	Examples:\n"
				  "		gridworld 1: std grid world as the book (step cost -0.04, discount factor 1)\n"
				  "		gridworld 2: low discount factor 0.6 (step cost -0.04)\n"
				  "		gridworld 3: low step cost -0.01\n"
				  "		gridworld 4: suicide mode (step cost -2)\n"
				  )
		print(hlpStr)
		exit()
	
	if len(sys.argv) == 1: showhelp()
	
	if sys.argv[1] == "gridworld":
		
		w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT], 
				   [GridWorld.CELL_VOID, GridWorld.CELL_WALL, GridWorld.CELL_VOID, GridWorld.CELL_PIT],
				   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
		
		if len(sys.argv) < 3:
			mdpc = MDPChooser()
		elif sys.argv[2] == "1":	
			w.setRewards(-0.04, -1, 1)
			w.setProbabilities(0.8, 0.1, 0.1, 0)
			w.setDiscountFactor(1)
			g = MDPGUI(w)
		elif sys.argv[2] == "2":
			w.setRewards(-0.04, -1, 1)
			w.setProbabilities(0.8, 0.1, 0.1, 0)
			w.setDiscountFactor(0.9)
			g = MDPGUI(w)
		elif sys.argv[2] == "3":

Esempio n. 22

0

Mostra file

File: TDL.py Progetto: eriktoger/Reinforcment-Learning

 def __init__(self):
     self.game = GridWorld( (5,5))
     self.squareCountGrid = self.game.createSquareCount()
     self.alpha = 0.1
     self.gamma = 0.9

Esempio n. 23

0

Mostra file

from GridWorld import GridWorld


g = GridWorld(3,4)
policy={
    (0, 0):'R',
    (0, 1):'R',
    (0, 2):'R',
    (1, 0):'U',
    (1, 1):'U',
    (1, 2):'U',
    (1, 3):'U',
    (2, 0):'R',
    (2, 1):'R',
    (2, 2):'U',
    (2, 3):'L'
}

def print_policy(p,g):
    for r in range(g.row):
        print('------------------')
        for c in range(g.col):
            a = p.get((r,c),' ')
            print(' %s |'%a, end="")
        print("")

def print_value(V,g):
    for r in range(g.row):
        print('------------------')
        for c in range(g.col):
            v = V.get((r,c), 0)

Esempio n. 24

0

Mostra file

    t = 0
    G = 0
    while not done and t < 100:
        action = policy[state]
        state_, reward, done, _ = env.step(action)
        state = state_
        G += reward
        t += 1
    return G


for t in range(len(types)):
    print("type: ", t)

    # Learning universal bounds (min and max tasks)
    env = GridWorld(goals=T_states, dense_rewards=not types[t][0])
    EQ_max, _ = Goal_Oriented_Q_learning(env, maxiter=maxiter)

    env = GridWorld(goals=T_states,
                    goal_reward=-0.1,
                    dense_rewards=not types[t][0])
    EQ_min, _ = Goal_Oriented_Q_learning(env, maxiter=maxiter)

    # Learning base tasks and doing composed tasks
    goals = Bases[0]
    goals = [[pos, pos] for pos in goals]
    env = GridWorld(goals=goals,
                    dense_rewards=not types[t][0],
                    T_states=T_states if types[t][1] else goals)
    A, stats1 = Goal_Oriented_Q_learning(
        env, maxiter=maxiter, T_states=None if types[t][1] else T_states)

Esempio n. 25

0

Mostra file

File: TDL.py Progetto: eriktoger/Reinforcment-Learning

class TDL_solution:
    def __init__(self):
        self.game = GridWorld( (5,5))
        self.squareCountGrid = self.game.createSquareCount()
        self.alpha = 0.1
        self.gamma = 0.9
    
    def playTDLGame(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        
        keepPlaying = not self.game.gameOver()
        squares_and_returns = [(self.game.currentSquare,0)]
     
        while keepPlaying:
            
            #policy
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            move = self.game.policyGrid[i][j]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i,j))
               
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i = self.game.currentSquare[0]
            j = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i][j]
            squares_and_returns.append( (self.game.currentSquare,theReturn) )
            keepPlaying = not self.game.gameOver()
        
        G = 0
        self.squares_and_values = []
        for square , theReturn in reversed(squares_and_returns):
            self.squares_and_values.append( (square,G) )
            G = theReturn + self.game.gamma*G
        #self.squares_and_values.reverse()
    
    def playSarsa(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
            
    def playQLearning(self,startSquare, randomMove):
        self.game.currentSquare = startSquare
        keepPlaying = not self.game.gameOver()
        
        while keepPlaying:
            
            #policy
            i1 = self.game.currentSquare[0]
            j1 = self.game.currentSquare[1]
            move = self.game.policyGrid[i1][j1]
            
            # we use the best move even if random runs over it
            i3 = self.game.currentSquare[0]
            j3 = self.game.currentSquare[1]
      
            if randomMove < np.random.rand():
                moves = self.game.possibleMoves((i1,j1))
                print( str(i1) + " " + str(j1) + " " + str(moves) + " " + str(move) )
                moves.remove(move)
                if len(moves) > 0:
                    idx = np.random.randint(0,len(moves))
                    move = moves[idx]
            #move
            self.game.move(move)
            i2 = self.game.currentSquare[0]
            j2 = self.game.currentSquare[1]
            theReturn = self.game.returnGrid[i2][j2]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(theReturn + self.gamma*self.game.valueGrid[i3][j3]- self.game.valueGrid[i1][j1] )
            keepPlaying = not self.game.gameOver()
    
        
        
    def updateValueGrid(self):
        for t in range(len(self.squares_and_values) -1):
            
            square , _ = self.squares_and_values[t]
            nextSquare, value = self.squares_and_values[t+1]
            i1 = square[0]
            j1 = square[1]
            i2 = nextSquare[0]
            j2 = nextSquare[1]
            self.game.valueGrid[i1][j1] = self.game.valueGrid[i1][j1] + self.alpha*(value + self.gamma*self.game.valueGrid[i2][j2]- self.game.valueGrid[i1][j1] )  
    
    def updatePolicyGrid(self):
        
        #check if policy change
        #hasChanged = False
        #if bestMove is new set to true.
        rows = self.game.size[0]
        cols = self.game.size[1]
        change = False
        for i in range(rows):
            for j in range(cols):
                if self.game.policyGrid[i][j] in [0,1,2,3]:
                    self.game.currentSquare = (i,j)
                    oldMove = self.game.policyGrid[i][j]
                    self.game.policyGrid[i][j] = self.game.bestMove()
                    if oldMove != self.game.policyGrid[i][j]:
                        change = True
        return change
        
        
    def printGrids(self):
        self.game.printPolicyGrid()
        self.game.printReturnGrid()
        self.game.printValueGrid()

Esempio n. 26

0

Mostra file

from Evaluation import Evaluation
from GridWorld import GridWorld
from Learning import Learning

# グリッドワールドの大きさを指定
row = 5
column = 5

LearningAgentSpan = 10  # 学習エージェントの寿命
LearningTimes = 100  # 学習回数
P = 5  # 報酬
T = 10  # 遡る数

EvaluationAgentSpan = 10  # 評価エージェントの寿命
EvaluationTimes = 100  # 試行回数

grid_world = GridWorld(row, column)
grid_world.make_grid_world()

learning = Learning(grid_world.get_grid_world(), row, column)
learning.do_learning(LearningAgentSpan, LearningTimes, P, T)

evaluation = Evaluation(learning.get_grid_world(), row, column)
evaluation.evaluation(EvaluationAgentSpan, EvaluationTimes)

Esempio n. 27

0

Mostra file

    vehState = start
    env_file = open("Environment.txt", "w")
    gridWorld = CreateEnvironment()
    gridWorld.create(env_file,
                     size_row='10',
                     size_col='10',
                     agent_row=str(vehState[0]),
                     agent_col=str(vehState[1]),
                     goal_row=str(goal[0]),
                     goal_col=str(goal[1]),
                     static_number='2',
                     static_list=[0, 3, 2, 4])
    env_file = open("Environment.txt", "r")
    text_in_file = env_file.readline()
    print(text_in_file)
    grid = GridWorld(text_in_file)
    gw = grid.gridDefine()
    #-------------------------------------------------------

    # initialize agent class and uav class
    Agent = agent(vehState)
    # define a model dictionary, which maps user inputs of learning model names to learning model function
    modelType = {
        "random": Agent.predict_Random,
        "standard": Agent.predict_Standard,
        "NN": Agent.predict_NN
    }
    UAV = uav(vehState)

    # initialize decision model (options = "random", "standard", or "NN")
    model = "random"  # will be a user input

Esempio n. 28

0

Mostra file

import hashlib
import json
from GridWorld import GridWorld
import numpy as np
import copy

from matplotlib import pyplot as plt

import torch
from torch.nn.modules.loss import SmoothL1Loss
import torch.nn as nn
from torch.optim import Adam
import random

grid_world = GridWorld()

rewards_to_plot = []
stats_ax = None
rewards_ax = None
model = None


def init_gridworld(random_player=False, random_mines=False, maze=False):
    global grid_world
    grid_world = GridWorld(random_player, random_mines, maze)


class NeuralNetwork(nn.Module):
    def __init__(self, iterations=500):
        super(NeuralNetwork, self).__init__()

Esempio n. 29

0

Mostra file

File: run.py Progetto: imishra23663/DeepQMotionPlanning

from GridWorld import GridWorld
from Robot import Robot

env = GridWorld("grid-small.txt")
env.print_map()
gamma = 0.9

start = [0, 0]
agent = Robot(env, gamma)
epochs = 500
decay = 0.99
rvm_max_iter = 500
max_step = 1000
epsilon = 1
epsilon_threshold = 0.001
verbose = True
verbose_iteration = 1
steps, rewards = agent.learn(epochs, decay, rvm_max_iter, max_step, epsilon,  start, verbose, verbose_iteration)
path = agent.get_path(start)
print(path)

Esempio n. 30

0

Mostra file

 def __createEmptyPolicy(self):
     """we create a partial function that is undefined in all points"""
     c, r = self.world.size
     return [[(None if self.world.cellAt(x, y) == GridWorld.CELL_VOID else
               GridWorld.randomAction()) for x in range(c)]
             for y in range(r)]

Esempio n. 31

0

Mostra file

        self.drawUtilities(canvas)
        self.drawQValues(canvas)
        self.drawPolicy(canvas)


# ===========================================================================
# TEST
# ===========================================================================
if __name__ == '__main__':
    w = GridWorld([[
        GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID,
        GridWorld.CELL_EXIT
    ],
                   [
                       GridWorld.CELL_VOID, GridWorld.CELL_WALL,
                       GridWorld.CELL_VOID, GridWorld.CELL_PIT
                   ],
                   [
                       GridWorld.CELL_VOID, GridWorld.CELL_VOID,
                       GridWorld.CELL_VOID, GridWorld.CELL_VOID
                   ]],
                  discountFactor=1)
    w.setRewards(-0.04, -1, 1)
    w.setProbabilities(0.8, 0.1, 0.1, 0)
    print("GridWorld-----------")
    print(w)
    print("----------------")

    print("\nPolicy----------")
    p = Policy(w)

Esempio n. 32

0

Mostra file

def init_gridworld(random_player=False, random_mines=False, maze=False):
    global grid_world
    grid_world = GridWorld(random_player, random_mines, maze)

Esempio n. 33

0

Mostra file

            # RL take action and get next state and reward
            _, next_state_index, reward, done = env.step(action)

            # RL choose action based on next state
            next_action = RL.choose_action(str(next_state_index))

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(state), action, reward, str(next_state_index), next_action)

            # swap state and action
            state = next_state_index
            action = next_action

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = GridWorld()
    RL = Sarsa(actions=list(range(env.n_actions)))

    env.after(10000, update)
    env.mainloop()
    print(RL.q_table)

Esempio n. 34

0

Mostra file

from GridWorld import GridWorld

g = GridWorld(3, 4)
policy = {
    (0, 0): {
        'R': 1
    },
    (0, 1): {
        'R': 1
    },
    (0, 2): {
        'R': 1
    },
    (1, 0): {
        'U': 1
    },
    (1, 1): {
        'U': 1
    },
    (1, 2): {
        'U': 1
    },
    (1, 3): {
        'U': 1
    },
    (2, 0): {
        'R': 0.5,
        'U': 0.5
    },
    (2, 1): {
        'R': 1

Esempio n. 35

0

Mostra file

import tensorflow as tf

from GridWorld import GridWorld

np.random.seed(20)
tf.set_random_seed(20)

MAX_EPISODE = 1000
MAX_EP_STEPS = 1000  # maximum time step in one episode
GAMMA = 0.9  # reward discount in TD error
lr_actor = 0.001
lr_critic = 0.01

grid_world_h = 5
grid_world_w = 5
env = GridWorld(grid_world_h, grid_world_w)

n_features = 2
n_actions = 4


class Actor(object):
    def __init__(self, sess, n_features, n_actions, lr=0.001):
        self.sess = sess
        self.state = tf.placeholder(tf.float32, [1, n_features], "state")
        self.action = tf.placeholder(tf.int32, None, "act")
        self.td_error = tf.placeholder(tf.float32, None, "td_error")

        with tf.variable_scope('Actor'):
            state_layer = tf.layers.dense(
                inputs=self.state,

Esempio n. 36

0

Mostra file

File: Policy.py Progetto: GreatAlexander/Games

		self.world.draw(canvas)
		self.drawUtilities(canvas)
		self.drawQValues(canvas)
		self.drawPolicy(canvas)
	
		
#===========================================================================
# TEST
#===========================================================================
if __name__ == '__main__':

	w = GridWorld([[GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_EXIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID], 
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_PIT, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID],
			   [GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID, GridWorld.CELL_VOID]], discountFactor = 1 )
	
	
	
	w.setRewards(-0.04, -1, 1)
	w.setProbabilities(0.8, 0.1, 0.1, 0)
#	w.setDiscountFactor(0.9)
#	print("-GridWorld-")
#	print(w)
#	print("-----------")
	
	print("\n---Policy---")

Esempio n. 37

0

Mostra file

File: Policy.py Progetto: GreatAlexander/Games

	def __createEmptyPolicy(self):
		'''we create a partial function that is undefined in all points'''
		c, r = self.world.size
		return [ [ (None if self.world.cellAt(x,y) == GridWorld.CELL_VOID else GridWorld.randomAction()) for x in range(c) ] for y in range(r) ]

Esempio n. 38

0

Mostra file

File: RunDFS.py Progetto: guroosh/CS7IS2-AI-project

        return -1
    grid_world.is_visited[x][y] = 1
    grid_world.dfs_route.append((x, y))
    random.shuffle(adjacent_nodes)
    for l in adjacent_nodes:
        if grid_world.is_visited[l[0]][l[1]] == 0:
            ret_val = random_dfs(grid_world, str(l[0]) + "," + str(l[1]))
            if ret_val == -1:
                grid_world.dfs_best_route.append((l[0], l[1]))
                return -1


def run_dfs(grid_world):
    # dfs(grid_world, grid_world.start_key)
    random_dfs(grid_world, grid_world.start_key)
    grid_world.dfs_best_route.append((grid_world.start_x, grid_world.start_y))
    grid_world.dfs_best_route = grid_world.dfs_best_route[::-1]


grid_world = GridWorld()
Functions.create_obstacles_from_hex(grid_world)
# Functions.create_random_obstacles(grid_world, 0.205)
# Functions.create_fixed_obstacles(grid_world, 6)
grid_world.scan_grid_and_generate_graph()
grid_world.print_graph()
grid_world.create_grid_ui(grid_world.m, grid_world.n, (grid_world.start_x, grid_world.start_y),
                          (grid_world.end_x, grid_world.end_y), grid_world.obstacles)
run_dfs(grid_world)
grid_world.move_on_given_route()
tk.mainloop()