""" import numpy as np import matplotlib.pyplot as plt from gridWorld import standardGrid, negativeGrid from iterativePolicyEvaluation import printPolicy, printValues from monteCarloControl import argMax from td0Prediction import randomAction GAMMA = 0.9 ALPHA = 0.1 ACTIONS = {'U','D','L','R'} if __name__ == '__main__': grid = negativeGrid(stepCost = -0.1) print ("Rewards:") printValues(grid.rewards,grid) Q = {} states = grid.allStates() for s in states: Q[s] = {} for a in ACTIONS: Q[s][a] = 0 updateCountsSarsa = {}
Created on Mon Jan 7 11:37:45 2019 @author: user """ import numpy as np from gridWorld import standardGrid, negativeGrid from iterativePolicyEvaluation import printValues, printPolicy EPSILON = 10e-4 GAMMA = 0.9 ACTIONS = ('U', 'D', 'L', 'R') if __name__ == '__main__': grid = negativeGrid() policy = {} for s in grid.actions.keys(): policy[s] = np.random.choice(ACTIONS) print("Rewards:") printValues(grid.rewards, grid) print("Initial Policy:") printPolicy(policy, grid) V = {} states = grid.allStates() for s in states: