taskToPerform = args.task epsilon = args.epsilon verbose = args.verbose inputMDP = args.input outputPath = args.output optionsToLoad = args.load bothDirections = args.both num_seeds = args.num_seeds max_length_episode = args.max_length_ep num_episodes = args.num_episodes if not verbose: warnings.filterwarnings('ignore') # Create environment env = GridWorld(path=inputMDP, useNegativeRewards=False) numStates = env.getNumStates() numRows, numCols = env.getGridDimensions() # I may load options if I'm told so: loadedOptions = None if optionsToLoad != None: loadedOptions = [] for i in xrange(len(optionsToLoad)): loadedOptions.append(Utils.loadOption(optionsToLoad[i])) plot = Plotter(outputPath, env) plot.plotPolicy(loadedOptions[i], str(i + 1) + '_') if taskToPerform == 1: #Discover options optionDiscoveryThroughPVFs(env=env, epsilon=epsilon,
# [' ','#',' ',' ',' '], # [' ','#', 1,'#', 10], # ['S',' ',' ',' ',' '], # [-10,-10, -10, -10, -10]] # grid = [[' ',' ',' ',+1], # ['#','#',' ','#'], # [' ','#',' ',' '], # [' ','#','#',' '], # ['S',' ',' ',' ']] grid = [[ '#',-100, -100, -100, -100, -100, '#'], [ 1, 'S', ' ', ' ', ' ', ' ', 10], [ '#',-100, -100, -100, -100, -100, '#']] Grid = GridWorld(grid, 0.01, -1.0) M = MarkovDecisonProcess(0.4, Grid, 100) M.ValueIteration() values = M.getValues() actions = M.getActions() qvalues = M.getQValues() for i in values.keys(): print(f"{i} : {values[i]}, {actions[i]}") RL = QLearning(Grid, 50000, alpha = 0.6, epsilon = 0.2, discount = 0.4) RL.train() values = RL.getValues() actions = RL.getActions() qvalues = RL.getQValues() for i in values.keys(): print(f"{i} : {values[i]}, {actions[i]}")