def __init__(self, alpha, epsilon, gamma, actionList, size): self.size = self.width, self.height = size self.actionList = actionList self.epsilon = epsilon self.agent = [ SARSA.SARSA(alpha, epsilon, gamma, actionList), #all SARSA.SARSA(alpha, epsilon, gamma, actionList), #world SARSA.SARSA(alpha, epsilon, gamma, actionList), #turtle SARSA.SARSA(alpha, epsilon, gamma, actionList), #coin ]
def read_argument(): parser = argparse.ArgumentParser('Reinforcement Learning') parser.add_argument('goal_state_reward', type=float, help='The reward for reaching the goal state') parser.add_argument('pit_fall_reward', type=float, help='The reward for falling into a pit') parser.add_argument('move_reward', type=float, help='The reward for moving') parser.add_argument('give_up_reward', type=float, help='The reward for giving up') parser.add_argument('number_of_trials', type=int, help='The number of learning trials to run') parser.add_argument('exploration_epsilon', type=float, help='The weight for exploration') args = vars(parser.parse_args()) env = environment.Environment( args['goal_state_reward'], args['pit_fall_reward'], args['move_reward'], args['give_up_reward']) sarsa = SARSA.SARSA( env, args['number_of_trials'], args['exploration_epsilon'] ) return env, sarsa
import EmptySARSA import HORDQ import SARSA if __name__ == "__main__": alpha = 0.2 epsilon = 0.1 gamma = 0.9 #ob = (-1, -1, -1, (1, 2)) #ob2 = (-1, -1, -1, (5, 4)) #ob3 = (-1, -1, -1, (0, 0)) ob4 = (-1, -1, -1, (1, 1)) punishment = 10 isRORDQ = False hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, [1, -1], isRORDQ) probQ = SARSA.SARSA(alpha, epsilon, gamma, [1, -1]) controller = RMax(epsilon, gamma, hordQ, probQ, punishment) #unit test for get room val = controller.getRoom(ob4) print "value: ", val assert( val == 0) #controller.start(ob) #for i in range(0, 1000): # #controller.step(1, ob) #controller.step(1, ob) #controller.step(1, ob) #controller.step(1, ob2) #controller.end(10)
def BusRun(type, punishment, maxStep, isRORDQ, isRandomPlanner, isShow, framRate, loadFile): discrete_size = 6 objSet = (1, 1) monsterMoveProb = 0.3 isEpisodeEnd = False #maxStep = 200000 size = 800, 800 gridSize = (discrete_size, discrete_size) delay = 100 interval = 50 pygame.init() pygame.key.set_repeat(delay, interval) clock = pygame.time.Clock() screen = pygame.display.set_mode(size) actionList = ((0, 1), (0, -1), (1, 0), (-1, 0)) #controller = RelationalQ.RelationalQ(0.05, 0.1, 0.9, actionList) alpha = 0.2 probAlpha = 0.1 gamma = 1 if type == 'SARSA': epsilon = 0.1 controller = SARSA.SARSA(alpha, epsilon, gamma, actionList) else: epsilon = 0.05 #isRORDQ = True hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, actionList, isRORDQ) probQ = SARSA.SARSA(probAlpha, epsilon, gamma, [0]) if isRandomPlanner: epsilon = 1 controller = RMax.RMax(epsilon, gamma, hordQ, probQ, punishment) if loadFile != '': print "load:", loadFile controller = tool.Load(loadFile) env = BusEnv((discrete_size, discrete_size), size, actionList) numOfTurtle = objSet[0] numOfCoin = objSet[1] print "# coin ", numOfCoin print "# Turtle ", numOfTurtle print "isEpisodeEnd ", isEpisodeEnd isTraining = not isEpisodeEnd count = 0 totalReward = 0 rewardList = [] stepCount = 0 while stepCount < maxStep: #randomly choose a sub goal at the beginning of the episode goalDiff = actionList[int(random.random() * len(actionList))] world = env.start(numOfTurtle, numOfCoin) action = controller.start(env.getSarsaFeature()) count += 1 prevStepCount = stepCount episodeReward = 0 while stepCount < maxStep: if stepCount % 1000 == 0: print "Time: ", stepCount / 1000 stepCount = stepCount + 1 clock.tick(frameRate) reward, flag = env.step(action) fea = env.getSarsaFeature() totalReward = totalReward + reward episodeReward = episodeReward + reward if flag: controller.end(reward) break action = controller.step(reward, fea) for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() if isShow: screen.blit(env.getScreen(), (0, 0)) pygame.display.flip() rewardList.append((prevStepCount, stepCount, episodeReward)) print totalReward #for conf in controller.agent: #print controller.agent[conf].Q #controller.dumpObj() #controller.dumpCoinAndGoal() #controller.dumpCoinAndGoalEx(controller.prob) #controller.dumpCoinAndGoalEx(controller.realReward) tool.Save(controller, type) tool.Save(rewardList, 'reward_' + type)