Example #1
0
 def __init__(self, alpha, epsilon, gamma, actionList, size):
     self.size = self.width, self.height = size
     self.actionList = actionList
     self.epsilon = epsilon
     self.agent = [
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #all
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #world
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #turtle
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #coin
     ]
Example #2
0
def read_argument():
    parser = argparse.ArgumentParser('Reinforcement Learning')

    parser.add_argument('goal_state_reward', type=float, help='The reward for reaching the goal state')
    parser.add_argument('pit_fall_reward', type=float, help='The reward for falling into a pit')
    parser.add_argument('move_reward', type=float, help='The reward for moving')
    parser.add_argument('give_up_reward', type=float, help='The reward for giving up')
    parser.add_argument('number_of_trials', type=int, help='The number of learning trials to run')
    parser.add_argument('exploration_epsilon', type=float, help='The weight for exploration')

    args = vars(parser.parse_args())

    env = environment.Environment(
        args['goal_state_reward'],
        args['pit_fall_reward'],
        args['move_reward'],
        args['give_up_reward'])

    sarsa = SARSA.SARSA(
        env,
        args['number_of_trials'],
        args['exploration_epsilon']
    )

    return env, sarsa
Example #3
0
import EmptySARSA
import HORDQ
import SARSA
if __name__ == "__main__":

    alpha = 0.2
    epsilon = 0.1
    gamma = 0.9
    #ob = (-1, -1, -1, (1, 2))
    #ob2 = (-1, -1, -1, (5, 4))
    #ob3 = (-1, -1, -1, (0, 0))
    ob4 = (-1, -1, -1, (1, 1))
    punishment = 10
    isRORDQ = False
    hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, [1, -1], isRORDQ)
    probQ = SARSA.SARSA(alpha, epsilon, gamma, [1, -1])
    controller = RMax(epsilon, gamma, hordQ, probQ, punishment)

    #unit test for get room
    val = controller.getRoom(ob4)
    print "value: ", val
    assert( val == 0)
    #controller.start(ob)
    #for i in range(0, 1000):
        #
        #controller.step(1, ob)
        #controller.step(1, ob)
        #controller.step(1, ob)
        #controller.step(1, ob2)
    #controller.end(10)
Example #4
0
def BusRun(type, punishment, maxStep, isRORDQ, isRandomPlanner, isShow,
           framRate, loadFile):
    discrete_size = 6
    objSet = (1, 1)
    monsterMoveProb = 0.3
    isEpisodeEnd = False
    #maxStep = 200000
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    #controller = RelationalQ.RelationalQ(0.05, 0.1, 0.9, actionList)
    alpha = 0.2
    probAlpha = 0.1
    gamma = 1
    if type == 'SARSA':
        epsilon = 0.1
        controller = SARSA.SARSA(alpha, epsilon, gamma, actionList)

    else:
        epsilon = 0.05
        #isRORDQ = True
        hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, actionList, isRORDQ)
        probQ = SARSA.SARSA(probAlpha, epsilon, gamma, [0])
        if isRandomPlanner:
            epsilon = 1
        controller = RMax.RMax(epsilon, gamma, hordQ, probQ, punishment)
    if loadFile != '':
        print "load:", loadFile
        controller = tool.Load(loadFile)
    env = BusEnv((discrete_size, discrete_size), size, actionList)

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff = actionList[int(random.random() * len(actionList))]
        world = env.start(numOfTurtle, numOfCoin)
        action = controller.start(env.getSarsaFeature())

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            if stepCount % 1000 == 0:
                print "Time: ", stepCount / 1000
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, flag = env.step(action)
            fea = env.getSarsaFeature()
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag:
                controller.end(reward)
                break
            action = controller.step(reward, fea)

            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    #for conf in controller.agent:
    #print controller.agent[conf].Q
    #controller.dumpObj()
    #controller.dumpCoinAndGoal()
    #controller.dumpCoinAndGoalEx(controller.prob)
    #controller.dumpCoinAndGoalEx(controller.realReward)
    tool.Save(controller, type)
    tool.Save(rewardList, 'reward_' + type)