コード例 #1
0
def drawForLambdaZero():
    montecarlo = MonteCarlo(100)
    print('Training Monte Carlo')
    montecarlo.train(500000)
    print('Training of Monte Carlo Completed')
    lambdaValue = 0
    learningRate = []
    learningRateIndex = []
    sarsa = SARSA(100, lambdaValue)
    print('Training SARSA and plotting graph')
    for i in range(1000):
        learningRateIndex.append(i)
        sarsa.train(1)
        squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000)
        learningRate.append(squareMean)

    fig = plt.figure("SARSAZERO")
    surf = plt.plot(learningRateIndex, learningRate)
    fig.savefig('lambdaZero.png')
    plt.show()
コード例 #2
0
def drawForAllLambdas():
    montecarlo = MonteCarlo(100)
    print('Training Monte Carlo')
    montecarlo.train(500000)
    print('Training of Monte Carlo Completed')
    lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    squareMean = []
    numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2
    for lambdaValue in lambdas:
        sarsa = SARSA(100, lambdaValue)
        print('Training SARSA', lambdaValue)
        sarsa.train(1000)
        print('Training of SARSA Completed')
        squareMeanCalc = np.sum(
            np.square(sarsa.Q - montecarlo.Q)) / float(numberElements)
        squareMean.append(squareMeanCalc)
    fig = plt.figure("SARSA")
    surf = plt.plot(lambdas[1:10], squareMean[1:10])
    fig.savefig('lambdaALL.png')
    plt.show()
コード例 #3
0
        "--- starting training for Q Learning agent using eligibility trace --- "
    )
    episode_steps = q_agent.train_lambda()
    print(
        "--- optimal policy for Q Learning agent using eligibility trace ---")
    q_agent.get_optimal_policy()
    q_agent.plot_policy()
    print(
        "--- plotting training for Q Learning agent using eligibility trace ---"
    )
    q_agent.plot(episode_steps)

    # Performing SARSA with standard environment
    sarsa_agent = SARSA(episodes=episodes,
                        lr=learning_rate,
                        discount=discount,
                        epsilon=epsilon,
                        king=False)
    print("--- starting training for SARSA agent --- ")
    episode_steps = sarsa_agent.train()
    print("--- optimal policy for SARSA agent ---")
    sarsa_agent.get_optimal_policy()
    sarsa_agent.plot_policy()
    print("--- plotting training for SARSA agent ---")
    sarsa_agent.plot(episode_steps)

    # Performing SARSA with standard environment using eligibility trace
    sarsa_agent = SARSA(episodes=episodes,
                        lr=learning_rate,
                        discount=discount,
                        epsilon=epsilon,
コード例 #4
0
# plot rewards
plot_rewards(np.mean(results, axis=0), smoothing=0.1, color='blue')

#### run with SARSA

# define learning settings

for t in range(tries):

    epsilon_decay = 1 - (1 / episodes) * 6
    learning_decay = 1 - (1 / episodes) * 3
    agent = SARSA(env.env,
                  learning_rate=0.5,
                  discount_factor=0.9,
                  exploration_rate=0,
                  epsilon_decay_func=lambda x: x * epsilon_decay,
                  alpha_decay_func=lambda x: x * learning_decay,
                  qtable_default=1)

    # fit and save results
    env.fit(agent, episodes)
    results[t, :] = agent.rewards_per_episode

# plot rewards
plot_rewards(np.mean(results, axis=0), smoothing=0.1, color='green')

#### show plot
plt.show()
"""
There is a component of randomness, but overall, it seems SARSA takes longer to converge
コード例 #5
0
def main():
    #マップ読み込み
    tmpMap = ReadMap(input("FilePath (Environment_*.txt) :"))

    #各種インスタンス作成
    grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE)
    orga = Agent(stamina_max=STAMINA)
    human = Agent(stamina_max=STAMINA)
    state = State()
    controller = Controller()

    #学習アルゴリズムの決定
    learning = Montecarlo(grid)

    #強化学習
    controller.gameSet(orga, human, grid, state)
    while learning.get_nowEpisode() <= EPISODE:
        learning.proceedTurn(orga, human, state, grid, controller)
    plotGraph(np.array(learning.rewardHistory), "Montecarlo")

    #各種インスタンス作成
    grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE)
    orga = Agent(stamina_max=STAMINA)
    human = Agent(stamina_max=STAMINA)
    state = State()
    controller = Controller()

    #学習アルゴリズムの決定
    learning = ProfitSharing(grid)

    #強化学習
    controller.gameSet(orga, human, grid, state)
    while learning.get_nowEpisode() <= EPISODE:
        learning.proceedTurn(orga, human, state, grid, controller)
    plotGraph(np.array(learning.rewardHistory), "ProfitSharing")

    #各種インスタンス作成
    grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE)
    orga = Agent(stamina_max=STAMINA)
    human = Agent(stamina_max=STAMINA)
    state = State()
    controller = Controller()

    #学習アルゴリズムの決定
    learning = Q_learning(grid)

    #強化学習
    controller.gameSet(orga, human, grid, state)
    while learning.get_nowEpisode() <= EPISODE:
        learning.proceedTurn(orga, human, state, grid, controller)
    plotGraph(np.array(learning.rewardHistory), "Q-Learning")

    #各種インスタンス作成
    grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE)
    orga = Agent(stamina_max=STAMINA)
    human = Agent(stamina_max=STAMINA)
    state = State()
    controller = Controller()

    #学習アルゴリズムの決定
    learning = SARSA(grid)

    #強化学習
    controller.gameSet(orga, human, grid, state)
    while learning.get_nowEpisode() <= EPISODE:
        learning.proceedTurn(orga, human, state, grid, controller)
    plotGraph(np.array(learning.rewardHistory), "SARSA")

    plt.xlabel("Episode")
    plt.ylabel("reward")
    plt.legend()
    plt.show()
    """
コード例 #6
0
            container = [sizeArray[0], sizeArray[1], controllers[0], results[0][0], results[1][0],
                         controllers[1], results[0][1], results[1][1]]
            
            if results[1][0] <= 0 and results[1][1] <= 0:
                victor = "t"
            elif container[3]-container[4] > container[6]-container[7]:
                victor = "b"
            elif container[3]-container[4] < container[6]-container[7]:
                victor = "r"
            else:
                victor = "t"
    
            container.append(victor)
            data.append(container)
            c1 = random.randint(0,1)
            c2 = c1 * -1 + 1
            controllers = [controllers[c1], controllers[c2]]
            
            sizeArray = [random.randint(1,3), random.randint(1,3)]
            engine.reset(sizeArray, controllers, allowRandom = False)
        except KeyboardInterrupt:
            break
        
    df = pd.DataFrame(data = data)
    df.to_csv("{}.csv".format(fileName), index = False, header = columns)
    
    evalMethod(data, c_eval)

if __name__ == "__main__":
    compareMethod(minmaxAI(), SARSA(), "walls_minmax_v_sarsa3")
コード例 #7
0
if __name__ == "__main__":
    from SARSA import SARSA
    size = 400, 400
    discrete_size = 10
    delay = 100
    interval = 50
    action = 0

    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock=pygame.time.Clock()
    screen = pygame.display.set_mode(size)


    agent = SARSA(0.01, 0.1, 0.9, (-1, 1))
    agentEnemy = SARSA(0.01, 0.1, 0.9, (-1, 1))

    lastWin = False

    while 1:
        env = Environment(size, discrete_size)
        state = env.start(lastWin)
        action = agent.start(state)
        actionEnemy = agentEnemy.start(state)
        while 1:
            clock.tick(60)
            for event in pygame.event.get():
               #action = 0
               if event.type == pygame.QUIT: sys.exit()
               #if event.type==pygame.KEYDOWN:
コード例 #8
0
ファイル: Environment.py プロジェクト: lono175/Pong
if __name__ == "__main__":
    from SARSA import SARSA

    size = 400, 400
    discrete_size = 10
    delay = 100
    interval = 50
    action = 0

    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    agent = SARSA(0.1, 0.1, 0.9, (-1, 0, 1))

    while 1:
        env = Environment(size, discrete_size)
        state = env.start()
        action = agent.start(state)
        while 1:
            clock.tick(1000)
            for event in pygame.event.get():
                # action = 0
                if event.type == pygame.QUIT:
                    sys.exit()
                # if event.type==pygame.KEYDOWN:
                # if event.key==pygame.K_LEFT:
                # action = -1
                # if event.key==pygame.K_RIGHT:
コード例 #9
0
ファイル: Environment.py プロジェクト: lono175/Maze

if __name__ == "__main__":
    from SARSA import SARSA
    size = 400, 400
    discrete_size = 10
    delay = 100
    interval = 50
    action = 0

    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    agent = SARSA(0.1, 0.1, 0.9, (-1, 0, 1))

    while 1:
        env = Environment(size, discrete_size)
        state = env.start()
        action = agent.start(state)
        while 1:
            clock.tick(1000)
            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
                #if event.type==pygame.KEYDOWN:
                #if event.key==pygame.K_LEFT:
                #action = -1
                #if event.key==pygame.K_RIGHT:
                #action = 1
コード例 #10
0
            print(i, rewards)
            print(steps)

        avg_steps[l] = np.divide(avg_steps[l], runs)
        avg_rewards[l] = np.divide(avg_rewards[l], runs)

        y1.append(avg_rewards[l][-1])
        y2.append(avg_steps[l][-1])

    fig1.plot(x, y1)
    fig2.plot(x, y2)

    # fig1.title.set_text(algo.__class__.__name__ )
    fig1.set_xlabel(r'$\lambda$')  # Setting the label for x-axis
    fig1.set_ylabel(
        'Average Reward per Episode')  # Setting the label for y-axis

    # fig2.title.set_text(algo.__class__.__name__)
    fig2.set_xlabel(r'$\lambda$')  # Setting the label for x-axis
    fig2.set_ylabel(
        'Average Steps to reach the goal')  # Setting the label for y-axis

    plt.show()


results(SARSA(env, n_episodes, targets['B'], gamma, alpha), n_episodes, n_runs)
# results(QLearning(env, n_episodes, targets['C'], gamma, alpha, epsilon), n_episodes, n_runs)
# results(SARSA_Lambda(env, n_episodes, targets['A'], lambda_val[0], gamma, alpha), n_episodes, n_runs)
# results_sarsa_lambda(SARSA_Lambda(env, 25, targets['C'], gamma, alpha), 25, n_runs)