def drawForLambdaZero(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdaValue = 0 learningRate = [] learningRateIndex = [] sarsa = SARSA(100, lambdaValue) print('Training SARSA and plotting graph') for i in range(1000): learningRateIndex.append(i) sarsa.train(1) squareMean = np.sum(np.square(sarsa.Q - montecarlo.Q)) / float(1000) learningRate.append(squareMean) fig = plt.figure("SARSAZERO") surf = plt.plot(learningRateIndex, learningRate) fig.savefig('lambdaZero.png') plt.show()
def drawForAllLambdas(): montecarlo = MonteCarlo(100) print('Training Monte Carlo') montecarlo.train(500000) print('Training of Monte Carlo Completed') lambdas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] squareMean = [] numberElements = montecarlo.Q.shape[0] * montecarlo.Q.shape[1] * 2 for lambdaValue in lambdas: sarsa = SARSA(100, lambdaValue) print('Training SARSA', lambdaValue) sarsa.train(1000) print('Training of SARSA Completed') squareMeanCalc = np.sum( np.square(sarsa.Q - montecarlo.Q)) / float(numberElements) squareMean.append(squareMeanCalc) fig = plt.figure("SARSA") surf = plt.plot(lambdas[1:10], squareMean[1:10]) fig.savefig('lambdaALL.png') plt.show()
"--- starting training for Q Learning agent using eligibility trace --- " ) episode_steps = q_agent.train_lambda() print( "--- optimal policy for Q Learning agent using eligibility trace ---") q_agent.get_optimal_policy() q_agent.plot_policy() print( "--- plotting training for Q Learning agent using eligibility trace ---" ) q_agent.plot(episode_steps) # Performing SARSA with standard environment sarsa_agent = SARSA(episodes=episodes, lr=learning_rate, discount=discount, epsilon=epsilon, king=False) print("--- starting training for SARSA agent --- ") episode_steps = sarsa_agent.train() print("--- optimal policy for SARSA agent ---") sarsa_agent.get_optimal_policy() sarsa_agent.plot_policy() print("--- plotting training for SARSA agent ---") sarsa_agent.plot(episode_steps) # Performing SARSA with standard environment using eligibility trace sarsa_agent = SARSA(episodes=episodes, lr=learning_rate, discount=discount, epsilon=epsilon,
# plot rewards plot_rewards(np.mean(results, axis=0), smoothing=0.1, color='blue') #### run with SARSA # define learning settings for t in range(tries): epsilon_decay = 1 - (1 / episodes) * 6 learning_decay = 1 - (1 / episodes) * 3 agent = SARSA(env.env, learning_rate=0.5, discount_factor=0.9, exploration_rate=0, epsilon_decay_func=lambda x: x * epsilon_decay, alpha_decay_func=lambda x: x * learning_decay, qtable_default=1) # fit and save results env.fit(agent, episodes) results[t, :] = agent.rewards_per_episode # plot rewards plot_rewards(np.mean(results, axis=0), smoothing=0.1, color='green') #### show plot plt.show() """ There is a component of randomness, but overall, it seems SARSA takes longer to converge
def main(): #マップ読み込み tmpMap = ReadMap(input("FilePath (Environment_*.txt) :")) #各種インスタンス作成 grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE) orga = Agent(stamina_max=STAMINA) human = Agent(stamina_max=STAMINA) state = State() controller = Controller() #学習アルゴリズムの決定 learning = Montecarlo(grid) #強化学習 controller.gameSet(orga, human, grid, state) while learning.get_nowEpisode() <= EPISODE: learning.proceedTurn(orga, human, state, grid, controller) plotGraph(np.array(learning.rewardHistory), "Montecarlo") #各種インスタンス作成 grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE) orga = Agent(stamina_max=STAMINA) human = Agent(stamina_max=STAMINA) state = State() controller = Controller() #学習アルゴリズムの決定 learning = ProfitSharing(grid) #強化学習 controller.gameSet(orga, human, grid, state) while learning.get_nowEpisode() <= EPISODE: learning.proceedTurn(orga, human, state, grid, controller) plotGraph(np.array(learning.rewardHistory), "ProfitSharing") #各種インスタンス作成 grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE) orga = Agent(stamina_max=STAMINA) human = Agent(stamina_max=STAMINA) state = State() controller = Controller() #学習アルゴリズムの決定 learning = Q_learning(grid) #強化学習 controller.gameSet(orga, human, grid, state) while learning.get_nowEpisode() <= EPISODE: learning.proceedTurn(orga, human, state, grid, controller) plotGraph(np.array(learning.rewardHistory), "Q-Learning") #各種インスタンス作成 grid = GridMap(width, hight, tmpMap, SCREEN_SIZE, CELLS_SIZE) orga = Agent(stamina_max=STAMINA) human = Agent(stamina_max=STAMINA) state = State() controller = Controller() #学習アルゴリズムの決定 learning = SARSA(grid) #強化学習 controller.gameSet(orga, human, grid, state) while learning.get_nowEpisode() <= EPISODE: learning.proceedTurn(orga, human, state, grid, controller) plotGraph(np.array(learning.rewardHistory), "SARSA") plt.xlabel("Episode") plt.ylabel("reward") plt.legend() plt.show() """
container = [sizeArray[0], sizeArray[1], controllers[0], results[0][0], results[1][0], controllers[1], results[0][1], results[1][1]] if results[1][0] <= 0 and results[1][1] <= 0: victor = "t" elif container[3]-container[4] > container[6]-container[7]: victor = "b" elif container[3]-container[4] < container[6]-container[7]: victor = "r" else: victor = "t" container.append(victor) data.append(container) c1 = random.randint(0,1) c2 = c1 * -1 + 1 controllers = [controllers[c1], controllers[c2]] sizeArray = [random.randint(1,3), random.randint(1,3)] engine.reset(sizeArray, controllers, allowRandom = False) except KeyboardInterrupt: break df = pd.DataFrame(data = data) df.to_csv("{}.csv".format(fileName), index = False, header = columns) evalMethod(data, c_eval) if __name__ == "__main__": compareMethod(minmaxAI(), SARSA(), "walls_minmax_v_sarsa3")
if __name__ == "__main__": from SARSA import SARSA size = 400, 400 discrete_size = 10 delay = 100 interval = 50 action = 0 pygame.init() pygame.key.set_repeat(delay, interval) clock=pygame.time.Clock() screen = pygame.display.set_mode(size) agent = SARSA(0.01, 0.1, 0.9, (-1, 1)) agentEnemy = SARSA(0.01, 0.1, 0.9, (-1, 1)) lastWin = False while 1: env = Environment(size, discrete_size) state = env.start(lastWin) action = agent.start(state) actionEnemy = agentEnemy.start(state) while 1: clock.tick(60) for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() #if event.type==pygame.KEYDOWN:
if __name__ == "__main__": from SARSA import SARSA size = 400, 400 discrete_size = 10 delay = 100 interval = 50 action = 0 pygame.init() pygame.key.set_repeat(delay, interval) clock = pygame.time.Clock() screen = pygame.display.set_mode(size) agent = SARSA(0.1, 0.1, 0.9, (-1, 0, 1)) while 1: env = Environment(size, discrete_size) state = env.start() action = agent.start(state) while 1: clock.tick(1000) for event in pygame.event.get(): # action = 0 if event.type == pygame.QUIT: sys.exit() # if event.type==pygame.KEYDOWN: # if event.key==pygame.K_LEFT: # action = -1 # if event.key==pygame.K_RIGHT:
if __name__ == "__main__": from SARSA import SARSA size = 400, 400 discrete_size = 10 delay = 100 interval = 50 action = 0 pygame.init() pygame.key.set_repeat(delay, interval) clock = pygame.time.Clock() screen = pygame.display.set_mode(size) agent = SARSA(0.1, 0.1, 0.9, (-1, 0, 1)) while 1: env = Environment(size, discrete_size) state = env.start() action = agent.start(state) while 1: clock.tick(1000) for event in pygame.event.get(): #action = 0 if event.type == pygame.QUIT: sys.exit() #if event.type==pygame.KEYDOWN: #if event.key==pygame.K_LEFT: #action = -1 #if event.key==pygame.K_RIGHT: #action = 1
print(i, rewards) print(steps) avg_steps[l] = np.divide(avg_steps[l], runs) avg_rewards[l] = np.divide(avg_rewards[l], runs) y1.append(avg_rewards[l][-1]) y2.append(avg_steps[l][-1]) fig1.plot(x, y1) fig2.plot(x, y2) # fig1.title.set_text(algo.__class__.__name__ ) fig1.set_xlabel(r'$\lambda$') # Setting the label for x-axis fig1.set_ylabel( 'Average Reward per Episode') # Setting the label for y-axis # fig2.title.set_text(algo.__class__.__name__) fig2.set_xlabel(r'$\lambda$') # Setting the label for x-axis fig2.set_ylabel( 'Average Steps to reach the goal') # Setting the label for y-axis plt.show() results(SARSA(env, n_episodes, targets['B'], gamma, alpha), n_episodes, n_runs) # results(QLearning(env, n_episodes, targets['C'], gamma, alpha, epsilon), n_episodes, n_runs) # results(SARSA_Lambda(env, n_episodes, targets['A'], lambda_val[0], gamma, alpha), n_episodes, n_runs) # results_sarsa_lambda(SARSA_Lambda(env, 25, targets['C'], gamma, alpha), 25, n_runs)