def get_agent(name): a = TpFinalPlayer(name) b = RandomPlayer('PC') game = TpFinalGame([a, b]) print ('Training with a random player, please wait...') game.agents = [a, b] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerB']) for i in range(3000): game.run() per.show_statistics() return a
else: return -1 return 0 if __name__ == '__main__': a = TicTacToePlayer('X') b = RandomPlayer('O') c = HumanPlayer('O') game = TicTacToeGame([a, b]) print ('Training with a random player, please wait...') game.agents = [a, b] for i in range(3000): game.run() a.dump('qlearner_agent') d = TicTacToePlayer.load('qlearner_agent') d.play_with = 'O' game.agents = [a, d] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerD']) for i in range(3000): game.run() per.show_statistics() game.agents = [a, c] print ('Do you like to play?') game.run() print game.state
self.last_agent = None random.shuffle(self.agents) self.run() def reward(self, state, agent): if self.is_completed(state): if self.last_agent is agent: return -1 else: return 1 return 0 def get_agent(name): res = Player(name) rand = RandomPlayer('random') env = TrainingEnvironment([res, rand]) for i in range(3000): env.play() return res if __name__ == '__main__': res = Player('name') rand = RandomPlayer('random') env = TrainingEnvironment([res, rand]) per = PerformanceCounter(env.agents, ['q', 'random']) for i in range(3000): env.play() per.show_statistics()
else: return -1 return 0 if __name__ == '__main__': a = TicTacToePlayer('X') b = RandomPlayer('O') c = HumanPlayer('O') game = TicTacToeGame([a, b]) print('Training with a random player, please wait...') game.agents = [a, b] for i in range(3000): game.run() a.dump('qlearner_agent') d = TicTacToePlayer.load('qlearner_agent') d.play_with = 'O' game.agents = [a, d] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerD']) for i in range(3000): game.run() per.show_statistics() game.agents = [a, c] print('Do you like to play?') game.run() print game.state
def is_completed(self, state): return state in self.rewards.keys() def reward(self, state, agent): return self.rewards.get(state, -0.08) class WumpusProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(WumpusProblem(), temperature_function=make_exponential_temperature(1000, 0.01), discount_factor=0.8) game = WumpusEnvironment(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) print 'Training...' for i in range(10000): game.run() p.show_statistics() game.run(viewer=WumpusViewer(game))
if __name__ == '__main__': a = NuevePlayer('Agente') b = RandomPlayer('Random') c = HumanPlayer('Human') game = NueveGame([a, b]) print('Training with a random player, please wait...') game.agents = [a, b] for i in range(5000): game.run() a.dump('qlearner_agent') d = NuevePlayer.load('qlearner_agent') game.agents = [a, d] per = PerformanceCounter(game.agents, ['QLearnerA', 'QLearnerD']) for i in range(5000): game.run() #per.show_statistics() game.agents = [a, c] print('Do you like to play?') game.run() print game.state if a.last_action == a.last_state and a.last_state != (): print 'Gana Humano' else: print 'Gana Maquina'
def is_completed(self, state): return state in self.rewards.keys() def make_reward(self, state, agent): return self.rewards.get(state, -0.08) class GridProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(GridProblem(), temperature_function=make_exponential_temperature(1000, 0.005), discount_factor=0.9) game = GridGame(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) for i in range(3000): game.run() p.show_statistics() game.run(viewer=Viewer())
return 0 if __name__ == '__main__': theProblem = Fisa9Problem() thePlayer = GenericPlayer("Trained Bot", theProblem) randomPlayer = RandomPlayer( "FisaBot", theProblem ) # Formerly called "MullerBot" but because now it actually trains our # bot, we changed its name theGame = Fisa9Game([randomPlayer, thePlayer], tuple(range(1, 10))) per = PerformanceCounter(theGame.agents, [a.name for a in theGame.agents]) print('Training with a random player, please wait...') contador = 0 for i in range(8000): shuffle(theGame.agents) theGame.run() per.show_statistics() humanPlayer = HumanPlayer("Human Player", theProblem) theGame.agents = [humanPlayer, thePlayer] while True: shuffle(theGame.agents) print('Do you like to play?') theGame.run() print theGame.state