def __init__(self, name): super(Player, self).__init__(NineProblem(), temperature_function=make_exponential_temperature( 1000000, 0.01), discount_factor=0.9) self.name = name
def __init__(self, play_with): super(NuevePlayer, self).__init__(NueveProblem(), temperature_function=make_exponential_temperature( 1000000, 0.01), discount_factor=0.4) self.name = play_with
def __init__(self, name): super(PapelitosPlayer, self).__init__(ProblemaPapelitos(), temperature_function=make_exponential_temperature( 1000000, 0.01), discount_factor=0.4) self.name = name
def __init__(self, agentName, problem): super(GenericPlayer, self).__init__(problem, temperature_function=make_exponential_temperature( 100, 0.0019), discount_factor=0.4) self.name = agentName
def __init__(self, play_with): super(TicTacToePlayer, self).__init__(TicTacToeProblem(), temperature_function=make_exponential_temperature( 1000000, 0.01), discount_factor=0.4) self.play_with = play_with self.other_play_with = 'X' if play_with == 'O' else 'O'
def __init__(self, play_with): super(TicTacToePlayer, self).__init__(TicTacToeProblem(), temperature_function=make_exponential_temperature(1000000, 0.01), discount_factor=0.4) self.play_with = play_with self.other_play_with = 'X' if play_with == 'O' else 'O'
def is_completed(self, state): return state in self.rewards.keys() def reward(self, state, agent): return self.rewards.get(state, -0.08) class WumpusProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(WumpusProblem(), temperature_function=make_exponential_temperature(1000, 0.01), discount_factor=0.8) game = WumpusEnvironment(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) print 'Training...' for i in range(10000): game.run() p.show_statistics() game.run(viewer=WumpusViewer(game))
def is_completed(self, state): return state in self.rewards.keys() def make_reward(self, state, agent): return self.rewards.get(state, -0.08) class GridProblem(RLProblem): def actions(self, state): actions = ['up', 'down', 'left', 'rigth'] return actions if __name__ == '__main__': agent = TDQLearner(GridProblem(), temperature_function=make_exponential_temperature(1000, 0.005), discount_factor=0.9) game = GridGame(agent) p = PerformanceCounter([agent], ['Q-learner Agent']) for i in range(3000): game.run() p.show_statistics() game.run(viewer=Viewer())
def __init__(self, name): super(TpFinalPlayer, self).__init__(TpFinalProblem(), temperature_function=make_exponential_temperature(1000000, 0.01), discount_factor=0.4) self.name = name