def __init__(self): QLearningAgent.__init__(self) self.name = "FunctionAgent" self.discount = 1.0 self.learn_rate = 0.1 self.epsilon = 0 self.theta_flap = np.zeros(4) self.theta_noflap = np.zeros(4) self.Q_table = np.zeros((15,15,15,19,2))
def stateToFeatures(self, gameState, playerHand): reportedCount = gameState.getDeck().count # Constrain the count to be in the range [-10, 10] reportedCount = min(reportedCount, 10) reportedCount = max(reportedCount, -10) # print reportedCount return (QLearningAgent.stateToFeatures(self, gameState, playerHand) + (reportedCount,))
def __init__(self): state_size = rospy.get_param('/cartpole_v0/state_size') action_size = rospy.get_param('/cartpole_v0/n_actions') gamma = rospy.get_param('/cartpole_v0/gamma') epsilon = rospy.get_param('/cartpole_v0/epsilon') epsilon_decay = rospy.get_param('/cartpole_v0/epsilon_decay') epsilon_min = rospy.get_param('/cartpole_v0/epsilon_min') batch_size = rospy.get_param('/cartpole_v0/batch_size') QLearningAgent.__init__(self, state_size=state_size, action_size=action_size, gamma=gamma, epsilon=epsilon, epsilon_decay=epsilon_decay, epsilon_min=epsilon_min, batch_size=batch_size)
import collections, copy moves_loader = MovesLoader() pokemon_loader = PokemonLoader(moves_loader) type_efficacy_loader = TypeEfficacyLoader() Q = collections.defaultdict(float) counter = {'player': 0, 'opponent': 0, 'timeout': 0} while counter['player'] + counter['opponent'] < 1000: print counter # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1) # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2) # player = BaselineAgent('player', pokemon_loader.getRandomTeam()) # player = HumanAgent('player', pokemon_loader.getRandomTeam(), moves_loader) player = QLearningAgent('player', pokemon_loader.getRandomTeam(), Q, True) # player = RandomAgent('player', pokemon_loader.getRandomTeam()) # opponent = BaselineAgent('opponent', pokemon_loader.getRandomTeam()) # opponent = RandomAgent('opponent', pokemon_loader.getRandomTeam()) # opponent = HumanAgent('opponent', pokemon_loader.getRandomTeam(), moves_loader) opponent = QLearningAgent('opponent', pokemon_loader.getRandomTeam(), copy.deepcopy(Q), False) game_obj = Game(pokemon_loader, moves_loader, type_efficacy_loader, player, opponent) counter[game_obj.run()] += 1 counter = {'player': 0, 'opponent': 0, 'timeout': 0} while counter['player'] + counter['opponent'] < 1000: print counter # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1) # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2)
import matplotlib.pyplot as plt import environment from q_learning_agent import QLearningAgent import utils if __name__ == "__main__": env = environment.Env() # Initializing Environment display = utils.GraphicDisplay(env) # Initializing Graphic Display q_learning_agent = QLearningAgent(env.actions) # Initializing Q-Learning Agent env = environment.Env() # Initializing Environment q_learning_agent = QLearningAgent(env.actions) # Initializing Q-Learning Agent # Run 1000 episodes for episode in range(1000): env.reset_env() # Running a new Environment state = env.initialState terminated = False display.reset_display() while not terminated: action = q_learning_agent.get_action(state) # Getting current state action following e-greedy strategy new_state, reward, terminated = env.step(state, action) q_learning_agent.update_q_function(state, action, reward, new_state) # Updating Q-function from agent if not (state == new_state).all(): display.step(action, q_learning_agent.q_values_table) state = new_state
import gym from q_learning_agent import QLearningAgent env = gym.make("Taxi-v2") agent = QLearningAgent(env) agent.train(to_render=False) agent.test()
realRounds = args.realRounds randomSeed = args.randomSeed iterations = args.iterations if realRounds <= 0: print "Number of real rounds must be > 0 but was {0}".format(realRounds) sys.exit(1) if randomSeed: random.seed(randomSeed) playerAgentStrings = args.playerAgents playerAgents = [] for playerAgentString in playerAgentStrings: if playerAgentString == "QLearningAgent": playerAgents.append(QLearningAgent(args.alpha, args.discount, args.epsilon)) elif playerAgentString == "CountLearningAgent": playerAgents.append(CountLearningAgent()) elif playerAgentString == "AceCountLearningAgent": playerAgents.append(AceCountLearningAgent()) elif playerAgentString == "ReflexAgent": playerAgents.append(ReflexAgent()) elif playerAgentString == "StandingAgent": playerAgents.append(StandingAgent()) elif playerAgentString == "NoBustAgent": playerAgents.append(NoBustAgent()) elif playerAgentString == "HumanAgent": playerAgents.append(HumanAgent()) elif playerAgentString == "ValueIterationAgent": if not iterations: print "Number of iterations must be specified with ValueIterationAgent"
def gameOver(self, gameState, hand, reward): QLearningAgent.gameOver(self, gameState, hand, reward) #print gameState.playerHands for hand in gameState.playerHands.keys(): self.updateCount(hand) self.updateCount(gameState.dealerHand)
def __init__(self, alpha=0.2, discount=0.8, epsilon=0.1): QLearningAgent.__init__(self, alpha=alpha, discount=discount, epsilon=epsilon) self.count = 0
from itertools import cycle from pygame.locals import * from q_learning_agent import QLearningAgent from q_learning_agent_greedy import QLearningAgentGreedy training_mode = None operation = sys.argv[1] if operation == 'train': training_mode = True elif operation == 'run': training_mode = False if len(sys.argv) == 2: Agent = QLearningAgent(training_mode) elif sys.argv[2] == 'greedy': Agent = QLearningAgentGreedy(training_mode) FPS = 30 FPS_CLOCK = None SCREEN_WIDTH = 288 SCREEN_HEIGHT = 512 SCREEN = None PIPE_GAP_SIZE = 100 BASE_Y = SCREEN_HEIGHT * 0.79 IMAGES = {} HITMASKS = {}
plt.xlabel("Distance to the next pipe") plt.ylabel("next_pipe_top_y - player_y") plt.title("Policy after " + str(nb_episodes * nb_iterations) + " episodes for " + agent_name) plt.savefig(folder_name + "/" + agent_name + "_p_" + str(nb_episodes * nb_iterations) + "ep.jpg") #%% agent164 = MonteCarloAgent() agent164.pipeDist = 164 agent164.name = "MonteCarlo" agent164.discount = 0.9 agentQ283 = QLearningAgent() agentQ283.name = "Q-learning" agentBest = MonteCarloAgentBest() agentBest.name = "MonteCarloBest" nb_episodes = 5 nb_episods_eval = 5 nb_iterations = 3 s164 = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations, agent164) sq283 = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations, agentQ283) sb = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations, agentBest)
def __init__(self): QLearningAgent.__init__(self, 4, 2)