def __init__(self):
     QLearningAgent.__init__(self)
     self.name = "FunctionAgent"
     
     self.discount = 1.0
     self.learn_rate = 0.1
     self.epsilon = 0
     
     self.theta_flap = np.zeros(4)
     self.theta_noflap = np.zeros(4)
     self.Q_table = np.zeros((15,15,15,19,2))
Esempio n. 2
0
 def stateToFeatures(self, gameState, playerHand):
     reportedCount = gameState.getDeck().count
     # Constrain the count to be in the range [-10, 10]
     reportedCount = min(reportedCount, 10)
     reportedCount = max(reportedCount, -10)
     # print reportedCount
     return (QLearningAgent.stateToFeatures(self, gameState, playerHand) + (reportedCount,))
Esempio n. 3
0
 def __init__(self):
     
     state_size = rospy.get_param('/cartpole_v0/state_size')
     action_size = rospy.get_param('/cartpole_v0/n_actions')
     gamma = rospy.get_param('/cartpole_v0/gamma')
     epsilon = rospy.get_param('/cartpole_v0/epsilon')
     epsilon_decay = rospy.get_param('/cartpole_v0/epsilon_decay')
     epsilon_min = rospy.get_param('/cartpole_v0/epsilon_min')
     batch_size = rospy.get_param('/cartpole_v0/batch_size')
     
     
     
     QLearningAgent.__init__(self,
                             state_size=state_size,
                             action_size=action_size,
                             gamma=gamma,
                             epsilon=epsilon,
                             epsilon_decay=epsilon_decay,
                             epsilon_min=epsilon_min,
                             batch_size=batch_size)
import collections, copy

moves_loader = MovesLoader()
pokemon_loader = PokemonLoader(moves_loader)
type_efficacy_loader = TypeEfficacyLoader()

Q = collections.defaultdict(float)

counter = {'player': 0, 'opponent': 0, 'timeout': 0}
while counter['player'] + counter['opponent'] < 1000:
    print counter
    # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1)
    # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2)
    # player = BaselineAgent('player', pokemon_loader.getRandomTeam())
    # player = HumanAgent('player', pokemon_loader.getRandomTeam(), moves_loader)
    player = QLearningAgent('player', pokemon_loader.getRandomTeam(), Q, True)
    # player = RandomAgent('player', pokemon_loader.getRandomTeam())
    # opponent = BaselineAgent('opponent', pokemon_loader.getRandomTeam())
    # opponent = RandomAgent('opponent', pokemon_loader.getRandomTeam())
    # opponent = HumanAgent('opponent', pokemon_loader.getRandomTeam(), moves_loader)
    opponent = QLearningAgent('opponent', pokemon_loader.getRandomTeam(),
                              copy.deepcopy(Q), False)
    game_obj = Game(pokemon_loader, moves_loader, type_efficacy_loader, player,
                    opponent)
    counter[game_obj.run()] += 1

counter = {'player': 0, 'opponent': 0, 'timeout': 0}
while counter['player'] + counter['opponent'] < 1000:
    print counter
    # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1)
    # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2)
Esempio n. 5
0
import matplotlib.pyplot as plt
import environment
from q_learning_agent import QLearningAgent
import utils

if __name__ == "__main__":
    env = environment.Env()  # Initializing Environment
    display = utils.GraphicDisplay(env)  # Initializing Graphic Display
    q_learning_agent = QLearningAgent(env.actions)  # Initializing Q-Learning Agent
    env = environment.Env()  # Initializing Environment
    q_learning_agent = QLearningAgent(env.actions)  # Initializing Q-Learning Agent
    # Run 1000 episodes
    for episode in range(1000):
        env.reset_env()  # Running a new Environment
        state = env.initialState
        terminated = False
        display.reset_display()
        while not terminated:
            action = q_learning_agent.get_action(state)  # Getting current state action following e-greedy strategy
            new_state, reward, terminated = env.step(state, action)
            q_learning_agent.update_q_function(state, action, reward, new_state)  # Updating Q-function from agent

            if not (state == new_state).all():
                display.step(action, q_learning_agent.q_values_table)

            state = new_state
Esempio n. 6
0
import gym
from q_learning_agent import QLearningAgent

env = gym.make("Taxi-v2")

agent = QLearningAgent(env)
agent.train(to_render=False)

agent.test()
Esempio n. 7
0
    realRounds = args.realRounds
    randomSeed = args.randomSeed
    iterations = args.iterations
    if realRounds <= 0:
        print "Number of real rounds must be > 0 but was {0}".format(realRounds)
        sys.exit(1)

    if randomSeed:
        random.seed(randomSeed)

    playerAgentStrings = args.playerAgents

    playerAgents = []
    for playerAgentString in playerAgentStrings:
        if playerAgentString == "QLearningAgent":
            playerAgents.append(QLearningAgent(args.alpha, args.discount, args.epsilon))
        elif playerAgentString == "CountLearningAgent":
            playerAgents.append(CountLearningAgent())            
        elif playerAgentString == "AceCountLearningAgent":
            playerAgents.append(AceCountLearningAgent())
        elif playerAgentString == "ReflexAgent":
            playerAgents.append(ReflexAgent())
        elif playerAgentString == "StandingAgent":
            playerAgents.append(StandingAgent())
        elif playerAgentString == "NoBustAgent":
            playerAgents.append(NoBustAgent())
        elif playerAgentString == "HumanAgent":
            playerAgents.append(HumanAgent())
        elif playerAgentString == "ValueIterationAgent":
            if not iterations:
                print "Number of iterations must be specified with ValueIterationAgent"
Esempio n. 8
0
 def gameOver(self, gameState, hand, reward):
     QLearningAgent.gameOver(self, gameState, hand, reward)
     #print gameState.playerHands
     for hand in gameState.playerHands.keys():
         self.updateCount(hand)
     self.updateCount(gameState.dealerHand)
Esempio n. 9
0
 def __init__(self, alpha=0.2, discount=0.8, epsilon=0.1):
     QLearningAgent.__init__(self, alpha=alpha, discount=discount, epsilon=epsilon)
     self.count = 0
Esempio n. 10
0
from itertools import cycle
from pygame.locals import *

from q_learning_agent import QLearningAgent
from q_learning_agent_greedy import QLearningAgentGreedy

training_mode = None
operation = sys.argv[1]

if operation == 'train':
    training_mode = True
elif operation == 'run':
    training_mode = False

if len(sys.argv) == 2:
    Agent = QLearningAgent(training_mode)
elif sys.argv[2] == 'greedy':
    Agent = QLearningAgentGreedy(training_mode)

FPS = 30
FPS_CLOCK = None

SCREEN_WIDTH = 288
SCREEN_HEIGHT = 512
SCREEN = None

PIPE_GAP_SIZE = 100
BASE_Y = SCREEN_HEIGHT * 0.79

IMAGES = {}
HITMASKS = {}
Esempio n. 11
0
        plt.xlabel("Distance to the next pipe")
        plt.ylabel("next_pipe_top_y - player_y")
        plt.title("Policy after " + str(nb_episodes * nb_iterations) +
                  " episodes for " + agent_name)
        plt.savefig(folder_name + "/" + agent_name + "_p_" +
                    str(nb_episodes * nb_iterations) + "ep.jpg")


#%%
agent164 = MonteCarloAgent()
agent164.pipeDist = 164

agent164.name = "MonteCarlo"
agent164.discount = 0.9

agentQ283 = QLearningAgent()
agentQ283.name = "Q-learning"

agentBest = MonteCarloAgentBest()
agentBest.name = "MonteCarloBest"

nb_episodes = 5
nb_episods_eval = 5
nb_iterations = 3

s164 = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations,
                          agent164)
sq283 = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations,
                           agentQ283)
sb = train_and_evaluate(nb_episodes, nb_episods_eval, nb_iterations, agentBest)
Esempio n. 12
0
 def __init__(self):
     QLearningAgent.__init__(self, 4, 2)