Esempio n. 1
0
    def __init__(self,
                 size,
                 ships,
                 nb_samples=1000,
                 player1="human",
                 player2="random"):
        self.board_player1 = Board(size)
        self.board_player2 = Board(size)
        self.size = size
        self.ships = ships

        if player1 == "human":
            self.player1 = HumanAgent()
        elif player1 == "MC":
            self.player1 = MCAgent(ships=ships,
                                   size=size,
                                   nb_samples=nb_samples)
        elif player1 == "MC2":
            self.player1 = MCAgent(ships=ships,
                                   size=size,
                                   nb_samples=nb_samples)
        else:
            self.player1 = RandomAgent(size=size)

        if player2 == "human":
            self.player2 = HumanAgent()
        elif player2 == "MC":
            self.player2 = MCAgent(ships=ships.copy(),
                                   size=size,
                                   nb_samples=nb_samples)
        elif player2 == "MC2":
            self.player2 = MCAgent(ships=ships.copy(),
                                   size=size,
                                   nb_samples=nb_samples)
        else:
            self.player2 = RandomAgent(size=size)
Esempio n. 2
0
    def play_with_human(self, save_as_file='TD_human_policy.dat'):
        ProgramDriver.load_model(save_as_file)
        env = Env()
        agents = [
            TD_agent(self.epsilon, self.alpha, self.decay_rate),
            HumanAgent(1),
        ]

        while True:
            env.reset()
            td_agent = agents[0]
            td_agent.decay_epsilon()
            env.render()

            while True:
                curr_qttt, mark = env.get_state()

                agent = ProgramDriver.get_agent_by_mark(agents, mark)

                free_qblock_id_lists, collapsed_qttts = env.get_valid_moves()

                collapsed_qttt, agent_move = agent.act(free_qblock_id_lists,
                                                       collapsed_qttts, mark)

                if collapsed_qttt is None:
                    ProgramDriver.save_model(save_as_file, 0, self.epsilon,
                                             self.alpha, self.decay_rate)
                    print("Model saved.")
                    sys.exit()

                next_qttt, next_round, reward, done = env.step(
                    collapsed_qttt, agent_move, mark)

                print('')
                env.render()

                td_agent.bellman_backup(curr_qttt, next_qttt, reward, mark)

                if done:
                    GameTree.set_state_value(next_qttt.get_state(), reward)
                    next_qttt.show_result()
                    break
Esempio n. 3
0
def run_game(print_board=False):
    game = Connect4Board()
    next_player = {
        Player.PLAYER_1: Player.PLAYER_2,
        Player.PLAYER_2: Player.PLAYER_1
    }
    curr_player = Player.PLAYER_1

    player_1_agent = HumanAgent("Player 1")
    #  player_2_agent = HumanAgent("Player 2")
    player_2_agent = MinimaxAgent(Player.PLAYER_2)
    player_map = {
        Player.PLAYER_1: player_1_agent,
        Player.PLAYER_2: player_2_agent
    }

    while True:
        if print_board:
            game.print_board()
            print("================================")

        game = game.add_piece(curr_player,
                              player_map[curr_player].get_action(game))

        game_state = game.check_game_state(curr_player)
        if game_state == GameState.DRAW:
            print("DRAW!!!")
            game.print_board()
            return
        elif game_state == GameState.PLAYER_1_WIN:
            print("PLAYER 1 WINS!!!")
            game.print_board()
            return
        elif game_state == GameState.PLAYER_2_WIN:
            print("PLAYER 2 WINS!!!")
            game.print_board()
            return
        curr_player = next_player[curr_player]
Esempio n. 4
0
def learnplay(max_episode, epsilon, alpha, model_file, show_number):
    _learn(max_episode, epsilon, alpha, model_file)
    _play(model_file, HumanAgent('O'), show_number)
Esempio n. 5
0
def play(load_file, show_number):
    _play(load_file, HumanAgent('O'), show_number)
Esempio n. 6
0
def execute_experiment(args):
    #### HARD RULES
    if args['parallel'] != 0:
        args['use_gpu'] = 0
    if args['agent_type'] == 'human':
        args['use_gpu'] = 0
        args['render_delay'] = 0
        args['mode'] = 'play'
        args['display_prob'] = 1
#        args['action_repeat'] = 1

    if args['env_name'] == 'key_mdp-v0':
        args['action_repeat'] = 1
    arch_names = [n for n in args.keys() if 'architecture' in n]
    for arch_name in arch_names:
        if args[arch_name] is None:
            continue
        else:
            args[arch_name] = args[arch_name].split('-')

    cnf = configuration.Configuration()

    #Global settings
    gl_st = configuration.GlobalSettings(args)

    cnf.set_global_settings(gl_st)

    #Agent settings
    if args['agent_type'] == 'dqn':
        ag_st = configuration.DQNSettings(args['scale'])
    elif args['agent_type'] == 'hdqn':
        ag_st = configuration.hDQNSettings(args['scale'])
    elif args['agent_type'] == 'human':
        ag_st = configuration.HumanSettings()
    else:
        raise ValueError("Wrong agent %s" % args['agent_type'])

    ag_st.update(args)
    cnf.set_agent_settings(ag_st)

    #Environment settings
    utils.insert_dirs(cnf.gl.env_dirs)
    if args['env_name'] == 'SF-v0':
        #Space Fortress
        env_st = configuration.SpaceFortressSettings(new_attrs=args)
    elif args['env_name'] == 'key_mdp-v0':
        #MDP
        env_st = configuration.Key_MDPSettings(new_attrs=args)
    else:
        raise ValueError("Wrong env_name %s, (env_names: s%)"\
                         .format(args['env_name'], ', '.join(CT.env_names)))

    env_st.set_reward_function()
    cnf.set_environment_settings(env_st)
    environment = Environment(cnf)

    tf.set_random_seed(gl_st.random_seed)
    random.seed(gl_st.random_seed)

    if gl_st.gpu_fraction == '':
        raise ValueError("--gpu_fraction should be defined")

    if not gl_st.use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = "-1"

    frac = utils.calc_gpu_fraction(gl_st.gpu_fraction)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=frac)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        if ag_st.agent_type == 'dqn':
            agent = DQNAgent(cnf, environment, sess)

        elif ag_st.agent_type == 'hdqn':
            agent = HDQNAgent(cnf, environment, sess)

        elif ag_st.agent_type == 'human':
            agent = HumanAgent(cnf, environment)
        else:
            raise ValueError("Wrong agent %s".format())

        if ag_st.mode == 'train':
            agent.train()
        elif ag_st.mode == 'play':
            agent.play()
        elif ag_st.mode == 'graph':
            pass
        else:
            raise ValueError("Wrong mode " + str(ag_st.mode))

        #agent.show_attrs()
    tf.reset_default_graph()
from moves_loader import MovesLoader
from type_efficacy_loader import TypeEfficacyLoader

from random_agent import RandomAgent
from minimax_agent import MinimaxAgent
from minimax_pruning_agent import MinimaxPruningAgent
from baseline_agent import BaselineAgent
from human_agent import HumanAgent

from game import Game

moves_loader = MovesLoader()
pokemon_loader = PokemonLoader(moves_loader)
type_efficacy_loader = TypeEfficacyLoader()

counter = {'player': 0, 'opponent': 0, 'timeout': 0}
while counter['player'] + counter['opponent'] < 1:
    print counter
    # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1)
    # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2)
    # player = BaselineAgent('player', pokemon_loader.getRandomTeam())
    player = HumanAgent('player', pokemon_loader.getRandomTeam(), moves_loader)
    # player = RandomAgent('player', pokemon_loader.getRandomTeam())
    opponent = BaselineAgent('opponent', pokemon_loader.getRandomTeam())
    # opponent = RandomAgent('opponent', pokemon_loader.getRandomTeam())
    # opponent = HumanAgent('opponent', pokemon_loader.getRandomTeam(), moves_loader)
    game_obj = Game(pokemon_loader, moves_loader, type_efficacy_loader, player, opponent)
    counter[game_obj.run()] += 1

print counter
  p1.learn(states_actions_p1, game_reward_p1)
  #p2.learn(states_actions_p2, game_reward_p2)

  if game_reward_p1 == 1:
    win_p1 += 1
  elif game_reward_p1 == 0:
    draw += 1
  
  if game_num % BATCH_SIZE == 0:
    print("Batch: {}".format(game_num / BATCH_SIZE))
    print("P1:   {}\nDRAW: {}\nP2:   {}".format(win_p1, draw, BATCH_SIZE - win_p1 - draw))
    print()
    win_p1 = 0
    draw = 0

p2 = HumanAgent()

# Games to display what has been learnt.
for game_num in range(NUM_OF_DISPLAY_GAMES):
  print("\n## Game ##")

  game = TicTacToe()

  is_terminal = False
  game_reward_p1 = 0
  game_reward_p2 = 0

  # p1 starts.
  turn = 1
  state = game.get_state()
Esempio n. 9
0
    playerAgents = []
    for playerAgentString in playerAgentStrings:
        if playerAgentString == "QLearningAgent":
            playerAgents.append(QLearningAgent(args.alpha, args.discount, args.epsilon))
        elif playerAgentString == "CountLearningAgent":
            playerAgents.append(CountLearningAgent())            
        elif playerAgentString == "AceCountLearningAgent":
            playerAgents.append(AceCountLearningAgent())
        elif playerAgentString == "ReflexAgent":
            playerAgents.append(ReflexAgent())
        elif playerAgentString == "StandingAgent":
            playerAgents.append(StandingAgent())
        elif playerAgentString == "NoBustAgent":
            playerAgents.append(NoBustAgent())
        elif playerAgentString == "HumanAgent":
            playerAgents.append(HumanAgent())
        elif playerAgentString == "ValueIterationAgent":
            if not iterations:
                print "Number of iterations must be specified with ValueIterationAgent"
                sys.exit(1)
            playerAgents.append(ValueIterationAgent(iterations))
        else:
            print "Unrecognized agent {0}".format(playerAgentString)
            sys.exit(1)

    dealerAgent = DealerAgent()

    if trainingRounds > 0:
        trainingAgents = filter(lambda x: x.needsTraining(), playerAgents)
        print "Training ({0} rounds)...".format(trainingRounds)
        game = Game(dealerAgent, trainingAgents)
Esempio n. 10
0
except:
    bestTime = np.inf
    track_length = 500#5700
print('bestTime: ', bestTime)
save = False

#if csv with car trajectory does not exist, build empty one with header
"""try:
    car_trajectory_df = pd.read_csv('car_trajectory_monza.csv')
except:
    df = pd.DataFrame(obs_dic)
    df.to_csv(index = False, path_or_buf = 'car_trajectory_monza.csv')"""

# Generate a Torcs environment
env = TorcsEnv(vision=False, throttle=True, gear_change=False, brake=True)
agent = HumanAgent(max_steps, use_logitech_g27=False)
#agent = HumanAgent(max_steps, use_logitech_g27=True)


print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.
Esempio n. 11
0
    pygame.draw.line(canvas, BLACK, [(WIDTH_SCALE * (1-state.paddle_x)), 0], [(WIDTH_SCALE * (1-state.paddle_x)), HEIGHT_SCALE], 5)
    pygame.draw.line(canvas, BLACK, [0, 0], [WIDTH_SCALE, 0], 5)
    pygame.draw.line(canvas, BLACK, [0, HEIGHT_SCALE], [WIDTH_SCALE, HEIGHT_SCALE], 5)
    # Draw Ball and Paddle
    pygame.draw.circle(canvas, BLACK, [int(state.ball_x * WIDTH_SCALE), int(state.ball_y * HEIGHT_SCALE)], BALL_RADIUS, 0)
    pygame.draw.polygon(canvas, BLACK, [[int(WIDTH_SCALE * state.paddle_x), int(state.paddle_y * HEIGHT_SCALE)], [int((WIDTH_SCALE * state.paddle_x) + PADDLE_WIDTH_SCALE - (state.paddle_x * 2 * PADDLE_WIDTH_SCALE)), int(state.paddle_y * HEIGHT_SCALE)], [int((WIDTH_SCALE * state.paddle_x) + PADDLE_WIDTH_SCALE - (state.paddle_x * 2 * PADDLE_WIDTH_SCALE)), int((state.paddle_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE)], [int(WIDTH_SCALE * state.paddle_x), int((state.paddle_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE)]], 0)
    # Update Scores
    myfont = pygame.font.SysFont("Comic Sans MS", 25)
    label = myfont.render("Score " + str(score), 1, BLACK)
    canvas.blit(label, ((WIDTH_SCALE//2)-50, 20))

"""""""""""""""""""""
SIMULATION CODE BELOW
"""""""""""""""""""""

agent = HumanAgent()
agent.set_paddle_x(PADDLE_ON_RIGHT)
curr_state = get_initial_state(PADDLE_ON_RIGHT)
score = 0

while True:
    draw(window, curr_state, score)
    for event in pygame.event.get():
        if event.type == KEYDOWN:
            if event.key == K_UP:
                agent.up_pressed()
            if event.key == K_DOWN:
                agent.down_pressed()
        else:
            agent.nothing_pressed()
    if curr_state.game_over:
Esempio n. 12
0
#from lstm_agent import NeuralAgent as lstmAgent
import numpy as np
import pandas as pd

episode_count = 1
max_steps = 10000
reward = 0
done = False
#collect_data_mode = False
collect_data_mode = True
step = 0

# Generate a Torcs environment
env = TorcsEnv(vision=False, throttle=True, gear_change=False, brake=True)

agent = HumanAgent(max_steps, use_logitech_g27=False)
#agent = HumanAgent(max_steps, use_logitech_g27=True)

print("TORCS Experiment Start.")
for i in range(episode_count):
    print("Episode : " + str(i))

    if np.mod(i, 3) == 0:
        # Sometimes you need to relaunch TORCS because of the memory leak error
        ob = env.reset(relaunch=True)
    else:
        ob = env.reset()

    total_reward = 0.

    if step == max_steps and not done and collect_data_mode:
Esempio n. 13
0
         ],
         [
             int(WIDTH_SCALE),
             int((state.paddle1_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE)
         ]], 0)
    # Update Scores
    myfont = pygame.font.SysFont("Comic Sans MS", 25)
    label = myfont.render("Total Hits " + str(total_hits), 1, BLACK)
    canvas.blit(label, ((WIDTH_SCALE // 2) - 75, 20))


"""""" """""" """""" """
SIMULATION CODE BELOW
""" """""" """""" """"""

left_agent = HumanAgent()
right_agent = HumanAgent()
curr_state = get_initial_state()
total_hits = 0

while True:
    draw(window, curr_state, total_hits)
    for event in pygame.event.get():
        if event.type == KEYDOWN:
            if event.key == K_w:
                left_agent.up_pressed()
            if event.key == K_s:
                left_agent.down_pressed()
            if event.key == K_UP:
                right_agent.up_pressed()
            if event.key == K_DOWN:
Esempio n. 14
0
import pprint
pp = pprint.PrettyPrinter(indent=4)
p = pp.pprint

# Make environment
env = WhaleEnv(
    config={
        'active_player': 0,
        'seed': datetime.utcnow().microsecond,
        'env_num': 1,
        'num_players': 4
    })
episode_num = 1

# Set up agents
agent_0 = HumanAgent(action_num=env.action_num)
agent_1 = RandomAgent(action_num=env.action_num)
agent_2 = RandomAgent(action_num=env.action_num)
agent_3 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0, agent_1, agent_2, agent_3])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    i = 0
    for trajectory in trajectories:
        print('\tPlayer {}'.format(i))