def __init__(self, size, ships, nb_samples=1000, player1="human", player2="random"): self.board_player1 = Board(size) self.board_player2 = Board(size) self.size = size self.ships = ships if player1 == "human": self.player1 = HumanAgent() elif player1 == "MC": self.player1 = MCAgent(ships=ships, size=size, nb_samples=nb_samples) elif player1 == "MC2": self.player1 = MCAgent(ships=ships, size=size, nb_samples=nb_samples) else: self.player1 = RandomAgent(size=size) if player2 == "human": self.player2 = HumanAgent() elif player2 == "MC": self.player2 = MCAgent(ships=ships.copy(), size=size, nb_samples=nb_samples) elif player2 == "MC2": self.player2 = MCAgent(ships=ships.copy(), size=size, nb_samples=nb_samples) else: self.player2 = RandomAgent(size=size)
def play_with_human(self, save_as_file='TD_human_policy.dat'): ProgramDriver.load_model(save_as_file) env = Env() agents = [ TD_agent(self.epsilon, self.alpha, self.decay_rate), HumanAgent(1), ] while True: env.reset() td_agent = agents[0] td_agent.decay_epsilon() env.render() while True: curr_qttt, mark = env.get_state() agent = ProgramDriver.get_agent_by_mark(agents, mark) free_qblock_id_lists, collapsed_qttts = env.get_valid_moves() collapsed_qttt, agent_move = agent.act(free_qblock_id_lists, collapsed_qttts, mark) if collapsed_qttt is None: ProgramDriver.save_model(save_as_file, 0, self.epsilon, self.alpha, self.decay_rate) print("Model saved.") sys.exit() next_qttt, next_round, reward, done = env.step( collapsed_qttt, agent_move, mark) print('') env.render() td_agent.bellman_backup(curr_qttt, next_qttt, reward, mark) if done: GameTree.set_state_value(next_qttt.get_state(), reward) next_qttt.show_result() break
def run_game(print_board=False): game = Connect4Board() next_player = { Player.PLAYER_1: Player.PLAYER_2, Player.PLAYER_2: Player.PLAYER_1 } curr_player = Player.PLAYER_1 player_1_agent = HumanAgent("Player 1") # player_2_agent = HumanAgent("Player 2") player_2_agent = MinimaxAgent(Player.PLAYER_2) player_map = { Player.PLAYER_1: player_1_agent, Player.PLAYER_2: player_2_agent } while True: if print_board: game.print_board() print("================================") game = game.add_piece(curr_player, player_map[curr_player].get_action(game)) game_state = game.check_game_state(curr_player) if game_state == GameState.DRAW: print("DRAW!!!") game.print_board() return elif game_state == GameState.PLAYER_1_WIN: print("PLAYER 1 WINS!!!") game.print_board() return elif game_state == GameState.PLAYER_2_WIN: print("PLAYER 2 WINS!!!") game.print_board() return curr_player = next_player[curr_player]
def learnplay(max_episode, epsilon, alpha, model_file, show_number): _learn(max_episode, epsilon, alpha, model_file) _play(model_file, HumanAgent('O'), show_number)
def play(load_file, show_number): _play(load_file, HumanAgent('O'), show_number)
def execute_experiment(args): #### HARD RULES if args['parallel'] != 0: args['use_gpu'] = 0 if args['agent_type'] == 'human': args['use_gpu'] = 0 args['render_delay'] = 0 args['mode'] = 'play' args['display_prob'] = 1 # args['action_repeat'] = 1 if args['env_name'] == 'key_mdp-v0': args['action_repeat'] = 1 arch_names = [n for n in args.keys() if 'architecture' in n] for arch_name in arch_names: if args[arch_name] is None: continue else: args[arch_name] = args[arch_name].split('-') cnf = configuration.Configuration() #Global settings gl_st = configuration.GlobalSettings(args) cnf.set_global_settings(gl_st) #Agent settings if args['agent_type'] == 'dqn': ag_st = configuration.DQNSettings(args['scale']) elif args['agent_type'] == 'hdqn': ag_st = configuration.hDQNSettings(args['scale']) elif args['agent_type'] == 'human': ag_st = configuration.HumanSettings() else: raise ValueError("Wrong agent %s" % args['agent_type']) ag_st.update(args) cnf.set_agent_settings(ag_st) #Environment settings utils.insert_dirs(cnf.gl.env_dirs) if args['env_name'] == 'SF-v0': #Space Fortress env_st = configuration.SpaceFortressSettings(new_attrs=args) elif args['env_name'] == 'key_mdp-v0': #MDP env_st = configuration.Key_MDPSettings(new_attrs=args) else: raise ValueError("Wrong env_name %s, (env_names: s%)"\ .format(args['env_name'], ', '.join(CT.env_names))) env_st.set_reward_function() cnf.set_environment_settings(env_st) environment = Environment(cnf) tf.set_random_seed(gl_st.random_seed) random.seed(gl_st.random_seed) if gl_st.gpu_fraction == '': raise ValueError("--gpu_fraction should be defined") if not gl_st.use_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = "-1" frac = utils.calc_gpu_fraction(gl_st.gpu_fraction) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=frac) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if ag_st.agent_type == 'dqn': agent = DQNAgent(cnf, environment, sess) elif ag_st.agent_type == 'hdqn': agent = HDQNAgent(cnf, environment, sess) elif ag_st.agent_type == 'human': agent = HumanAgent(cnf, environment) else: raise ValueError("Wrong agent %s".format()) if ag_st.mode == 'train': agent.train() elif ag_st.mode == 'play': agent.play() elif ag_st.mode == 'graph': pass else: raise ValueError("Wrong mode " + str(ag_st.mode)) #agent.show_attrs() tf.reset_default_graph()
from moves_loader import MovesLoader from type_efficacy_loader import TypeEfficacyLoader from random_agent import RandomAgent from minimax_agent import MinimaxAgent from minimax_pruning_agent import MinimaxPruningAgent from baseline_agent import BaselineAgent from human_agent import HumanAgent from game import Game moves_loader = MovesLoader() pokemon_loader = PokemonLoader(moves_loader) type_efficacy_loader = TypeEfficacyLoader() counter = {'player': 0, 'opponent': 0, 'timeout': 0} while counter['player'] + counter['opponent'] < 1: print counter # player = MinimaxAgent('player', pokemon_loader.getRandomTeam(), 1) # player = MinimaxPruningAgent('player', pokemon_loader.getRandomTeam(), 2) # player = BaselineAgent('player', pokemon_loader.getRandomTeam()) player = HumanAgent('player', pokemon_loader.getRandomTeam(), moves_loader) # player = RandomAgent('player', pokemon_loader.getRandomTeam()) opponent = BaselineAgent('opponent', pokemon_loader.getRandomTeam()) # opponent = RandomAgent('opponent', pokemon_loader.getRandomTeam()) # opponent = HumanAgent('opponent', pokemon_loader.getRandomTeam(), moves_loader) game_obj = Game(pokemon_loader, moves_loader, type_efficacy_loader, player, opponent) counter[game_obj.run()] += 1 print counter
p1.learn(states_actions_p1, game_reward_p1) #p2.learn(states_actions_p2, game_reward_p2) if game_reward_p1 == 1: win_p1 += 1 elif game_reward_p1 == 0: draw += 1 if game_num % BATCH_SIZE == 0: print("Batch: {}".format(game_num / BATCH_SIZE)) print("P1: {}\nDRAW: {}\nP2: {}".format(win_p1, draw, BATCH_SIZE - win_p1 - draw)) print() win_p1 = 0 draw = 0 p2 = HumanAgent() # Games to display what has been learnt. for game_num in range(NUM_OF_DISPLAY_GAMES): print("\n## Game ##") game = TicTacToe() is_terminal = False game_reward_p1 = 0 game_reward_p2 = 0 # p1 starts. turn = 1 state = game.get_state()
playerAgents = [] for playerAgentString in playerAgentStrings: if playerAgentString == "QLearningAgent": playerAgents.append(QLearningAgent(args.alpha, args.discount, args.epsilon)) elif playerAgentString == "CountLearningAgent": playerAgents.append(CountLearningAgent()) elif playerAgentString == "AceCountLearningAgent": playerAgents.append(AceCountLearningAgent()) elif playerAgentString == "ReflexAgent": playerAgents.append(ReflexAgent()) elif playerAgentString == "StandingAgent": playerAgents.append(StandingAgent()) elif playerAgentString == "NoBustAgent": playerAgents.append(NoBustAgent()) elif playerAgentString == "HumanAgent": playerAgents.append(HumanAgent()) elif playerAgentString == "ValueIterationAgent": if not iterations: print "Number of iterations must be specified with ValueIterationAgent" sys.exit(1) playerAgents.append(ValueIterationAgent(iterations)) else: print "Unrecognized agent {0}".format(playerAgentString) sys.exit(1) dealerAgent = DealerAgent() if trainingRounds > 0: trainingAgents = filter(lambda x: x.needsTraining(), playerAgents) print "Training ({0} rounds)...".format(trainingRounds) game = Game(dealerAgent, trainingAgents)
except: bestTime = np.inf track_length = 500#5700 print('bestTime: ', bestTime) save = False #if csv with car trajectory does not exist, build empty one with header """try: car_trajectory_df = pd.read_csv('car_trajectory_monza.csv') except: df = pd.DataFrame(obs_dic) df.to_csv(index = False, path_or_buf = 'car_trajectory_monza.csv')""" # Generate a Torcs environment env = TorcsEnv(vision=False, throttle=True, gear_change=False, brake=True) agent = HumanAgent(max_steps, use_logitech_g27=False) #agent = HumanAgent(max_steps, use_logitech_g27=True) print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0.
pygame.draw.line(canvas, BLACK, [(WIDTH_SCALE * (1-state.paddle_x)), 0], [(WIDTH_SCALE * (1-state.paddle_x)), HEIGHT_SCALE], 5) pygame.draw.line(canvas, BLACK, [0, 0], [WIDTH_SCALE, 0], 5) pygame.draw.line(canvas, BLACK, [0, HEIGHT_SCALE], [WIDTH_SCALE, HEIGHT_SCALE], 5) # Draw Ball and Paddle pygame.draw.circle(canvas, BLACK, [int(state.ball_x * WIDTH_SCALE), int(state.ball_y * HEIGHT_SCALE)], BALL_RADIUS, 0) pygame.draw.polygon(canvas, BLACK, [[int(WIDTH_SCALE * state.paddle_x), int(state.paddle_y * HEIGHT_SCALE)], [int((WIDTH_SCALE * state.paddle_x) + PADDLE_WIDTH_SCALE - (state.paddle_x * 2 * PADDLE_WIDTH_SCALE)), int(state.paddle_y * HEIGHT_SCALE)], [int((WIDTH_SCALE * state.paddle_x) + PADDLE_WIDTH_SCALE - (state.paddle_x * 2 * PADDLE_WIDTH_SCALE)), int((state.paddle_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE)], [int(WIDTH_SCALE * state.paddle_x), int((state.paddle_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE)]], 0) # Update Scores myfont = pygame.font.SysFont("Comic Sans MS", 25) label = myfont.render("Score " + str(score), 1, BLACK) canvas.blit(label, ((WIDTH_SCALE//2)-50, 20)) """"""""""""""""""""" SIMULATION CODE BELOW """"""""""""""""""""" agent = HumanAgent() agent.set_paddle_x(PADDLE_ON_RIGHT) curr_state = get_initial_state(PADDLE_ON_RIGHT) score = 0 while True: draw(window, curr_state, score) for event in pygame.event.get(): if event.type == KEYDOWN: if event.key == K_UP: agent.up_pressed() if event.key == K_DOWN: agent.down_pressed() else: agent.nothing_pressed() if curr_state.game_over:
#from lstm_agent import NeuralAgent as lstmAgent import numpy as np import pandas as pd episode_count = 1 max_steps = 10000 reward = 0 done = False #collect_data_mode = False collect_data_mode = True step = 0 # Generate a Torcs environment env = TorcsEnv(vision=False, throttle=True, gear_change=False, brake=True) agent = HumanAgent(max_steps, use_logitech_g27=False) #agent = HumanAgent(max_steps, use_logitech_g27=True) print("TORCS Experiment Start.") for i in range(episode_count): print("Episode : " + str(i)) if np.mod(i, 3) == 0: # Sometimes you need to relaunch TORCS because of the memory leak error ob = env.reset(relaunch=True) else: ob = env.reset() total_reward = 0. if step == max_steps and not done and collect_data_mode:
], [ int(WIDTH_SCALE), int((state.paddle1_y * HEIGHT_SCALE) + PADDLE_HEIGHT_SCALE) ]], 0) # Update Scores myfont = pygame.font.SysFont("Comic Sans MS", 25) label = myfont.render("Total Hits " + str(total_hits), 1, BLACK) canvas.blit(label, ((WIDTH_SCALE // 2) - 75, 20)) """""" """""" """""" """ SIMULATION CODE BELOW """ """""" """""" """""" left_agent = HumanAgent() right_agent = HumanAgent() curr_state = get_initial_state() total_hits = 0 while True: draw(window, curr_state, total_hits) for event in pygame.event.get(): if event.type == KEYDOWN: if event.key == K_w: left_agent.up_pressed() if event.key == K_s: left_agent.down_pressed() if event.key == K_UP: right_agent.up_pressed() if event.key == K_DOWN:
import pprint pp = pprint.PrettyPrinter(indent=4) p = pp.pprint # Make environment env = WhaleEnv( config={ 'active_player': 0, 'seed': datetime.utcnow().microsecond, 'env_num': 1, 'num_players': 4 }) episode_num = 1 # Set up agents agent_0 = HumanAgent(action_num=env.action_num) agent_1 = RandomAgent(action_num=env.action_num) agent_2 = RandomAgent(action_num=env.action_num) agent_3 = RandomAgent(action_num=env.action_num) env.set_agents([agent_0, agent_1, agent_2, agent_3]) for episode in range(episode_num): # Generate data from the environment trajectories = env.run(is_training=False) # Print out the trajectories print('\nEpisode {}'.format(episode)) i = 0 for trajectory in trajectories: print('\tPlayer {}'.format(i))