def __init__(self, architecture, min_episode=50000, train_lvls=None, test_lvl=None, description=None, additional_description=None): description = 'Untrained' if train_lvls is None else self.make_description( train_lvls) if description is None else description # putting space at beginning additional_description = '' if additional_description is None else ' ' + additional_description self.description = description + additional_description # use untrained agent if either the test levels or the trained levels are not given self.agent = agents.DQNAgent( min_episode, architecture=architecture ) if (train_lvls and test_lvl) is None else agents.DQNAgent.load_agent( self.description) self.env = golfenv.Env( test_lvl) if test_lvl is not None else golfenv.Env( train_lvls) if train_lvls is not None else golfenv.Env( levels=[1]) self.current_episode = 1 self.min_episode = min_episode self.episode_rewards = [] self.loss_table = [] self.transition_list = [] self.is_done = False
# INIT ENV env, observation_space, action_space = build_env_wrapper( params["env_name"], env_type=params["env_type"]) # LOGGING writer = SummaryWriter(comment="-" + params["run_name"] + "-noisy") # NETWORK net = dqn_noisy_net.Network(env.observation_space.shape, env.action_space.n).to(device) tgt_net = agents.TargetNetwork(net) # AGENT selector = actions.ArgmaxActionSelector() agent = agents.DQNAgent(net, selector, device=device) # RUNNER exp_source = runner.RunnerSourceFirstLast( env, agent, gamma=params["gamma"]) # increase the number of steps for the runner buffer = ExperienceReplayBuffer(exp_source, buffer_size=params["replay_size"]) optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"]) frame_idx = 0 # TRAIN with logger.RewardTracker(writer, params["stop_reward"]) as reward_tracker: while True: frame_idx += 1
def connect4(path='data/connect4', seed=161831415): env = envs.Connect4() rand_epochs = 1000 ai_epochs = 0 test_games = 500 mem_size = 200 log_freq = 100 # 3 states per position depth = 3 # The state is preprocessed and has this shape now dim_state = [depth, *env.n_state] log = Logger(log_freq) # Simple dqn net = dqn.Conn(depth, env.n_action) ai = agents.DQNAgent(env.n_state, env.n_action, net, logger=log, lr=1e-3, discount_factor=.98, exploration_decay=.98, exploration_min=.1, state_preprocessor=f_one_hot_state(depth, -1, new_size=[1] + dim_state)) mem = LinearMemory(dim_state, mem_size, ai.learn) # Train first against random agent rand_act = envs.Connect4.random_act() # Loading # TODO : ai.load(path) # Training print('Training vs random') train(ai, rand_act, mem, env, rand_epochs, log, False) # print('Training vs ai') # TODO : train(ai, ai.act, mem, env, ai_epochs, log, True) # Saving # TODO : ai.save(path) # Testing ai.exploration_rate = 0 win, draw = test(ai.act, rand_act, env, games=test_games, state_preprocessor=ai.state_preprocessor) print(f'Test on {test_games} games : Victories : {win} Draws : {draw}') print(f'Win or draw rate : {(win + draw) / test_games * 100:.1f} %') # Playing while 1: print('New Game') p1, p2 = play(ai.act, user_act(env.n_action), env, state_preprocessor=ai.state_preprocessor) if p1 > 0: print('AI won') elif p2 > 0: print('You won') else: print('Error / Draw')
net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size()) # load the network if RL_options['load_net'] is True: with open( os.path.join(RL_options['net_saved_path'], RL_options['net_file']), "rb") as f: checkpoint = torch.load(f) net = models.SimpleFFDQN(env.get_obs_len(), env.get_action_space_size()) net.load_state_dict(checkpoint['state_dict']) # create buffer net.to(torch.device("cuda")) # pass into gpu selector = actions.EpsilonGreedyActionSelector(RL_options['epsilon_start']) agent = agents.DQNAgent(net, selector) # agent = agents.Supervised_DQNAgent(net, selector, sample_sheet, assistance_ratio=0.2) exp_source = experience.ExperienceSourceFirstLast( env, agent, RL_options['gamma'], steps_count=RL_options['reward_steps']) buffer = experience.ExperienceReplayBuffer(exp_source, RL_options['replay_size']) # create optimizer optimizer = optim.Adam(net.parameters(), lr=RL_options['lr']) # create net pre-processor net_processor = common.netPreprocessor(net, agent.target_model)
env, observation_space, action_space = build_env_wrapper( params["env_name"], env_type=params["env_type"]) # LOGGING writer = SummaryWriter(comment="-" + params["run_name"] + "-distrib") # NETWORK net = dqn_distributional_net.Network(env.observation_space.shape, env.action_space.n).to(device) tgt_net = agents.TargetNetwork(net) # AGENT selector = actions.EpsilonGreedyActionSelector( epsilon=params["epsilon_start"]) epsilon_tracker = logger.EpsilonTracker(selector, params) agent = agents.DQNAgent(lambda x: net.qvals(x), selector, device=device) # RUNNER exp_source = runner.RunnerSourceFirstLast(env, agent, gamma=params["gamma"], steps_count=1) buffer = ExperienceReplayBuffer(exp_source, buffer_size=params["replay_size"]) optimizer = optim.Adam(net.parameters(), lr=params["learning_rate"]) frame_idx = 0 # TRAIN with logger.RewardTracker(writer, params["stop_reward"]) as reward_tracker: while True:
import agents import pong_env import pygame import numpy as np import matplotlib.pyplot as plt player_random = agents.RandomAgent(3) player_dqn = agents.DQNAgent(5, 3) ''' #player dqn uses saved model model = "q_model_4_5" player_dqn.q_from_load_model(model) print("Model loaded.") ''' num_play = 100 clock = pygame.time.Clock() scores = [] epsilons = [] break_learning = False for i in range(num_play): done = False exits = False score = 0 game = pong_env.Pong() while not done and not exits: #terminate if user quits for event in pygame.event.get(): if event.type == pygame.QUIT: