def assemble_training(seed, weights=None, lr=cfg.LEARNING_RATE, er=cfg.EPS_START): """ Configure everything needed to start the training. The parameter weights is used to continue training and set the weights. This function wraps the environment with all the preprocessing steps, sets the type of policy and the Replay Buffer. """ if weights: checkpoint = torch.load(weights) env = getWrappedEnv(seed=checkpoint["info"]["seed"]) dqn = DuelingDQN(env, lr=lr) eval_net = DuelingDQN(env) load_checkpoint(dqn, weights, dqn.device) load_checkpoint(eval_net, weights, dqn.device) policy = eGreedyPolicyDecay(env, seed, checkpoint["info"]["er"], er, cfg.EPS_END, cfg.DECAY_STEPS, dqn) buffer = ReplayBuffer(seed=seed) agent = DDQNAgent(dqn, eval_net, policy, buffer) with open(checkpoint["info"]["buffer"], "rb") as f: preloaded_buffer = pickle.load(f) agent.buffer = preloaded_buffer print( "Resume training at Episode", checkpoint["info"]["episodes"], "after", checkpoint["info"]["frames"], "frames.\n", "Learning rate is", checkpoint["info"]["lr"], "\nExploration rate is", checkpoint["info"]["er"], ) return env, agent, checkpoint["info"]["episodes"], checkpoint["info"][ "frames"] env = getWrappedEnv(seed=seed) dqn = DuelingDQN(env, lr=lr) eval_net = DuelingDQN(env) policy = eGreedyPolicyDecay(env, seed, er, er, cfg.EPS_END, cfg.DECAY_STEPS, dqn) buffer = ReplayBuffer(seed=seed) agent = DDQNAgent(dqn, eval_net, policy, buffer) return env, agent, 0, 0
def main(_): if check_path_validity() == -1: exit(1) FLAGS.logdir = FLAGS.logdir if FLAGS.logdir.endswith( '/') else FLAGS.logdir + '/' # Make a new directory to store checkpoints and tensorboard summaries, # this is only necessary if were are going to train a new model. if FLAGS.training: os.makedirs(FLAGS.logdir) # Setup tensorflow and tensorboard writers tf.reset_default_graph() session = tf.Session() writer = tf.summary.FileWriter(FLAGS.logdir, session.graph) if FLAGS.visualize else None summary_ops, summary_placeholders = setup_summary() # Initialize key objects: environment, agent and preprocessor env = Environment("127.0.0.1", 9090) agent = DDQNAgent(session, num_actions, width, height, FLAGS.logdir, writer) preprocessor = Preprocessor(width, height) if FLAGS.training: summarize_func = partial(summarize, session, writer, summary_ops, summary_placeholders) train(agent, env, preprocessor, summarize_func) else: play(agent, env, preprocessor)
def play(**kwargs): env = BananaEnvironment(file_name=kwargs['env_file'], num_stacked_frames=kwargs['num_stacked_frames']) agent_name = kwargs['agent_fname'] is_per = 'PER' in agent_name if 'ddqn' in agent_name: agent = DDQNAgentPER.load(agent_name) if is_per else DDQNAgent.load( agent_name) elif 'dqn' in agent_name: agent = DQNAgentPER.load(agent_name) if is_per else DQNAgent.load( agent_name) else: raise KeyError('Unknown agent type') for i in range(kwargs['num_plays']): done = False score = 0 state = env.reset(train_mode=False) while not done: action = agent.act(state, eps=0.) state, reward, done = env.step(action) # roll out transition score += reward print("\r play #{}, reward: {} | score: {}".format( i + 1, reward, score), end='') print()
def main(): print("Creating model...") model = create_model() model.summary() print("Creating environment...") environment = gym.make("CartPole-v0") environment._max_episode_steps = 500 print("Creating agent...") if agent_type == "dqn": agent = DQNAgent(name="cartpole-dqn", model=model, environment=environment, observation_frames=1, observation_transformation=observation_transformation, reward_transformation=reward_transformation, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, number_of_iterations=1000000, replay_memory_size=2000, minibatch_size=32) elif agent_type == "ddqn": agent = DDQNAgent( name="cartpole-ddqn", model=model, environment=environment, observation_frames=1, observation_transformation=observation_transformation, reward_transformation=reward_transformation, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, number_of_iterations=1000000, replay_memory_size=2000, minibatch_size=32, model_copy_interval=100) agent.enable_rewards_tracking(rewards_running_means_length=10000) agent.enable_episodes_tracking(episodes_running_means_length=10000) agent.enable_maxq_tracking(maxq_running_means_length=10000) agent.enable_model_saving(model_save_frequency=100000) agent.enable_tensorboard_for_tracking() print("Training ...") agent.fit(verbose=True, headless="render" not in sys.argv)
def main(): print("Creating environment...") environment = gym_tetris.make('Tetris-v0') print("Creating model...") model = modelutils.create_model(number_of_actions) model.summary() print("Creating agent...") if agent_type == "dqn": agent = DQNAgent( name="tetris-dqn", environment=environment, model=model, observation_transformation=utils.resize_and_bgr2gray, observation_frames=4, number_of_iterations=1000000, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, replay_memory_size=2000, minibatch_size=32 ) elif agent_type == "ddqn": agent = DDQNAgent( name="tetris-ddqn", environment=environment, model=model, observation_transformation=utils.resize_and_bgr2gray, observation_frames=4, number_of_iterations=1000000, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, replay_memory_size=2000, minibatch_size=32, model_copy_interval=100 ) agent.enable_rewards_tracking(rewards_running_means_length=10000) agent.enable_episodes_tracking(episodes_running_means_length=100) agent.enable_maxq_tracking(maxq_running_means_length=10000) agent.enable_model_saving(model_save_frequency=10000) agent.enable_plots_saving(plots_save_frequency=10000) print("Training ...") agent.fit(verbose=True, headless="headless" in sys.argv, render_states=True)
def main(): print("Creating model...") model = modelutils.create_model(number_of_actions=4) model.summary() print("Creating agent...") if agent_type == "dqn": agent = DQNAgent(name="doom-dqn", model=model, number_of_actions=4, gamma=0.99, final_epsilon=0.0001, initial_epsilon=0.1, number_of_iterations=200000, replay_memory_size=10000, minibatch_size=32) elif agent_type == "ddqn": agent = DDQNAgent(name="doom-ddqn", model=model, number_of_actions=4, gamma=0.99, final_epsilon=0.0001, initial_epsilon=0.1, number_of_iterations=200000, replay_memory_size=10000, minibatch_size=32, model_copy_interval=100) agent.enable_rewards_tracking(rewards_running_means_length=1000) agent.enable_episodes_tracking(episodes_running_means_length=1000) agent.enable_maxq_tracking(maxq_running_means_length=1000) agent.enable_model_saving(model_save_frequency=10000) agent.enable_plots_saving(plots_save_frequency=10000) print("Creating game...") #environment = Environment(headless=("headless" in sys.argv)) # Create an instance of the Doom game. environment = DoomGame() environment.load_config("scenarios/basic.cfg") environment.set_screen_format(ScreenFormat.GRAY8) environment.set_window_visible("headless" not in sys.argv) environment.init() print("Training ...") train(agent, environment, verbose="verbose" in sys.argv)
def main(): print("Creating model...") model = modelutils.create_model(number_of_actions) model.summary() print("Creating agent...") if agent_type == "dqn": agent = DQNAgent(name="supermario-dqn", model=model, number_of_actions=number_of_actions, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, number_of_iterations=1000000, replay_memory_size=2000, minibatch_size=32) elif agent_type == "ddqn": agent = DDQNAgent(name="supermario-ddqn", model=model, number_of_actions=number_of_actions, gamma=0.95, final_epsilon=0.01, initial_epsilon=1.0, number_of_iterations=1000000, replay_memory_size=2000, minibatch_size=32, model_copy_interval=100) agent.enable_rewards_tracking(rewards_running_means_length=10000) agent.enable_episodes_tracking(episodes_running_means_length=100) agent.enable_maxq_tracking(maxq_running_means_length=10000) agent.enable_model_saving(model_save_frequency=10000) agent.enable_plots_saving(plots_save_frequency=10000) print("Creating game...") environment = gym_super_mario_bros.make("SuperMarioBros-v0") environment = BinarySpaceToDiscreteSpaceEnv(environment, actions) print("Training ...") train(agent, environment, verbose="verbose" in sys.argv, headless="headless" in sys.argv)
algo='DDQNAgent' render_game = True load_checkpoint = False train_model = True n_games = 500 gamma = 0.99 epsilon = 1.0 lr = 1e-4 eps_min = 1e-5 eps_dec = 1e-5 replace = 1000 mem_size = 20000 batch_size = 32 agent = DDQNAgent(gamma=gamma, epsilon=epsilon, lr=lr, input_dims=input_dims, n_actions=n_actions, mem_size=mem_size, eps_min=eps_min, batch_size=batch_size, replace=replace, eps_dec=eps_dec, chkpt_dir=chkpt_dir, algo=algo, env_name=env_name) if load_checkpoint: agent.load_models() agent.epsilon = eps_min fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) + '_' \ '_' + str(n_games) + 'games' figure_file = 'plots/' + fname + '.png' scores_file = fname + '_scores.npy' n_steps = 0 scores, eps_history, steps_array = [], [], [] for i in range(n_games):
def train(**kwargs): kwargs['worker_id'] = 0 if kwargs['env_type'] == 'visual': env = VisualBananaEnvironment( file_name=kwargs['env_file'], num_stacked_frames=kwargs['num_stacked_frames'], worker_id=kwargs['worker_id']) elif kwargs['env_type'] == 'simple': env = BananaEnvironment(file_name=kwargs['env_file'], worker_id=kwargs['worker_id']) else: raise KeyError('unknown env type') state_dim = env.get_state_dim() action_dim = env.get_action_dim() kwargs['device'] = "cuda:0" if torch.cuda.is_available( ) and kwargs['use_gpu'] else "cpu" torch.manual_seed(0) torch.cuda.manual_seed_all(0) random.seed(0) np.random.seed(0) if kwargs['env_type'] == 'visual': net = ConvQNetwork(state_dim, action_dim).to(kwargs['device']) target_net = ConvQNetwork(state_dim, action_dim).to(kwargs['device']) elif kwargs['env_type'] == 'simple': net = MlpQNetwork(state_dim, action_dim).to(kwargs['device']) target_net = MlpQNetwork(state_dim, action_dim).to(kwargs['device']) else: raise KeyError('unknown env type') kwargs['action_dim'] = action_dim if kwargs['agent_type'] == 'ddqn': agent = DDQNAgentPER( net, target_net, ** kwargs) if kwargs['use_prioritized_buffer'] else DDQNAgent( net, target_net, **kwargs) elif kwargs['agent_type'] == 'dqn': agent = DQNAgentPER( net, target_net, ** kwargs) if kwargs['use_prioritized_buffer'] else DQNAgent( net, target_net, **kwargs) else: raise KeyError('Unknown agent type') dqn = DQN(env=env, agent=agent, **kwargs) scores, losses = dqn.train(kwargs['num_episodes']) # save agent dt = str(datetime.datetime.now().strftime("%m_%d_%Y_%I_%M_%p")) per = 'PER' if kwargs['use_prioritized_buffer'] else '' model_fname = kwargs[ 'model_dir'] + '/' + kwargs['env_type'] + '/{}_agent_{}_{}.pt'.format( kwargs['agent_type'], per, dt) agent.save(model_fname) # save scores scores_fname = kwargs['reports_dir'] + '/' + kwargs[ 'env_type'] + '/{}_agent_{}_{}'.format(kwargs['agent_type'], per, dt) np.save(scores_fname, np.array(scores)) # save scores losses_fname = kwargs['reports_dir'] + '/' + kwargs[ 'env_type'] + '/{}_agent_{}_loss_{}'.format(kwargs['agent_type'], per, dt) np.save(scores_fname, np.array(losses_fname)) env.close() pass
import numpy as np from agent import DDQNAgent, DDQNArgs from trainer import DistributedTrainer from numeric_env import MultiEnv import torch torch.set_num_threads(1) env = MultiEnv(2, 2) action_space = [(-5 + x * 2, -5 + y * 2) for x in range(6) for y in range(6)] args = DDQNArgs(state_dim=env.STATE_DIM + 1, discrete_action_space=action_space) agents = [DDQNAgent(args) for _ in range(2)] trainer = DistributedTrainer(agents, env, parameter_share=True, state_transformer=np.append, log_dir='../logs/ddqn_d_ps') trainer.train(1000000)
kwargs['device'] = "cuda:0" if torch.cuda.is_available() and kwargs['use_gpu'] else "cpu" torch.manual_seed(0) random.seed(0) if kwargs['env_type'] == 'visual': net = ConvQNetwork(state_dim, action_dim).to(kwargs['device']) target_net = ConvQNetwork(state_dim, action_dim).to(kwargs['device']) elif kwargs['env_type'] == 'simple': net = MlpQNetwork(state_dim, action_dim).to(kwargs['device']) target_net = MlpQNetwork(state_dim, action_dim).to(kwargs['device']) else: raise KeyError('unknown env type') kwargs['action_dim'] = action_dim if kwargs['agent_type'] == 'ddqn': agent = DDQNAgentPER(net, target_net, **kwargs) if kwargs['use_prioritized_buffer'] else DDQNAgent(net, target_net, **kwargs) elif kwargs['agent_type'] == 'dqn': agent = DQNAgentPER(net, target_net, **kwargs) if kwargs['use_prioritized_buffer'] else DQNAgent(net, target_net, **kwargs) else: raise KeyError('Unknown agent type') train(agent, env)
from agent import DDQNAgent, DDQNArgs from trainer import CentralizedTrainer from numeric_env import MultiEnv import torch torch.set_num_threads(1) env = MultiEnv(2, 2) action_space = [(-5 + x * 2, -5 + y * 2, -5 + a * 2, -5 + b * 2) for x in range(6) for y in range(6) for a in range(6) for b in range(6)] args = DDQNArgs(state_dim=env.STATE_DIM, discrete_action_space=action_space) agent = DDQNAgent(args) trainer = CentralizedTrainer(agent, env, log_dir='../logs/ddqn_c') trainer.train(1000000)
print(full_data[i].head(5)) print("(rows, columns):", full_data[i].shape) full_train_data.append(full_data[i].iloc[:data_split, :]) # Training phase print( "\n=========================== Training Mode ======================================" ) env = TradingEnv(train_data, args.initial_invest) state_size = env.observation_space.shape print("state size", state_size) action_size = env.action_space.n print("action size", action_size) trade_agent = DDQNAgent(state_size, action_size) scaler = get_scaler(env) labels = ['episode', 'total_reward'] results = {x: [] for x in labels} acts = [] portfolio_value = [] train_results = {} for e in range(args.episode): state = env.reset() score = 0 total_profit = 0 state = scaler.transform([state])