def train(cls, num_frames: int): rl_agent = cls() preset = ddpg(device=rl_agent.device, lr_q=Settings.LEARNING_RATE, lr_pi=Settings.LEARNING_RATE) experiment = SingleEnvExperiment(preset, rl_agent.env) experiment.train(num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close()
def resume_training(cls, path, num_frames: int): rl_agent = cls() lr = Settings.LEARNING_RATE agent = rainbow(device=rl_agent.device, lr=lr) q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device) experiment = SingleEnvExperiment(agent, rl_agent.env) agent = experiment._agent old_q_dist = agent.q_dist old_q_dist.model.load_state_dict(q_dist_module.state_dict()) experiment.train(frames=num_frames) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir) rl_agent.env.close()
def train_dqn_all(): from all.environments import GymEnvironment from all.presets.classic_control import ddqn from all.experiments import SingleEnvExperiment if Settings.CUDA: device = "cuda" else: device = "cpu" env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device) preset = ddqn(device=device, lr=Settings.LEARNING_RATE, initial_exploration=Settings.EPS_START, final_exploration=Settings.EPS_END) experiment = SingleEnvExperiment(preset, env) experiment.train(1E6) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir)
def resume_dqn_all(): from all.presets.classic_control import ddqn from all.environments import GymEnvironment from all.experiments import SingleEnvExperiment if Settings.CUDA: device = "cuda" else: device = "cpu" env = GymEnvironment('sumo-jerk-v0', device=device) lr = 1e-5 agent = ddqn(device=device, lr=lr) q_module = torch.load(os.path.join('models', "q.pt"), map_location='cpu').to(device) experiment = SingleEnvExperiment(agent, env) agent = experiment._agent old_q = agent.q old_q.model.load_state_dict(q_module.state_dict()) experiment.train(frames=1e6) default_log_dir = experiment._writer.log_dir copy_tree(default_log_dir, Settings.FULL_LOG_DIR) rmtree(default_log_dir)
previous_runs = set(get_existing_runs()) env = gym.make('nextro-v0', c_args=args) env = GymEnvironment(env, args.device) pretrained_models = None if args.loc != '': pretrained_models = load_weights(args.loc) agent = sac_minitaur_inspired(device=args.device, last_frame=args.frames, pretrained_models=pretrained_models, train_parallel=args.train_parallel) exp = SingleEnvExperiment(agent, env, render=args.render) all_runs = set(get_existing_runs()) current_run_name = list(all_runs - previous_runs)[0] if args.mode == 'train': if args.episodes != 0: exp.train(episodes=args.episodes) else: exp.train(frames=args.frames) else: if args.episodes == 0: raise Exception('You HAVE TO speficy EPISODES when using test!!!') exp.test(episodes=args.episodes) exp._env._env.store_settings(current_run_name)