예제 #1
0
 def train(cls, num_frames: int):
     rl_agent = cls()
     preset = ddpg(device=rl_agent.device,
                   lr_q=Settings.LEARNING_RATE,
                   lr_pi=Settings.LEARNING_RATE)
     experiment = SingleEnvExperiment(preset, rl_agent.env)
     experiment.train(num_frames)
     default_log_dir = experiment._writer.log_dir
     copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
     rmtree(default_log_dir)
     rl_agent.env.close()
예제 #2
0
 def resume_training(cls, path, num_frames: int):
     rl_agent = cls()
     lr = Settings.LEARNING_RATE
     agent = rainbow(device=rl_agent.device, lr=lr)
     q_dist_module = torch.load(os.path.join(path, "q_dist.pt"), map_location='cpu').to(rl_agent.device)
     experiment = SingleEnvExperiment(agent, rl_agent.env)
     agent = experiment._agent
     old_q_dist = agent.q_dist
     old_q_dist.model.load_state_dict(q_dist_module.state_dict())
     experiment.train(frames=num_frames)
     default_log_dir = experiment._writer.log_dir
     copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
     rmtree(default_log_dir)
     rl_agent.env.close()
예제 #3
0
def train_dqn_all():
    from all.environments import GymEnvironment
    from all.presets.classic_control import ddqn
    from all.experiments import SingleEnvExperiment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment(Settings.GYM_ENVIRONMENT, device=device)
    preset = ddqn(device=device,
                  lr=Settings.LEARNING_RATE,
                  initial_exploration=Settings.EPS_START,
                  final_exploration=Settings.EPS_END)
    experiment = SingleEnvExperiment(preset, env)
    experiment.train(1E6)
    default_log_dir = experiment._writer.log_dir
    copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
    rmtree(default_log_dir)
예제 #4
0
def resume_dqn_all():
    from all.presets.classic_control import ddqn
    from all.environments import GymEnvironment
    from all.experiments import SingleEnvExperiment

    if Settings.CUDA:
        device = "cuda"
    else:
        device = "cpu"

    env = GymEnvironment('sumo-jerk-v0', device=device)
    lr = 1e-5
    agent = ddqn(device=device, lr=lr)
    q_module = torch.load(os.path.join('models', "q.pt"),
                          map_location='cpu').to(device)

    experiment = SingleEnvExperiment(agent, env)
    agent = experiment._agent
    old_q = agent.q
    old_q.model.load_state_dict(q_module.state_dict())
    experiment.train(frames=1e6)
    default_log_dir = experiment._writer.log_dir
    copy_tree(default_log_dir, Settings.FULL_LOG_DIR)
    rmtree(default_log_dir)
    previous_runs = set(get_existing_runs())

    env = gym.make('nextro-v0', c_args=args)

    env = GymEnvironment(env, args.device)

    pretrained_models = None
    if args.loc != '':
        pretrained_models = load_weights(args.loc)

    agent = sac_minitaur_inspired(device=args.device,
                                  last_frame=args.frames,
                                  pretrained_models=pretrained_models,
                                  train_parallel=args.train_parallel)
    exp = SingleEnvExperiment(agent, env, render=args.render)

    all_runs = set(get_existing_runs())
    current_run_name = list(all_runs - previous_runs)[0]

    if args.mode == 'train':
        if args.episodes != 0:
            exp.train(episodes=args.episodes)
        else:
            exp.train(frames=args.frames)
    else:
        if args.episodes == 0:
            raise Exception('You HAVE TO speficy EPISODES when using test!!!')
        exp.test(episodes=args.episodes)

    exp._env._env.store_settings(current_run_name)