예제 #1
0
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(
                env,
                logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            gym.logger.setLevel(logging.WARN)
            return env

        return _thunk

    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])

    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'radlstm':
        policy_fn = RadLstmPolicy

    learn(policy_fn,
          env,
          seed,
          total_timesteps=int(num_timesteps * 1.1),
          lrschedule=lrschedule)
    env.close()
예제 #2
0
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), NUM_CPU)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close()
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    dict = {}
    dict['clip_rewards']=False
    env = VecFrameStack(make_atari_env(env_id, num_env, seed, wrapper_kwargs=dict), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close()
예제 #4
0
def train(map_name, num_timesteps, batch_steps, seed, network, ar, lr,
          lrschedule, screen_size, minimap_size, step_mul, num_cpu, optimizer,
          ent_coef, vl_coef, max_grad_norm):
    maps.get(map_name)  # Assert the map exists.

    log_path = './experiments/%s/' % (time.strftime("%m%d_%H%M_") + map_name)
    make_path(log_path)
    make_path("%sreplay" % log_path)

    def make_env(rank):
        def _thunk():
            agent_interface = features.parse_agent_interface_format(
                feature_screen=64, feature_minimap=64)
            env = sc2_env.SC2Env(
                map_name=map_name,
                step_mul=step_mul,
                agent_interface_format=agent_interface,
                # screen_size_px=(screen_size, screen_size),
                # minimap_size_px=(minimap_size, minimap_size),
                visualize=False)
            return env

        return _thunk

    set_global_seeds(seed)

    log_file = open("%sconfig.log" % log_path, "a+")
    log_file.write("Map Name: %s\n" % map_name)
    log_file.write("Optimizer: %s\n" % optimizer)
    log_file.write("Network: %s\n" % network)
    log_file.write("Learning Rate: %f\n" % lr)
    log_file.write("Entropy Coefficient: %f\n" % ent_coef)
    log_file.write("Value Function Coefficient: %f\n" % vl_coef)
    log_file.write("Maximum Gradient Norm: %f\n" % max_grad_norm)
    log_file.write("Screen Size: %d\n" % screen_size)
    log_file.write("Minimap Size: %d\n" % minimap_size)
    log_file.write("Batch Steps: %d\n" % batch_steps)
    log_file.close()

    learn(network,
          log_path,
          make_env,
          total_timesteps=num_timesteps,
          nsteps=batch_steps,
          ent_coef=ent_coef,
          max_grad_norm=max_grad_norm,
          optimizer=optimizer,
          vl_coef=vl_coef,
          ar=ar,
          lr=lr,
          num_cpu=num_cpu)
def train(env_id, num_timesteps, num_cpu):
    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(SEED + rank)
            gym.logger.setLevel(logging.WARN)
            env = wrap_deepmind(env)

            # wrap the env one more time for getting total reward
            env = Monitor(env, rank)
            return env
        return _thunk

    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    learn(CNN, env, SEED, total_timesteps=int(num_timesteps * 1.1))
    env.close()
    pass
def train(env_id, num_timesteps, num_cpu):
    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(SEED + rank)
            gym.logger.setLevel(logging.WARN)
            env = wrap_deepmind(env)

            # wrap the env one more time for getting total reward
            env = Monitor(env, rank)
            return env

        return _thunk

    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])
    learn(CNN, env, SEED, total_timesteps=int(num_timesteps * 1.1))
    env.close()
    pass
예제 #7
0
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, args):
    if policy == 'i2a':
        policy_fn = I2A
    elif policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    env = VecFrameStack(
        make_atari_env('MsPacmanNoFrameskip-v0', num_env, seed), 4)
    learn(policy_fn,
          env,
          seed,
          total_timesteps=int(num_timesteps * 1.1),
          lrschedule=lrschedule,
          args=args)
    env.close()
예제 #8
0
def main(env_id, num_timesteps, seed, policy, nstack, nsteps, lrschedule,
         optimizer, num_cpu, model_file, use_static_wrapper,
         use_encoded_imagination, use_decoded_imagination):
    num_timesteps //= 4
    assert not (use_encoded_imagination and use_decoded_imagination)

    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            if use_static_wrapper:
                env = StaticWrapper(env)
            if policy == 'cnn' or use_encoded_imagination:
                env = RenderWrapper(env, 400, 600)
                env = DownsampleWrapper(env, 4)
            if use_encoded_imagination or use_decoded_imagination:
                env = FrameStack(env, 3)
            if use_encoded_imagination:
                env = EncodedImaginationWrapper(env, model_file, num_cpu)
            if use_decoded_imagination:
                env = DecodedImaginationWrapper(env, model_file, num_cpu)
            gym.logger.setLevel(logging.WARN)
            return env

        return _thunk

    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])

    if policy == 'fc':
        policy_fn = FcPolicy
    if policy == 'cnn':
        policy_fn = CnnPolicy
    learn(policy_fn,
          env,
          seed,
          nsteps=nsteps,
          nstack=nstack,
          total_timesteps=num_timesteps,
          lrschedule=lrschedule,
          optimizer=optimizer,
          max_episode_length=195)
    env.close()
예제 #9
0
def train(config,
          num_frames,
          seed,
          policy,
          lrschedule,
          num_cpu,
          ckpt,
          nsteps,
          start_port=8000,
          dfn=all):
    num_timesteps = int(num_frames / 4 * 1.1)

    # divide by 4 due to frameskip, then do a little extras so episodes end
    def make_env(rank):
        def _thunk():
            port = rank + start_port
            gym.logger.setLevel(logging.WARN)
            return wrap_ma_doom(config, NUM_PLAYERS, port)

        return _thunk

    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)], dfn)
    if policy == 'comm':
        policy_fn = MACommPolicy
    elif policy == 'commsep':
        policy_fn = MACommSepCriticPolicy
    elif policy == 'cnn':
        policy_fn = MACnnPolicy
    elif policy == 'recon':
        policy_fn = MAReconPolicy
    elif policy == 'lnlstm':
        raise NotImplemented
    time.sleep(num_cpu * 1)
    print("creation complete, start running!")
    return learn(policy_fn,
                 env,
                 seed,
                 nsteps=nsteps,
                 checkpoint=ckpt,
                 total_timesteps=num_timesteps,
                 lrschedule=lrschedule)
예제 #10
0
def train(config, num_frames, seed, policy, lrschedule, num_cpu, ckpt, nsteps, dfn=all):
    num_timesteps = int(num_frames / 4 * 1.1)
    # divide by 4 due to frameskip, then do a little extras so episodes end
    def make_env(rank):
        def _thunk():
            gym.logger.setLevel(logging.WARN)
            return wrap_predator_prey(**config)
        return _thunk
    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)], dfn, nplayers=config["npredator"])
    if policy == 'nmap':
        policy_fn = MANMapPolicy
    elif policy == 'cnn':
        policy_fn = MACnnPolicy
    elif policy == 'lnlstm':
        raise NotImplemented
    time.sleep(num_cpu * 0.5)
    print("creation complete, start running!")

    logs_path = "log/" + policy_fn.__name__ + "_" + str(config["po_radius"])

    return learn(policy_fn, env, seed, logs_path, nplayers=config["npredator"],
            nsteps=nsteps, checkpoint=ckpt, total_timesteps=num_timesteps, lrschedule=lrschedule, eval_env_fn=make_env(0))