Example #1
0
def train():
    if not os.path.isdir("log/"):
        os.mkdir("log")

    if ENV_COUNT == 1:
        envs = create_env_headless()
        env_id = str(time.time())[-6:]
        envs = Monitor(envs,
                       "log/" + MODEL_NAME + "-" + env_id,
                       allow_early_resets=False)
        vec_envs = DummyVecEnv([lambda: envs])
    else:
        vec_envs = []

        def make_env():
            env_id = str(time.time())[-6:]
            env = create_env_headless()
            return Monitor(env,
                           "log/" + MODEL_NAME + "-" + env_id,
                           allow_early_resets=False)

        for _ in range(ENV_COUNT):
            vec_envs.append(make_env)
        vec_envs = SubprocVecEnv(vec_envs)

    model = PPO2('CnnPolicy',
                 vec_envs,
                 verbose=1,
                 ent_coef=0.0001,
                 n_steps=256)
    model.learn(total_timesteps=TIMESTEPS)
    model.save(MODEL_NAME)
    vec_envs.close()

    print("Learning Done!")
Example #2
0
def run_experiment(args):

    randomization_settings = {
        "engagement_distance": (100, 100),
        "turnframes": (args.turnframes, args.turnframes)
    }

    if args.randomize_engagement:
        randomization_settings["engagement_distance"] = (100, 200)

    vecEnv = None
    if args.num_envs == 1:
        # Create dummyvecenv
        env = gym.make(args.env)
        env = Monitor(
            TorilleWrapper(env, 100, args.experiment_name,
                           randomization_settings), args.experiment_name)
        vecEnv = DummyVecEnv([
            lambda: env
        ])  # The algorithms require a vectorized environment to run
    else:
        vecEnv = []

        def make_env():
            env = gym.make(args.env)
            unique_id = str(time.time())[-6:]
            experiment_env_name = args.experiment_name + ("_env%s" % unique_id)
            return Monitor(
                TorilleWrapper(env, 100, experiment_env_name,
                               randomization_settings), experiment_env_name)

        for i in range(args.num_envs):
            vecEnv.append(make_env)

        vecEnv = SubprocVecEnv(vecEnv)

    steps_per_env = args.steps_per_batch // args.num_envs

    # Standard 2 x 64 network with sigmoid activations
    policy_kwargs = dict(act_fun=tf.nn.sigmoid, net_arch=[64, 64])
    model = None
    if args.agent == "ppo":
        model = PPO2(MlpPolicy,
                     vecEnv,
                     policy_kwargs=policy_kwargs,
                     ent_coef=args.ent_coef,
                     n_steps=steps_per_env,
                     verbose=1)
    elif args.agent == "trpo":
        model = TRPO(MlpPolicy,
                     vecEnv,
                     policy_kwargs=policy_kwargs,
                     entcoeff=args.ent_coef,
                     timesteps_per_batch=steps_per_env,
                     verbose=1)

    model.learn(total_timesteps=args.timesteps)