def train(): if not os.path.isdir("log/"): os.mkdir("log") if ENV_COUNT == 1: envs = create_env_headless() env_id = str(time.time())[-6:] envs = Monitor(envs, "log/" + MODEL_NAME + "-" + env_id, allow_early_resets=False) vec_envs = DummyVecEnv([lambda: envs]) else: vec_envs = [] def make_env(): env_id = str(time.time())[-6:] env = create_env_headless() return Monitor(env, "log/" + MODEL_NAME + "-" + env_id, allow_early_resets=False) for _ in range(ENV_COUNT): vec_envs.append(make_env) vec_envs = SubprocVecEnv(vec_envs) model = PPO2('CnnPolicy', vec_envs, verbose=1, ent_coef=0.0001, n_steps=256) model.learn(total_timesteps=TIMESTEPS) model.save(MODEL_NAME) vec_envs.close() print("Learning Done!")
def run_experiment(args): randomization_settings = { "engagement_distance": (100, 100), "turnframes": (args.turnframes, args.turnframes) } if args.randomize_engagement: randomization_settings["engagement_distance"] = (100, 200) vecEnv = None if args.num_envs == 1: # Create dummyvecenv env = gym.make(args.env) env = Monitor( TorilleWrapper(env, 100, args.experiment_name, randomization_settings), args.experiment_name) vecEnv = DummyVecEnv([ lambda: env ]) # The algorithms require a vectorized environment to run else: vecEnv = [] def make_env(): env = gym.make(args.env) unique_id = str(time.time())[-6:] experiment_env_name = args.experiment_name + ("_env%s" % unique_id) return Monitor( TorilleWrapper(env, 100, experiment_env_name, randomization_settings), experiment_env_name) for i in range(args.num_envs): vecEnv.append(make_env) vecEnv = SubprocVecEnv(vecEnv) steps_per_env = args.steps_per_batch // args.num_envs # Standard 2 x 64 network with sigmoid activations policy_kwargs = dict(act_fun=tf.nn.sigmoid, net_arch=[64, 64]) model = None if args.agent == "ppo": model = PPO2(MlpPolicy, vecEnv, policy_kwargs=policy_kwargs, ent_coef=args.ent_coef, n_steps=steps_per_env, verbose=1) elif args.agent == "trpo": model = TRPO(MlpPolicy, vecEnv, policy_kwargs=policy_kwargs, entcoeff=args.ent_coef, timesteps_per_batch=steps_per_env, verbose=1) model.learn(total_timesteps=args.timesteps)