def learn_on_ppo1(env_name, num_timesteps, seed, model_path, lognum, old_model_path=None): log_dir = str(Path.cwd().parent.parent.joinpath('logs', str(lognum))) # win: tensorboard --logdir="E:\gitstuff\bachelor_thesis_snake\logs\x" logger.configure( dir=log_dir, format_strs=['stdout', 'log', 'csv', 'json', 'tensorboard']) # mac: go to cwd, pipenv shell, tensorboard --logdir=/Users/julian.schmitz/gitstuff/bachelor_thesis_snake/logs/1 sess = U.make_session(num_cpu=1) sess.__enter__() set_global_seeds(seed) env = gym.make(env_name) env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json")) pi = None if old_model_path is not None: pi = cnn_policy.CnnPolicy('pi', env.observation_space, env.action_space) # tf.train.Saver().restore(sess, old_model_path) # TODO does that load the model and we can go on? tf.train.Saver().restore( tf.get_default_session(), old_model_path ) # TODO does that load the model and we can learn on? my_pposgd_simple.learn( env, policy_fn=policy_fn_cnn, max_timesteps=num_timesteps, # max_episodes=1000, timesteps_per_actorbatch=2048, # TODO or 4096 when obstacles clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', callback=callback, pi=pi) # env.close() saver = tf.train.Saver() saver.save(sess, model_path) print('saved to ' + model_path)
def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
def policy_fn_cnn(name, ob_space, ac_space): return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, kind='small') # TODO kind='large'
def policy_fn(name, ob_space, ac_space): return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
def policy_fn(name, ob_space, ac_space, sess=None, placeholders=None): # pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, sess=sess, placeholders=placeholders)
def policy_fn(name, ob_space, ac_space): """Given an obs, returns an act.""" return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)