with open(path.join(outdir, fname), 'w') as fh: fh.write(s) info = {} info['params'] = params info['argv'] = sys.argv info['env_id'] = env.spec.id # ------------------------------------------ def noisy_evaluation(theta): agent = BinaryActionLinearPolicy(theta) rew, T = do_rollout(agent, env, num_steps) return rew # Train the agent, and snapshot each stage for (i, iterdata) in enumerate( cem(noisy_evaluation, np.zeros(env.observation_space.shape[0] + 1), **params)): print('Iteration %2i. Episode mean reward: %7.3f' % (i, iterdata['y_mean'])) agent = BinaryActionLinearPolicy(iterdata['theta_mean']) if args.display: do_rollout(agent, env, 200, render=True) writefile('agent-%.4i.pkl' % i, str(pickle.dumps(agent, -1))) # Write out the env at the end so we store the parameters of this # environment. writefile('info.json', json.dumps(info)) env.close()
def noisy_evaluation(theta): agent = BinaryActionLinearPolicy(theta) rew, T = do_rollout(agent, env, num_steps) return rew
def writefile(fname, s): with open(path.join(outdir, fname), "w") as fh: fh.write(s) info = {} info["params"] = params info["argv"] = sys.argv info["env_id"] = env.spec.id # ------------------------------------------ def noisy_evaluation(theta): agent = BinaryActionLinearPolicy(theta) rew, T = do_rollout(agent, env, num_steps) return rew # Train the agent, and snapshot each stage for (i, iterdata) in enumerate( cem(noisy_evaluation, np.zeros(env.observation_space.shape[0] + 1), **params) ): print("Iteration %2i. Episode mean reward: %7.3f" % (i, iterdata["y_mean"])) agent = BinaryActionLinearPolicy(iterdata["theta_mean"]) if args.display: do_rollout(agent, env, 200, render=True) writefile("agent-%.4i.pkl" % i, str(pickle.dumps(agent, -1))) # Write out the env at the end so we store the parameters of this # environment. writefile("info.json", json.dumps(info)) env.close()