def main(): """ Train on CartPole. """ args = pybullet_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="Uber-GA-" + args.env + "_seed_" + str(args.seed)) logger.log("Algorithm:Uber-GA-" + args.env + "_seed_" + str(args.seed)) env_id = args.env seed = args.seed generation = 0 with make_session() as sess: env = make_pybullet_env(env_id, seed) try: model = simple_mlp(sess, env) sess.run(tf.global_variables_initializer()) learn_sess = LearningSession(sess, model) while True: if generation >= 10000 or learn_sess.timesteps_so_far >= 5e6: break pop = learn_sess.generation(env, trials=1, population=POPULATION) generation += 1 # rewards = [x[0] for x in pop] # print('mean=%f best=%s' % (sum(rewards)/len(rewards), str(rewards[:10]))) finally: env.close()
def main(): args = pybullet_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="CMAES-" + args.env + "_seed_" + str(args.seed)) logger.log("Algorithm: CMAES-" + args.env + "_seed_" + str(args.seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = pybullet_arg_parser().parse_args() logger.configure( format_strs=['stdout', 'log', 'csv'], log_suffix = "NAC_Advantage_Fisher-"+args.env) logger.log("Algorithm: NAC_Advantage_Fisher-"+args.env) import random args.seed += random.randint(0, 2**32 - 1) logger.log("Algorithm: SEED-"+str(args.seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = pybullet_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="PES-S2-" + args.env + "_seed_" + str(args.seed)) import random seed = args.seed + random.randint(0, 2**32 - 1) logger.log("Algorithm: PES-S2-" + args.env + "_seed_" + str(seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = pybullet_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="PPO-" + args.env) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = pybullet_arg_parser().parse_args() logger.configure( format_strs = ['stdout', 'log', 'csv'], log_suffix = "PPO_Dual_NAC_Fisher-" + args.env) logger.log("Algorithm: PPO_Dual_NAC_Fisher-" + args.env) train(args.env, num_timesteps = args.num_timesteps, seed = args.seed)