def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="OpenAI-ES-" + args.env + "_seed_" + str(args.seed)) logger.log("Algorithm: OpenAI-ES-" + args.env + "_seed_" + str(args.seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): """ Train on CartPole. """ args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix = "UberGA-"+args.env+"_seed_"+str(args.seed)) logger.log("Algorithm:UberGA-" + args.env + "_seed_" + str(args.seed)) env_id = args.env seed = args.seed generation = 0 with make_session() as sess: env = make_gym_control_env(env_id, seed) try: model = simple_mlp(sess, env) sess.run(tf.global_variables_initializer()) learn_sess = LearningSession(sess, model) while True: if generation >= 10000 or learn_sess.timesteps_so_far >= 5e6: break pop = learn_sess.generation(env, trials=1, population=POPULATION) generation+=1 finally: env.close()
def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=["stdout", 'log', 'csv'], log_suffix="ACKTR-" + args.env) import random seed = 2 logger.log("Algorithm: ACKTR-" + args.env + "_seed_" + str(seed)) train(args.env, num_timesteps=args.num_timesteps, seed=seed)
def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="Dual_RAC-" + args.env) logger.log("Algorithm: Dual_RAC-" + args.env) import random args.seed += random.randint(0, 2**32 - 1) logger.log("Algorithm: SEED-" + str(args.seed)) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="PES-S2-" + args.env + "_seed_" + str(args.seed)) import random seed = args.seed + random.randint(0, 2**32 - 1) logger.log("Algorithm: PES-S2-" + args.env + "_seed_" + str(seed)) train(args.env, num_timesteps=args.num_timesteps, seed=seed)
def main(): parser = gym_ctrl_arg_parser() parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm', 'mlp'], default='mlp') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='linear') args = parser.parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="A2C-" + args.env) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed, policy=args.policy, lrschedule=args.lrschedule, num_env=1, env_name=args.env)
def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="PPO_NAC_Advantage-" + args.env) logger.log("Algorithm: PPO_NAC_Advantage-" + args.env) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main(): args = gym_ctrl_arg_parser().parse_args() logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix="PPO_CMAES-" + args.env) train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)