Esempio n. 1
0
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="OpenAI-ES-" + args.env + "_seed_" +
                     str(args.seed))
    logger.log("Algorithm: OpenAI-ES-" + args.env + "_seed_" + str(args.seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Esempio n. 2
0
def main():
    """
    Train on CartPole.
    """

    args = gym_ctrl_arg_parser().parse_args()

    logger.configure(format_strs=['stdout', 'log', 'csv'], log_suffix = "UberGA-"+args.env+"_seed_"+str(args.seed))
    logger.log("Algorithm:UberGA-" + args.env + "_seed_" + str(args.seed))
    env_id = args.env
    seed = args.seed
    generation = 0
    with make_session() as sess:
        env = make_gym_control_env(env_id, seed)
        try:
            model = simple_mlp(sess, env)
            sess.run(tf.global_variables_initializer())
            learn_sess = LearningSession(sess, model)
            while True:
                if generation >= 10000 or learn_sess.timesteps_so_far >= 5e6:
                    break
                pop = learn_sess.generation(env, trials=1, population=POPULATION)
                generation+=1
        finally:
            env.close()
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=["stdout", 'log', 'csv'],
                     log_suffix="ACKTR-" + args.env)
    import random
    seed = 2
    logger.log("Algorithm: ACKTR-" + args.env + "_seed_" + str(seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=seed)
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="Dual_RAC-" + args.env)
    logger.log("Algorithm: Dual_RAC-" + args.env)
    import random
    args.seed += random.randint(0, 2**32 - 1)
    logger.log("Algorithm: SEED-" + str(args.seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="PES-S2-" + args.env + "_seed_" +
                     str(args.seed))
    import random
    seed = args.seed + random.randint(0, 2**32 - 1)
    logger.log("Algorithm: PES-S2-" + args.env + "_seed_" + str(seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=seed)
Esempio n. 6
0
def main():

    parser = gym_ctrl_arg_parser()
    parser.add_argument('--policy',
                        help='Policy architecture',
                        choices=['cnn', 'lstm', 'lnlstm', 'mlp'],
                        default='mlp')
    parser.add_argument('--lrschedule',
                        help='Learning rate schedule',
                        choices=['constant', 'linear'],
                        default='linear')
    args = parser.parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="A2C-" + args.env)
    train(args.env,
          num_timesteps=args.num_timesteps,
          seed=args.seed,
          policy=args.policy,
          lrschedule=args.lrschedule,
          num_env=1,
          env_name=args.env)
Esempio n. 7
0
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="PPO_NAC_Advantage-" + args.env)
    logger.log("Algorithm: PPO_NAC_Advantage-" + args.env)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
def main():
    args = gym_ctrl_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="PPO_CMAES-" + args.env)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)