Esempio n. 1
0
def main():
    """
    Train on CartPole.
    """

    args = pybullet_arg_parser().parse_args()

    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="Uber-GA-" + args.env + "_seed_" +
                     str(args.seed))
    logger.log("Algorithm:Uber-GA-" + args.env + "_seed_" + str(args.seed))
    env_id = args.env
    seed = args.seed
    generation = 0
    with make_session() as sess:
        env = make_pybullet_env(env_id, seed)
        try:
            model = simple_mlp(sess, env)
            sess.run(tf.global_variables_initializer())
            learn_sess = LearningSession(sess, model)
            while True:
                if generation >= 10000 or learn_sess.timesteps_so_far >= 5e6:
                    break
                pop = learn_sess.generation(env,
                                            trials=1,
                                            population=POPULATION)
                generation += 1
                # rewards = [x[0] for x in pop]
                # print('mean=%f best=%s' % (sum(rewards)/len(rewards), str(rewards[:10])))
        finally:
            env.close()
Esempio n. 2
0
def main():
    args = pybullet_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="CMAES-" + args.env + "_seed_" +
                     str(args.seed))
    logger.log("Algorithm: CMAES-" + args.env + "_seed_" + str(args.seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Esempio n. 3
0
def main():
    args = pybullet_arg_parser().parse_args()
    logger.configure(
                     format_strs=['stdout', 'log', 'csv'], log_suffix = "NAC_Advantage_Fisher-"+args.env)
    logger.log("Algorithm: NAC_Advantage_Fisher-"+args.env)
    import random
    args.seed += random.randint(0, 2**32 - 1)
    logger.log("Algorithm: SEED-"+str(args.seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Esempio n. 4
0
def main():
    args = pybullet_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="PES-S2-" + args.env + "_seed_" +
                     str(args.seed))
    import random
    seed = args.seed + random.randint(0, 2**32 - 1)
    logger.log("Algorithm: PES-S2-" + args.env + "_seed_" + str(seed))
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Esempio n. 5
0
def main():
    args = pybullet_arg_parser().parse_args()
    logger.configure(format_strs=['stdout', 'log', 'csv'],
                     log_suffix="PPO-" + args.env)
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)
Esempio n. 6
0
def main():
    args = pybullet_arg_parser().parse_args()
    logger.configure(
        format_strs = ['stdout', 'log', 'csv'], log_suffix = "PPO_Dual_NAC_Fisher-" + args.env)
    logger.log("Algorithm: PPO_Dual_NAC_Fisher-" + args.env)
    train(args.env, num_timesteps = args.num_timesteps, seed = args.seed)