Example #1
0
def main(args):
    training.utility.set_up_logging()
    experiment = training.Experiment(args.logdir,
                                     process_fn=functools.partial(process,
                                                                  args=args),
                                     num_runs=args.num_runs,
                                     ping_every=args.ping_every,
                                     resume_runs=args.resume_runs)
    for run in experiment:
        for unused_score in run:
            pass
Example #2
0
def main(args):
    training.utility.set_up_logging()
    experiment = training.Experiment(args.logdir,
                                     process_fn=functools.partial(process,
                                                                  args=args),
                                     start_fn=functools.partial(start,
                                                                args=args),
                                     resume_fn=functools.partial(resume,
                                                                 args=args),
                                     num_runs=args.num_runs,
                                     ping_every=args.ping_every,
                                     resume_runs=args.resume_runs)
    for run in experiment:  # the method __iter__(self) defined in experiment.
        for unused_score in run:  # the method __iter__(self) defined in run.
            pass
Example #3
0
 def trainable(config):
     print('begin a trial')
     args.params = tools.AttrDict(yaml.safe_load(args.params.replace('#', ',')))
     args.logdir = args.logdir and os.path.expanduser(args.logdir)
     print('debug ', config["divergence_scale"], config["reward_loss_scale"])
     with args.params.unlocked:
         args.params.divergence_scale = config["divergence_scale"]
         args.params.reward_loss_scale = config["reward_loss_scale"]
         # args.params.main_learning_rate = config["main_learning_rate"]
         args.params.test_steps = 50
         # args.params.num_units = config['num_units']
         args.params.test_traj = 5
     training.utility.set_up_logging()
     experiment = training.Experiment(
         args.logdir,
         process_fn=functools.partial(process, args=args),
         num_runs=args.num_runs,
         ping_every=args.ping_every,
         resume_runs=args.resume_runs)
     for run in experiment:
         for test_score in run:
             if test_score > 1.0:
                 tune.report(mean_score=test_score)
         break
Example #4
0
def main(args):
    import dowel
    from dowel import logger, tabular
    training.utility.set_up_logging()
    stages = {'500k': 'model.ckpt-2502500'}
    # stages = {'1000k': 'model.ckpt-5005000'}
    num_traj = 10
    # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500'}
    # stages = {'1M': 'model.ckpt-5005000'}
    # stages = {'final': 'model.ckpt-2652650'}
    # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500', '1M': 'model.ckpt-5005000'}
    # stages = {'100k': 'model.ckpt-600500', '500k': 'model.ckpt-3002500',
    #           'final':'model.ckpt-3182650'}
    # methods = ['weighted_100']
    # methods = ['aug7']
    methods = ['baseline3']
    # rival_method = 'baseline3'
    rival_method = 'aug7'
    rival_runs = 5
    base_dir = 'benchmark'
    envs = ['finger_spin']
    # envs = ['cartpole_swingup']
    # envs = ['finger_spin', 'cartpole_swingup','cheetah_run', 'cup_catch']
    # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run']
    # envs = ['cartpole_swingup', 'cheetah_run', 'walker_walk', 'cup_catch']
    # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch']
    if not check_finish(base_dir, stages, methods, envs, args.num_runs):
        exit()

    for pref, chkpt in stages.items():
        print(pref, 'begin')
        logger.add_output(dowel.StdOutput())
        logger.add_output(dowel.CsvOutput('benchmark_{}.csv'.format(pref)))
        for env in envs:
            tabular.record('Env', env)
            for method in methods:
                for id in range(rival_runs):
                    means, stds, all_scores = [], [], []
                    with args.params.unlocked:
                        args.params.chkpt = chkpt
                        args.params.tasks = [env]
                        args.params.planner_horizon = 12
                        args.params.eval_ratio = 1 / num_traj
                        # args.params.r_loss = 'contra'
                        # args.params.aug = 'rad'
                        args.params.planner = 'dual2'
                        args.params.rival = '{}/{}/00{}'.format(
                            env, rival_method, id + 1)

                    experiment = training.Experiment(
                        os.path.join(base_dir, env, method),
                        process_fn=functools.partial(process, args=args),
                        num_runs=args.num_runs,
                        ping_every=args.ping_every,
                        resume_runs=args.resume_runs,
                        planner=args.params.planner,
                        task_str=env)
                    for i, run in enumerate(experiment):
                        scores = []
                        for i, unused_score in enumerate(run):
                            print('unused', unused_score)
                            scores.append(unused_score)
                            if i == num_traj - 1:
                                break
                        means.append(np.mean(scores))
                        stds.append(np.std(scores))
                        all_scores.append(scores)
                        print(means)
                        # if args.params.planner != 'cem':
                        #     exit()
                        if args.params.planner == 'cem_eval':
                            np.save(
                                os.path.join(
                                    args.logdir, env, method,
                                    '00{}/scores_{}_cem.npy'.format(i, pref)),
                                np.array(all_scores))
                    mean, std = np.mean(means), np.std(means)
                    print('{}    {}+/-{}'.format(method, int(mean), int(std)))
                    if mean > 0:
                        tabular.record(method,
                                       '{}+/-{}'.format(int(mean), int(std)))
                    np.save(
                        os.path.join(args.logdir, env, method,
                                     'scores_{}.npy'.format(pref)),
                        np.array(all_scores))
            logger.log(tabular)
            logger.dump_all()
        logger.remove_all()