def main(args): training.utility.set_up_logging() experiment = training.Experiment(args.logdir, process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs) for run in experiment: for unused_score in run: pass
def main(args): training.utility.set_up_logging() experiment = training.Experiment(args.logdir, process_fn=functools.partial(process, args=args), start_fn=functools.partial(start, args=args), resume_fn=functools.partial(resume, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs) for run in experiment: # the method __iter__(self) defined in experiment. for unused_score in run: # the method __iter__(self) defined in run. pass
def trainable(config): print('begin a trial') args.params = tools.AttrDict(yaml.safe_load(args.params.replace('#', ','))) args.logdir = args.logdir and os.path.expanduser(args.logdir) print('debug ', config["divergence_scale"], config["reward_loss_scale"]) with args.params.unlocked: args.params.divergence_scale = config["divergence_scale"] args.params.reward_loss_scale = config["reward_loss_scale"] # args.params.main_learning_rate = config["main_learning_rate"] args.params.test_steps = 50 # args.params.num_units = config['num_units'] args.params.test_traj = 5 training.utility.set_up_logging() experiment = training.Experiment( args.logdir, process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs) for run in experiment: for test_score in run: if test_score > 1.0: tune.report(mean_score=test_score) break
def main(args): import dowel from dowel import logger, tabular training.utility.set_up_logging() stages = {'500k': 'model.ckpt-2502500'} # stages = {'1000k': 'model.ckpt-5005000'} num_traj = 10 # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500'} # stages = {'1M': 'model.ckpt-5005000'} # stages = {'final': 'model.ckpt-2652650'} # stages = {'100k': 'model.ckpt-500500', '500k': 'model.ckpt-2502500', '1M': 'model.ckpt-5005000'} # stages = {'100k': 'model.ckpt-600500', '500k': 'model.ckpt-3002500', # 'final':'model.ckpt-3182650'} # methods = ['weighted_100'] # methods = ['aug7'] methods = ['baseline3'] # rival_method = 'baseline3' rival_method = 'aug7' rival_runs = 5 base_dir = 'benchmark' envs = ['finger_spin'] # envs = ['cartpole_swingup'] # envs = ['finger_spin', 'cartpole_swingup','cheetah_run', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run'] # envs = ['cartpole_swingup', 'cheetah_run', 'walker_walk', 'cup_catch'] # envs = ['finger_spin', 'cartpole_swingup', 'reacher_easy', 'cheetah_run', 'walker_walk', 'cup_catch'] if not check_finish(base_dir, stages, methods, envs, args.num_runs): exit() for pref, chkpt in stages.items(): print(pref, 'begin') logger.add_output(dowel.StdOutput()) logger.add_output(dowel.CsvOutput('benchmark_{}.csv'.format(pref))) for env in envs: tabular.record('Env', env) for method in methods: for id in range(rival_runs): means, stds, all_scores = [], [], [] with args.params.unlocked: args.params.chkpt = chkpt args.params.tasks = [env] args.params.planner_horizon = 12 args.params.eval_ratio = 1 / num_traj # args.params.r_loss = 'contra' # args.params.aug = 'rad' args.params.planner = 'dual2' args.params.rival = '{}/{}/00{}'.format( env, rival_method, id + 1) experiment = training.Experiment( os.path.join(base_dir, env, method), process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs, planner=args.params.planner, task_str=env) for i, run in enumerate(experiment): scores = [] for i, unused_score in enumerate(run): print('unused', unused_score) scores.append(unused_score) if i == num_traj - 1: break means.append(np.mean(scores)) stds.append(np.std(scores)) all_scores.append(scores) print(means) # if args.params.planner != 'cem': # exit() if args.params.planner == 'cem_eval': np.save( os.path.join( args.logdir, env, method, '00{}/scores_{}_cem.npy'.format(i, pref)), np.array(all_scores)) mean, std = np.mean(means), np.std(means) print('{} {}+/-{}'.format(method, int(mean), int(std))) if mean > 0: tabular.record(method, '{}+/-{}'.format(int(mean), int(std))) np.save( os.path.join(args.logdir, env, method, 'scores_{}.npy'.format(pref)), np.array(all_scores)) logger.log(tabular) logger.dump_all() logger.remove_all()