def setUp(self): self.expName = 'test_experiment_unit_tests' self.exp = ro.Experiment( self.expName, { 'param1': ro.Uniform(low=0.0, high=100.0, dtype='int'), 'param2': ro.Normal(mean=100, std=10, dtype='int'), })
def run_ro(init, num_epochs): param = init e = ro.Experiment('ro_example', {'alpha': ro.Uniform(low=0.0, high=0.01)}) e.sample_all_params() for epoch in range(num_epochs): param = param - e.alpha * dloss(param) e.add_result(loss(param)) return e
def wrapper(*args, **kwargs): params = { param: ro.Constant(default) for param, default in zip(arg_names, arg_defaults) } for key, value in kwargs.items(): if key in params: params[key] = ro.Constant(value) experiment = ro.Experiment(name=name, directory=directory, params=params) result = fn(*args, **kwargs) if isinstance(result, collections.Iterable): if len(result) == 2: experiment.add_result(result[0], data=result[1]) elif len(result) == 3: experiment.add_result(result[0], data=result[1], attachment=result[2]) else: experiment.add_result(result)
def test_raise_with_param_named_result(self): with self.assertRaises(ValueError): ro.Experiment( 'invalid experiment', { 'result': ro.Uniform(low=0.0, high=100.0, dtype='int'), })
def learn(exp_name, dataset, model=None, optimizer=None, loss=None, rng_seed=1234, num_epochs=10, split=(0.7, 0.2, 0.1), bsz=64): if model is None: in_size = dataset[0][0].numel() if isinstance(dataset[0][1], (int, long, float, complex)): out_size = 1 else: out_size = dataset[0][1].numel() model = get_model(in_size, out_size) model = Network(model) if loss is None: if isinstance(dataset[0][1], (int, long, float, complex)): reg = True else: reg = False loss = get_loss(regression=reg) if optimizer is None: optimizer = get_optimizer(model) opt_hyperparams = optimizer.param_groups[0] opt_hyperparams = {k: opt_hyperparams[k] for k in opt_hyperparams if not k == 'params'} exp = ro.Experiment(exp_name, { 'model': str(model), 'optimizer': str(optimizer), 'opt_hyperparams': opt_hyperparams, 'loss': str(loss), 'rng_seed': rng_seed, 'num_epochs': num_epochs, 'bsz': bsz, 'split': split, }) th.manual_seed(rng_seed) np.random.seed(rng_seed) if args.cuda: th.cuda.manual_seed(rng_seed) model.cuda() print('Splitting dataset in ' + str(split[0]) + ' train, ' + str(split[1]) + ' Validation, ' + str(split[2]) + ' Test') dataset = split_dataset(dataset, split[0], split[1], split[2]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) dataset.use_valid() valid_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) dataset.use_test() test_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) train_errors = [] valid_errors = [] # Start training for epoch in range(num_epochs): print('\n\n', '-' * 20, ' Epoch ', epoch, ' ', '_' * 20) dataset.use_train() train_errors.append(train(train_loader, model, loss, optimizer)) print('Training error: ', train_errors[-1]) dataset.use_valid() valid_errors.append(test(valid_loader, model, loss)) print('Validation error: ', valid_errors[-1]) # Benchmark on Test dataset.use_test() test_error = test(test_loader, model, loss) print('Final Test Error: ', test_error) # Save experiment result exp.add_result(test_error, { 'train_errors': train_errors, 'valid_errors': valid_errors, }) # Plot Results if not os.path.exists('./results'): os.mkdir('./results') p = Plot('Convergence') x = np.arange(0, len(train_errors), 1) p.plot(x, np.array(train_errors), label='Train') p.plot(x, np.array(valid_errors), label='Validation') p.set_axis('Epoch', 'Loss') b = Plot('Final Error') b.bar(x=[train_errors[-1], valid_errors[-1], test_error], labels=['Train', 'Validation', 'Test']) cont = Container(1, 2, title=exp_name) cont.set_plot(0, 0, p) cont.set_plot(0, 1, b) cont.save('./results/' + exp_name + '.pdf')
import randopt as ro import randopt.objectives as obj def converge(curve, mu, sigma): return [c + gauss(mu, sigma)**2 for c in curve] if __name__ == '__main__': curve = [10 / x for x in range(1, 36)] loss = obj.median_variance exp = ro.Experiment('objectives_example', params={ 'mu': ro.Gaussian(3, 1), 'sigma': ro.Gaussian(1, 1), }) for _ in range(10): exp.sample_all_params() convergence = converge(curve, exp.mu, exp.sigma) exp.add_result(loss(convergence, 0.5, 0.5), data={ 'convergence': convergence, 'normalized': obj.normalize(convergence), 'curve': curve, }) evo = ro.Evolutionary(exp, { 'mu': ro.Gaussian(0.0, 0.1),
def test_experiment4(x=2, y=4): exp = ro.Experiment('params_from_def', params=ro.dict_to_constants(locals())) exp.add_result(x**2 + y**2, data={'additional': 'as usual.'})
metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 1000000)') args = parser.parse_args() env = NormalizedActions(gym.make(args.env_name)) writer = SummaryWriter() REWARDS = [] TEST_REWARDS = [] experiment = ro.Experiment(name='baseline-' + args.algo) env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.algo == "NAF": agent = NAF(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) else: agent = DDPG(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) memory = ReplayMemory(args.replay_size) ounoise = OUNoise(env.action_space.shape[0]) if args.ou_noise else None param_noise = AdaptiveParamNoiseSpec(
# env = gym.make('DiscreteOrientation-v0', size_noise=0.0) env = StochasticDiscreteOrientation(size_noise=0.0) env = EnvWrapper(env) env.seed(1234) model, critic = Baseline(env.state_size, env.action_size, layer_sizes=(2, 2), discrete=True) policy = DiscretePolicy(model) agent = Reinforce( policy=policy, critic=critic, update_frequency=args.update_frequency, critic_weight=1.0, entropy_weight=0.0001, # grad_clip=0.5, advantage=DiscountedAdvantage()) # advantage=GeneralizedAdvantageEstimation(tau=0.95, gamma=0.99)) opt = optim.Adam(agent.parameters(), lr=7e-4, eps=1e-5) exp = ro.Experiment('DiscreteOrientation-dev-seq', params={}) train_rewards = train(args, env, agent, opt) test_rewards = test(args, env, agent) data = {p: getattr(args, p) for p in vars(args)} data['train_rewards'] = train_rewards data['test_rewards'] = test_rewards data['timestamp'] = time() exp.add_result(result=sum(test_rewards) / len(test_rewards), data=data) th.save(agent.state_dict(), './high_level.pth')
import torch as th from time import time from drl.utils import get_setup from drl.training import test, train def seq_update(args, env, agent, opt): opt.zero_grad() update = agent.get_update() opt.step() def train_update(args, env, agent, opt): opt.zero_grad() update = agent.get_update() opt.step() if __name__ == '__main__': args, env, agent, opt = get_setup() exp = ro.Experiment(args.env + '-dev-seq', params={}) train_rewards = train(args, env, agent, opt, train_update) test_rewards = test(args, env, agent) data = {p: getattr(args, p) for p in vars(args)} data['train_rewards'] = train_rewards data['test_rewards'] = test_rewards data['timestamp'] = time() exp.add_result(result=sum(test_rewards) / len(test_rewards), data=data)
def setUp(self): self._clean_up() self.experiment = ro.Experiment('ropt_test')
def main(): ARGUMENTS.update(vars(args)) torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") if args.vis: from visdom import Visdom viz = Visdom(port=args.port) win = None envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False) actor_critic = Policy(envs.observation_space.shape, envs.action_space, base_kwargs={'recurrent': args.recurrent_policy}) actor_critic.to(device) if args.algo == 'a2c': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.observation_space.shape, envs.action_space, actor_critic.recurrent_hidden_state_size) obs = envs.reset() rollouts.obs[0].copy_(obs) rollouts.to(device) episode_rewards = deque(maxlen=10) start = time.time() for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly if args.algo == "acktr": # use optimizer's learning rate since it's hard-coded in kfac.py update_linear_schedule(agent.optimizer, j, num_updates, agent.optimizer.lr) else: update_linear_schedule(agent.optimizer, j, num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_lr_decay: agent.clip_param = args.clip_param * (1 - j / float(num_updates)) for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) # Obser reward and next obs obs, reward, done, infos = envs.step(action) for info in infos: if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() # save for every interval-th episode or for the last epoch if (j % args.save_interval == 0 or j == num_updates - 1) and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass # A really ugly way to save a model to CPU save_model = actor_critic if args.cuda: save_model = copy.deepcopy(actor_critic).cpu() save_model = [save_model, getattr(get_vec_normalize(envs), 'ob_rms', None)] torch.save(save_model, os.path.join(save_path, args.env_name + ".pt")) total_num_steps = (j + 1) * args.num_processes * args.num_steps if j % args.log_interval == 0 and len(episode_rewards) > 1: end = time.time() print("Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n". format(j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy, value_loss, action_loss)) ALL_UPDATES.append(j) ALL_TIMESTEPS.append(total_num_steps) ALL_FPS.append(int(total_num_steps / (end - start))) ALL_MEAN_REWARDS.append(np.mean(episode_rewards)) ALL_MEDIAN_REWARDS.append(np.median(episode_rewards)) ALL_MIN_REWARDS.append(np.min(episode_rewards)) ALL_MAX_REWARDS.append(np.max(episode_rewards)) ALL_DIST_ENTROPY.append(dist_entropy) ALL_VALUE_LOSS.append(value_loss) ALL_ACTION_LOSS.append(action_loss) if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): eval_envs = make_vec_envs( args.env_name, args.seed + args.num_processes, args.num_processes, args.gamma, eval_log_dir, args.add_timestep, device, True) vec_norm = get_vec_normalize(eval_envs) if vec_norm is not None: vec_norm.eval() vec_norm.ob_rms = get_vec_normalize(envs).ob_rms eval_episode_rewards = [] obs = eval_envs.reset() eval_recurrent_hidden_states = torch.zeros(args.num_processes, actor_critic.recurrent_hidden_state_size, device=device) eval_masks = torch.zeros(args.num_processes, 1, device=device) while len(eval_episode_rewards) < 10: with torch.no_grad(): _, action, _, eval_recurrent_hidden_states = actor_critic.act( obs, eval_recurrent_hidden_states, eval_masks, deterministic=True) # Obser reward and next obs obs, reward, done, infos = eval_envs.step(action) eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) for info in infos: if 'episode' in info.keys(): eval_episode_rewards.append(info['episode']['r']) eval_envs.close() print(" Evaluation using {} episodes: mean reward {:.5f}\n". format(len(eval_episode_rewards), np.mean(eval_episode_rewards))) if args.vis and j % args.vis_interval == 0: try: # Sometimes monitor doesn't properly flush the outputs win = visdom_plot(viz, win, args.log_dir, args.env_name, args.algo, args.num_env_steps) except IOError: pass # Save the results name = ARGUMENTS['env_name'] + '-' + ARGUMENTS['algo'] + '-' + ARGUMENTS['experiment'] + '-grad_noise' + str(ARGUMENTS['gradient_noise']) experiment = ro.Experiment(name, directory='results') data = { 'updates': ALL_UPDATES, 'timesteps': ALL_TIMESTEPS, 'fps': ALL_FPS, 'mean_rewards': ALL_MEAN_REWARDS, 'median_rewards': ALL_MEDIAN_REWARDS, 'min_rewards': ALL_MIN_REWARDS, 'max_rewards': ALL_MAX_REWARDS, 'dist_entropy': ALL_DIST_ENTROPY, 'value_loss': ALL_VALUE_LOSS, 'action_loss': ALL_ACTION_LOSS, } data.update(ARGUMENTS) result = data['mean_rewards'][-1] experiment.add_result(result, data)
#!/usr/bin/env python import randopt as ro def loss(x, y): return x**2 + y**2 if __name__ == '__main__': e = ro.Experiment('gs_example', { 'alpha': ro.Choice([0.1, 0.2, 0.3]), 'beta': ro.Choice([0.1, 0.2, 0.3]), }) # Add a single result e.alpha = 0.1 e.beta = 0.1 #e.add_result(loss(0.1, 0.1)) gs = ro.GridSearch(e) gs.sample('alpha') # Sampling parameters for i in range(9): gs.refresh_index() gs.sample_all_params() res = loss(gs.alpha, gs.beta) print('Result: ', res) gs.add_result(res)
#!/usr/bin/env python import randopt as ro def loss(x): return x**2 if __name__ == '__main__': e = ro.Experiment('evo_example', { 'alpha': ro.Gaussian(mean=0.0, std=1.5, dtype='float'), }) # Populate with first result e.sample('alpha') res = loss(e.alpha) e.add_result(res) # Evolutionary search e = ro.Experiment( 'evo_example', { # Evolutionary will use alpha.sample() as perturbation 'alpha': ro.Gaussian(mean=0.0, std=0.5, dtype='float'), }) evo = ro.Evolutionary(e) for i in range(100): evo.sample_parent() evo.sample_all_params()
#!/usr/bin/env python3 import random import randopt as ro if __name__ == '__main__': random.seed(1234) # Create random JSON Summaries exp = ro.Experiment('summary_list') for i in range(15): exp.add_result(random.random(), data={ 'alpha': [random.random() for _ in range(100)], 'beta': [random.random() for _ in range(1000)], 'gamma': random.random(), }) # Fetch some results results = exp.top(10) # Play with the API print(len(results)) assert len(results) == results.count() print('slice mean', results[0:3].mean()) print('mean of top half:', results.filter(lambda r: r.result > results.mean()).mean()) # Special functions on scalars print('min(gamma):', results.min('gamma')) print('max(gamma):', results.max('gamma')) print('mean(gamma):', results.mean('gamma'))
#!/usr/bin/env python import randopt as ro def loss(w, x, y, z): return w**2 + x**2 + y**2 + z**2 if __name__ == '__main__': e = ro.Experiment('multi_params_example', { 'dog': ro.Normal(mean=0.0, std=1.0, dtype='float'), 'cat': ro.Uniform(low=-1.0, high=1.0, dtype='float'), 'dolphin': ro.LognormVariate(mean=0.0, std=1.0, dtype='float'), 'any_name': ro.Choice([0.01, 0.05, 0.1, 0.5, 0.7, 0.9], sampler=ro.Uniform()), }) # Seeding will make all of your searches reproducible. (Usually not wanted) e.seed(1234) # Randomly sampling parameters for i in range(100): e.sample_all_params() res = loss(e.dog, e.cat, e.dolphin, e.any_name) print('Result: ', res) # Example of using the second parameter e.add_result(res, data={ 'sup.data': [e.dog, e.cat, e.dolphin, e.any_name] }) # Save/load the state of the random number generators e.save_state('./multi_params_state.pk')
import randopt as ro def loss(x): return x**2 e = ro.Experiment('myexp', { 'alpha': ro.Gaussian(mean=0.0, std=1.0, dtype='float'), }) # Sampling parameters for i in range(100): e.sample('alpha') res = loss(e.alpha) print('Result: ', res) e.add_result(res) # Manually setting parameters e.alpha = 0.00001 res = loss(e.alpha) e.add_result(res) # Search over all experiments results, including ones from previous runs opt = e.minimum() print('Best result: ', opt.result, ' with params: ', opt.params)
param, sampler = arg.split('=') command = command + ' ' + param + ' {' + str(len(samplers)) + ':.10f}' sampler = parse_sampler(sampler) param = param.replace('-', '') parameters.append(param) samplers.append(sampler) else: command = command + ' ' + arg # Generate the right number of commands if experiment is not None and experiment_name is not None: print('Using ', experiment.__name__) print('sys: ', sys.argv) params = {p: s for p, s in zip(parameters, samplers)} experiment = experiment(ro.Experiment(name=experiment_name, params=params, directory=experiment_dir)) command_generator = ExperimentSampler(command, parameters, experiment) else: command_generator = CommandGenerator(command, parameters, samplers) if n_searches == -1: n_searches = float('inf') commands = command_generator else: commands = (next(command_generator) for _ in range(n_searches)) # Run until search finishes for i, command in enumerate(commands): print(i, ':', command) subprocess.call(command, shell=True)
#!/usr/bin/env python import randopt as ro from bonn import Bonn def loss(x, y, z): return x**2 + y**2 + z**2 if __name__ == '__main__': e = ro.Experiment('bo_simple', { 'x': ro.Choice([0.0, 1, 2, 3, 4, 5, 6, 7]), 'y': ro.Gaussian(0.0, 3.0), 'z': ro.Uniform(0.0, 1.0), }) bo = Bonn(e) e.sample_all_params() res = loss(e.x, e.y, e.z) e.add_result(res) for i in xrange(200): bo.fit() bo.sample(e) res = loss(e.x, e.y, e.z) print res e.add_result(res, { 'trial': i })
#!/usr/bin/env python3 import argparse import randopt as ro def parse(): parser = argparse.ArgumentParser() parser.add_argument('--abcd', type=float) parser.add_argument('--qwer', type=float) parser.add_argument('--asdf', type=float) return parser.parse_args() def loss(x, y, z): return x**2 + y**2 + z**2 if __name__ == '__main__': args = parse() exp = ro.Experiment('ropt_test', params={ 'abcd': ro.Constant(args.abcd), 'qwer': ro.Constant(args.qwer), 'asdf': ro.Constant(args.asdf), }) exp.add_result(loss(args.abcd, args.asdf, args.qwer))
#!/usr/bin/env python import randopt as ro import time def loss(x): # time.sleep(1) return x**2 if __name__ == '__main__': e = ro.Experiment('simple_example', { 'alpha': ro.Gaussian(mean=0.0, std=1.0, dtype='float'), }) # Sampling parameters for i in range(100): e.sample('alpha') res = loss(e.alpha) print('Result: ', res) e.add_result(res) # Manually setting parameters e.alpha = 0.00001 res = loss(e.alpha) e.add_result(res) # Search over all experiments results, including ones from previous runs opt = e.minimum()
is_root = (rank == 0) train_rewards = train(args, env, agent, opt, train_update, verbose=is_root) if is_root: for r in train_rewards: outputs.put(r) if __name__ == '__main__': args, env, agent, opt = get_setup() num_processes = args.n_proc processes = [] # Share parameters of the policy (and opt) agent.share_memory() exp = ro.Experiment(args.env + '-dev-async', params={}) train_rewards = Queue() for rank in range(num_processes): sleep(1.0) p = mp.Process(target=async_update, args=(agent, opt, rank, train_rewards)) p.start() processes.append(p) for p in processes: p.join() test_rewards = test(args, env, agent) data = {p: getattr(args, p) for p in vars(args)} data['train_rewards'] = [train_rewards.get() for _ in range(train_rewards.qsize())] data['test_rewards'] = test_rewards data['timestamp'] = time()
convergence = [] for epoch in range(num_epochs): params = params - (lr * df(params)) convergence.append(f(params)) # Return final result + convergence array return f(params), convergence if __name__ == '__main__': init = 10.0 num_runs = 100 exp = ro.Experiment( 'grad_descent', { 'learning_rate': ro.Gaussian(mean=0.01, std=0.01), 'num_epochs': ro.Truncated( ro.Gaussian(mean=50, std=10, dtype='int'), low=10, high=100) }) # Run the experiment a couple of time for _ in range(num_runs): exp.sample_all_params() result, convergence = grad_descent(loss, dloss, init, exp.num_epochs, exp.learning_rate) exp.add_result(result, data={'convergence': convergence}) opt = exp.minimum() print('Optimal result: ', opt.result, ', with convergence: ', opt.params['convergence'])
#!/usr/bin/env python3 import randopt as ro from randopt_plugins.vislive import Vislive from time import sleep, time if __name__ == '__main__': exp = ro.Experiment('live_example', params={ 'x': ro.Gaussian(), 'y': ro.Gaussian() }) live = Vislive(exp, metrics=['square', 'norm', 'xminusy', 'time']) live.update({'square': 0.0, 'norm': 0.0, 'xminusy': 0.0, 'time': 0.0}) start = time() for i in range(10): live.sample_all_params() live.update('square', live.x**2) live.update({ 'norm': abs(exp.y), 'xminusy': exp.x - exp.y, 'time': time() - start }) print(live.table_metrics()) live.plot_metrics() sleep(1) live.add_result(exp.x - exp.y) live.add_result(exp.x - exp.y, {'useless': [0, 0, 0, 0]}) live.add_result(exp.x - exp.y, data={'useless': [0, 0, 0, 0]})
#!/usr/bin/env python3 import randopt as ro from random import random def loss(x, y): return [(x**2 + y**2 + random()) / i for i in range(1, 51)] if __name__ == '__main__': exp = ro.Experiment('quadratic', params={ 'x': ro.Gaussian(), 'y': ro.Uniform(-0.5, 0.5) }) for _ in range(20): exp.sample_all_params() conv = loss(exp.x, exp.y) exp.add_result(conv[-1], data={ 'convergence': conv })