Esempio n. 1
0
 def setUp(self):
     self.expName = 'test_experiment_unit_tests'
     self.exp = ro.Experiment(
         self.expName, {
             'param1': ro.Uniform(low=0.0, high=100.0, dtype='int'),
             'param2': ro.Normal(mean=100, std=10, dtype='int'),
         })
Esempio n. 2
0
def run_ro(init, num_epochs):
    param = init

    e = ro.Experiment('ro_example', {'alpha': ro.Uniform(low=0.0, high=0.01)})
    e.sample_all_params()

    for epoch in range(num_epochs):
        param = param - e.alpha * dloss(param)
    e.add_result(loss(param))
    return e
Esempio n. 3
0
 def wrapper(*args, **kwargs):
     params = {
         param: ro.Constant(default)
         for param, default in zip(arg_names, arg_defaults)
     }
     for key, value in kwargs.items():
         if key in params:
             params[key] = ro.Constant(value)
     experiment = ro.Experiment(name=name,
                                directory=directory,
                                params=params)
     result = fn(*args, **kwargs)
     if isinstance(result, collections.Iterable):
         if len(result) == 2:
             experiment.add_result(result[0], data=result[1])
         elif len(result) == 3:
             experiment.add_result(result[0],
                                   data=result[1],
                                   attachment=result[2])
     else:
         experiment.add_result(result)
Esempio n. 4
0
 def test_raise_with_param_named_result(self):
     with self.assertRaises(ValueError):
         ro.Experiment(
             'invalid experiment', {
                 'result': ro.Uniform(low=0.0, high=100.0, dtype='int'),
             })
Esempio n. 5
0
def learn(exp_name, dataset, model=None, optimizer=None, loss=None,
          rng_seed=1234, num_epochs=10, split=(0.7, 0.2, 0.1), bsz=64):

    if model is None:
        in_size = dataset[0][0].numel()
        if isinstance(dataset[0][1], (int, long, float, complex)):
            out_size = 1
        else:
            out_size = dataset[0][1].numel()
        model = get_model(in_size, out_size)
        model = Network(model)

    if loss is None:
        if isinstance(dataset[0][1], (int, long, float, complex)):
            reg = True
        else:
            reg = False
        loss = get_loss(regression=reg)

    if optimizer is None:
        optimizer = get_optimizer(model)

    opt_hyperparams = optimizer.param_groups[0]
    opt_hyperparams = {k: opt_hyperparams[k] for k in opt_hyperparams if not k == 'params'}

    exp = ro.Experiment(exp_name, {
        'model': str(model),
        'optimizer': str(optimizer),
        'opt_hyperparams': opt_hyperparams, 
        'loss': str(loss),
        'rng_seed': rng_seed,
        'num_epochs': num_epochs,
        'bsz': bsz,
        'split': split,
    })

    th.manual_seed(rng_seed)
    np.random.seed(rng_seed)
    if args.cuda:
        th.cuda.manual_seed(rng_seed)
        model.cuda()

    print('Splitting dataset in ' + str(split[0]) + ' train, ' + str(split[1]) + ' Validation, ' + str(split[2]) + ' Test')
    dataset = split_dataset(dataset, split[0], split[1], split[2])
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    train_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs)
    dataset.use_valid()
    valid_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs)
    dataset.use_test()
    test_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs)

    train_errors = []
    valid_errors = []

    # Start training
    for epoch in range(num_epochs):
        print('\n\n', '-' * 20, ' Epoch ', epoch, ' ', '_' * 20)
        dataset.use_train()
        train_errors.append(train(train_loader, model, loss, optimizer))
        print('Training error: ', train_errors[-1])
        dataset.use_valid()
        valid_errors.append(test(valid_loader, model, loss))
        print('Validation error: ', valid_errors[-1])

    # Benchmark on Test
    dataset.use_test()
    test_error = test(test_loader, model, loss)
    print('Final Test Error: ', test_error)

    # Save experiment result
    exp.add_result(test_error, {
        'train_errors': train_errors,
        'valid_errors': valid_errors,
    })

    # Plot Results
    if not os.path.exists('./results'):
        os.mkdir('./results')

    p = Plot('Convergence')
    x = np.arange(0, len(train_errors), 1)
    p.plot(x, np.array(train_errors), label='Train')
    p.plot(x, np.array(valid_errors), label='Validation')
    p.set_axis('Epoch', 'Loss')
    
    b = Plot('Final Error')
    b.bar(x=[train_errors[-1], valid_errors[-1], test_error],
          labels=['Train', 'Validation', 'Test'])
    
    cont = Container(1, 2, title=exp_name)
    cont.set_plot(0, 0, p)
    cont.set_plot(0, 1, b)
    cont.save('./results/' + exp_name + '.pdf')
import randopt as ro
import randopt.objectives as obj


def converge(curve, mu, sigma):
    return [c + gauss(mu, sigma)**2 for c in curve]


if __name__ == '__main__':
    curve = [10 / x for x in range(1, 36)]

    loss = obj.median_variance

    exp = ro.Experiment('objectives_example',
                        params={
                            'mu': ro.Gaussian(3, 1),
                            'sigma': ro.Gaussian(1, 1),
                        })

    for _ in range(10):
        exp.sample_all_params()
        convergence = converge(curve, exp.mu, exp.sigma)
        exp.add_result(loss(convergence, 0.5, 0.5),
                       data={
                           'convergence': convergence,
                           'normalized': obj.normalize(convergence),
                           'curve': curve,
                       })

    evo = ro.Evolutionary(exp, {
        'mu': ro.Gaussian(0.0, 0.1),
Esempio n. 7
0
def test_experiment4(x=2, y=4):
    exp = ro.Experiment('params_from_def',
                        params=ro.dict_to_constants(locals()))
    exp.add_result(x**2 + y**2, data={'additional': 'as usual.'})
Esempio n. 8
0
                    metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--replay_size',
                    type=int,
                    default=1000000,
                    metavar='N',
                    help='size of replay buffer (default: 1000000)')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

REWARDS = []
TEST_REWARDS = []
experiment = ro.Experiment(name='baseline-' + args.algo)

env.seed(args.seed)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
if args.algo == "NAF":
    agent = NAF(args.gamma, args.tau, args.hidden_size,
                env.observation_space.shape[0], env.action_space)
else:
    agent = DDPG(args.gamma, args.tau, args.hidden_size,
                 env.observation_space.shape[0], env.action_space)

memory = ReplayMemory(args.replay_size)

ounoise = OUNoise(env.action_space.shape[0]) if args.ou_noise else None
param_noise = AdaptiveParamNoiseSpec(
Esempio n. 9
0
    #    env = gym.make('DiscreteOrientation-v0', size_noise=0.0)
    env = StochasticDiscreteOrientation(size_noise=0.0)
    env = EnvWrapper(env)
    env.seed(1234)
    model, critic = Baseline(env.state_size,
                             env.action_size,
                             layer_sizes=(2, 2),
                             discrete=True)
    policy = DiscretePolicy(model)
    agent = Reinforce(
        policy=policy,
        critic=critic,
        update_frequency=args.update_frequency,
        critic_weight=1.0,
        entropy_weight=0.0001,
        #                      grad_clip=0.5,
        advantage=DiscountedAdvantage())
    #                      advantage=GeneralizedAdvantageEstimation(tau=0.95, gamma=0.99))
    opt = optim.Adam(agent.parameters(), lr=7e-4, eps=1e-5)

    exp = ro.Experiment('DiscreteOrientation-dev-seq', params={})
    train_rewards = train(args, env, agent, opt)
    test_rewards = test(args, env, agent)
    data = {p: getattr(args, p) for p in vars(args)}
    data['train_rewards'] = train_rewards
    data['test_rewards'] = test_rewards
    data['timestamp'] = time()
    exp.add_result(result=sum(test_rewards) / len(test_rewards), data=data)
    th.save(agent.state_dict(), './high_level.pth')
Esempio n. 10
0
import torch as th

from time import time

from drl.utils import get_setup
from drl.training import test, train


def seq_update(args, env, agent, opt):
    opt.zero_grad()
    update = agent.get_update()
    opt.step()


def train_update(args, env, agent, opt):
    opt.zero_grad()
    update = agent.get_update()
    opt.step()


if __name__ == '__main__':
    args, env, agent, opt = get_setup()
    exp = ro.Experiment(args.env + '-dev-seq', params={})
    train_rewards = train(args, env, agent, opt, train_update)
    test_rewards = test(args, env, agent)
    data = {p: getattr(args, p) for p in vars(args)}
    data['train_rewards'] = train_rewards
    data['test_rewards'] = test_rewards
    data['timestamp'] = time()
    exp.add_result(result=sum(test_rewards) / len(test_rewards), data=data)
Esempio n. 11
0
 def setUp(self):
     self._clean_up()
     self.experiment = ro.Experiment('ropt_test')
Esempio n. 12
0
def main():
    ARGUMENTS.update(vars(args))
    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    if args.vis:
        from visdom import Visdom
        viz = Visdom(port=args.port)
        win = None

    envs = make_vec_envs(args.env_name, args.seed, args.num_processes,
                        args.gamma, args.log_dir, args.add_timestep, device, False)

    actor_critic = Policy(envs.observation_space.shape, envs.action_space,
        base_kwargs={'recurrent': args.recurrent_policy})
    actor_critic.to(device)

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef,
                               args.entropy_coef, lr=args.lr,
                               eps=args.eps, alpha=args.alpha,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch,
                         args.value_loss_coef, args.entropy_coef, lr=args.lr,
                               eps=args.eps,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'acktr':
        agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef,
                               args.entropy_coef, acktr=True)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                        envs.observation_space.shape, envs.action_space,
                        actor_critic.recurrent_hidden_state_size)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)

    start = time.time()
    for j in range(num_updates):

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            if args.algo == "acktr":
                # use optimizer's learning rate since it's hard-coded in kfac.py
                update_linear_schedule(agent.optimizer, j, num_updates, agent.optimizer.lr)
            else:
                update_linear_schedule(agent.optimizer, j, num_updates, args.lr)

        if args.algo == 'ppo' and args.use_linear_lr_decay:
            agent.clip_param = args.clip_param  * (1 - j / float(num_updates))

        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                        rollouts.obs[step],
                        rollouts.recurrent_hidden_states[step],
                        rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.obs[-1],
                                                rollouts.recurrent_hidden_states[-1],
                                                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        # save for every interval-th episode or for the last epoch
        if (j % args.save_interval == 0 or j == num_updates - 1) and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            # A really ugly way to save a model to CPU
            save_model = actor_critic
            if args.cuda:
                save_model = copy.deepcopy(actor_critic).cpu()

            save_model = [save_model,
                          getattr(get_vec_normalize(envs), 'ob_rms', None)]

            torch.save(save_model, os.path.join(save_path, args.env_name + ".pt"))

        total_num_steps = (j + 1) * args.num_processes * args.num_steps

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            end = time.time()
            print("Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n".
                format(j, total_num_steps,
                       int(total_num_steps / (end - start)),
                       len(episode_rewards),
                       np.mean(episode_rewards),
                       np.median(episode_rewards),
                       np.min(episode_rewards),
                       np.max(episode_rewards), dist_entropy,
                       value_loss, action_loss))
            ALL_UPDATES.append(j)
            ALL_TIMESTEPS.append(total_num_steps)
            ALL_FPS.append(int(total_num_steps / (end - start)))
            ALL_MEAN_REWARDS.append(np.mean(episode_rewards))
            ALL_MEDIAN_REWARDS.append(np.median(episode_rewards))
            ALL_MIN_REWARDS.append(np.min(episode_rewards))
            ALL_MAX_REWARDS.append(np.max(episode_rewards))
            ALL_DIST_ENTROPY.append(dist_entropy)
            ALL_VALUE_LOSS.append(value_loss)
            ALL_ACTION_LOSS.append(action_loss)

        if (args.eval_interval is not None
                and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            eval_envs = make_vec_envs(
                args.env_name, args.seed + args.num_processes, args.num_processes,
                args.gamma, eval_log_dir, args.add_timestep, device, True)

            vec_norm = get_vec_normalize(eval_envs)
            if vec_norm is not None:
                vec_norm.eval()
                vec_norm.ob_rms = get_vec_normalize(envs).ob_rms

            eval_episode_rewards = []

            obs = eval_envs.reset()
            eval_recurrent_hidden_states = torch.zeros(args.num_processes,
                            actor_critic.recurrent_hidden_state_size, device=device)
            eval_masks = torch.zeros(args.num_processes, 1, device=device)

            while len(eval_episode_rewards) < 10:
                with torch.no_grad():
                    _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                        obs, eval_recurrent_hidden_states, eval_masks, deterministic=True)

                # Obser reward and next obs
                obs, reward, done, infos = eval_envs.step(action)

                eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                                for done_ in done])
                for info in infos:
                    if 'episode' in info.keys():
                        eval_episode_rewards.append(info['episode']['r'])

            eval_envs.close()

            print(" Evaluation using {} episodes: mean reward {:.5f}\n".
                format(len(eval_episode_rewards),
                       np.mean(eval_episode_rewards)))

        if args.vis and j % args.vis_interval == 0:
            try:
                # Sometimes monitor doesn't properly flush the outputs
                win = visdom_plot(viz, win, args.log_dir, args.env_name,
                                  args.algo, args.num_env_steps)
            except IOError:
                pass
    # Save the results
    name = ARGUMENTS['env_name'] + '-' + ARGUMENTS['algo'] + '-' + ARGUMENTS['experiment'] + '-grad_noise' + str(ARGUMENTS['gradient_noise'])
    experiment = ro.Experiment(name, directory='results')
    data = {
        'updates': ALL_UPDATES,
        'timesteps': ALL_TIMESTEPS,
        'fps': ALL_FPS,
        'mean_rewards': ALL_MEAN_REWARDS,
        'median_rewards': ALL_MEDIAN_REWARDS,
        'min_rewards': ALL_MIN_REWARDS,
        'max_rewards': ALL_MAX_REWARDS,
        'dist_entropy': ALL_DIST_ENTROPY,
        'value_loss': ALL_VALUE_LOSS,
        'action_loss': ALL_ACTION_LOSS,
    }
    data.update(ARGUMENTS)
    result = data['mean_rewards'][-1]
    experiment.add_result(result, data)
Esempio n. 13
0
#!/usr/bin/env python

import randopt as ro


def loss(x, y):
    return x**2 + y**2


if __name__ == '__main__':

    e = ro.Experiment('gs_example', {
        'alpha': ro.Choice([0.1, 0.2, 0.3]),
        'beta': ro.Choice([0.1, 0.2, 0.3]),
    })

    # Add a single result
    e.alpha = 0.1
    e.beta = 0.1
    #e.add_result(loss(0.1, 0.1))

    gs = ro.GridSearch(e)
    gs.sample('alpha')
    # Sampling parameters
    for i in range(9):
        gs.refresh_index()
        gs.sample_all_params()
        res = loss(gs.alpha, gs.beta)
        print('Result: ', res)
        gs.add_result(res)
Esempio n. 14
0
#!/usr/bin/env python

import randopt as ro


def loss(x):
    return x**2


if __name__ == '__main__':

    e = ro.Experiment('evo_example', {
        'alpha': ro.Gaussian(mean=0.0, std=1.5, dtype='float'),
    })

    # Populate with first result
    e.sample('alpha')
    res = loss(e.alpha)
    e.add_result(res)

    # Evolutionary search
    e = ro.Experiment(
        'evo_example',
        {
            # Evolutionary will use alpha.sample() as perturbation
            'alpha': ro.Gaussian(mean=0.0, std=0.5, dtype='float'),
        })
    evo = ro.Evolutionary(e)
    for i in range(100):
        evo.sample_parent()
        evo.sample_all_params()
Esempio n. 15
0
#!/usr/bin/env python3

import random
import randopt as ro

if __name__ == '__main__':
    random.seed(1234)
    # Create random JSON Summaries
    exp = ro.Experiment('summary_list')
    for i in range(15):
        exp.add_result(random.random(),
                       data={
                           'alpha': [random.random() for _ in range(100)],
                           'beta': [random.random() for _ in range(1000)],
                           'gamma': random.random(),
                       })

    # Fetch some results
    results = exp.top(10)

    # Play with the API
    print(len(results))
    assert len(results) == results.count()
    print('slice mean', results[0:3].mean())
    print('mean of top half:',
          results.filter(lambda r: r.result > results.mean()).mean())

    # Special functions on scalars
    print('min(gamma):', results.min('gamma'))
    print('max(gamma):', results.max('gamma'))
    print('mean(gamma):', results.mean('gamma'))
Esempio n. 16
0
#!/usr/bin/env python

import randopt as ro

def loss(w, x, y, z):
    return w**2 + x**2 + y**2 + z**2

if __name__ == '__main__':

    e = ro.Experiment('multi_params_example', {
            'dog': ro.Normal(mean=0.0, std=1.0, dtype='float'),
            'cat': ro.Uniform(low=-1.0, high=1.0, dtype='float'),
            'dolphin': ro.LognormVariate(mean=0.0, std=1.0, dtype='float'),
            'any_name': ro.Choice([0.01, 0.05, 0.1, 0.5, 0.7, 0.9], sampler=ro.Uniform()),
        })

    # Seeding will make all of your searches reproducible. (Usually not wanted)
    e.seed(1234)

    # Randomly sampling parameters
    for i in range(100):
        e.sample_all_params()
        res = loss(e.dog, e.cat, e.dolphin, e.any_name)
        print('Result: ', res)
        # Example of using the second parameter
        e.add_result(res, data={
            'sup.data': [e.dog, e.cat, e.dolphin, e.any_name]
        })

    # Save/load the state of the random number generators
    e.save_state('./multi_params_state.pk')
Esempio n. 17
0
import randopt as ro


def loss(x):
    return x**2


e = ro.Experiment('myexp', {
    'alpha': ro.Gaussian(mean=0.0, std=1.0, dtype='float'),
})

# Sampling parameters
for i in range(100):
    e.sample('alpha')
    res = loss(e.alpha)
    print('Result: ', res)
    e.add_result(res)

# Manually setting parameters
e.alpha = 0.00001
res = loss(e.alpha)
e.add_result(res)

# Search over all experiments results, including ones from previous runs
opt = e.minimum()
print('Best result: ', opt.result, ' with params: ', opt.params)
Esempio n. 18
0
            param, sampler = arg.split('=')
            command = command + ' ' + param + ' {' + str(len(samplers)) + ':.10f}'
            sampler = parse_sampler(sampler)
            param = param.replace('-', '')
            parameters.append(param)
            samplers.append(sampler)
        else:
            command = command + ' ' + arg

    # Generate the right number of commands
    if experiment is not None and experiment_name is not None:
        print('Using ', experiment.__name__)
        print('sys: ', sys.argv)
        params = {p: s for p, s in zip(parameters, samplers)}
        experiment = experiment(ro.Experiment(name=experiment_name,
                                              params=params,
                                              directory=experiment_dir))
        command_generator = ExperimentSampler(command, parameters, experiment)
    else:
        command_generator = CommandGenerator(command, parameters, samplers)

    if n_searches == -1:
        n_searches = float('inf')
        commands = command_generator
    else:
        commands = (next(command_generator) for _ in range(n_searches))

    # Run until search finishes
    for i, command in enumerate(commands):
        print(i, ':', command)
        subprocess.call(command, shell=True)
Esempio n. 19
0
#!/usr/bin/env python

import randopt as ro
from bonn import Bonn

def loss(x, y, z):
    return x**2 + y**2 + z**2

if __name__ == '__main__':
    e = ro.Experiment('bo_simple', {
        'x': ro.Choice([0.0, 1, 2, 3, 4, 5, 6, 7]),
        'y': ro.Gaussian(0.0, 3.0),
        'z': ro.Uniform(0.0, 1.0),
        })

    bo = Bonn(e)

    e.sample_all_params()
    res = loss(e.x, e.y, e.z)
    e.add_result(res)

    for i in xrange(200):
        bo.fit()
        bo.sample(e)
        res = loss(e.x, e.y, e.z)
        print res
        e.add_result(res, {
            'trial': i
            })
Esempio n. 20
0
#!/usr/bin/env python3

import argparse
import randopt as ro


def parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('--abcd', type=float)
    parser.add_argument('--qwer', type=float)
    parser.add_argument('--asdf', type=float)
    return parser.parse_args()


def loss(x, y, z):
    return x**2 + y**2 + z**2


if __name__ == '__main__':
    args = parse()
    exp = ro.Experiment('ropt_test',
                        params={
                            'abcd': ro.Constant(args.abcd),
                            'qwer': ro.Constant(args.qwer),
                            'asdf': ro.Constant(args.asdf),
                        })
    exp.add_result(loss(args.abcd, args.asdf, args.qwer))
Esempio n. 21
0
#!/usr/bin/env python

import randopt as ro
import time


def loss(x):
    #    time.sleep(1)
    return x**2


if __name__ == '__main__':

    e = ro.Experiment('simple_example', {
        'alpha': ro.Gaussian(mean=0.0, std=1.0, dtype='float'),
    })

    # Sampling parameters
    for i in range(100):
        e.sample('alpha')
        res = loss(e.alpha)
        print('Result: ', res)
        e.add_result(res)

    # Manually setting parameters
    e.alpha = 0.00001
    res = loss(e.alpha)
    e.add_result(res)

    # Search over all experiments results, including ones from previous runs
    opt = e.minimum()
Esempio n. 22
0
    is_root = (rank == 0)
    train_rewards = train(args, env, agent, opt, train_update, verbose=is_root)
    if is_root:
        for r in train_rewards:
            outputs.put(r)


if __name__ == '__main__':
    args, env, agent, opt = get_setup()
    num_processes = args.n_proc
    processes = []

    # Share parameters of the policy (and opt)
    agent.share_memory()

    exp = ro.Experiment(args.env + '-dev-async', params={})
    train_rewards = Queue()
    for rank in range(num_processes):
        sleep(1.0)
        p = mp.Process(target=async_update, args=(agent, opt, rank, train_rewards))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

    test_rewards = test(args, env, agent)
    data = {p: getattr(args, p) for p in vars(args)}
    data['train_rewards'] = [train_rewards.get() for _ in range(train_rewards.qsize())]
    data['test_rewards'] = test_rewards
    data['timestamp'] = time()
Esempio n. 23
0
    convergence = []
    for epoch in range(num_epochs):
        params = params - (lr * df(params))
        convergence.append(f(params))
    # Return final result + convergence array
    return f(params), convergence


if __name__ == '__main__':
    init = 10.0
    num_runs = 100

    exp = ro.Experiment(
        'grad_descent', {
            'learning_rate':
            ro.Gaussian(mean=0.01, std=0.01),
            'num_epochs':
            ro.Truncated(
                ro.Gaussian(mean=50, std=10, dtype='int'), low=10, high=100)
        })

    # Run the experiment a couple of time
    for _ in range(num_runs):
        exp.sample_all_params()
        result, convergence = grad_descent(loss, dloss, init, exp.num_epochs,
                                           exp.learning_rate)
        exp.add_result(result, data={'convergence': convergence})

        opt = exp.minimum()
    print('Optimal result: ', opt.result, ', with convergence: ',
          opt.params['convergence'])
Esempio n. 24
0
#!/usr/bin/env python3

import randopt as ro
from randopt_plugins.vislive import Vislive
from time import sleep, time

if __name__ == '__main__':
    exp = ro.Experiment('live_example',
                        params={
                            'x': ro.Gaussian(),
                            'y': ro.Gaussian()
                        })
    live = Vislive(exp, metrics=['square', 'norm', 'xminusy', 'time'])
    live.update({'square': 0.0, 'norm': 0.0, 'xminusy': 0.0, 'time': 0.0})

    start = time()
    for i in range(10):
        live.sample_all_params()
        live.update('square', live.x**2)
        live.update({
            'norm': abs(exp.y),
            'xminusy': exp.x - exp.y,
            'time': time() - start
        })
        print(live.table_metrics())
        live.plot_metrics()
        sleep(1)

    live.add_result(exp.x - exp.y)
    live.add_result(exp.x - exp.y, {'useless': [0, 0, 0, 0]})
    live.add_result(exp.x - exp.y, data={'useless': [0, 0, 0, 0]})
Esempio n. 25
0
#!/usr/bin/env python3

import randopt as ro
from random import random

def loss(x, y):
    return [(x**2 + y**2 + random()) / i for i in range(1, 51)]

if __name__ == '__main__':
    exp = ro.Experiment('quadratic', params={
        'x': ro.Gaussian(),
        'y': ro.Uniform(-0.5, 0.5)
    })

    for _ in range(20):
        exp.sample_all_params()
        conv = loss(exp.x, exp.y)
        exp.add_result(conv[-1], data={
            'convergence': conv
        })