Example #1
0
def task_deterministic_2(epoch=5):
    """Check that training in 2 steps is the same as training in one step"""
    device = fetch_device()

    state_folder = '/tmp/olympus/tests'
    file_name = f'{state_folder}/93c88038692bf4baf715ca3806d8a46347a646552f08ede113ef68efae6f1579.state'

    state_storage = StateStorage(folder=state_folder)

    # Run in one step
    metrics1 = run_no_interrupts(epoch * 2, params, device, state_storage)
    remove(file_name)

    # run 5 epochs
    _ = run_no_interrupts(epoch, params, device, state_storage)
    assert os.path.exists(file_name)

    # run 10 epochs but resume from the 5 previous epochs
    metrics2 = run_no_interrupts(epoch * 2, params, device, state_storage)
    remove(file_name)

    for k in keys:
        diff = abs(metrics1[k] - metrics2[k])
        print(f'{k:>30} => {diff}')
        assert diff < 1e-4
Example #2
0
def test_model_init(seed):
    params = {
        'optimizer': {
            'lr': 0.011113680070144951,
            'momentum': 0.04081791544572477,
            'weight_decay': 6.2091793568732874e-06
        },
        'model': {
            'initializer': {
                'gain': 1.0
            }
        }
    }

    device = fetch_device()

    model2 = classification_baseline(
        'logreg', 'glorot_uniform', 'sgd', 'none', 'test-mnist', 32, device, init_seed=seed, storage=NoStorage())

    model1 = classification_baseline(
        'logreg', 'glorot_uniform', 'sgd', 'none', 'test-mnist', 32, device, init_seed=seed, storage=NoStorage())

    model1.init(**params)
    model2.init(**params)

    for p1, p2 in zip(model1.parameters(), model2.parameters()):
        diff = (p1 - p2).abs().sum()
        assert diff == 0
Example #3
0
def main_resume(epoch, batch_freq=0):
    global interruption_counter
    interruption_counter = 0

    state_folder = '/tmp/olympus/tests'

    device = fetch_device()

    stdout = sys.stdout
    # sys.stdout = open(os.devnull, "w")

    metrics1 = run_no_interrupts(epoch, params, device)
    metrics2 = run_with_interrupts(epoch, batch_freq, state_folder, params,
                                   device)

    # sys.stdout.close()
    sys.stdout = stdout

    print(f'epoch = {epoch}')
    print(f'interrupted = {interruption_counter}')
    print(f'interrupted = {interruption_counter_batch}')
    print(f'{"key":>30} | {"NoInterrupt":>12} | {"Interrupted":>12}')

    for k, v in metrics1.items():
        print(f'{k:>30} | {v:12.4f} | {metrics2.get(k, float("NaN")):12.4f}')

    for k in keys:
        diff = abs(metrics1[k] - metrics2[k])
        print(f'{k} => {diff}')
        assert diff < 1e-4, f'diff for {k} should be lower but it is {diff}'
Example #4
0
def create_trained_trial(epochs=5):
    """Create a Task that was trained from scratch without interruption"""
    device = fetch_device()
    task = make_base_task(device, NoStorage())
    task.init(**params)
    task.fit(epochs=epochs)
    return task
Example #5
0
def main(**kwargs):
    show_dict(kwargs)

    args = Namespace(**kwargs)
    set_verbose_level(args.verbose)

    device = fetch_device()
    experiment_name = args.experiment_name.format(**kwargs)

    # save partial results here
    state_storage = StateStorage(
        folder=option('state.storage', '/tmp/olympus/classification'))

    def main_task():
        task = classification_baseline(device=device,
                                       storage=state_storage,
                                       **kwargs)

        if args.uri is not None:
            logger = metric_logger(args.uri, args.database, experiment_name)
            task.metrics.append(logger)

        return task

    space = main_task().get_space()

    # If space is not empty we search the best hyper parameters
    params = {}
    if space:
        show_dict(space)
        hpo = HPOptimizer('hyperband',
                          space=space,
                          fidelity=Fidelity(args.min_epochs,
                                            args.epochs).to_dict())

        hpo_task = HPO(hpo, main_task)
        hpo_task.metrics.append(ElapsedRealTime())

        trial = hpo_task.fit(objective='validation_accuracy')
        print(f'HPO is done, objective: {trial.objective}')
        params = trial.params
    else:
        print('No hyper parameter missing, running the experiment...')
    # ------

    # Run the experiment with the best hyper parameters
    # -------------------------------------------------
    if params is not None:
        # Train using train + valid for the final result
        final_task = classification_baseline(device=device,
                                             **kwargs,
                                             hpo_done=True)
        final_task.init(**params)
        final_task.fit(epochs=args.epochs)

        print('=' * 40)
        print('Final Trial Results')
        show_dict(flatten(params))
        final_task.report(pprint=True, print_fun=print)
        print('=' * 40)
Example #6
0
def task_deterministic(epoch=5):
    device = fetch_device()

    state_folder = '/tmp/olympus/tests'
    file_name = f'{state_folder}/93c88038692bf4baf715ca3806d8a46347a646552f08ede113ef68efae6f1579.state'

    metrics1 = run_no_interrupts(epoch, params, device)
    remove(file_name)

    metrics2 = run_no_interrupts(epoch, params, device)
    remove(file_name)

    print(metrics1)
    for k in keys:
        diff = abs(metrics1[k] - metrics2[k])
        print(f'{k:>30} => {diff}')
        assert diff < 1e-4
Example #7
0
def create_resumed_trained_trial(epochs=5):
    """Create a Task was trained stopped and resumed"""
    device = fetch_device()

    # Saves Task
    old_task = create_trained_trial(epochs)
    checkpointer = old_task.metrics.get('CheckPointer')
    uid = checkpointer.uid
    state_storage = StateStorage(folder='/tmp/olympus/tests')
    checkpointer.storage = state_storage
    checkpointer.save(old_task)

    # Done
    new_task = make_base_task(device, state_storage)
    # Automatic Resume
    new_task.init(uid=uid, **params)
    assert new_task.resumed()
    return new_task
Example #8
0
import torch.nn.functional as F
from olympus.datasets import Dataset, SplitDataset, DataLoader
from olympus.optimizers import Optimizer, LRSchedule

from olympus.models import Model
from olympus.observers import ObserverList, ProgressView, Speed
from olympus.utils import fetch_device, option

epochs = 2
device = fetch_device()
base = option('base_path', '/tmp/olympus')

# Model
model = Model('resnet18', input_size=(1, 28, 28), output_size=(10, ))

# Optimizer
optimizer = Optimizer('sgd',
                      params=model.parameters(),
                      weight_decay=0.001,
                      lr=1e-5,
                      momentum=1e-5)

# Schedule
lr_schedule = LRSchedule('exponential', optimizer=optimizer, gamma=0.99)

data = Dataset('fake_mnist', path=f'{base}/data')

splits = SplitDataset(data, split_method='original')

# Dataloader
loader = DataLoader(splits, sampler_seed=1, batch_size=32)
Example #9
0
def main(bootstrapping_seed=1,
         sampler_seed=1,
         init_seed=1,
         batch_size=16,
         learning_rate=0.001,
         momentum=0.9,
         weight_decay=1e-4,
         epoch=240,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    base_folder = options('state.storage', '/tmp')
    storage = StateStorage(folder=base_folder)

    split_method = {
        'split_method': 'bootstrap',
        'ratio': 0.25,  # This means 50% training, 25% valid, 25% test
        'seed': bootstrapping_seed,
        'balanced': False
    }

    task = segmentation_baseline('fcn_resnet18',
                                 'self_init',
                                 'SGD',
                                 dataset='voc-segmentation',
                                 batch_size=batch_size,
                                 device=fetch_device(),
                                 split_method=split_method,
                                 sampler_seed=sampler_seed,
                                 init_seed=init_seed,
                                 storage=storage,
                                 half=half,
                                 hpo_done=hpo_done,
                                 verbose=False,
                                 validate=True)

    hyperparameters = {
        'model': {
            'initializer': {
                'gain': 1.0
            }
        },
        'optimizer': {
            'lr': learning_rate,
            'momentum': momentum,
            'weight_decay': weight_decay
        }
    }
    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))
        storage.time_buffer = 0

    task.init(uid=uid, **hyperparameters)
    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    show_dict(task.metrics.value())

    return float(task.metrics.value()['validation_mean_jaccard_distance'])
Example #10
0
def main(task='rte',
         bootstrapping_seed=1,
         sampler_seed=1,
         init_seed=1,
         global_seed=1,
         learning_rate=0.00002,
         beta1=0.9,
         beta2=0.999,
         weight_decay=0.0,
         attention_probs_dropout_prob=0.1,
         hidden_dropout_prob=0.1,
         batch_size=32,
         weight_init='normal',
         warmup=0,
         ratio=0.1,
         init_std=0.2,
         epoch=3,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    print('seeds: init {} / global {} / sampler {} / bootstrapping {}'.format(
        init_seed, global_seed, sampler_seed, bootstrapping_seed))

    base_folder = options('state.storage', '/tmp/storage')
    storage = StateStorage(folder=base_folder)

    split_method = {
        'split_method': 'bootstrap',
        'ratio': ratio,
        'seed': bootstrapping_seed,
        'balanced': False
    }

    task = classification_baseline("bert-{}".format(task),
                                   'normal',
                                   'adam',
                                   schedule='warmup',
                                   dataset="glue-{}".format(task),
                                   split_method=split_method,
                                   sampler_seed=sampler_seed,
                                   init_seed=init_seed,
                                   batch_size=batch_size,
                                   device=fetch_device(),
                                   storage=storage,
                                   half=half,
                                   hpo_done=hpo_done,
                                   verbose=False,
                                   validate=True)

    hyperparameters = dict(model={
        'initializer': {
            'mean': 0.0,
            'std': init_std
        },
        'attention_probs_dropout_prob': attention_probs_dropout_prob,
        'hidden_dropout_prob': hidden_dropout_prob
    },
                           optimizer={
                               'lr': learning_rate,
                               'beta1': beta1,
                               'beta2': beta2,
                               'weight_decay': weight_decay
                           },
                           lr_schedule={
                               'warmup_steps': warmup,
                               'max_steps': epoch * len(task.dataloader),
                               'iterations': 'step'
                           })

    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))

    task.init(uid=uid, **hyperparameters)

    # NOTE: Seed global once all special inits are done.
    set_seeds(global_seed)

    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    return task.metrics.value().get('validation_error_rate', None)
Example #11
0
def main():
    from sspace.space import compute_identity

    args = arguments()
    tickers = [
        # 1     2      3     4      5       6     7     8      9    10
        'MO',
        'AEP',
        'BA',
        'BMY',
        'CPB',
        'CAT',
        'CVX',
        'KO',
        'CL',
        'COP',  # 1
        'ED',
        'CVS',
        'DHI',
        'DHR',
        'DRI',
        'DE',
        'D',
        'DTE',
        'ETN',
        'EBAY',  # 2
        'F',
        'BEN',
        'HSY',
        'HBAN',
        'IBM',
        'K',
        'GIS',
        'MSI',
        'NSC',
        'TXN'
    ]
    start, end = '2000-01-01', '2019-05-10'

    device = fetch_device()

    task = finance_baseline(tickers, start, end, args.optimizer,
                            args.batch_size, device, args.window)

    lr = 1e-8
    uid = compute_identity(
        dict(tickers=tickers,
             start=start,
             end=end,
             window=args.window,
             lr=lr,
             epochs=args.epochs), 16)

    if args.uri is not None:
        logger = metric_logger(args.uri, args.database,
                               f'{DEFAULT_EXP_NAME}_{uid}')
        task.metrics.append(logger)

    if args.storage is not None:
        storage = StateStorage(
            folder=option('state.storage', '/home/setepenre/zshare/tmp'))
        task.metrics.append(
            CheckPointer(storage=storage,
                         time_buffer=5,
                         keep_best='validation_loss',
                         save_init=True))

    optimizer = task.optimizer.defaults
    optimizer['lr'] = lr

    task.init(optimizer=optimizer, uid=uid)
    task.fit(args.epochs)

    stats = task.metrics.value()
    print(stats)
    return float(stats['validation_loss'])
Example #12
0
def main(bootstrapping_seed=1,
         sampler_seed=1,
         transform_seed=1,
         init_seed=1,
         learning_rate=0.1,
         momentum=0.9,
         weight_decay=5e-4,
         gamma=0.99,
         weight_init='glorot_uniform',
         epoch=120,
         half=False,
         hpo_done=False,
         uid=None,
         experiment_name=None,
         client=None,
         clean_on_exit=True,
         _interrupt=0):

    base_folder = options('state.storage', '/tmp')
    storage = StateStorage(folder=base_folder, time_buffer=5 * 60)
    print(base_folder)

    sampling_method = {
        'split_method': 'bootstrap',
        'ratio': 0.1666,
        'seed': bootstrapping_seed,
        'balanced': True
    }

    batch_size = 128

    task = classification_baseline('vgg11',
                                   'glorot_uniform',
                                   'sgd',
                                   schedule='exponential',
                                   dataset='cifar10',
                                   batch_size=batch_size,
                                   device=fetch_device(),
                                   data_augment=True,
                                   split_method=sampling_method,
                                   sampler_seed=sampler_seed,
                                   transform_seed=transform_seed,
                                   init_seed=init_seed,
                                   storage=storage,
                                   half=half,
                                   hpo_done=hpo_done,
                                   verbose=False,
                                   validate=True)

    hyperparameters = dict(model={'initializer': {
        'gain': 1.0
    }},
                           optimizer=dict(lr=learning_rate,
                                          momentum=momentum,
                                          weight_decay=weight_decay),
                           lr_schedule=dict(gamma=gamma))

    show_dict(hyperparameters)

    if client is not None:
        task.metrics.append(
            metric_logger(client=client, experiment=experiment_name))

    if _interrupt:
        from studies import InterruptingMetric
        # Will raise interrupt every `_interrupt` epochs
        task.metrics.append(InterruptingMetric(frequency_epoch=_interrupt))
        storage.time_buffer = 0

    task.init(uid=uid, **hyperparameters)

    task.fit(epochs=epoch)

    # Remove checkpoint
    if clean_on_exit:
        file_path = storage._file(uid)
        try:
            os.remove(file_path)
            print('Removed checkpoint at', file_path)
        except FileNotFoundError:
            print('No checkpoint at ', file_path)

    show_dict(task.metrics.value())

    return float(task.metrics.value()['validation_error_rate'])