Beispiel #1
0
def fft_experiment_block(trainable, size, ntrials, nsteps, nepochsvalid,
                         result_dir, nthreads, smoke_test):
    config = {
        'target_matrix':
        named_target_matrix('dft', size),
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(1e-4), math.log(5e-1)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'n_steps_per_epoch':
        nsteps,
        'n_epochs_per_validation':
        nepochsvalid,
        'complex':
        True,
    }
    experiment = RayExperiment(
        name=f'Fft_factorization_{trainable.__name__}_{size}',
        run=trainable,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': nthreads,
            'gpu': 0
        },
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #2
0
def vandermonde_experiment_complex(fixed_order, softmax_fn, size, ntrials, nsteps, result_dir, nthreads, smoke_test):
    assert softmax_fn in ['softmax', 'sparsemax']
    config={
        'fixed_order': fixed_order,
        'softmax_fn': softmax_fn,
        'size': size,
        'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
        'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
        'perm': sample_from(lambda spec: random.choice(['id', 'br', 'dct'])),
        'n_steps_per_epoch': nsteps,
     }
    if (not fixed_order) and softmax_fn == 'softmax':
        config['semantic_loss_weight'] = sample_from(lambda spec: math.exp(random.uniform(math.log(5e-3), math.log(5e-1))))
    experiment = RayExperiment(
        name=f'VandermondeEval_factorization_complex_{fixed_order}_{softmax_fn}_{size}',
        run=TrainableVandermondeComplex,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={'cpu': nthreads, 'gpu': 0},
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #3
0
def ops_experiment(size, ntrials, nsteps, result_dir, nthreads, smoke_test):
    config = {
        'size':
        size,
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(1e-4), math.log(5e-1)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'n_steps_per_epoch':
        nsteps,
    }
    experiment = RayExperiment(
        name=f'Ops_factorization_{size}',
        run=TrainableOps,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': nthreads,
            'gpu': 0
        },
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #4
0
def fft_factorization_sparsemax_perm_front(argv):
    parser = argparse.ArgumentParser(description='Learn to factor Fft matrix')
    parser.add_argument('--size',
                        type=int,
                        default=8,
                        help='Size of matrix to factor, must be power of 2')
    parser.add_argument('--ntrials',
                        type=int,
                        default=20,
                        help='Number of trials for hyperparameter tuning')
    parser.add_argument('--nsteps',
                        type=int,
                        default=200,
                        help='Number of steps per epoch')
    parser.add_argument('--nmaxepochs',
                        type=int,
                        default=200,
                        help='Maximum number of epochs')
    parser.add_argument('--result-dir',
                        type=str,
                        default='./results',
                        help='Directory to store results')
    parser.add_argument('--nthreads',
                        type=int,
                        default=1,
                        help='Number of CPU threads per job')
    parser.add_argument('--smoke-test',
                        action='store_true',
                        help='Finish quickly for testing')
    args = parser.parse_args(argv)
    experiment = RayExperiment(
        name=f'Fft_factorization_sparsemax_perm_front_{args.size}',
        run=TrainableFftFactorSparsemaxPermFront,
        local_dir=args.result_dir,
        num_samples=args.ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': args.nthreads,
            'gpu': 0
        },
        stop={
            'training_iteration': 1 if args.smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config={
            'size':
            args.size,
            'lr':
            sample_from(lambda spec: math.exp(
                random.uniform(math.log(1e-4), math.log(5e-1)))),
            'seed':
            sample_from(lambda spec: random.randint(0, 1 << 16)),
            'n_steps_per_epoch':
            args.nsteps,
        },
    )
    return experiment, args
Beispiel #5
0
def distillation_experiment(model_args, objective, optimizer, ntrials,
                            result_dir, cuda, smoke_test, teacher_model,
                            teacher_model_path, input_cov_path, dataset,
                            min_lr, max_lr, momentum, nsteps, nepochsvalid):
    # config={'objective': objective, 'optimizer': optimizer, 'lr': 0.001, 'seed': 42, 'device': 'cuda', 'model_args': dict(model_args), 'teacher_model': teacher_model, 'teacher_model_path': teacher_model_path, 'input_cov_path': input_cov_path, 'dataset': dataset, 'momentum': momentum, 'n_steps_per_epoch': nsteps, 'n_epochs_per_validation': nepochsvalid,}
    assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported'
    config = {
        'objective':
        objective,
        'optimizer':
        optimizer,
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(min_lr), math.log(max_lr)) if optimizer ==
            'Adam' else random.uniform(math.log(min_lr), math.log(max_lr)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'device':
        'cuda' if cuda else 'cpu',
        'model_args':
        dict(model_args
             ),  # Need to copy @encoder as sacred created a read-only dict
        'teacher_model':
        teacher_model,
        'teacher_model_path':
        teacher_model_path,
        'input_cov_path':
        input_cov_path,
        'dataset':
        dataset,
        'momentum':
        momentum,
        'n_steps_per_epoch':
        nsteps,
        'n_epochs_per_validation':
        nepochsvalid,
    }
    model_args_print = '_'.join(
        [f'{key}_{value}' for key, value in model_args.items()])
    experiment = RayExperiment(
        name=f'{teacher_model}_{objective}_{model_args_print}_{optimizer}',
        run=TrainableDistillCovModel,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        checkpoint_freq=1000,  # Just to enable recovery with @max_failures
        max_failures=-1,
        resources_per_trial={
            'cpu': 2,
            'gpu': 0.5 if cuda else 0
        },
        stop={"training_iteration": 1 if smoke_test else 9999},
        config=config,
    )
    return experiment
Beispiel #6
0
def transform_experiment(model, target, size, complex, param, lr_min, lr_max,
                         ntrials, nsteps, nepochsvalid, result_dir, cuda,
                         nthreads, smoke_test, b):
    # assert model in ['B', 'BP', 'PBT', 'BPP', 'BPBP', 'BBT', 'BBB'], f'Model {model} not implemented'
    config = {
        'model':
        model,
        'target_matrix':
        target,
        'size':
        size,
        'complex':
        complex,
        # 'share_logit': sample_from(lambda spec: np.random.choice((True, False), size=2)),
        'share_logit':
        True,
        'bfargs':
        b,
        'param':
        param,
        # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(lr_min), math.log(lr_max)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'n_steps_per_epoch':
        nsteps,
        'n_epochs_per_validation':
        nepochsvalid,
        'device':
        'cuda' if cuda else 'cpu',
    }
    b_args = '_'.join([k + ':' + str(v) for k, v in b.items()])
    commit_id = subprocess.check_output(
        ['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
    experiment = RayExperiment(
        # name=f'{commit_id}_{target}_factorization_{model}_{complex}_{size}_{param}',
        name=f'{size}_{target}_{model}_{b_args}_c{complex}_{commit_id}',
        run=TrainableBP,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': nthreads,
            'gpu': 0.25 if cuda else 0
        },
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #7
0
def distillation_experiment(model, model_args, optimizer, ntrials, result_dir,
                            train_dir, workers, cuda, smoke_test,
                            teacher_model, dataset, min_lr, max_lr, momentum,
                            pretrained):
    assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported'
    config = {
        'optimizer':
        optimizer,
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(min_lr), math.log(max_lr)) if optimizer ==
            'Adam' else random.uniform(math.log(min_lr), math.log(max_lr)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'device':
        'cuda' if cuda else 'cpu',
        'model': {
            'name': model,
            'args': model_args
        },
        'teacher_model':
        teacher_model,
        'train_dir':
        train_dir,
        'workers':
        workers,
        'dataset':
        dataset,
        'momentum':
        momentum,
        'pretrained':
        pretrained
    }
    model_args_print = '_'.join(
        [f'{key}_{value}' for key, value in model_args.items()])
    experiment = RayExperiment(
        name=f'{model}_{model_args_print}_{optimizer}',
        run=TrainableModel,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        checkpoint_freq=1000,  # Just to enable recovery with @max_failures
        max_failures=-1,
        resources_per_trial={
            'cpu': 4,
            'gpu': 1 if cuda else 0
        },
        stop={"training_iteration": 1 if smoke_test else 9999},
        config=config,
    )
    return experiment
def dynamic_conv_experiment(model, model_args, encoder, decoder,
                            structure_lr_multiplier, nmaxupdates, ntrials,
                            result_dir, cuda, smoke_test):
    # name=f"{model}_{model_args}_encoder_[{'-'.join(encoder)}]_decoder_[{'-'.join(decoder)}]_structlr_{structure_lr_multiplier}"
    name = f"{model}_{model_args}_encoder_[{'-'.join(encoder)}]_decoder_[{'-'.join(decoder)}]_structlr_grid"
    config = {
        # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(1e-3)))),
        # 'lr': grid_search([5e-4, 7e-4, 9e-4, 11e-4]),
        # 'lr': grid_search([1e-4, 2.5e-4, 5e-4, 7.5e-4]),
        'lr': 5e-4,
        # 'weight_decay': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-6), math.log(5e-4)))) if model == 'DynamicConv' else 1e-4,
        'weight_decay': 1e-4,
        # Transformer seems to need dropout 0.3
        # 'dropout': sample_from(lambda spec: random.uniform(0.1, 0.3)) if model == 'DynamicConv' else 0.3,
        'dropout': 0.3,
        'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
        'encoder':
        list(encoder
             ),  # Need to copy @encoder as sacred created a read-only list
        'decoder': list(decoder),
        # 'structure-lr-multiplier': structure_lr_multiplier,
        'structure-lr-multiplier': grid_search([0.25, 0.5, 1.0, 2.0, 4.0]),
        'device': 'cuda' if cuda else 'cpu',
        'model': {
            'name': model,
            'args': model_args
        },
        'nmaxupdates': nmaxupdates,
        'result_dir': result_dir + '/' + name
    }
    experiment = RayExperiment(
        name=name,
        run=TrainableModel,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=False,
        checkpoint_freq=1000,  # Just to enable recovery with @max_failures
        max_failures=-1,
        resources_per_trial={
            'cpu': 2,
            'gpu': 1 if cuda else 0
        },
        stop={"training_iteration": 1},
        config=config,
    )
    return experiment
Beispiel #9
0
def transform_experiment(model, target, size, complex, ntrials, nsteps,
                         nepochsvalid, result_dir, cuda, nthreads, smoke_test):
    assert model in ['B', 'BP', 'PBT', 'BPP',
                     'BPBP'], f'Model {model} not implemented'
    config = {
        'model':
        model,
        'target_matrix':
        target,
        'size':
        size,
        'complex':
        complex,
        'share_logit':
        sample_from(lambda spec: np.random.choice((True, False), size=2)),
        'lr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(1e-4), math.log(5e-1)))),
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'n_steps_per_epoch':
        nsteps,
        'n_epochs_per_validation':
        nepochsvalid,
        'device':
        'cuda' if cuda else 'cpu',
    }
    experiment = RayExperiment(
        name=f'{target}_factorization_{model}_{complex}_{size}',
        run=TrainableBP,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        resources_per_trial={
            'cpu': nthreads,
            'gpu': 0.25 if cuda else 0
        },
        stop={
            'training_iteration': 1 if smoke_test else 99999,
            'negative_loss': -1e-8
        },
        config=config,
    )
    return experiment
Beispiel #10
0
def cifar10_experiment(dataset, model, args, optimizer, use_hyperband, lr, lr_decay, weight_decay, ntrials, nmaxepochs, batch, resume_pth, result_dir, cuda, smoke_test):
    assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported'
    if lr_decay is None:
        lr_decay = {'factor': 1.0, 'period': 1000, 'milestones': None}
    config={
        'optimizer': optimizer,
        'switch_ams': int(0.5 * nmaxepochs) if optimizer == 'Adam' else None,
        'lr': grid_search(lr['grid']) if lr['grid'] is not None else sample_from(lambda spec: math.exp(random.uniform(math.log(lr['min']), math.log(lr['max'])))),
        # 'lr_decay_factor': 0.2 if lr_decay else 1.0,
        # 'lr_decay_period': lr_decay_period if lr_decay else 10000,
        # 'decay_milestones': decay_milestones,
        'lr_decay' : lr_decay,
        'weight_decay': 5e-4 if weight_decay else 0.0,
        'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
        'device': 'cuda' if cuda else 'cpu',
        'model': {'name': model, 'args': args},
        'dataset': {'name': dataset, 'batch': batch},
     }
    smoke_str = 'smoke_' if smoke_test else '' # for easy finding and deleting unimportant logs
    args_str = '_'.join([k+':'+str(v) for k,v in args.items()])
    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    commit_id = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
    experiment = RayExperiment(
        name=f'{smoke_str}{dataset.lower()}_{model}_{args_str}_{optimizer}_epochs_{nmaxepochs}_{timestamp}_{commit_id}',
        run=TrainableModel,
        local_dir=result_dir,
        num_samples=ntrials if not smoke_test else 1,
        checkpoint_at_end=True,
        checkpoint_freq=1000,  # Just to enable recovery with @max_failures
        max_failures=0,
        resources_per_trial={'cpu': 4, 'gpu': 1 if cuda else 0},
        stop={"training_iteration": 1 if smoke_test else nmaxepochs},
        restore=resume_pth,
        config=config,
    )
    return experiment
Beispiel #11
0
def cifar10_experiment(
        dataset, model, args, optimizer, nmaxepochs, lr_decay, lr_decay_period,
        plr_min, plr_max, weight_decay, pwd, pwd_min, pwd_max, ntrials,
        result_dir, cuda, smoke_test, unsupervised, batch, tv_norm, tv_p,
        tv_sym, temp_min, temp_max, anneal_ent_min, anneal_ent_max,
        anneal_sqrt, entropy_p, restore_perm,
        resume_pth):  # TODO clean up and set min,max to pairs/dicts
    assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported'
    assert restore_perm is None or resume_pth is None  # If we're fully resuming training from the checkpoint, no point in restoring any part of the model
    if restore_perm is not None:
        restore_perm = '/dfs/scratch1/albertgu/learning-circuits/cnn/saved_perms/' + restore_perm
        print("Restoring permutation from", restore_perm)

    args_rand = args.copy()
    if temp_min is not None and temp_max is not None:
        args_rand['temp'] = sample_from(lambda spec: math.exp(
            random.uniform(math.log(temp_min), math.log(temp_max))))
    # args_rand['samples'] = sample_from(lambda _: np.random.choice((8,16)))
    # args_rand['sig'] = sample_from(lambda _: np.random.choice(('BT1', 'BT4')))

    tv = {'norm': tv_norm, 'p': tv_p}
    if tv_sym is 'true':
        tv['sym'] = sample_from(lambda _: np.random.choice((True, )))
    elif tv_sym is 'false':
        tv['sym'] = sample_from(lambda _: np.random.choice((False, )))
    elif tv_sym is 'random':
        tv['sym'] = sample_from(lambda _: np.random.choice((True, False)))
    else:
        assert tv_sym is None, 'tv_sym must be true, false, or random'
        tv['sym'] = False

    if anneal_ent_max == 0.0:
        anneal_entropy = 0.0
    else:
        anneal_entropy = sample_from(lambda _: math.exp(
            random.uniform(math.log(anneal_ent_min), math.log(anneal_ent_max)))
                                     ),

    name_smoke_test = 'smoke_' if smoke_test else ''  # for easy finding and deleting unimportant logs
    name_args = '_'.join([k + ':' + str(v) for k, v in args.items()])
    config = {
        'optimizer':
        optimizer,
        # 'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(2e-5), math.log(1e-2)) if optimizer == 'Adam'
        'lr':
        2e-4 if optimizer == 'Adam' else math.exp(
            random.uniform(math.log(0.025), math.log(0.2))),
        'plr':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(plr_min), math.log(plr_max)))),
        # 'lr_decay_factor': sample_from(lambda spec: random.choice([0.1, 0.2])) if lr_decay else 1.0,
        'lr_decay_factor':
        0.2 if lr_decay else 1.0,
        'lr_decay_period':
        lr_decay_period,
        # 'weight_decay':  sample_from(lambda spec: math.exp(random.uniform(math.log(1e-6), math.log(5e-4)))) if weight_decay else 0.0,
        'weight_decay':
        5e-4 if weight_decay else 0.0,
        'pwd':
        sample_from(lambda spec: math.exp(
            random.uniform(math.log(pwd_min), math.log(pwd_max))))
        if pwd else 0.0,
        'seed':
        sample_from(lambda spec: random.randint(0, 1 << 16)),
        'device':
        'cuda' if cuda else 'cpu',
        'model': {
            'name': model,
            'args': args_rand
        },
        # 'model':           {'name': model, 'args': args.update({'temp': sample_from(lambda spec: math.exp(random.uniform(math.log(temp_min), math.log(temp_max))))})},
        'dataset': {
            'name': dataset,
            'batch': batch
        },
        'unsupervised':
        unsupervised,
        # 'tv':              {'norm': tv_norm, 'p': tv_p, 'sym': tv_sym},
        # 'tv':              {'norm': tv_norm, 'p': tv_p, 'sym': sample_from(lambda _: np.random.choice((True,False)))},
        'tv':
        tv if unsupervised else None,
        # 'anneal_entropy':  anneal_entropy,
        # 'anneal_entropy':  sample_from(lambda _: random.uniform(anneal_ent_min, anneal_ent_max)),
        'anneal_entropy':
        0.0 if anneal_ent_max == 0.0 else sample_from(lambda _: math.exp(
            random.uniform(math.log(anneal_ent_min), math.log(anneal_ent_max)))
                                                      ),
        'anneal_sqrt':
        anneal_sqrt,
        'entropy_p':
        entropy_p,
        'restore_perm':
        restore_perm,
    }
    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    commit_id = subprocess.check_output(
        ['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
    stopping_criteria = {"training_iteration": 1 if smoke_test else nmaxepochs}
    if unsupervised:  # TODO group all the unsupervised casework together
        stopping_criteria.update({'model_ent': 200, 'neg_ent': -5.0})

    experiment = RayExperiment(
        # name=f'pcifar10_{model}_{args}_{optimizer}_lr_decay_{lr_decay}_weight_decay_{weight_decay}',
        name=
        f'{name_smoke_test}{dataset.lower()}_{model}_{name_args}_{optimizer}_epochs_{nmaxepochs}_plr_{plr_min}-{plr_max}_{timestamp}_{commit_id}',
        # name=f'{dataset.lower()}_{model}_{args_orig}_{optimizer}_epochs_{nmaxepochs}_lr_decay_{lr_decay}_plr_{plr_min}-{plr_max}_tvsym_{tv_sym}_{timestamp}_{commit_id}',
        run=TrainableModel,
        local_dir=result_dir,
        num_samples=ntrials,
        checkpoint_at_end=True,
        checkpoint_freq=500,  # Just to enable recovery with @max_failures
        max_failures=0,
        # resources_per_trial={'cpu': 4, 'gpu': 0.5 if cuda else 0},
        resources_per_trial={
            'cpu': 4,
            'gpu': 1 if cuda else 0
        },
        # stop={"training_iteration": 1 if smoke_test else nmaxepochs, 'model_ent': 200, 'neg_ent': -5.0},
        stop=stopping_criteria,
        # stop={"training_iteration": 1 if smoke_test else nmaxepochs},
        restore=resume_pth,
        config=config,
    )
    return experiment