Example #1
0
File: mlp.py Project: pajkossy/nn
def main():

    args = read_args()
    logging.basicConfig(
        level=logging.INFO, format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")

    network = MLP(784, args.hidden, 10)
    train, train_outs, test, test_outs = get_datasets()
    network.train(train,
                  train_outs,
                  args.iterations,
                  args.batch_size,
                  args.learning_rate,
                  args.reg_lambda,
                  args.lr_decay_rate,
                  args.cost)
    network.evaluate(test, test_outs)
    if args.plot_weights_fn:
        plot_weights(network.W1, args.plot_weights_fn)
        logging.info('Weights plotted to {}'.format(args.plot_weights_fn))
Example #2
0
num_workers, batch_size, num_models, devices, lr, percent, n_epoch, dataset_name = args.num_workers, args.batch_size, args.num_models, args.devices, args.lr, args.percent, args.n_epoch, args.dataset_name

noise_rate, noise_type = None, None
ID = dataset_name
input_channel, num_classes, size = get_input_info(dataset_name)


#################################################################################
def _get_labels(dataset_obj, idx):
    return dataset_obj.get_label(idx)


##################################################################################

########################################Data and Loader#####################################
train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_name, noise_rate, noise_type)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           drop_last=False,
                                           pin_memory=True,
                                           sampler=ImbalancedDatasetSampler(
                                               train_dataset,
                                               callback_get_label=_get_labels))
val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                         batch_size=batch_size,
                                         num_workers=num_workers,
                                         drop_last=False,
                                         shuffle=False,
                                         pin_memory=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
Example #3
0
    'dropout': adaptor_dropout,
    'spatial': True,
    'temporal': True
}

model = models.create_model(datasets,
                            model_config,
                            adaptor_config,
                            device=device)
model.load_state_dict(saved['model_state_dict'], strict=False)

if frozen_predictor:
    for param in model.predictor.parameters():
        param.requires_grad_(False)

datasets = utils.get_datasets(datasets, 9, 1)
scaler = utils.ZScoreScaler(datasets['train'].mean, datasets['train'].std)
optimizer = optim.Adam([{
    'params': model.adaptor.parameters()
}, {
    'params': model.predictor.parameters(),
    'lr': 1e-5
}],
                       lr=learning_rate)
loss = utils.get_loss('MaskedMAELoss')
trainer = utils.OursTrainer(model, loss, scaler, device, optimizer,
                            weight_decay, 2, 5)

utils.train_model(datasets=datasets,
                  batch_size=64,
                  folder=saved_folder,
Example #4
0
                coord.request_stop(exc)
            finally:
                coord.request_stop()

            coord.join(threads)
        return average_error


if __name__ == '__main__':
    # CLI arguments
    PARSER = argparse.ArgumentParser(description="Evaluate the model")

    # Required arguments
    PARSER.add_argument("--model", required=True, choices=utils.get_models())
    PARSER.add_argument(
        "--dataset", required=True, choices=utils.get_datasets())
    PARSER.add_argument("--checkpoint_dir", required=True)
    PARSER.add_argument("--test", action="store_true")
    PARSER.add_argument("--device", default="/gpu:0")
    ARGS = PARSER.parse_args()

    # Load required model and dataset, ovverides default
    MODEL = getattr(
        importlib.import_module("models." + ARGS.model), ARGS.model)()
    DATASET = getattr(
        importlib.import_module("inputs." + ARGS.dataset), ARGS.dataset)()

    DATASET.maybe_download_and_extract()
    print('{}: {} error = {:.3f}'.format(
        datetime.now(),
        'test' if ARGS.test else 'validation',
            # When done, ask the threads to stop.
            coord.request_stop()
            # Wait for threads to finish.
            coord.join(threads)
    return best_validation_error_value


if __name__ == '__main__':
    # CLI arguments
    PARSER = argparse.ArgumentParser(description="Train the model")

    # Required arguments
    PARSER.add_argument("--model", required=True, choices=utils.get_models())
    PARSER.add_argument("--dataset",
                        required=True,
                        choices=utils.get_datasets())

    # Restart train or continue
    PARSER.add_argument("--restart", action='store_true')

    # Learning rate decay arguments
    PARSER.add_argument("--lr_decay", action="store_true")
    PARSER.add_argument("--lr_decay_epochs", type=int, default=25)
    PARSER.add_argument("--lr_decay_factor", type=float, default=0.1)

    # L2 regularization arguments
    PARSER.add_argument("--l2_penalty", type=float, default=0.0)

    # Optimization arguments
    PARSER.add_argument("--optimizer",
                        choices=utils.get_optimizers(),
    parser.add_argument('--batch_size', type=int, help='batch_size')
    parser.add_argument('--dropout', type=float, help='dropout value')
    parser.add_argument('--network', type=str, required=True, help='lstm/gru')
    parser.add_argument('--dynet-autobatch', type=int, help='')
    parser.add_argument('--dynet-mem', type=int, help='')
    parser.add_argument('--include_embeddings', type=int, help='')
    parser.add_argument(
        '--features',
        type=int,
        help='whether or not to represent input as phonolgocial features')

    args = parser.parse_args()
    id = args.running_id
    model = dy.Model()
    ablation_mask = [1, 1, 1, 1, 1, 1]  # ["rm", "fr", "it", "sp", "pt", "lt"]
    train, dev, test, test_missing = utils.get_datasets(id, ablation_mask)
    letters, C2I, I2C = utils.create_voc(id)

    latin_embeddings = LatinEmbeddings()

    encoder = Encoder(model, C2I) if not args.features else FeaturesEncoder(
        model, C2I)
    encoders = []
    for i in range(6):  # 6 languages encoders + separator encoder
        #encoder = Encoder(model, C2I)
        encoders.append(encoder)
    encoders.append(Encoder(model, C2I))
    attention_recorder = AttentionRecorder()
    embedding_collector = Collector(encoders, "voc/voc.txt",
                                    "embeddings/embeddings", args.features)
    network = Network(C2I,
Example #7
0
            num_const += 1
        else:
            # var = tf.Variable(init(shape=[dim]))
            # embed_list.append(var)
            embed_list.append(embed_list[2])
            num_vars += 1
    print num_const, num_vars
    return tf.stack(embed_list, axis=0)


if __name__ == '__main__':
    assert sys.argv[1] in models.keys()
    print 'using model', sys.argv[1]

    print 'loading data'
    start = time()
    trainset, dev, test, vocab = utils.get_datasets(batch_size=BATCH_SIZE,
                                                    num_words=VOCAB_SIZE,
                                                    seq_len=SEQ_LEN)
    print 'took', time() - start, 'seconds'
    start = time()
    print 'getting embeddings'
    embeddings = utils.get_embeddings(vocab, './glove.6B/glove.6B.300d.txt')
    print 'took', time() - start, 'seconds'
    print 'initializing embeddings'
    start = time()
    embeddings = init_embeddings(embeddings, vocab, 300)
    print 'took', time() - start, 'seconds'
    print 'begin training'
    train(vocab, embeddings, trainset, dev, test)
Example #8
0
def get_dataloader(dataset_dir: str) -> DataLoader:
    datasets = get_datasets(dataset_dir)

    return DataLoader(datasets["test"], batch_size=32, shuffle=False, num_workers=8)
Example #9
0
def main():

    global args, best_prec1
    global param_avg, train_loss, train_err, test_loss, test_err, arr_time

    args = parser.parse_args()

    set_seed(args.randomseed)

    # Check the save_dir exists or not
    print(args.save_dir)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Define model
    model = torch.nn.DataParallel(get_model(args))
    model.cuda()

    # Optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            print('from ', args.start_epoch)
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.evaluate, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Prepare Dataloader
    train_loader, val_loader = get_datasets(args)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    if args.half:
        model.half()
        criterion.half()

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=0.001,
                                     weight_decay=args.weight_decay)

    ##################################################################################################

    if args.datasets == 'CIFAR10':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[100, 150], last_epoch=args.start_epoch - 1)

    elif args.datasets == 'CIFAR100':
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[150], last_epoch=args.start_epoch - 1)

    if args.arch in ['resnet1202', 'resnet110']:
        # for resnet1202 original paper uses lr=0.01 for first 400 minibatches for warm-up
        # then switch back. In this setup it will correspond for first epoch.
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr * 0.1

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    is_best = 0
    save_checkpoint(
        {
            'epoch': 0,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        },
        is_best,
        filename=os.path.join(args.save_dir,
                              'checkpoint_refine_' + str(0) + '.th'))

    print('Start training: ', args.start_epoch, '->', args.epochs)

    # DLDR sampling
    torch.save(model.state_dict(), os.path.join(args.save_dir, str(0) + '.pt'))

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        if epoch > 0 and epoch % args.save_every == 0 or epoch == args.epochs - 1:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_prec1': best_prec1,
                },
                is_best,
                filename=os.path.join(
                    args.save_dir,
                    'checkpoint_refine_' + str(epoch + 1) + '.th'))

        save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            },
            is_best,
            filename=os.path.join(args.save_dir, 'model.th'))

        # DLDR sampling
        torch.save(model.state_dict(),
                   os.path.join(args.save_dir,
                                str(epoch + 1) + '.pt'))

    print('train loss: ', train_loss)
    print('train err: ', train_err)
    print('test loss: ', test_loss)
    print('test err: ', test_err)

    print('time: ', arr_time)
Example #10
0
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import os

from nn.losses import CrossEntropy
from utils import set_seed, get_datasets, get_optimizer, get_model, get_config

if __name__ == "__main__":
    config = get_config()

    # Setting seed for reproducability
    set_seed()

    # Get data
    train_dataset, val_dataset = get_datasets(config)

    # MODEL
    model = get_model(config['model'])
    # Define loss
    loss = CrossEntropy()
    # Define optimizer
    optimizer = get_optimizer(config['train']['optimizer'], model, loss)

    # Main loop
    train_loss_hist, val_loss_hist, val_acc_hist = list(), list(), list()
    pbar = tqdm(range(config['train']['epochs']))
    lr_decay_config = config['train']['lr_decay']
    for i in pbar:
        # TRAINING
        model.train()
Example #11
0
def main(args):
    train_dataset, test_dataset, nclasses = utils.get_datasets(
        args, not args.no_normalization)
    new_test_size = int(0.8 * len(test_dataset))
    val_size = len(test_dataset) - new_test_size
    test_dataset, val_dataset = random_split(test_dataset,
                                             [new_test_size, val_size])

    teacher_model = torch.load(args.teacher_model_file).to(args.device)
    teacher_model = teacher_model.eval()

    teacher_base_metric = evaluate(teacher_model, val_dataset, args)
    teacher_test_metric = evaluate(teacher_model, test_dataset, args)
    if not args.test_only:
        train_logits = get_logits(teacher_model, train_dataset, args)
        teacher_model = teacher_model.cpu()

    if args.student_model_file is None:
        student_model = copy_model(
            teacher_model,
            args.device,
            reinitialize=(not args.retain_teacher_weights))
        if args.predictive_pruning:
            student_model = StudentModelWrapper2(student_model, logger, args)
        else:
            student_model = StudentModelWrapper(student_model, logger, args)
        for param in student_model.parameters():
            param.requires_grad = True
    else:
        student_model = torch.load(args.student_model_file)
    del teacher_model

    student_model.args = args
    student_model = student_model.to(args.device)
    student_base_metric = evaluate(student_model, val_dataset, args)

    if args.test_only:
        student_test_metric = evaluate(student_model, test_dataset, args)
        print('teacher_test_metric = %.4f' % (teacher_test_metric))
        print('teacher_base_metric = %.4f' % (teacher_base_metric))
        # logger.info('base_metric = %.4f' % (teacher_base_metric))
        print('student_base_metric = %.4f' % (student_base_metric))
        # logger.info('student_base_metric = %.4f' % student_base_metric)
        return teacher_base_metric, student_base_metric, teacher_test_metric, student_test_metric

    base_metric = max(args.base_metric, teacher_base_metric)

    print('teacher_base_metric = %.4f' % (teacher_base_metric))
    logger.info('base_metric = %.4f' % (teacher_base_metric))
    print('student_base_metric = %.4f' % (student_base_metric))
    logger.info('student_base_metric = %.4f' % student_base_metric)
    print('base_metric = %.4f' % (base_metric))
    logger.info('base_metric = %.4f' % base_metric)

    shrinkable_layers = student_model.get_shrinkable_layers()
    if args.global_pruning:
        student_model = global_compression(student_model,
                                           train_logits,
                                           val_dataset,
                                           test_dataset,
                                           nclasses,
                                           base_metric,
                                           shrinkable_layers,
                                           args,
                                           mLogger=logger)
    else:
        not_shrinkables = []
        if args.reverse_shrink_order:
            shrinkable_layers = shrinkable_layers[::-1]
        if args.random_shrink_order:
            np.random.shuffle(shrinkable_layers)
        if args.shrink_all or len(args.shrink_layer_idxs) > 0:
            old_num_params = num_params = sum(
                [p.numel() for p in student_model.parameters()])
            # rr_iters = args.round_robin_iters if args.round_robin else 1
            # for rri in range(rr_iters):
            rri = 0

            shrinkable_layers_ = shrinkable_layers

            while rri == 0 or old_num_params != num_params:
                student_model.reset()

                shrinkable_layers = shrinkable_layers_

                if len(args.shrink_layer_idxs) > 0:
                    not_shrinkables = [
                        i for i in shrinkable_layers
                        if i not in args.shrink_layer_idxs
                    ]
                    shrinkable_layers = args.shrink_layer_idxs
                else:
                    not_shrinkables = args.exclude_layers[:]

                if rri == 0 and args.start_layer_idx >= 0 and args.start_layer_idx in shrinkable_layers:
                    if args.reverse_shrink_order:
                        not_shrinkables += [
                            i for i in shrinkable_layers
                            if i > args.start_layer_idx
                        ]
                    else:
                        not_shrinkables += [
                            i for i in shrinkable_layers
                            if i < args.start_layer_idx
                        ]
                    shrinkable_layers = shrinkable_layers[
                        shrinkable_layers.index(args.start_layer_idx):]

                print(rri, not_shrinkables, shrinkable_layers,
                      args.exclude_layers, args.shrink_layer_idxs)
                while len(shrinkable_layers) > 0:
                    student_model = iterative_distillation(
                        student_model,
                        shrinkable_layers[0],
                        train_logits,
                        val_dataset,
                        test_dataset,
                        nclasses,
                        base_metric,
                        args,
                        mLogger=logger)
                    if args.train_on_student:
                        train_logits = get_logits(student_model, train_dataset,
                                                  args)
                    print(student_model)
                    new_shrinkable_layers = student_model.get_shrinkable_layers(
                        not_shrinkables)
                    if args.reverse_shrink_order:
                        new_shrinkable_layers = new_shrinkable_layers[::-1]
                    if args.random_shrink_order:
                        np.random.shuffle(new_shrinkable_layers)
                    if set(shrinkable_layers) == set(new_shrinkable_layers):
                        not_shrinkables.append(shrinkable_layers[0])
                    shrinkable_layers = [
                        x for x in new_shrinkable_layers
                        if x not in not_shrinkables
                    ]
                    print(not_shrinkables, shrinkable_layers)

                if not args.round_robin:
                    break

                old_num_params = num_params
                num_params = sum(
                    [p.numel() for p in student_model.parameters()])
                num_dense = sum([
                    sum([p.numel() for p in m.parameters()])
                    for m in student_model.modules()
                    if isinstance(m, nn.Linear)
                ])
                num_conv = sum([
                    sum([p.numel() for p in m.parameters()])
                    for m in student_model.modules()
                    if isinstance(m, nn.Conv2d)
                ])
                print('change in num_params: %d -> %d' %
                      (old_num_params, num_params))
                logger.info('num params: %d' % num_params)
                logger.info('num dense: %d' % num_dense)
                logger.info('num conv: %d' % num_conv)
                rri += 1

        else:
            student_model = iterative_distillation(
                student_model,
                shrinkable_layers[args.start_layer_idx],
                train_logits,
                val_dataset,
                test_dataset,
                nclasses,
                base_metric,
                args,
                mLogger=logger)

    print(student_model)
    test_metric = evaluate(student_model, test_dataset, args)
    print('test_metric = %.4f' % (test_metric))
    print('teacher_test_metric = %.4f' % (teacher_test_metric))
    logger.info('test_metric = %.4f' % (test_metric))
    logger.info('teacher_test_metric = %.4f' % (teacher_test_metric))
    torch.save(student_model, args.outfile)
Example #12
0
                         minibatch_size)
    return opt_update(i, grads, opt_state)


@jit
def update(rng, i, opt_state, batch):
    params = get_params(opt_state)
    grads = grad(loss)(params, batch)
    return opt_update(i, grads, opt_state)


if __name__ == '__main__':
    key = random.PRNGKey(0)

    # Create dataset
    X_full = utils.get_datasets(dataset)

    kfold = model_selection.KFold(pieces, shuffle=True, random_state=0)
    for fold_iter, (idx_train, idx_test) in enumerate(
            utils.take(pieces_to_run, kfold.split(X_full))):
        X, X_test = X_full[idx_train], X_full[idx_test]

        scaler = preprocessing.StandardScaler()
        X = scaler.fit_transform(X)
        X_test = scaler.transform(X_test)

        delta = 1. / (X.shape[0]**1.1)

        print('X: {}'.format(X.shape))
        print('X test: {}'.format(X_test.shape))
        print('Delta: {}'.format(delta))
Example #13
0
import utils
from names import SplitPartNames, DatasetNames, set_names
import trees_algorithms

from sklearn import model_selection

print('downloading datasets...')
datasets = utils.get_datasets()
set_names()

Xs = []
ys = []

for dataset_index in range(0, len(datasets)):
    X, y = utils.split_dataset(datasets[dataset_index])
    Xs.append(X), ys.append(y)

k = 30
cv = model_selection.StratifiedKFold(n_splits=k)

id3_measures = []
cart_measures = []

for dataset_index in range(0, len(datasets)):
    fold = 0
    id3_fold_measures = []
    cart_fold_measures = []
    for train_indexes, test_indexes in cv.split(Xs[dataset_index],
                                                ys[dataset_index]):
        print('processing {} fold of {} algorithm...'.format(
            fold, utils.get_dataset_name(dataset_index)))
Example #14
0
from models import test

parser = argparse.ArgumentParser()
parser.add_argument('--saving_folder', type=str)
parser.add_argument('--confusion_matrix', action='store_true', default=False)
parser.add_argument('--read_history', action='store_true', default=False)
parser.add_argument('--f1_mean', action='store_true', default=False)
args = parser.parse_args()

run_info = json.load(
    open(os.path.join(args.saving_folder, 'run_info.json'), 'r'))
args = namedtuple('Struct', run_info.keys())(*run_info.values())

datasets = utils.get_datasets(args.dataset,
                              validation=True,
                              window_size=args.window_size,
                              step=args.window_step,
                              downsample=args.downsample_factor)
testing_loader = DataLoader(dataset=datasets['testing_set'],
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=2,
                            drop_last=True,
                            pin_memory=True)

random_sample = next(iter(testing_loader))[0]
model = resnets.HAR_ResNet1D(
    input_channels=random_sample.shape[1],
    kernel_size=args.kernel_size,
    depth=[int(item) for item in args.architecture_depth.split(',')],
    dilated=args.dilated,
def home_page():
    datasets = utils.get_datasets()
    return render_template("home.html", dataset_names=datasets)
Example #16
0
def run():
    model = torch.hub.load('AdeelH/WideResNet-pytorch:torch_hub',
                           'WideResNet',
                           depth=28,
                           num_classes=NUM_CLASSES,
                           widen_factor=2)
    model = model.cuda()
    ema_model = ModelEMA(model, decay=0.999)

    train_params = {}
    train_params['batch_size'] = 100
    train_params['val_batch_size'] = 256

    train_ds, train_subset_ds, val_ds = get_datasets(subset_size=4000)
    train_dl_l = torch.utils.data.DataLoader(
        train_subset_ds,
        batch_size=train_params['batch_size'],
        pin_memory=True,
        num_workers=4,
        shuffle=True,
        drop_last=False,
        collate_fn=collate_fn)
    train_dl_ul = torch.utils.data.DataLoader(
        train_ds,
        batch_size=train_params['batch_size'],
        pin_memory=True,
        num_workers=4,
        shuffle=True,
        drop_last=False,
        collate_fn=collate_fn)
    val_dl = torch.utils.data.DataLoader(
        val_ds,
        batch_size=train_params['val_batch_size'],
        pin_memory=True,
        num_workers=2,
        drop_last=False,
        collate_fn=collate_fn)

    train_params['epochs'] = 300
    train_params['learning_rate'] = 3e-4

    optimizer = optim.Adam(model.parameters(),
                           lr=train_params['learning_rate'],
                           betas=(0.9, 0.999))
    sched = optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        train_params['epochs'],
        eta_min=train_params['learning_rate'] / 10)

    train_mixmatch(model,
                   ema_model,
                   train_dl_l,
                   train_dl_ul,
                   val_dl,
                   optimizer,
                   sched,
                   train_params,
                   num_augs=2,
                   T=0.5,
                   α=0.75,
                   w_scale=100,
                   rampup_epochs=int(train_params['epochs'] * .4),
                   rampdown_epochs=int(train_params['epochs'] / 6),
                   start_epoch=0)
Example #17
0
def fwd_pass(x):
    x = conv(unfold_1, x, conv_wts_1, conv_bias_1)
    x = F.relu(x)
    x = conv(unfold_2, x, conv_wts_2, conv_bias_2)
    x = F.relu(x)
    x = x.view(x.size(0), -1)
    x = torch.matmul(x, torch.t(fc_1)) + fc_bias_1.unsqueeze(0)
    x = F.relu(x)
    x = torch.matmul(x, torch.t(fc_2)) + fc_bias_2.unsqueeze(0)
    return x


##############################################################################

#################################Data fetch####################################
train_dataset, val_dataset, test_dataset = get_datasets(".")
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=256,
                                           shuffle=True,
                                           num_workers=8)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=256,
                                         shuffle=True,
                                         num_workers=8)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=8)
#################################################################################
# optimizer = optim.SGD([conv_wts_1, conv_wts_2, fc_1, fc_2], lr = 0.01, momentum = 0.9)
# optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
Example #18
0
def main(args):
    wall_start = time.time()
    parameters = get_parameters(args)

    print("Candidate generator parameters:", parameters)

    datasets = utils.get_datasets(args.include_aida_train,
                                  args.keep_pregenerated_candidates)

    if args.single_dataset:
        datasets = [datasets[0]]

    mentions = utils.get_list_of_mentions(datasets)

    # NUM_TREADS = multiprocessing.cpu_count()
    NUM_THREADS = args.num_threads
    pool = ThreadPool(NUM_THREADS)

    # Split the data into approximately equal parts and give one block to each thread
    data_per_thread = split(mentions, NUM_THREADS)

    if args.keep_pregenerated_candidates:
        arguments = [{
            "id":
            idx,
            "data":
            data_bloc,
            "args":
            args,
            "candidate_generator":
            Simple_Candidate_Generator(parameters),
            "pregenereted_cands_data_fetcher":
            Pregenerated_Candidates_Data_Fetcher(parameters),
        } for idx, data_bloc in enumerate(data_per_thread)]
    else:
        arguments = [{
            "id":
            idx,
            "data":
            data_bloc,
            "args":
            args,
            "candidate_generator":
            Simple_Candidate_Generator(parameters),
        } for idx, data_bloc in enumerate(data_per_thread)]

    results = pool.map(run_thread, arguments)

    # Merge the results
    processed_mentions = []
    for _id, mentions in results:
        processed_mentions = processed_mentions + mentions

    has_gold = 0

    pool.terminate()
    pool.join()
    execution_time = (time.time() - wall_start) / 60
    print("The execution took:", execution_time, " minutes")

    # Evaluate the generation
    evaluator = Evaluator(processed_mentions)
    evaluator.candidate_generation(
        save_gold_pos=True,
        save_pregenerated_gold_pos=args.keep_pregenerated_candidates)

    # Dump the data if the dump_mentions flag was set
    if args.dump_mentions:
        print("Dumping processed mentions")
        # Create the directory for the mention dumps if it does not exist
        dump_folder = args.dump_mentions_folder
        os.makedirs(dump_folder, exist_ok=True)

        dump_object = {}
        dump_object["mentions"] = processed_mentions
        dump_object["total_per_dataset"] = evaluator.total_per_dataset
        dump_object["has_gold_per_dataset"] = evaluator.has_gold_per_dataset
        dump_object["parameters"] = parameters
        dump_object["args"] = args
        dump_object["execution_time"] = execution_time

        pickle.dump(
            dump_object,
            open(os.path.join(dump_folder, args.dump_file_id), "wb"),
            protocol=4,
        )

    # evaluator.candidate_generation(max_rank=100)
    return evaluator.recall
Example #19
0
'''
    Reinforcement Learning

'''
import os
import torch
import torch.backends.cudnn as cudnn
from utils import get_datasets, Mode, Data
import torch.utils.data as D
import argparse

os.environ["CUDA_VISIBLE_DEVICES"] = '2'
cudnn.benchmark = True
device = 'cuda' if torch.cuda.is_available() else 'cpu'

parser = argparse.ArgumentParser(
    description='Dynamic ResNet Reinforcement Learning')
parser.add_argument('--batch_size', type=int, default=128, help='batch size')
args = parser.parse_args()

trainset, testset = get_datasets(Data.cifar10, Mode.with_policy)
trainloader = D.DataLoader(trainset,
                           batch_size=args.batch_size,
                           shuffle=True,
                           num_workers=4)
testloader = D.DataLoader(testset,
                          batch_size=args.batch_size,
                          shuffle=False,
                          num_workers=4)
def gmm_client_specific(features, label_dict):
    runs_eer = []
    runs_hter = []

    for _ in range(5):
        train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data(
            features, label_dict)
        train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets(
            train_set, development_set, test_set, train_dev_set)
        nb_of_components = 11

        all_gmms = build_GMMs(train_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict)
        cur_eers, cur_thresholds = compute_eer_client_threshold(
            dist_matrix, development_y, label_dict)
        runs_eer.append(np.mean(cur_eers))

        print(f"Client thresholds:{np.array(cur_thresholds)}")

        all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict)

        client_hters = []
        for i in range(len(label_dict)):
            cur_dm = dist_matrix[:, i]
            genuine_indexes = (test_y == i)
            client_threshold = cur_thresholds[i]
            cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes,
                                                      client_threshold)
            client_hters.append((cur_frr + cur_far) / 2)

        cur_hter = np.mean(client_hters)
        runs_hter.append(cur_hter)

        print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}")

    print(
        f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}"
    )
    print(
        f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}"
    )
Example #21
0
def main():
    EPOCHS = 10
    tasks_nb = 50
    models_nb_per_task = 1
    multi_task_dataset = False
    use_kfac = True
    accumulate_last_kfac = False
    ewc = False
    lmbd = 10**4
    seed = 1234
    dataset_name = 'pMNIST'

    save_models = False

    set_seed(seed)
    train_datasets, test_datasets = get_datasets(
        dataset_name=dataset_name,
        task_number=tasks_nb,
        batch_size_train=128,
        batch_size_test=4096,
        include_prev=multi_task_dataset,
        seed=seed)

    all_models = {}
    models = [Net().cuda() for i in range(models_nb_per_task)]
    optimizers = [
        optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
        for model in models
    ]

    kfacs = []
    train_criterion = [
        create_loss_function(kfacs, model, accumulate_last_kfac, lmbd,
                             use_kfac) for model in models
    ]
    test_criterion = torch.nn.CrossEntropyLoss()
    val_accs = [[0.0] * tasks_nb for _ in range(tasks_nb)]

    for task_id in range(tasks_nb):
        task_kfacs = []

        for model_id, model in enumerate(models):
            print('Task {} Model {}:'.format(task_id + 1, model_id + 1))

            for epoch in range(1, EPOCHS + 1):
                train(model, train_datasets[task_id], optimizers[model_id],
                      train_criterion[model_id], epoch, task_id + 1)
                all_models['{:d}-{:d}'.format(task_id,
                                              model_id)] = deepcopy(model)

            for test_task_id in range(tasks_nb):
                print('Test model {} on task {}'.format(
                    model_id + 1, test_task_id + 1),
                      flush=True)
                val_acc = validate(model, test_datasets[test_task_id],
                                   test_criterion)[0].avg.item()

                prev_acc = val_accs[task_id][test_task_id] * model_id
                val_accs[task_id][test_task_id] = (prev_acc +
                                                   val_acc) / (model_id + 1)

            task_kfacs.append(KFAC(model, train_datasets[task_id], ewc))
            task_kfacs[-1].update_stats()

        kfacs.append(task_kfacs)

        if accumulate_last_kfac and len(kfacs) > 1:
            for model_kfac_id in range(len(kfacs[-1])):
                for module_id in range(len(kfacs[-1][model_kfac_id].modules)):
                    kfacs[-1][model_kfac_id].m_aa[module_id] += kfacs[-2][
                        model_kfac_id].m_aa[module_id]
                    kfacs[-1][model_kfac_id].m_gg[module_id] += kfacs[-2][
                        model_kfac_id].m_gg[module_id]

        # kfacs[-1][-1].visualize_attr('images/', task_id, 'gg')
        # kfacs[-1][-1].visualize_attr('images/', task_id, 'aa')

        print(
            '#' * 60, 'Avg acc: {:.2f}'.format(
                np.sum(val_accs[task_id][:task_id + 1]) / (task_id + 1)))

    if save_models:
        for i in range(len(kfacs)):
            kfac = kfacs[i][-1]
            with open('kfacs/{:d}_weights.pkl'.format(i), 'wb') as output:
                pickle.dump(kfac.weights, output, pickle.HIGHEST_PROTOCOL)
            with open('kfacs/{:d}_maa.pkl'.format(i), 'wb') as output:
                pickle.dump(kfac.m_aa, output, pickle.HIGHEST_PROTOCOL)
            with open('kfacs/{:d}_mgg.pkl'.format(i), 'wb') as output:
                pickle.dump(kfac.m_gg, output, pickle.HIGHEST_PROTOCOL)

        for model_name, model in all_models.items():
            torch.save(model.state_dict(), 'models/{:s}.pt'.format(model_name))
def gmm_global_threshold(features, label_dict):
    runs_eer = []
    runs_hter = []

    for experiment_i in range(5):
        train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data(
            features, label_dict)
        train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets(
            train_set, development_set, test_set, train_dev_set)
        nb_of_components = 11

        all_gmms = build_GMMs(train_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(development_x, all_gmms, label_dict)
        cur_eer, cur_threshold = compute_eer(dist_matrix, development_y,
                                             label_dict)
        runs_eer.append(cur_eer)

        if experiment_i == 0:
            utils.plot_scores(dist_matrix, development_y, "First Section",
                              "e1", label_dict)
            frr_list, far_list, threshold_list = compute_frr_far_list(
                dist_matrix, development_y, label_dict)
            utils.plot_far_frr(frr_list, far_list, threshold_list,
                               "First Section", "e1")

        print(f"Threshold:{cur_threshold}")

        all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict)
        dist_matrix = compute_dist_matrix(test_x, all_gmms, label_dict)
        cur_frr, cur_far = compute_frr_far(dist_matrix, test_y, cur_threshold,
                                           label_dict)
        cur_hter = (cur_frr + cur_far) / 2
        runs_hter.append(cur_hter)

        print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}")

        print(
            f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}"
        )
        print(
            f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}"
        )
Example #23
0
device = u.get_backend(args)

# initialize logger
logger = WandBLogger(
    args=args,
    name=args.model,
)

# make experiments reproducible
if args.seed:
    u.set_seed(args.seed)

# load dataset
train_loader, val_loader, (width, height, channels) = u.get_datasets(
    dataset=args.dataset,
    batch_size=args.batch_size,
    test_batch_size=args.test_batch_size,
    cuda=args.cuda,
    verbose=args.verbose)

encoder_params = dict(encoder=args.encoder,
                      device=device,
                      noise=args.noise,
                      std=1.0,
                      scaling=args.scale,
                      leak=args.decay)

decoder_params = dict(decoder=args.decoder,
                      device=device,
                      scaling=args.steps * args.scale)

loss_fn = losses.get_loss_function(
def ubm(features, label_dict):
    runs_eer = []
    runs_hter = []

    for experiment_i in range(5):
        train_set, development_set, test_set, train_dev_set = utils.shuffle_split_data(
            features, label_dict)
        train_x, train_y, development_x, development_y, test_x, test_y, train_dev_x, train_dev_y = utils.get_datasets(
            train_set, development_set, test_set, train_dev_set)
        nb_of_components = 11

        nb_of_components_background = 15

        all_gmms = build_GMMs(train_set, nb_of_components, label_dict)
        all_ubms = build_UBMs(train_set, nb_of_components_background,
                              label_dict)
        dist_matrix = compute_dist_matrix_with_ubm(development_x, all_gmms,
                                                   all_ubms, label_dict)
        cur_eers, cur_thresholds = compute_eer_client_threshold(
            dist_matrix, development_y, label_dict)
        runs_eer.append(np.mean(cur_eers))

        if experiment_i == 0:
            utils.plot_scores(dist_matrix, development_y, "Second Section",
                              "e2", label_dict)
            frr_list, far_list, threshold_list = compute_frr_far_list(
                dist_matrix, development_y, label_dict)
            utils.plot_far_frr(frr_list, far_list, threshold_list,
                               "Second Section", "e2")

        print(f"Client thresholds:{np.array(cur_thresholds)}")

        all_gmms = build_GMMs(train_dev_set, nb_of_components, label_dict)
        all_ubms = build_UBMs(train_dev_set, nb_of_components_background,
                              label_dict)
        dist_matrix = compute_dist_matrix_with_ubm(test_x, all_gmms, all_ubms,
                                                   label_dict)

        client_hters = []
        for i in range(len(label_dict)):
            cur_dm = dist_matrix[:, i]
            genuine_indexes = (test_y == i)
            client_threshold = cur_thresholds[i]
            cur_frr, cur_far = compute_frr_far_client(cur_dm, genuine_indexes,
                                                      client_threshold)
            client_hters.append((cur_frr + cur_far) / 2)

        cur_hter = np.mean(client_hters)
        runs_hter.append(cur_hter)

        print(f"EERs:{np.array(runs_eer)}, HTERs:{np.array(runs_hter)}")

    print(
        f"Average EER:{np.array(runs_eer).mean():.4f}, std:{np.array(runs_eer).std():.4f}"
    )
    print(
        f"Average HTER:{np.array(runs_hter).mean():.4f}, std:{np.array(runs_hter).std():.4f}"
    )
Example #25
0
                        type=float,
                        metavar='A',
                        help='value of spike variable (default: 0.5')
    args = parser.parse_args()
    print('VSC Baseline Experiments\n')
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    #Set reproducibility seed
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    #Define device for training
    device = torch.device('cuda' if args.cuda else 'cpu')
    print(f'Using {device} device...')

    #Load datasets
    train_loader, test_loader, (width, height, channels) = get_datasets(
        args.dataset, args.batch_size, args.cuda)

    # Tune the learning rate (All training rates used were between 0.001 and 0.01)
    vsc = VariationalSparseCoding(args.dataset, width, height, channels,
                                  args.hidden_size, args.latent_size, args.lr,
                                  args.alpha, device, args.log_interval,
                                  args.normalize)
    vsc.run_training(train_loader,
                     test_loader,
                     args.epochs,
                     args.report_interval,
                     args.sample_size,
                     reload_model=not args.do_not_resume)
Example #26
0
import tensorflow as tf

from utils import MyModel, get_datasets

if __name__ == '__main__':
    train_ds, test_ds = get_datasets()

    # Create an instance of the model
    model = MyModel()

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    optimizer = tf.keras.optimizers.Adam()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
            # training=True is only needed if there are layers with different
            # behavior during training versus inference (e.g. Dropout).
            predictions = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
Example #27
0
def main():

    path = 'orders/'
    classes_groups, class_map, map_reverse = utils.get_class_maps_from_files(
        path + 'classgroups1.pickle', path + 'map1.pickle',
        path + 'revmap1.pickle')
    print(classes_groups, class_map, map_reverse)

    net = LwF(0, class_map)
    net.to(DEVICE)

    for i in range(int(100 / CLASSES_BATCH)):

        print('-' * 30)
        print(f'**** ITERATION {i+1} ****')
        print('-' * 30)

        #torch.cuda.empty_cache()

        print('Loading the Datasets ...')
        print('-' * 30)

        train_dataset, val_dataset, test_dataset = utils.get_datasets(
            classes_groups[i])

        print('-' * 30)
        print('Updating representation ...')
        print('-' * 30)

        net.update_representation(dataset=train_dataset,
                                  val_dataset=val_dataset,
                                  class_map=class_map,
                                  map_reverse=map_reverse)
        '''
        print('Reducing exemplar sets ...')
        print('-'*30)
        m = int(math.ceil(MEMORY_SIZE/net.n_classes))
        net.reduce_exemplars_set(m)
        print('Constructing exemplar sets ...')
        print('-'*30)
        for y in classes_groups[i]:
           net.construct_exemplars_set(train_dataset.dataset.get_class_imgs(y), m)
        '''

        net.n_known = net.n_classes

        print('Testing ...')
        print('-' * 30)

        print('New classes')
        net.classify_all(test_dataset, map_reverse)

        if i > 0:

            previous_classes = np.array([])
            for j in range(i):
                previous_classes = np.concatenate(
                    (previous_classes, classes_groups[j]))

            prev_classes_dataset, all_classes_dataset = utils.get_additional_datasets(
                previous_classes,
                np.concatenate((previous_classes, classes_groups[i])))

            print('Old classes')
            net.classify_all(prev_classes_dataset, map_reverse)
            print('All classes')
            net.classify_all(all_classes_dataset, map_reverse)

            print('-' * 30)
Example #28
0
def main():

    global args, best_prec1, Bk, p0, P

    # Check the save_dir exists or not
    print(args.save_dir)
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Define model
    model = torch.nn.DataParallel(get_model(args))
    model.cuda()

    # Load sampled model parameters
    print('params: from', args.params_start, 'to', args.params_end)
    W = []
    for i in range(args.params_start, args.params_end):
        ############################################################################
        #if (i % 2 == 1 or i % 6 == 2 and i < 150): continue
        if (i % 2 == 1): continue

        model.load_state_dict(
            torch.load(os.path.join(args.save_dir,
                                    str(i) + '.pt')))
        W.append(get_model_param_vec(model))
    W = np.array(W)
    print('W:', W.shape)

    # Obtain base variables through PCA
    pca = PCA(n_components=args.n_components)
    pca.fit_transform(W)
    P = np.array(pca.components_)
    print('ratio:', pca.explained_variance_ratio_)
    print('P:', P.shape)

    P = torch.from_numpy(P).cuda()

    # Resume from params_start
    model.load_state_dict(
        torch.load(os.path.join(args.save_dir,
                                str(args.params_start) + '.pt')))

    # Prepare Dataloader
    train_loader, val_loader = get_datasets(args)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if args.half:
        model.half()
        criterion.half()

    cudnn.benchmark = True

    optimizer = optim.SGD(model.parameters(), lr=1, momentum=0)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    print('Train:', (args.start_epoch, args.epochs))
    end = time.time()
    end1 = end
    p0 = get_model_param_vec(model)
    epoch_time = []
    for epoch in range(args.start_epoch, args.epochs):
        # Train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        epoch_time.append(time.time() - end1)
        end1 = time.time()
        # Bk = torch.eye(args.n_components).cuda()

        # Evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # Remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

    print('total time:', time.time() - end)
    print('train loss: ', train_loss)
    print('train acc: ', train_acc)
    print('test loss: ', test_loss)
    print('test acc: ', test_acc)
    print('best_prec1:', best_prec1)
    print('epoch time:', epoch_time)

    # torch.save(model.state_dict(), 'PBFGS.pt',_use_new_zipfile_serialization=False)
    torch.save(model.state_dict(), 'PBFGS.pt')
Example #29
0
print(f'Will save to {exp_dir}')
if not os.path.exists(exp_dir):
    os.mkdir(exp_dir)
losses_save_path = os.path.join(exp_dir, 'losses.npy')

with open(os.path.join(exp_dir, 'config.yml'), 'w') as f:
    yaml.dump(args.__dict__, f)

print('Configuration file written')

# ************** CREATE DATASET, MODEL AND OPTIMIZER******************

bpe = yttm.BPE(model=args.bpe_path)
TEXT = torchtext.data.Field(tokenize=lambda x: utils.bpe_tokenize(x, bpe),
                            lower=True)
train_txt, val_txt, test_txt = utils.get_datasets(args.dataset).splits(TEXT)
print('Dataset fetched')
TEXT.build_vocab(train_txt)
vocab_size = len(TEXT.vocab.stoi)
print(f"Unique tokens in vocabulary: {len(TEXT.vocab)}")

device = torch.device(
    f"cuda:{args.gpu_id}" if torch.cuda.is_available() else "cpu")

train_data = utils.batchify(train_txt, TEXT, args.batch_size, device)
val_data = utils.batchify(val_txt, TEXT, args.batch_size, device)

layernorm = not args.nolayernorm
model = transformer.LMTransformer(vocab_size,
                                  args.dmodel,
                                  args.nheads,
Example #30
0
    default="./logdir",
    help="where to store Tensorboard summaries",
)
parser.add_argument(
    "--save-dir",
    type=str,
    default="./model.ckpt",
    help="where to store Tensorflow model",
)

args = parser.parse_args()

tf.set_random_seed(args.random_seed)

print("Loading dataset...")
data = get_datasets(args.data_path, args.val_split, args.test_split, 32)
print("Dataset loaded")

print("Building graph...")
model = Model(
    data,
    tf.train.AdamOptimizer(learning_rate=0.0008408132388618728),
    0.003683848079337278,
    0.6275728419832726,
    tf.nn.elu,
    1000,
    100,
    0.10805612575300722,
)
print("Graph built")
Example #31
0
epochs = args.epochs

if not os.path.isdir(data_dir):
    print('{} is not a valid directory'.format(data_dir))
    exit()
if not utils.is_valid_architecture(architecture):
    print('{} is not a valid architecture'.format(architecture))
    exit()
if not torch.cuda.is_available() and args.gpu:
    print('WARNING : No Cuda available for training, will use CPU')

#Load data
trainloader, validloader, testloader = utils.load_data(data_dir)

# Get torchvision architecture
pre_trained_model = utils.get_torchvision_model(architecture)

# Build network
in_features = utils.get_input_features(architecture)
model = setup_model(pre_trained_model, in_features, hidden_units)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), learning_rate)

# Train the network
train(model, trainloader, validloader, criterion, optimizer, args.gpu, epochs)

# Save the model checkpoint
class_to_idx = utils.get_datasets(data_dir)[0].class_to_idx
save_path = args.save_dir + '/checkpoint.pth'
utils.save_checkpoint(model, optimizer, architecture, hidden_units,
                      learning_rate, epochs, class_to_idx, save_path)
Example #32
0
def patch_dataset(cfg: GumiConfig) -> GumiConfig:
    if cfg.dataset != "dogs-vs-cats":
        return cfg
    cfg.dataset = get_datasets(cfg.dataset_dir)
    return cfg