Esempio n. 1
0
def main():

    print()
    if torch.cuda.is_available():
        DEVICE = torch.device(
            "cuda:0"
        )  # you can continue going on here, like cuda:1 cuda:2....etc.
        print("Running on the GPU")
    else:
        DEVICE = torch.device("cpu")
        print("Running on the CPU")

    model = CNN().to(DEVICE)

    (train_loader, valid_loader, test_loader) = get_data_loaders()

    # Fit model
    model, train_history, _, best_epoch = fit(model=model,
                                              data=(train_loader,
                                                    valid_loader),
                                              device=DEVICE)

    # Test results
    test_loss, test_acc = eval_model(model, test_loader, DEVICE)

    print('\nTest loss: {:.3f}            |'.format(test_loss.item()) +
          ' Test Acc: {:.3f}'.format(test_acc))

    results_test = [test_loss.item(), test_acc]

    np.savetxt('results.txt', results_test, fmt='%.3f', delimiter=',')

    print("\n\nDONE!")
Esempio n. 2
0
def main(args):
    """Train fear extinction CNN.
    """
    start_time = time.time()
    print('-' * 80)
    log_args(args)
    print('-' * 80)

    model = load_model(args.model, args.pretrained)
    model = model.to(device)
    print('Model loaded.')
    
    split_across(args.mouse_num, args.datadir, args.mIDs)
    print('Train / test sets created (if necessary).')

    train_loader, test_loader = get_data_loaders(args.mouse_num,
                                                 args.augment,
                                                 args.model,
                                                 args.batch_size,
                                                 args.num_workers,
                                                 args.pin_memory,
                                                 args.datadir)                 
    print('Data loaded.')

    print('Training.')
    print('-' * 80)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    for epoch in range(1, args.n_epochs + 1):
        train_loss = train(model, criterion, optimizer, train_loader)
        test_loss, pct_correct, f0, f1, t0, t1 = test(model, criterion, test_loader)

        out_file = args.name + '_pretrained' + str(int(args.pretrained==True)) + '.csv'
        out_path = os.path.join(args.directory, out_file)
        
        msg = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s' % (epoch, round(train_loss,2), round(test_loss,2), pct_correct,f0,f1,t0,t1)
        print(msg)
        
        msg2 = ','.join([str(epoch), str(round(train_loss,2)), str(round(test_loss,2)),
                        str(pct_correct), str(f0), str(f1), str(t0), str(t1)]) #round(test_loss,2), pct_correct, f0, f1, t0, t1])
        with open(out_path, 'a+') as outfile:
            outfile.write(msg2 + '\n') 

        if epoch % SAVE_MODEL_EVERY == 0: 
            name = args.name + '_epoch%s' %(str(epoch))
            save_model(args.directory, model, name) 

    hours = round((time.time() - start_time) / 3600, 1)
    print('Job complete in %s hrs.' % hours)

    save_model(args.directory, model, args.name)
    print('Model saved.')
Esempio n. 3
0
    def __init__(self, config, args):
        self.config = config
        for k, v in args.__dict__.items():
            setattr(self.config, k, v)
        setattr(self.config, 'save_dir', '{}_log'.format(self.config.dataset))

        disp_str = ''
        for attr in sorted(dir(self.config), key=lambda x: len(x)):
            if not attr.startswith('__'):
                disp_str += '{} : {}\n'.format(attr,
                                               getattr(self.config, attr))
        sys.stdout.write(disp_str)
        sys.stdout.flush()

        self.labeled_loader, self.unlabeled_loader, self.dev_loader, self.special_set = \
            data.get_data_loaders(config)

        self.dis = model.Discriminative(config).cuda()
        self.gen = model.Generator(image_side=config.image_side,
                                   noise_size=config.noise_size).cuda()
        self.enc = model.Encoder(config.image_side,
                                 noise_size=config.noise_size,
                                 output_params=True).cuda()

        self.dis_optimizer = optim.Adam(self.dis.parameters(),
                                        lr=config.dis_lr,
                                        betas=(0.5, 0.999))
        self.gen_optimizer = optim.Adam(self.gen.parameters(),
                                        lr=config.gen_lr,
                                        betas=(0.0, 0.999))
        self.enc_optimizer = optim.Adam(self.enc.parameters(),
                                        lr=config.enc_lr,
                                        betas=(0.0, 0.999))

        self.d_criterion = nn.CrossEntropyLoss()

        if not os.path.exists(self.config.save_dir):
            os.makedirs(self.config.save_dir)

        log_path = os.path.join(
            self.config.save_dir,
            '{}.FM+VI.{}.txt'.format(self.config.dataset, self.config.suffix))
        self.logger = open(log_path, 'wb')
        self.logger.write(disp_str)

        print self.dis
Esempio n. 4
0
def test_BachNet():
    data_loaders = data.get_data_loaders(
        batch_size=training.std_config.batch_size,
        num_workers=training.std_config.num_workers,
        time_grid=training.std_config.time_grid,
        context_radius=training.std_config.context_radius,
        split=training.std_config.split,
        debug=True,
        overwrite=True)

    training.std_config.num_epochs = 1
    training.train(training.std_config, data_loaders)

    cp_dirname = sorted(glob('checkpoints/*/'))[-1]
    last_subdir = os.path.basename(os.path.normpath(cp_dirname))
    cp_path = cp_dirname + last_subdir + '_epoch=0001.pt'
    soprano_path = 'data/musicxml/001_soprano.xml'
    score = inference.compose_score(cp_path, soprano_path)
Esempio n. 5
0
def main(args):

    set_cuda(args)
    set_seed(args)

    loader_train, loader_val, loader_test = get_data_loaders(args)
    loss = get_loss(args)
    model = get_model(args)
    optimizer = get_optimizer(args, parameters=model.parameters())
    xp = get_xp(args, model, optimizer)

    for i in range(args.epochs):
        xp.Epoch.update(1).log()

        train(model, loss, optimizer, loader_train, xp, args)
        test(model, loader_val, xp, args)

        if (i + 1) in args.T:
            decay_optimizer(optimizer, args.decay_factor)

    load_best_model(model, xp)
    test(model, loader_test, xp, args)
Esempio n. 6
0
def main(args):

    set_cuda(args)
    set_seed(args)

    loader_train, loader_val, loader_test = get_data_loaders(args)
    loss = get_loss(args)
    model = get_model(args)
    optimizer = get_optimizer(args, model, loss, parameters=model.parameters())
    xp = setup_xp(args, model, optimizer)

    for i in range(args.epochs):
        xp.epoch.update(i)

        train(model, loss, optimizer, loader_train, args, xp)
        test(model, optimizer, loader_val, args, xp)

        if (i + 1) in args.T:
            decay_optimizer(optimizer, args.decay_factor)

    load_best_model(model, '{}/best_model.pkl'.format(args.xp_name))
    test(model, optimizer, loader_val, args, xp)
    test(model, optimizer, loader_test, args, xp)
Esempio n. 7
0
def tune(cfg):
    train_loader, valid_loader = get_data_loaders(cfg["data"])
    model = ResNet(cfg)

    for i in range(cfg["tune"]["num_epoch"]):
        for j, (images, attr) in enumerate(train_loader):
            attr = attr.type(torch.float)

            images = images.cuda(cfg["GPU"]["name"]) if cfg["GPU"]["enable"] else images
            attr = attr.cuda(cfg["GPU"]["name"]) if cfg["GPU"]["enable"] else attr

            ac, f1, loss = model.step(images, attr)
            print("\r Done: {}/{} acc {} f1 {} loss {}".
                format(
                    j * cfg["data"]["batch_size"],
                    len(train_loader) * cfg["data"]["batch_size"],
                    ac, f1, loss),
                end='')
        print()
        model.save(i)
        val_lbl = []
        val_pred = []
        model.set_mode("eval")
        for images, attr in valid_loader:
            attr = attr.type(torch.float)

            images = images.cuda(cfg["GPU"]["name"]) if cfg["GPU"]["enable"] else images
            attr = attr.cuda(cfg["GPU"]["name"]) if cfg["GPU"]["enable"] else attr

            pred = model.predict(images)

            val_lbl.append(attr.detach().cpu())
            val_pred.append(pred.detach().cpu())

        ac, f1 = model.metrics(torch.cat(val_lbl, dim=0).numpy(), torch.cat(val_pred, dim=0).numpy())
        print("{} test acc {}, f1 {}".format(i + 1, ac, f1))
        model.set_mode("train")
Esempio n. 8
0
 def _get_dataloaders(self):
     train_dataloader, val_dataloader = get_data_loaders(self.config)
     return train_dataloader, val_dataloader
Esempio n. 9
0
def run(args):
    if args.train:
        print(f"Training over {args.epochs} epochs")
    elif args.test:
        print("Running a full evaluation")
    else:
        print("Running inference speed test")
    print("model:\t\t", args.model)
    print("dataset:\t", args.dataset)
    print("batch_size:\t", args.batch_size)

    hook = sy.TorchHook(torch)

    if args.websockets:
        alice = DataCentricFLClient(hook, "ws://localhost:7600")
        bob = DataCentricFLClient(hook, "ws://localhost:7601")
        crypto_provider = DataCentricFLClient(hook, "ws://localhost:7602")
        my_grid = sy.PrivateGridNetwork(alice, bob, crypto_provider)
        sy.local_worker.object_store.garbage_delay = 1

    else:
        bob = sy.VirtualWorker(hook, id="bob")
        alice = sy.VirtualWorker(hook, id="alice")
        crypto_provider = sy.VirtualWorker(hook, id="crypto_provider")

    workers = [alice, bob]
    sy.local_worker.clients = workers

    encryption_kwargs = dict(workers=workers,
                             crypto_provider=crypto_provider,
                             protocol=args.protocol)
    kwargs = dict(
        requires_grad=args.requires_grad,
        precision_fractional=args.precision_fractional,
        dtype=args.dtype,
        **encryption_kwargs,
    )

    if args.preprocess:
        build_prepocessing(args.model, args.dataset, args.batch_size, workers,
                           args)

    private_train_loader, private_test_loader = get_data_loaders(args,
                                                                 kwargs,
                                                                 private=True)
    public_train_loader, public_test_loader = get_data_loaders(args,
                                                               kwargs,
                                                               private=False)

    model = get_model(args.model,
                      args.dataset,
                      out_features=get_number_classes(args.dataset))

    if args.test and not args.train:
        load_state_dict(model, args.model, args.dataset)

    model.eval()

    if torch.cuda.is_available():
        sy.cuda_force = True

    if not args.public:
        model.encrypt(**kwargs)
        if args.fp_only:  # Just keep the (Autograd+) Fixed Precision feature
            model.get()

    if args.train:
        for epoch in range(args.epochs):
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum)

            if not args.public:
                optimizer = optimizer.fix_precision(
                    precision_fractional=args.precision_fractional,
                    dtype=args.dtype)
            train_time = train(args, model, private_train_loader, optimizer,
                               epoch)
            test_time, accuracy = test(args, model, private_test_loader)
    else:
        test_time, accuracy = test(args, model, private_test_loader)
        if not args.test:
            print(
                f"{ 'Online' if args.preprocess else 'Total' } time (s):\t",
                round(test_time / args.batch_size, 4),
            )
        else:
            # Compare with clear text accuracy
            print("Clear text accuracy is:")
            model = get_model(args.model,
                              args.dataset,
                              out_features=get_number_classes(args.dataset))
            load_state_dict(model, args.model, args.dataset)
            test(args, model, public_test_loader)

    if args.preprocess:
        missing_items = [len(v) for k, v in sy.preprocessed_material.items()]
        if sum(missing_items) > 0:
            print("MISSING preprocessed material")
            for key, value in sy.preprocessed_material.items():
                print(f"'{key}':", value, ",")
Esempio n. 10
0
"""

import os
from options.test_options import TestOptions
from data import get_data_loaders
from models import create_model
from util import html

if __name__ == '__main__':
    opt = TestOptions().parse()  # get test options
    # hard-code some parameters for test
    opt.num_threads = 0  # test code only supports num_threads = 1
    opt.batch_size = 1  # test code only supports batch_size = 1
    opt.shuffle_data = False  # disable data shuffling;
    opt.display_id = -1  # no visdom display; the test code optionally saves the results to a HTML file.
    dataloader = get_data_loaders(
        opt)  # create a dataset given opt.dataset_mode and other options
    model = create_model(
        opt)  # create a model given opt.model and other options
    model.setup(
        opt)  # regular setup: load and print networks; create schedulers
    # create a website
    web_dir = os.path.join(opt.results_dir, opt.name, '{}'.format(
        opt.epoch))  # define the website directory

    print('creating web directory', web_dir)
    webpage = html.HTML(web_dir,
                        'Experiment = %s, Epoch = %s' % (opt.name, opt.epoch))

    # test with eval mode. This only affects layers like batchnorm and dropout.
    if opt.eval:
        model.eval()
Esempio n. 11
0

if __name__ == '__main__':
    use_cuda = torch.cuda.is_available()

    class p:
        pass

    p.seed = 13
    p.batch_size = 100
    p.dset = 'imagenet'

    # get data
    t = time.clock()
    seed(p)
    train_loader, val_loader = data.get_data_loaders(p)
    print(len(train_loader), len(val_loader), p.batch_size)

    # set up saving
    out_dir = '/accounts/projects/vision/scratch/yu_dl/raaz.rsk/cnns_preds'
    os.makedirs(out_dir, exist_ok=True)

    # save the labels
    out_file_labs = oj(out_dir, 'labs' + '.h5')
    if os.path.exists(out_file_labs):
        os.remove(out_file_labs)
    f2 = h5py.File(out_file_labs, "w")
    f2.create_dataset("labs_train", (len(train_loader) * p.batch_size, ),
                      dtype=np.int32)
    f2.create_dataset("labs_val", (len(val_loader) * p.batch_size, ),
                      dtype=np.int32)
Esempio n. 12
0
def fit_vision(p):
    out_name = p._str(p)  # generate random fname str before saving
    seed(p)
    use_cuda = torch.cuda.is_available()
    device = 'cuda' if use_cuda else 'cpu'

    # pick dataset and model
    print('loading dset...')
    train_loader, test_loader = data.get_data_loaders(p)
    X_train, Y_train_onehot = data.get_XY(train_loader)
    model = data.get_model(p, X_train, Y_train_onehot)
    init.initialize_weights(p, X_train, Y_train_onehot, model)

    # set up optimizer and freeze appropriate layers
    model, optimizer = optimization.freeze_and_set_lr(p, model, it=0)

    def reg_init(p):
        if p.lambda_reg == 0:
            return None

        # load the gan
        gan_dir = '/accounts/projects/vision/chandan/gan/mnist_dcgan'
        sys.path.insert(1, gan_dir)
        from dcgan import Discriminator
        D = Discriminator(
            ngpu=1 if torch.cuda.is_available() else 0).to(device)
        D.load_state_dict(
            torch.load(oj(gan_dir, 'weights/netD_epoch_99.pth'),
                       map_location=device))
        D = D.eval()
        return D

    def reg(p, it, model, D, device):
        if p.lambda_reg == 0:
            return 0

        exs = model.exs.reshape(model.exs.shape[0], 1, 28,
                                28)  # mnist-specific
        outputs = D(exs)

        # discriminator outputs 1 for real, 0 for fake
        loss = p.lambda_reg * torch.sum(1 - outputs)
        return loss

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    if 'linear' in p.dset:
        criterion = nn.MSELoss()
    reg_model = reg_init(p)

    # things to record
    s = S(p)
    s.weight_names = models.get_weight_names(model)
    if p.siamese:
        s.exs = model.exs.data.cpu().numpy()

    # run
    print('training...')
    for i, it in enumerate(tqdm(range(0, p.num_iters))):

        # calc stats and record
        s.losses_train[it], s.accs_train[it], s.confidence_unn_train[
            it], s.confidence_norm_train[it], s.margin_unn_train[
                it], s.margin_norm_train[it] = stats.calc_loss_acc_margins(
                    train_loader, p.batch_size, use_cuda, model, criterion,
                    p.dset)
        s.losses_test[it], s.accs_test[it], s.confidence_unn_test[
            it], s.confidence_norm_test[it], s.margin_unn_test[
                it], s.margin_norm_test[it] = stats.calc_loss_acc_margins(
                    test_loader,
                    p.batch_size,
                    use_cuda,
                    model,
                    criterion,
                    p.dset,
                    print_loss=True)

        # record weights
        weight_dict = deepcopy(
            {x[0]: x[1].data.cpu().numpy()
             for x in model.named_parameters()})
        s.weights_first10[p.its[it]] = deepcopy(
            model.state_dict()[s.weight_names[0]][:20].cpu().numpy())
        s.weight_norms[p.its[it]] = stats.layer_norms(model.state_dict())
        if it % p.save_all_weights_freq == 0 or it == p.num_iters - 1 or it == 0 or (
                it < p.num_iters_small
                and it % 2 == 0):  # save first, last, jumps
            s.weights[p.its[it]] = weight_dict
            if not p.use_conv:
                s.mean_max_corrs[p.its[it]] = stats.calc_max_corr_input(
                    X_train, Y_train_onehot, model)

        if p.save_singular_vals:
            # weight singular vals
            s.singular_val_dicts.append(
                get_singular_vals_from_weight_dict(weight_dict))
            s.singular_val_dicts_cosine.append(
                get_singular_vals_kernels(weight_dict, 'cosine'))
            s.singular_val_dicts_rbf.append(
                get_singular_vals_kernels(weight_dict, 'rbf'))
            s.singular_val_dicts_lap.append(
                get_singular_vals_kernels(weight_dict, 'laplacian'))

            # activations singular vals
            act_var_dicts = calc_activation_dims(
                use_cuda,
                model,
                train_loader.dataset,
                test_loader.dataset,
                calc_activations=p.calc_activations)
            s.act_singular_val_dicts_train.append(
                act_var_dicts['train']['pca'])
            s.act_singular_val_dicts_test.append(act_var_dicts['test']['pca'])
            s.act_singular_val_dicts_train_rbf.append(
                act_var_dicts['train']['rbf'])
            s.act_singular_val_dicts_test_rbf.append(
                act_var_dicts['test']['rbf'])

        # reduced model
        if p.save_reduce:
            model_r = reduce_model(model)
            s.losses_train_r[it], s.accs_train_r[
                it] = stats.calc_loss_acc_margins(train_loader, p.batch_size,
                                                  use_cuda, model_r, criterion,
                                                  p.dset)[:2]
            s.losses_test_r[it], s.accs_test_r[
                it] = stats.calc_loss_acc_margins(test_loader, p.batch_size,
                                                  use_cuda, model_r, criterion,
                                                  p.dset)[:2]

        # training
        for batch_idx, (x, target) in enumerate(train_loader):
            optimizer.zero_grad()
            x = x.to(device)
            target = target.to(device)
            x, target = Variable(x), Variable(target)
            out = model(x)
            loss = criterion(out, target) + reg(p, it, model, reg_model,
                                                device)
            loss.backward()
            optimizer.step()

            # don't go through whole dataset
            if batch_idx > len(
                    train_loader
            ) / p.saves_per_iter and it <= p.saves_per_iter * p.saves_per_iter_end + 1:
                break

        # set lr / freeze
        if it - p.num_iters_small in p.lr_ticks:
            model, optimizer = optimization.freeze_and_set_lr(p, model, it)

        if it % p.save_all_freq == 0:
            save(out_name, p, s)

        # check for need to flip dset
        if 'flip' in p.dset and it == p.num_iters // 2:
            print('flipped dset')
            s.flip_iter = p.num_iters // 2  # flip_iter tells when dset flipped
            train_loader, test_loader = data.get_data_loaders(p,
                                                              it=s.flip_iter)
            X_train, Y_train_onehot = data.get_XY(train_loader)
            if p.flip_freeze:
                p.freeze = 'last'
                model, optimizer = optimization.freeze_and_set_lr(p, model, it)
        elif 'permute' in p.dset and it > 0 and p.its[it] % p.change_freq == 0:
            s.permute_rng.append(int(p.its[it]))
            train_loader, test_loader = data.get_data_loaders(
                p, it=s.permute_rng[-1])
            X_train, Y_train_onehot = data.get_XY(train_loader)

    save(out_name, p, s)
Esempio n. 13
0
def train():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument("--dataset_cache",
                        type=str,
                        default='./dataset_cache',
                        help="Path or url of the dataset cache")
    parser.add_argument("--model_checkpoint",
                        type=str,
                        default="openai-gpt",
                        help="Path, url or short name of the model")
    parser.add_argument("--num_candidates",
                        type=int,
                        default=2,
                        help="Number of candidates for training")
    parser.add_argument("--max_history",
                        type=int,
                        default=2,
                        help="Number of previous exchanges to keep in history")
    parser.add_argument("--train_batch_size",
                        type=int,
                        default=4,
                        help="Batch size for training")
    parser.add_argument("--valid_batch_size",
                        type=int,
                        default=4,
                        help="Batch size for validation")
    parser.add_argument("--gradient_accumulation_steps",
                        type=int,
                        default=8,
                        help="Accumulate gradients on several steps")
    parser.add_argument("--lr",
                        type=float,
                        default=6.25e-5,
                        help="Learning rate")
    parser.add_argument("--lm_coef",
                        type=float,
                        default=1.0,
                        help="LM loss coefficient")
    parser.add_argument("--mc_coef",
                        type=float,
                        default=1.0,
                        help="Multiple-choice loss coefficient")
    parser.add_argument("--max_norm",
                        type=float,
                        default=1.0,
                        help="Clipping gradient norm")
    parser.add_argument("--n_epochs",
                        type=int,
                        default=3,
                        help="Number of training epochs")
    parser.add_argument("--personality_permutations",
                        type=int,
                        default=1,
                        help="Number of permutations of personality sentences")
    parser.add_argument(
        "--eval_before_start",
        action='store_true',
        help="If true start with a first evaluation before training")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument(
        "--fp16",
        type=str,
        default="",
        help=
        "Set to O0, O1, O2 or O3 for fp16 training (see apex documentation)")
    parser.add_argument(
        "--local_rank",
        type=int,
        default=-1,
        help="Local rank for distributed training (-1: not distributed)")
    args = parser.parse_args()

    # logging is set to INFO (resp. WARN) for main (resp. auxiliary) process. logger.info => log main process only, logger.warning => log all processes
    logging.basicConfig(
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
    print(
        "Running process {}".format(args.local_rank)
    )  # This is a logger.warning: it will be printed by all distributed processes
    print("Arguments: {}".format(pformat(args)))

    # Initialize distributed training if needed
    args.distributed = (args.local_rank != -1)
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        args.device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')

    print("Prepare tokenizer, pretrained model and optimizer.")
    tokenizer_class = GPT2Tokenizer if "gpt2" in args.model_checkpoint else OpenAIGPTTokenizer  # cant use Autotokenizer because checkpoint could be a Path
    tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint)

    model_class = GPT2DoubleHeadsModel if "gpt2" in args.model_checkpoint else OpenAIGPTDoubleHeadsModel
    model = model_class.from_pretrained(args.model_checkpoint)
    model.to(args.device)
    # Add special tokens if they are not already added
    add_special_tokens_(model, tokenizer)
    optimizer = AdamW(model.parameters(), lr=args.lr, correct_bias=True)

    # Prepare model for FP16 and distributed training if needed (order is important, distributed should be the last)
    if args.fp16:
        from apex import amp  # Apex is only required if we use fp16 training
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.fp16)
    if args.distributed:
        model = DistributedDataParallel(model,
                                        device_ids=[args.local_rank],
                                        output_device=args.local_rank)

    print("Prepare datasets")
    train_loader, val_loader, train_sampler, valid_sampler = get_data_loaders(
        args, tokenizer)

    # Training function and trainer
    def update(engine, batch):
        model.train()
        batch = tuple(input_tensor.to(args.device) for input_tensor in batch)
        input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch
        print('LM:', lm_labels)
        print('MC:', mc_labels)
        (lm_loss), (mc_loss), *_ = model(input_ids,
                                         token_type_ids=token_type_ids,
                                         mc_token_ids=mc_token_ids,
                                         mc_labels=mc_labels,
                                         lm_labels=lm_labels)
        loss = (lm_loss * args.lm_coef +
                mc_loss * args.mc_coef) / args.gradient_accumulation_steps
        if args.fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                           args.max_norm)
        else:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
        if engine.state.iteration % args.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        return loss.item()

    trainer = Engine(update)

    # Evaluation function and evaluator (evaluator output is the input of the metrics)
    def inference(engine, batch):
        model.eval()
        with torch.no_grad():
            batch = tuple(
                input_tensor.to(args.device) for input_tensor in batch)
            input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch
            # print(tokenizer.decode(input_ids[0, -1, :].tolist()))
            # if we dont send labels to model, it doesnt return losses
            lm_logits, mc_logits, *_ = model(
                input_ids,
                token_type_ids=token_type_ids,
                mc_token_ids=mc_token_ids,
            )
            lm_logits_flat_shifted = lm_logits[..., :-1, :].contiguous().view(
                -1, lm_logits.size(-1))
            lm_labels_flat_shifted = lm_labels[..., 1:].contiguous().view(-1)
            return (lm_logits_flat_shifted,
                    mc_logits), (lm_labels_flat_shifted, mc_labels)

    evaluator = Engine(inference)

    # Attach evaluation to trainer: we evaluate when we start the training and at the end of each epoch
    trainer.add_event_handler(Events.EPOCH_COMPLETED,
                              lambda _: evaluator.run(val_loader))
    if args.n_epochs < 1:
        trainer.add_event_handler(Events.COMPLETED,
                                  lambda _: evaluator.run(val_loader))
    if args.eval_before_start:
        trainer.add_event_handler(Events.STARTED,
                                  lambda _: evaluator.run(val_loader))

    # Make sure distributed data samplers split the dataset nicely between the distributed processes
    if args.distributed:
        trainer.add_event_handler(
            Events.EPOCH_STARTED,
            lambda engine: train_sampler.set_epoch(engine.state.epoch))
        evaluator.add_event_handler(
            Events.EPOCH_STARTED,
            lambda engine: valid_sampler.set_epoch(engine.state.epoch))

    # Linearly decrease the learning rate from lr to zero
    scheduler = PiecewiseLinear(optimizer, "lr",
                                [(0, args.lr),
                                 (args.n_epochs * len(train_loader), 0.0)])
    trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)

    # Prepare metrics - note how we compute distributed metrics
    RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
    metrics = {
        "nll":
        Loss(torch.nn.CrossEntropyLoss(ignore_index=-1),
             output_transform=lambda x: (x[0][0], x[1][0])),
        "accuracy":
        Accuracy(output_transform=lambda x: (x[0][1], x[1][1]))
    }
    metrics.update({
        "average_nll":
        MetricsLambda(average_distributed_scalar, metrics["nll"], args),
        "average_accuracy":
        MetricsLambda(average_distributed_scalar, metrics["accuracy"], args)
    })
    metrics["average_ppl"] = MetricsLambda(math.exp, metrics["average_nll"])
    for name, metric in metrics.items():
        metric.attach(evaluator, name)

    # On the main process: add progress bar, tensorboard, checkpoints and save model, configuration and tokenizer before we start to train
    if args.local_rank in [-1, 0]:
        pbar = ProgressBar(persist=True)
        pbar.attach(trainer, metric_names=["loss"])
        evaluator.add_event_handler(
            Events.COMPLETED, lambda _: pbar.log_message(
                "Validation: %s" % pformat(evaluator.state.metrics)))

        log_dir = make_logdir(args.model_checkpoint)
        tb_logger = TensorboardLogger(log_dir)

        tb_logger.attach(trainer,
                         log_handler=OutputHandler(tag="training",
                                                   metric_names=["loss"]),
                         event_name=Events.ITERATION_COMPLETED)
        tb_logger.attach(trainer,
                         log_handler=OptimizerParamsHandler(optimizer),
                         event_name=Events.ITERATION_STARTED)
        tb_logger.attach(evaluator,
                         log_handler=OutputHandler(tag="validation",
                                                   metric_names=list(
                                                       metrics.keys()),
                                                   another_engine=trainer),
                         event_name=Events.EPOCH_COMPLETED)

        checkpoint_handler = ModelCheckpoint(log_dir,
                                             'checkpoint',
                                             save_interval=1,
                                             n_saved=3)
        trainer.add_event_handler(
            Events.EPOCH_COMPLETED, checkpoint_handler,
            {'mymodel': getattr(model, 'module', model)
             })  # "getattr" takes care of distributed encapsulation

        torch.save(args, log_dir + '/model_training_args.bin')
        getattr(model, 'module',
                model).config.to_json_file(os.path.join(log_dir, CONFIG_NAME))
        tokenizer.save_pretrained(log_dir)

    # Run the training
    trainer.run(train_loader, max_epochs=args.n_epochs)

    # On the main process: close tensorboard logger and rename the last checkpoint (for easy re-loading with OpenAIGPTModel.from_pretrained method)
    if args.local_rank in [-1, 0] and args.n_epochs > 0:
        os.rename(
            os.path.join(log_dir, checkpoint_handler._saved[-1][1]),
            os.path.join(log_dir, WEIGHTS_NAME)
        )  # TODO: PR in ignite to have better access to saved file paths (cleaner)
        tb_logger.close()
Esempio n. 14
0
import os
from options.test_options import TestOptions
from data import get_data_loaders
from models import create_model
from util.visualizer import save_images
from util import html

if __name__ == '__main__':
    opt = TestOptions().parse()
    opt.nThreads = 1  # test code only supports nThreads = 1
    opt.batch_size = 1  # test code only supports batch_size = 1
    opt.serial_batches = True  # no shuffle
    opt.no_flip = True  # no flip
    opt.display_id = -1  # no visdom display
    data_loaders = get_data_loaders(opt)
    dataset = data_loaders['test']
    dataset_size = len(dataset)
    model = create_model(opt)
    model.setup(opt)
    # create website
    web_dir = os.path.join(opt.results_dir, opt.name,
                           '%s_%s' % (opt.phase, opt.which_epoch))
    webpage = html.HTML(
        web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' %
        (opt.name, opt.phase, opt.which_epoch))
    # test
    for i, data in enumerate(dataset):
        if i >= opt.how_many:
            break
        model.set_input(data)
        model.test()
Esempio n. 15
0
std_config = utils.Config({
    'num_epochs': 3000,
    'batch_size': 8192,
    'num_workers': 1,
    'hidden_size': 650,
    'context_radius': 32,
    'time_grid': 0.25,
    'lr': 0.0005,
    'lr_gamma': 0.99,
    'lr_step_size': 30,
    'checkpoint_interval': 1,
    'split': 0.05,

})

if __name__ == '__main__':
    logging.basicConfig(level=logging.ERROR)

    logging.debug('Loading datasets...')
    data_loaders = data.get_data_loaders(
        batch_size=std_config.batch_size,
        num_workers=std_config.num_workers,
        time_grid=std_config.time_grid,
        context_radius=std_config.context_radius,
        split=std_config.split,
        debug=False
    )

    train(std_config, data_loaders)
Esempio n. 16
0
def main():
    """ Main function to train a model with given cmdline options """

    opt = TrainOptions().parse()  # get training options
    dataloader = get_data_loaders(
        opt)  # create a dataset given opt.dataset_mode and other options
    dataset_size = len(
        dataloader)  # get the number of images in the train set.

    model = create_model(
        opt)  # create a model given opt.model and other options
    model.setup(
        opt)  # regular setup: load and print networks; create schedulers
    model.log_model_info(
        opt.verbose
    )  # log model metadata to log file iff opt.logging is enabled
    model.logger.info(
        'The number of training images = {}'.format(dataset_size))
    model.logger.info('Num val images = {}'.format(dataloader.len_val_set))
    print('The number of training images = {}'.format(dataset_size))
    print('Num val images = {}'.format(dataloader.len_val_set))
    visualizer = Visualizer(
        opt)  # create a visualizer that display/save images and plots

    start_time = time.monotonic()
    for epoch in range(opt.epoch_count, opt.n_epochs + 1):

        # reset the visualizer: make sure it saves the results to HTML at least once every epoch
        visualizer.reset()
        # model.update_learning_rate()    # update learning rates in the beginning of every epoch.

        epoch_start_time = time.monotonic()
        model.init_epoch()
        model.train_epoch(dataloader.train_loader)
        model.validate(dataloader)
        epoch_end_time = time.monotonic()

        model.log_parameters(epoch)

        if epoch % opt.log_freq == 0:  # print training losses and save logging information to the disk
            losses = model.get_epoch_losses()
            metrics = model.get_epoch_metrics()
            epoch_time = timedelta(seconds=epoch_end_time - epoch_start_time)
            visualizer.print_current_losses_and_metrics(
                epoch, losses, metrics, epoch_time)
            if opt.display_id > 0:
                for n, y_dict in model.get_plotting_artifacts().items():
                    visualizer.line_plot(n,
                                         epoch,
                                         y_dict,
                                         xlabel='epochs',
                                         ylabel=n)

        if epoch % opt.save_epoch_freq == 0:  # cache model every <save_epoch_freq> epochs
            model.logger.info('saving the model at the end of epoch %d' %
                              epoch)
            model.save_networks('latest')
            model.save_networks('epoch_%d' % epoch)
        if opt.verbose:
            print('End of epoch {} / {} \t Time Taken: {} sec'.format(
                epoch, opt.n_epochs,
                timedelta(seconds=epoch_end_time - epoch_start_time)))
        model.logger.info('End of epoch {} / {} \t Time Taken: {} sec'.format(
            epoch, opt.n_epochs,
            timedelta(seconds=epoch_end_time - epoch_start_time)))

        model.update_learning_rate()

    model.logger.info('Total training time for {} epochs = {}s'.format(
        opt.n_epochs, timedelta(seconds=time.monotonic() - start_time)))

    model.save_networks('epoch_{}_final'.format(opt.n_epochs))
        NUM_MATERIALS = 3
        TEST_MATERIALS = [0, 1, 2]
    else:
        NUM_MATERIALS = 4
        TEST_MATERIALS = [0, 1, 2, 3]

    results = []

    for PAI in TEST_MATERIALS:

        netD = DISCRIMINATOR().to(DEVICE)
        netG = GENERATOR().to(DEVICE)

        print("[Dataset] - " + DATASET + " -> Material number " + str(PAI))
        
        train_loader, valid_loader, test_loader = get_data_loaders(IMG_PATH, DATASET, test_material = PAI, img_size = IMG_SIZE, batch_size = BATCH_SIZE, croped=True, unseen_attack=UNSEEN_ATTACK)

        #netD, train_history = fit((netD, netG), DATASET, PAI, (train_loader, valid_loader), EPOCHS, EPOCHS_WITH_MATCHER, DEVICE, with_generator = USE_GENERATOR)

        netD, train_history = fit((netD, netG), DATASET, PAI, (train_loader, valid_loader), EPOCHS, EPOCHS_WITH_MATCHER, DEVICE, with_generator = USE_GENERATOR, just_train_classifier = True)

        test_loss, test_acc, test_apcer, test_bpcer, test_eer, test_bpcer_apcer1, test_bpcer_apcer5, test_bpcer_apcer10, test_apcer1, test_apcer5, test_apcer10 = test_model(netD, test_loader, DEVICE)

        results.append((test_loss.item(), test_acc, test_apcer, test_bpcer, test_eer, test_bpcer_apcer1, test_bpcer_apcer5, test_bpcer_apcer10, test_apcer1, test_apcer5, test_apcer10))

    #PRINTS -------------------------------------------------------------------------------------

    # Compute average and std
    acc_array = np.array([i[1] for i in results])
    apcer_array = np.array([i[2] for i in results])
    bpcer_array = np.array([i[3] for i in results])
Esempio n. 18
0
import time
from options.train_options import TrainOptions
from data import get_data_loaders
from models import create_model
from util.visualizer import Visualizer

if __name__ == '__main__':
    opt = TrainOptions().parse()
    visualizer = Visualizer(opt)
    logger = visualizer.logger

    data_loaders = get_data_loaders(opt, modes=['train', 'val'])
    dataset = data_loaders['train']
    dataset_size = len(dataset)
    fixed_real_imgs = next(iter(data_loaders['val']))

    model = create_model(opt)
    model.setup(opt)

    total_steps = 0

    for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
        epoch_start_time = time.time()
        iter_data_time = time.time()
        epoch_iter = 0

        for i, data in enumerate(dataset):
            iter_start_time = time.time()
            total_steps += opt.batch_size
            epoch_iter += opt.batch_size
            model.set_input(data)
Esempio n. 19
0
args.distributed = (args.local_rank != -1)

print("Prepare tokenizer, pretrained model and optimizer.")
tokenizer_class = GPT2Tokenizer if "gpt2" in args.model_checkpoint else OpenAIGPTTokenizer # cant use Autotokenizer because checkpoint could be a Path
tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint)

model_class = GPT2DoubleHeadsModel if "gpt2" in args.model_checkpoint else OpenAIGPTDoubleHeadsModel
print('Loading model from checkpoint {}'.format(args.model_checkpoint))
model = model_class.from_pretrained(args.model_checkpoint)
model.to(args.device)

# Add special tokens if they are not already added
add_special_tokens_(model, tokenizer)

print("Prepare datasets")
train_loader, val_loader, train_sampler, valid_sampler = get_data_loaders(args, tokenizer)

num_correct = 0.0
num_examples = 0.0
for i, batch in tqdm(enumerate(val_loader), total=len(val_loader)):
    model.eval()
    with torch.no_grad():
        batch = tuple(input_tensor.to(args.device) for input_tensor in batch)
        input_ids, mc_token_ids, lm_labels, mc_labels, token_type_ids = batch
        # print(tokenizer.decode(input_ids[0, -1, :].tolist()))
        # if we dont send labels to model, it doesnt return losses
        lm_logits, mc_logits, *_ = model(
            input_ids, token_type_ids=token_type_ids, mc_token_ids=mc_token_ids,
        )
        lm_logits_flat_shifted = lm_logits[..., :-1, :].contiguous().view(-1, lm_logits.size(-1))
        lm_labels_flat_shifted = lm_labels[..., 1:].contiguous().view(-1)
Esempio n. 20
0
        default=1e-3)
    parser.add_argument(
        '--num-classes',
        type=int,
        default=12)
    parser.add_argument(
        '--notify',
        type=int,
        default=100)
    args = parser.parse_args()

    cuda = torch.cuda.is_available()
    set_seed(seed=1, cuda=cuda)

    # Data
    train_loader, train_dataset, test_loader, test_dataset = get_data_loaders(
        batch_size=args.batch_size)

    # Model
    model, loss_fn, optimizer = get_model(
        num_classes=args.num_classes,
        learning_rate=args.learning_rate,
        cuda=cuda)
    start_epoch, best_accuracy = load_model(model, cuda)

    for epoch in range(start_epoch, args.epochs):
        train_model(model=model,
                    optimizer=optimizer,
                    train_loader=train_loader,
                    train_dataset=train_dataset,
                    loss_fn=loss_fn,
                    num_epochs=args.epochs,
Esempio n. 21
0
def train(args: CommandlineArgs,
          train_dataset,
          valid_dataset,
          test_dataset,
          writer,
          model: CompModel = None):
    # Init
    train_cfg = args.train

    best_metrics = {}
    epoch = -1
    start_epoch = 0
    device = args.device
    if len(writer.df) > 0:
        start_epoch = writer.df.index.max()

    # Get pytorch data loaders
    test_loader, train_loader, valid_loader = get_data_loaders(
        train_dataset,
        valid_dataset,
        test_dataset,
        train_cfg.batch_size,
        train_cfg.num_workers,
        test_batchsize=train_cfg.test_batchsize,
        shuffle_eval_set=train_cfg.shuffle_eval_set)

    if model is None:
        model: CompModel = get_model(args, train_dataset)
    best_model = clone_model(model)

    ## NOTE:
    # y1 refer to object labels
    # y2 refer to attribute labels
    num_classes1 = train_dataset.num_objs
    num_classes2 = train_dataset.num_attrs

    class NLLLossFuncs(NamedTuple):
        y1: nn.NLLLoss
        y2: nn.NLLLoss

    nll_loss_funcs = NLLLossFuncs(y1=nn.NLLLoss(), y2=nn.NLLLoss())
    if train_cfg.balanced_loss:
        nll_loss_funcs = NLLLossFuncs(
            y1=nn.NLLLoss(weight=to_torch(1 / train_dataset.y1_freqs, device)),
            y2=nn.NLLLoss(weight=to_torch(1 / train_dataset.y2_freqs, device)))

    itr_per_epoch = len(train_loader)
    n_epochs = train_cfg.n_iter // itr_per_epoch

    best_primary_metric = np.inf * (
        2 * (train_cfg.primary_early_stop_metric.polarity == 'min') - 1)

    optimizer = get_optimizer(train_cfg.optimizer_name, train_cfg.lr,
                              train_cfg.weight_decay, model, args)

    epoch_range = range(start_epoch + 1, start_epoch + n_epochs + 1)
    data_iterator = iter(train_loader)

    for epoch in epoch_range:
        with profileblock(label='Epoch train step'):
            # Select which tensors to log. Taking an average on all batches per epoch.
            logger = batch_torch_logger(
                num_batches=len(train_loader),
                cs_str_args='y1_loss, y2_loss, y_loss, '
                'L_rep, '
                'y1_acc, y2_acc, '
                'HSIC_cond1, HSIC_cond2, '
                'pairwise_dist_cond1_repr1, '
                'pairwise_dist_cond1_repr2, '
                'pairwise_dist_cond2_repr1, '
                'pairwise_dist_cond2_repr2, '
                'HSIC_label_cond1, HSIC_label_cond2',
                nanmean_args_cs_str='pairwise_dist_cond1_repr1, '
                'pairwise_dist_cond1_repr2, '
                'pairwise_dist_cond2_repr1, '
                'pairwise_dist_cond2_repr2, '
                'tloss_a, tloss_o, tloss_g_imgfeat, '
                'loss_inv_core, loss_inv_g_hidden, loss_inv_g_imgfeat',
                device=device)

            for batch_cnt in range(len(train_loader)):
                logger.new_batch()

                optimizer.zero_grad()

                with ns_profiling_label('fetch batch'):
                    try:
                        batch = next(data_iterator)
                    except StopIteration:
                        data_iterator = iter(train_loader)
                        batch = next(data_iterator)

                with ns_profiling_label('send to gpu'):
                    X, y2, y1 = batch[0], batch[1], batch[2]
                    neg_attrs, neg_objs = batch[3].to(device), batch[4].to(
                        device)
                    X = X.float().to(device)  # images
                    y1 = y1.long().to(device)  # object labels
                    y2 = y2.long().to(device)  # attribute labels

                with ns_profiling_label('forward pass'):
                    # y1_scores, y2_scores are logits of negative-squared-distances at the embedding space
                    # repr1, repr2 are phi_hat1, phi_hat2 at the paper
                    y1_scores, y2_scores, repr1, repr2, _ = \
                        model(X, freeze_class1=train_cfg.freeze_class1,
                              freeze_class2=train_cfg.freeze_class2)

                y1_loss = nll_loss_funcs.y1(y1_scores, y1)
                y2_loss = nll_loss_funcs.y2(y2_scores, y2)
                y_loss = y1_loss * train_cfg.Y12_balance_coeff + y2_loss * (
                    1 - train_cfg.Y12_balance_coeff)
                L_data = train_cfg.lambda_CE * y_loss

                L_invert = 0.
                if not args.model.VisProd:
                    # pair embedding losses
                    tloss_g_hidden, tloss_g_imgfeat, loss_inv_core, loss_inv_g_hidden, loss_inv_g_imgfeat = \
                        model.eval_pair_embed_losses(args, X, model.last_feature_common, y2, y1, neg_attrs,
                                                     neg_objs, nll_loss_funcs)

                    # aggregate triplet loss into L_data
                    L_data += train_cfg.lambda_ao_emb * tloss_g_hidden
                    L_data += train_cfg.lambda_feat * tloss_g_imgfeat

                    # aggregate components of L_invert
                    L_invert += train_cfg.lambda_aux_disjoint * loss_inv_core
                    L_invert += train_cfg.lambda_aux * loss_inv_g_hidden
                    L_invert += train_cfg.lambda_aux_img * loss_inv_g_imgfeat

                ys = (y1, y2)
                L_rep, HSIC_rep_loss_terms, HSIC_mean_of_median_pairwise_dist_terms = \
                    conditional_indep_losses(repr1, repr2, ys, train_cfg.HSIC_coeff, indep_coeff2=train_cfg.HSIC_coeff,
                                             num_classes1=num_classes1,
                                             num_classes2=num_classes2, log_median_pairwise_distance=False,
                                             device=device)

                ohy1 = one_hot(y1, num_classes1)
                ohy2 = one_hot(y2, num_classes2)
                L_oh1, HSIC_oh_loss_terms1, _ = \
                    conditional_indep_losses(ohy2, repr1, ys, train_cfg.alphaH, indep_coeff2=0, num_classes1=num_classes1,
                                             num_classes2=num_classes2, log_median_pairwise_distance=False,
                                             device=device)

                L_oh2, HSIC_oh_loss_terms2, _ = \
                    conditional_indep_losses(ohy1, repr2, ys, 0, indep_coeff2=train_cfg.alphaH, num_classes1=num_classes1,
                                             num_classes2=num_classes2, log_median_pairwise_distance=False,
                                             device=device)

                L_indep = L_rep + L_oh1 + L_oh2

                loss = L_data + L_indep + L_invert

                with ns_profiling_label('loss and update'):
                    loss.backward()
                    optimizer.step()

                # log the metrics
                with ns_profiling_label('log batch'):

                    # extract indep loss terms from lists for logging
                    HSIC_cond1, HSIC_cond2, pairwise_dist_cond1_repr1, pairwise_dist_cond1_repr2, \
                    pairwise_dist_cond2_repr1, pairwise_dist_cond2_repr2 = \
                        HSIC_logging_terms(HSIC_rep_loss_terms, HSIC_mean_of_median_pairwise_dist_terms)

                    HSIC_label_cond1 = HSIC_oh_loss_terms1[0]
                    HSIC_label_cond2 = HSIC_oh_loss_terms2[1]

                    with ns_profiling_label('calc y1 train acc'):
                        y1_acc = acc_from_logits(y1_scores,
                                                 y1,
                                                 return_tensor=True).detach()
                    with ns_profiling_label('calc y2 train acc'):
                        y2_acc = acc_from_logits(y2_scores,
                                                 y2,
                                                 return_tensor=True).detach()

                    logger.log(locals_dict=locals())

            curr_epoch_metrics = OrderedDict()
            curr_epoch_metrics.update(logger.get_means())
        with profileblock(label='Evaluation step'):

            best_primary_metric, is_best = evaluation_step(
                model,
                valid_loader,
                test_loader,
                nll_loss_funcs,
                writer,
                epoch,
                n_epochs,
                curr_epoch_metrics,
                early_stop_metric_name=train_cfg.primary_early_stop_metric,
                best_ES_metric_value=best_primary_metric,
                calc_AUC=train_cfg.metrics.calc_AUC)

            # write current epoch metrics to metrics logger
            with ns_profiling_label('write eval step metrics'):
                for metric_key, value in curr_epoch_metrics.items():
                    writer.add_scalar(f'{metric_key}', value, epoch)
            # dump collected metrics to csv
            writer.dump_to_csv()

            # print all columns
            last_results_as_string = writer.last_results_as_string()
            last_results_as_string = '\n       '.join(
                last_results_as_string.split('\n'))
            if train_cfg.verbose:
                print('\n[%d/%d]' % (epoch, n_epochs), last_results_as_string)

            if is_best:
                best_model = clone_model(model)
                best_metrics = writer.df.iloc[-1, :].to_dict()
                best_metrics['epoch'] = int(writer.df.iloc[-1, :].name)
                if train_cfg.verbose:
                    print(f'Best! (@epoch {epoch})')

    model = best_model
    model.eval()

    print('Best epoch was: ', best_metrics['epoch'])
    print(
        f'Primary early stop monitor was {train_cfg.primary_early_stop_metric}'
    )

    val_metrics = eval_model_with_dataloader(model,
                                             valid_loader,
                                             nll_loss_funcs,
                                             phase_name='valid')
    best_metrics.update(val_metrics)
    print('Val metrics on best val epoch :')
    pprint([(k, v) for k, v in val_metrics.items()])

    test_metrics = eval_model_with_dataloader(model,
                                              test_loader,
                                              nll_loss_funcs,
                                              phase_name='test')
    best_metrics.update(test_metrics)
    print('\n\nTest metrics on best val epoch :')
    pprint([(k, v) for k, v in test_metrics.items()])

    # cast numpy items to their original type
    for k, v in best_metrics.items():
        if isinstance(v, np.number):
            best_metrics[k] = v.item()

    #### two redundant calls to align random-number-generator with original training script
    # check: (to delete?)
    _ = eval_model_with_dataloader(model,
                                   valid_loader,
                                   nll_loss_funcs,
                                   phase_name='valid')
    _ = eval_model_with_dataloader(model,
                                   test_loader,
                                   nll_loss_funcs,
                                   phase_name='test')

    return model, best_metrics
Esempio n. 22
0
#     num_epoch=16
#     lr = 0.01
#     using_vae = True
#     vis_mode = 'wandb'
#     dataset = 'tinyImgNet'
#     param_path = 'models/'
#     exp_name = 'vae'

if __name__ == '__main__':
    ############
    ##  Data  ##
    ############
    args = parser.parse_args()
    os.makedirs(args.param_path, exist_ok=True)

    tr_loader, va_loader = get_data_loaders(args.batch_size, args.dataset,
                                            args.img_size)

    #############
    ##  Model  ##
    #############

    model = get_model(using_vae=args.using_vae).to(device)
    # model.decoder.net.backbone.requires_grad = False
    # model.decoder.net.backbone.eval()
    optim = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                   model.parameters()),
                            lr=args.lr,
                            momentum=0.9,
                            weight_decay=1e-4)
    sched = LinearWarmupScheduler(optim, 1000)