def __init__(self, learning_rate=1e-3, batch_size=32, scheduler_stepsize=20):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.scheduler_stepsize = scheduler_stepsize

        self.dataloaders = get_dataloaders(self.batch_size)
        self.model = Net().model.to(device)
Example #2
0
def main(config):
    # load data 
    train_dataloader, valid_dataloader, test_dataloader = get_dataloaders(config)

    # define model, loss, optimizer, scheduler, logger 
    model = AtecModel(config)
    criterion = nn.CrossEntropyLoss()
    trainable_params = list(model.encoder.parameters())+list(model.comparator.parameters())
    optimizer = Adam(trainable_params, lr=config.lr)
    scheduler = MultiStepLR(optimizer, milestones=[10, 20, 30], gamma=0.1)
     
    # training iterations 
    total_steps = len(train_dataloader)

    for epoch in range(config.num_epoch):
        scheduler.step()
        for i, (data, labels, indices, lengths) in enumerate(train_dataloader):
            logits = model(data, indices)
            loss = criterion(logits, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # log loss, could visualize in tensorboard if needed 
            if (i+1) % config.log_step == 0:
                print( 'Epoch [%d/%d], Step[%d/%d], loss: %.4f, ' 
                      % (epoch+1, config.num_epochs, i+1, total_steps, loss.data[0]))

        # save the model per epoch, only save parameters 
        if (epoch+1) % config.save_step == 0:
            model_path = os.path.join(config.model_dir, 'model-%d.pkl' %(epoch+1))
            torch.save(model.state_dict(), model_path)
Example #3
0
def main():
    # Get DataLoaders
    train_fonts = []
    with open('train52_fonts.txt', 'r') as file:
        for font in file:
            train_fonts.append(font.strip())
    val_fonts = []
    with open('val52_fonts.txt', 'r') as file:
        for font in file:
            val_fonts.append(font.strip())
    train_x_loader, train_y_loader, val_loader = get_dataloaders('data/jpg',
                                                                 'data/jpg',
                                                                 train_fonts,
                                                                 val_fonts,
                                                                 BATCH_SIZE,
                                                                 logger=log)
    # Initialize models
    gen = Generator().to(device)
    dis = Discriminator().to(device)
    epoch = 1

    min_eval_loss = np.inf
    while epoch <= MAX_EPOCHS:
        train(gen, dis, train_x_loader, train_y_loader, epoch, lr=LR)
        eval_loss = eval(gen, val_loader, epoch)
        log.info(f'Eval Pixelwise BCE Loss: {eval_loss}')
        if eval_loss < min_eval_loss:
            eval_loss = min_eval_loss
            save(gen, dis)
        epoch += 1
Example #4
0
def main():

    opt = parse_option()

    opt.n_test_runs = 600
    train_loader, val_loader, meta_testloader, meta_valloader, n_cls = get_dataloaders(
        opt)

    # load model
    model = create_model(opt.model, n_cls, opt.dataset)
    ckpt = torch.load(opt.model_path)
    model.load_state_dict(ckpt["model"])

    if torch.cuda.is_available():
        model = model.cuda()
        cudnn.benchmark = True

    start = time.time()
    test_acc, test_std = meta_test(model, meta_testloader)
    test_time = time.time() - start
    print('test_acc: {:.4f}, test_std: {:.4f}, time: {:.1f}'.format(
        test_acc, test_std, test_time))

    start = time.time()
    test_acc_feat, test_std_feat = meta_test(model,
                                             meta_testloader,
                                             use_logit=False)
    test_time = time.time() - start
    print('test_acc_feat: {:.4f}, test_std: {:.4f}, time: {:.1f}'.format(
        test_acc_feat, test_std_feat, test_time))
Example #5
0
def run(args, use_cuda, output_dir):

    trial_list = list(range(args.n_trials))
    np.random.shuffle(trial_list)

    for trial_i in trial_list:
        trial_dir = os.path.join(output_dir, 'trial_{}'.format(trial_i))
        os.makedirs(trial_dir, exist_ok=True)

        loaders, params = get_dataloaders(args.batch_size,
                                          trial_i,
                                          args.dataset,
                                          args.augment_data,
                                          early_stop=args.early_stop)

        if args.network_type == 'fc':
            model = DenseModel(input_dim=np.prod(params['input_shape']),
                               output_dim=params['output_dim'],
                               hidden_nodes=args.hidden_nodes,
                               num_modules=args.n_modules,
                               activation=args.activation)
        elif args.network_type == 'conv':
            model = ConvModel(input_shape=params['input_shape'],
                              output_dim=params['output_dim'],
                              num_filters=args.filters,
                              kernel_sizes=args.kernels,
                              strides=args.strides,
                              dilations=args.dilations,
                              num_modules=args.n_modules,
                              activation=args.activation,
                              final_layer=args.conv_final_layer)
        elif args.network_type == 'densenet':
            model = DenseNet(input_shape=params['input_shape'],
                             output_dim=params['output_dim'],
                             growth_rate=args.densenet_k,
                             depth=args.densenet_depth,
                             reduction=args.densenet_reduction,
                             bottleneck=args.densenet_bottleneck,
                             num_modules=args.n_modules)

        logging.debug(args)
        logging.debug('Parameters: {}'.format(model.n_parameters()))

        device = torch.device("cuda" if use_cuda else "cpu")
        model = model.to(device)
        model.reset_parameters()

        weight_path = os.path.join(trial_dir, 'initial_weights.pt')
        torch.save(model.state_dict(), weight_path)

        for lambda_i, (lambda_, learning_rate) in enumerate(
                zip(args.lambda_values, args.learning_rates)):
            model.load_state_dict(torch.load(weight_path))

            lambda_dir = os.path.join(trial_dir, str(lambda_))
            os.makedirs(lambda_dir, exist_ok=True)

            do_lambda_value(model, lambda_, learning_rate, args, loaders,
                            params['distribution'], device, lambda_dir)
Example #6
0
 def set_data_loader(self):
     
     df= pd.read_csv(self.args.csv_path)
     
     mapping = get_train_mapping(df)
     train_dataset = BaseDataset(self.args.data_dir, mapping,  enable_mixup=True, enable_aug=False)
     test_dataset = BaseDataset(self.args.data_dir, mapping,  enable_mixup=False, enable_aug=False)
     train_dataloader, eval_dataloader = get_dataloaders(train_dataset, test_dataset, self.args.batch_size,self.device)
     self.train_dataloader= train_dataloader
     self.eval_dataloader= eval_dataloader        
     
Example #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--num_epochs", type=int, default=5)
    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--vocab_size", type=float, default=10000)
    parser.add_argument("--embed_size", type=int, default=300)
    parser.add_argument("--hidden_size", type=int, default=100)
    parser.add_argument("--num_layers", type=int, default=1)
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--win_size", type=int, default=35)
    parser.add_argument("--num_samples", type=int, default=100)
    parser.add_argument("--early_stop", type=int, default=3)
    parser.add_argument("--use_glove", type=bool, default=False)
    args = parser.parse_args()

    # vocab_size = args.vocab_size
    num_epochs = args.num_epochs
    embed_size = args.embed_size
    hidden_size = args.hidden_size
    num_layers = args.num_layers
    batch_size = args.batch_size
    win_size = args.win_size
    num_samples = args.num_samples
    early_stop = args.early_stop
    use_glove = args.use_glove

    train_loader, dev_loader, test_loader, vocab_size, vocab = get_dataloaders(
        batch_size, win_size)

    weight = None
    if use_glove:
        weight = loadGloveModel(vocab, "glove.6B.300d.txt")
    model = RNN_LM(vocab_size, embed_size, hidden_size, num_layers, weight,
                   use_glove)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    model, lowest_perplexity = trainer(train_loader, dev_loader, model,
                                       optimizer, criterion, num_epochs,
                                       early_stop, num_layers, batch_size,
                                       hidden_size)
    print("lowest_perplexity: ", lowest_perplexity)

    test(test_loader, model, num_layers, batch_size, hidden_size, criterion)
    generate_words("I.txt", "I", num_layers, hidden_size, vocab, num_samples,
                   model)  # starting word to generate from
    generate_words("What.txt", "What", num_layers, hidden_size, vocab,
                   num_samples, model)
    generate_words("Anyway.txt", "anyway", num_layers, hidden_size, vocab,
                   num_samples, model)
Example #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", type=float, default=0.0001)
    parser.add_argument("--dropout", type=float, default=0.3)
    parser.add_argument("--batch_size", type=int, default=20)
    parser.add_argument("--early_stop", type=int, default=10)
    parser.add_argument("--embed_dim", type=int, default=128)
    parser.add_argument("--dim_size", type=int, default=512)
    parser.add_argument("--num_layers", type=int, default=2)
    parser.add_argument("--window_size", type=int, default=30)
    parser.add_argument("--lr_decay", type=float, default=0.5)
    parser.add_argument("--amount_of_vocab", type=int, default=15000)
    args = parser.parse_args()
    # load data
    train_loader, dev_loader, test_loader, vocab_size, vocab = get_dataloaders(
        args.batch_size, args.window_size, args.amount_of_vocab)
    # build model
    # try to use pretrained embedding here
    model = RNNLM(args, vocab_size, embedding_matrix=None)

    # loss function
    criterion = nn.CrossEntropyLoss()

    # choose optimizer
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr)

    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=args.lr_decay)
    model, best_perp = trainer(train_loader,
                               dev_loader,
                               model,
                               optimizer,
                               criterion,
                               early_stop=args.early_stop)

    print('best_dev_perp:{}'.format(best_perp))
    predict(model, vocab, clean_str("I"))
    predict(model, vocab, clean_str("What"))
    predict(model, vocab, clean_str("Anyway"))
Example #9
0
def main():

    opt = parse_option()

    opt.n_test_runs = 600
    train_loader, val_loader, meta_testloader, meta_valloader, n_cls, _ = get_dataloaders(
        opt)

    # load model
    model = create_model(opt.model, n_cls, opt.dataset)
    ckpt = torch.load(opt.model_path)["model"]

    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in ckpt.items():
        name = k.replace("module.", "")
        new_state_dict[name] = v

    model.load_state_dict(new_state_dict)

    # model.load_state_dict(ckpt["model"])

    if torch.cuda.is_available():
        model = model.cuda()
        cudnn.benchmark = True

    start = time.time()
    test_acc, test_std = meta_test(model, meta_testloader)
    test_time = time.time() - start
    print('test_acc: {:.4f}, test_std: {:.4f}, time: {:.1f}'.format(
        test_acc, test_std, test_time))

    start = time.time()
    test_acc_feat, test_std_feat = meta_test(model,
                                             meta_testloader,
                                             use_logit=False)
    test_time = time.time() - start
    print('test_acc_feat: {:.4f}, test_std: {:.4f}, time: {:.1f}'.format(
        test_acc_feat, test_std_feat, test_time))
def main(FLAGS):

    "train and validate the Unet model"
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #data directory
    data_dir = FLAGS.dataset_dir
    #log_directory
    log_dir = FLAGS.log_dir
    # Hyper and other parameters
    train_batch_size = FLAGS.train_batch_size
    val_batch_size = FLAGS.val_batch_size
    aug_flag = FLAGS.aug
    num_epochs = FLAGS.epochs
    num_classes = 2
    # get the train and validation dataloaders
    dataloaders = get_dataloaders(data_dir, train_batch_size, val_batch_size,
                                  aug_flag)
    model = Unet(3, num_classes)

    # Uncomment to run traiing on Multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model, device_ids=[0, 1])
    else:
        print("no multiple gpu found")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=0.02,
                          momentum=0.9,
                          weight_decay=0.0005)
    #optimizer = optim.Adam(model.parameters(),lr = learning_rate)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    plotter = VisdomLinePlotter(env_name='Unet Train')
    # uncomment for leraning rate schgeduler..
    train_val(dataloaders, model, criterion, optimizer, num_epochs, log_dir,
              device)
Example #11
0
import torch
import torch.optim
import torch.nn as nn
import torch.backends.cudnn as cudnn

#torch.manual_seed(args.seed)

if args.gpu:
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'


model = getattr(models, 'RANet')(args)
model = torch.nn.DataParallel(model.cuda())
criterion = nn.CrossEntropyLoss().cuda()

train_loader, val_loader, test_loader = get_dataloaders(args)
#state_dict = torch.load('model_best.pth.tar')['state_dict']

state_dict = torch.load('model_best2.pth.tar')
model.load_state_dict(state_dict)




def validate(val_loader, model, criterion):
    #batch_time = AverageMeter()
    #losses = AverageMeter()
    #data_time = AverageMeter()
    top1, top5 = [], []

Example #12
0
File: util.py Project: yyht/SKD
def generate_final_report(model, opt, wandb):
    from eval.meta_eval import meta_test

    opt.n_shots = 1
    train_loader, val_loader, meta_testloader, meta_valloader, _ = get_dataloaders(
        opt)

    #validate
    meta_val_acc, meta_val_std = meta_test(model, meta_valloader)

    meta_val_acc_feat, meta_val_std_feat = meta_test(model,
                                                     meta_valloader,
                                                     use_logit=False)

    #evaluate
    meta_test_acc, meta_test_std = meta_test(model, meta_testloader)

    meta_test_acc_feat, meta_test_std_feat = meta_test(model,
                                                       meta_testloader,
                                                       use_logit=False)

    print('Meta Val Acc : {:.4f}, Meta Val std: {:.4f}'.format(
        meta_val_acc, meta_val_std))
    print('Meta Val Acc (feat): {:.4f}, Meta Val std (feat): {:.4f}'.format(
        meta_val_acc_feat, meta_val_std_feat))
    print('Meta Test Acc: {:.4f}, Meta Test std: {:.4f}'.format(
        meta_test_acc, meta_test_std))
    print('Meta Test Acc (feat): {:.4f}, Meta Test std (feat): {:.4f}'.format(
        meta_test_acc_feat, meta_test_std_feat))

    wandb.log({
        'Final Meta Test Acc @1': meta_test_acc,
        'Final Meta Test std @1': meta_test_std,
        'Final Meta Test Acc  (feat) @1': meta_test_acc_feat,
        'Final Meta Test std  (feat) @1': meta_test_std_feat,
        'Final Meta Val Acc @1': meta_val_acc,
        'Final Meta Val std @1': meta_val_std,
        'Final Meta Val Acc   (feat) @1': meta_val_acc_feat,
        'Final Meta Val std   (feat) @1': meta_val_std_feat
    })

    opt.n_shots = 5
    train_loader, val_loader, meta_testloader, meta_valloader, _ = get_dataloaders(
        opt)

    #validate
    meta_val_acc, meta_val_std = meta_test(model, meta_valloader)

    meta_val_acc_feat, meta_val_std_feat = meta_test(model,
                                                     meta_valloader,
                                                     use_logit=False)

    #evaluate
    meta_test_acc, meta_test_std = meta_test(model, meta_testloader)

    meta_test_acc_feat, meta_test_std_feat = meta_test(model,
                                                       meta_testloader,
                                                       use_logit=False)

    print('Meta Val Acc : {:.4f}, Meta Val std: {:.4f}'.format(
        meta_val_acc, meta_val_std))
    print('Meta Val Acc (feat): {:.4f}, Meta Val std (feat): {:.4f}'.format(
        meta_val_acc_feat, meta_val_std_feat))
    print('Meta Test Acc: {:.4f}, Meta Test std: {:.4f}'.format(
        meta_test_acc, meta_test_std))
    print('Meta Test Acc (feat): {:.4f}, Meta Test std (feat): {:.4f}'.format(
        meta_test_acc_feat, meta_test_std_feat))

    wandb.log({
        'Final Meta Test Acc @5': meta_test_acc,
        'Final Meta Test std @5': meta_test_std,
        'Final Meta Test Acc  (feat) @5': meta_test_acc_feat,
        'Final Meta Test std  (feat) @5': meta_test_std_feat,
        'Final Meta Val Acc @5': meta_val_acc,
        'Final Meta Val std @5': meta_val_std,
        'Final Meta Val Acc   (feat) @5': meta_val_acc_feat,
        'Final Meta Val std   (feat) @5': meta_val_std_feat
    })
Example #13
0
def test(config):

    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Load pre-trained model
    file_path = config.model_dir + config.model_file
    if os.path.isfile(file_path):
        print('Loading checkpoint \'{}\''.format(file_path))
        checkpoint = torch.load(file_path)
        config = checkpoint['config']  # Use saved config
        print('Loaded  checkpoint \'{}\' (epoch {})'.format(
            file_path, checkpoint['epoch']))
    else:
        print('No checkpoint found at \'{}\''.format(file_path))
        sys.exit('Please check the filename.')

    # Get torch loaders for training and test data
    train_loader, test_loader = get_dataloaders(config.dataset,
                                                markov_order=2,
                                                batch_size=config.batch_size)
    vocab_size = train_loader.dataset.vocab_size

    # The following steps are to initialize the model, which will be overloaded with the trained model

    encoder = S2SEncoder(vocab_size,
                         config.embedding_dim,
                         config.hidden_size,
                         config.num_layers,
                         dropout=config.dropout)
    decoder = S2SAttnDecoder(vocab_size,
                             config.embedding_dim,
                             config.hidden_size,
                             config.num_layers,
                             dropout=config.dropout)
    model = S2S(encoder, decoder).to(device)

    # Load model from checkpoint and put in evalulation mode
    model.load_state_dict(checkpoint['model'])
    model.eval()
    print('Model loaded from checkpoint, start evaluation.')

    # f, p, r = (f1-score, precision, recall)
    rouge_scores = [
        [0, 0, 0],  # rouge-1
        [0, 0, 0],  # rouge-2
        [0, 0, 0]
    ]  # rouge-l

    num_examples = 0

    rouge_eval = Rouge()

    for batch_idx, (X, Y, xlen, ylen) in enumerate(test_loader):
        X = X.to(device)
        Y = Y.to(device)
        Y_in = Y[:, :-1]
        Y_t = Y[:, 1:]
        xlen = xlen.to(device)
        # ylen -= 1, outputs do not predict start token
        ylen = (ylen - 1).to(device)

        # No teacher forcing
        Y_in = Y_in[:, 0:1]
        ylen = torch.ones_like(ylen).to(device)
        out_length = Y_t.size(1)
        out = model(X,
                    Y_in,
                    xlen,
                    ylen,
                    output_length=out_length,
                    teacher_forcing=False)

        # Calculate avg rouge scores over batch
        batch_correct = []
        batch_test_sentence = []
        for i in range(len(out)):
            test_sentence = torch.argmax(out[i], -1).cpu().numpy()
            test_sentence = [
                test_loader.dataset.i2w[i] if i > 0 else 'PAD'
                for i in test_sentence
            ]
            correct = Y_t.cpu()[i].numpy()
            correct = [test_loader.dataset.i2w[i] for i in correct if i > 0]

            if config_old.rouge_subwords:
                correct = ''.join(word for word in correct).replace('▁', ' ')
                test_sentence = ''.join(
                    word for word in test_sentence).replace('▁', ' ')
            else:
                test_sentence = ' '.join(word for word in test_sentence)
                correct = ' '.join(word for word in correct)

            batch_test_sentence.append(test_sentence)
            batch_correct.append(correct)
        rouge = rouge_eval.get_scores(batch_test_sentence, batch_correct,
                                      True)  # output format is dict

        # Turn dict into lists and sum all corresponding elements with total
        rouge_scores[0][0] += rouge['rouge-1']['f']
        rouge_scores[0][1] += rouge['rouge-1']['p']
        rouge_scores[0][2] += rouge['rouge-1']['r']
        rouge_scores[1][0] += rouge['rouge-2']['f']
        rouge_scores[1][1] += rouge['rouge-2']['p']
        rouge_scores[1][2] += rouge['rouge-2']['r']
        rouge_scores[2][0] += rouge['rouge-l']['f']
        rouge_scores[2][1] += rouge['rouge-l']['p']
        rouge_scores[2][2] += rouge['rouge-l']['r']

        num_examples += 1

        # Show every 10 batches
        if batch_idx % 10 == 0:
            print("batch_idx:", batch_idx)
            # Current average rouge scores
            temp_rouge_scores = current_rouge_scores(rouge_scores,
                                                     num_examples)

    # Final average rouge scores
    final_rouge_scores = current_rouge_scores(rouge_scores, num_examples)
Example #14
0
from model_bert import *

BATCH_SIZE = 10
NUM_EPOCH = 2
MARGIN = 768 / 2

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if __name__ == "__main__":
    model_path = sys.argv[1]
    print(f'loading from [{model_path}]')
    model, existing_results = train_utils.load_model_save(model_path)

    optimizer = AdamW(model.parameters())

    train_loader, dev_loader, test_loader = dataloader.get_dataloaders(
        BATCH_SIZE)

    def train_epoch_fn(e):
        total_train_loss = 0
        total_num_correct_eucl = 0
        total_item = 0
        for i, data in enumerate(tqdm(train_loader, desc='train',
                                      leave=False)):
            if i > 10:
                break

            optimizer.zero_grad()

            ancs, poss, negs = data
            ancs = ancs.to(DEVICE)
            poss = poss.to(DEVICE)
Example #15
0
def main():

    global args
    best_prec1, best_epoch = 0.0, 0
    
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    if args.data.startswith('cifar'):
        IM_SIZE = 32
    else:
        IM_SIZE = 224
    
    print(args.arch)    
    model = getattr(models, args.arch)(args)
    args.num_exits = len(model.classifier)
    global n_flops

    n_flops, n_params = measure_model(model, IM_SIZE, IM_SIZE)
    
    torch.save(n_flops, os.path.join(args.save, 'flops.pth'))
    del(model)
    
    print(args)
    with open('{}/args.txt'.format(args.save), 'w') as f:
        print(args, file=f)

    model = getattr(models, args.arch)(args)
    model = torch.nn.DataParallel(model.cuda())
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch'] + 1
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    train_loader, val_loader, test_loader = get_dataloaders(args)

    if args.evalmode is not None:
        state_dict = torch.load(args.evaluate_from)['state_dict']
        model.load_state_dict(state_dict)

        if args.evalmode == 'anytime':
            validate(test_loader, model, criterion)
        elif args.evalmode == 'dynamic':
            dynamic_evaluate(model, test_loader, val_loader, args)
        else:
            validate(test_loader, model, criterion)
            dynamic_evaluate(model, test_loader, val_loader, args)
        return

    scores = ['epoch\tlr\ttrain_loss\tval_loss\ttrain_prec1'
              '\tval_prec1\ttrain_prec5\tval_prec5']

    for epoch in range(args.start_epoch, args.epochs):

        train_loss, train_prec1, train_prec5, lr = train(train_loader, model, criterion, optimizer, epoch)

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)

        scores.append(('{}\t{:.3f}' + '\t{:.4f}' * 6)
                      .format(epoch, lr, train_loss, val_loss,
                              train_prec1, val_prec1, train_prec5, val_prec5))

        is_best = val_prec1 > best_prec1
        if is_best or (epoch == 299):
            best_prec1 = val_prec1
            best_epoch = epoch
            print('Best var_prec1 {}'.format(best_prec1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint({
            'epoch': epoch,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, args, is_best, model_filename, scores)

        model_path = '%s/save_models/checkpoint_%03d.pth.tar' % (args.save, epoch-1)
        if os.path.exists(model_path):
            os.remove(model_path)

    print('Best val_prec1: {:.4f} at epoch {}'.format(best_prec1, best_epoch))

    ### Test the final model
    print('********** Final prediction results **********')
    validate(test_loader, model, criterion)

    return
Example #16
0
def main():
    best_acc = 0

    opt = parse_option()
    wandb.init(project=opt.model_path.split("/")[-1], tags=opt.tags)
    wandb.config.update(opt)
    wandb.save('*.py')
    wandb.run.save()
        
        
    # dataloader
    train_loader, val_loader, meta_testloader, meta_valloader, n_cls, no_sample = get_dataloaders(opt)
    
    # model
    model_t = []
    if("," in opt.path_t):
        for path in opt.path_t.split(","):
            model_t.append(load_teacher(path, opt.model_t, n_cls, opt.dataset, opt.trans, opt.memfeature_size))
    else:
        model_t.append(load_teacher(opt.path_t, opt.model_t, n_cls, opt.dataset, opt.trans, opt.memfeature_size))
    
    model_s = create_model(opt.model_s, n_cls, opt.dataset, n_trans=opt.trans, embd_sz=opt.memfeature_size)
    if torch.cuda.device_count() > 1:
        print("second gpu count:", torch.cuda.device_count())
        model_s = nn.DataParallel(model_s)
    if opt.pretrained_path != "":
        model_s.load_state_dict(torch.load(opt.pretrained_path)['model'])
    wandb.watch(model_s)

    criterion_cls = nn.CrossEntropyLoss()
    criterion_div = DistillKL(opt.kd_T)
    criterion_kd = DistillKL(opt.kd_T)

    optimizer = optim.SGD(model_s.parameters(),
                          lr=opt.learning_rate,
                          momentum=opt.momentum,
                          weight_decay=opt.weight_decay)

    if torch.cuda.is_available():
        for m in model_t: 
            m.cuda()
        model_s.cuda()
        criterion_cls = criterion_cls.cuda()
        criterion_div = criterion_div.cuda()
        criterion_kd = criterion_kd.cuda()
        cudnn.benchmark = True
    
    MemBank = np.random.randn(no_sample, opt.memfeature_size)
    MemBank = torch.tensor(MemBank, dtype=torch.float).cuda()
    MemBankNorm = torch.norm(MemBank, dim=1, keepdim=True)
    MemBank = MemBank / (MemBankNorm + 1e-6)
           
    meta_test_acc = 0 
    meta_test_std = 0
    # routine: supervised model distillation
    for epoch in range(1, opt.epochs + 1):

        if opt.cosine:
            scheduler.step()
        else:
            adjust_learning_rate(epoch, opt, optimizer)
        print("==> training...")

        time1 = time.time()
        train_acc, train_loss, MemBank = train(epoch, train_loader, model_s, model_t , criterion_cls, criterion_div, criterion_kd, optimizer, opt, MemBank)
        time2 = time.time()
        print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        val_acc = 0
        val_loss = 0
        meta_val_acc = 0
        meta_val_std = 0
#         val_acc, val_acc_top5, val_loss = validate(val_loader, model_s, criterion_cls, opt)
        
        
#         #evaluate
#         start = time.time()
#         meta_val_acc, meta_val_std = meta_test(model_s, meta_valloader)
#         test_time = time.time() - start
#         print('Meta Val Acc: {:.4f}, Meta Val std: {:.4f}, Time: {:.1f}'.format(meta_val_acc, meta_val_std, test_time))
        
        #evaluate
        start = time.time()
        meta_test_acc, meta_test_std = 0,0 #meta_test(model_s, meta_testloader, use_logit=False)
        test_time = time.time() - start
        print('Meta Test Acc: {:.4f}, Meta Test std: {:.4f}, Time: {:.1f}'.format(meta_test_acc, meta_test_std, test_time))
        
        
        # regular saving
        if epoch % opt.save_freq == 0 or epoch==opt.epochs:
            print('==> Saving...')
            state = {
                'epoch': epoch,
                'model': model_s.state_dict(),
            }            
            save_file = os.path.join(opt.save_folder, 'model_'+str(wandb.run.name)+'.pth')
            torch.save(state, save_file)
            
            #wandb saving
            torch.save(state, os.path.join(wandb.run.dir, "model.pth"))
        
        wandb.log({'epoch': epoch, 
                   'Train Acc': train_acc,
                   'Train Loss':train_loss,
                   'Val Acc': val_acc,
                   'Val Loss':val_loss,
                   'Meta Test Acc': meta_test_acc,
                   'Meta Test std': meta_test_std,
                   'Meta Val Acc': meta_val_acc,
                   'Meta Val std': meta_val_std
                  })        
        
    #final report
    print("GENERATING FINAL REPORT")
    generate_final_report(model_s, opt, wandb)
    
    #remove output.txt log file 
    output_log_file = os.path.join(wandb.run.dir, "output.log")
    if os.path.isfile(output_log_file):
        os.remove(output_log_file)
    else:    ## Show an error ##
        print("Error: %s file not found" % output_log_file)
Example #17
0
warnings.filterwarnings("ignore")

if __name__ == "__main__":
    n_epochs = 25
    log_interval = 20

    # set random seed
    random_seed = 42
    torch.manual_seed(random_seed)

    # set torch device
    dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    batch_size_train = 100  # specified in the paper
    train_loader, test_loader = get_dataloaders(500, 100, batch_size_train)

    model_dict = {
        # 4-layer convnets
        "Four_Layer_SG": Four_Layer_SG,
        "Four_layer": Four_Layer,
        #     #8-layer convnets
        "Eight_Layer_SG": Eight_Layer_SG,
        "Eight_Layer": Eight_Layer,
        # VGG Models
        "VGG16_SG": VGG16_SG2,
        "VGG16": VGG16_custom,
    }

    # with open("data.json", "r") as fp:
    #     data = json.load(fp)
Example #18
0
def start_tuning():
    lr = 0.0001
    dim_size = [128, 256, 512]
    num_layers = [1, 2]

    args = {
        'lr': lr,
        'dim_size': dim_size[0],
        'num_layers': 1,
        'window_size': 30,
        'embed_dim': 128,
        'batch_size': 20,
        'dropout': 0.3,
        'early_stop': 3,
        'amount_of_vocab': 15000,
    }

    args = Struct(**args)
    # print(args)

    best_args = args

    # load data
    train_loader, dev_loader, test_loader, vocab_size, vocab = get_dataloaders(
        args.batch_size, args.window_size, args.amount_of_vocab)

    best_perp = 0
    for size in dim_size:
        temp_args = args
        temp_args.dim_size = size
        print(
            "Current setting: \nHidden Dimension Size: {}\nNum of Hidden Layers: {}"
            .format(temp_args.dim_size, temp_args.num_layers))

        perp = setup(temp_args,
                     vocab_size,
                     embedding_matrix=None,
                     _train_loader=train_loader,
                     _dev_loader=dev_loader)
        if (best_perp is 0):
            best_perp = perp
            best_args = temp_args
        elif perp < best_perp:
            best_perp = perp
            best_args = temp_args
        print("Best perplexity: {}, Current Perplexity: {}".format(
            best_perp, perp))
        print("-" * 20)

    for layer in num_layers:
        temp_args = args
        temp_args.num_layers = layer
        print(
            "Current setting: \nHidden Dimension Size: {}\nNum of Hidden Layers: {}"
            .format(temp_args.dim_size, temp_args.num_layers))

        perp = setup(args,
                     vocab_size,
                     embedding_matrix=None,
                     _train_loader=train_loader,
                     _dev_loader=dev_loader)
        if (best_perp is 0):
            best_perp = perp
            best_args = temp_args
        elif perp < best_perp:
            best_perp = perp
            best_args = temp_args
        print("Best perplexity: {}, Current Perplexity: {}".format(
            best_perp, perp))
        print("-" * 20)

    #-----------------------------------------------
    # train lr = 0.001
    temp_args = args
    temp_args.lr = 0.001
    print(
        "Current setting: \nHidden Dimension Size: {}\nNum of Hidden Layers: {}"
        .format(temp_args.dim_size, temp_args.num_layers))

    perp = setup(args,
                 vocab_size,
                 embedding_matrix=None,
                 _train_loader=train_loader,
                 _dev_loader=dev_loader)
    if (best_perp is 0):
        best_perp = perp
        best_args = temp_args
    elif perp < best_perp:
        best_perp = perp
        best_args = temp_args
    print("Best perplexity: {}, Current Perplexity: {}".format(
        best_perp, perp))
    print("-" * 20)
    #------------------------------------------------

    print("Best Perplexity: {}".format(best_perp))
    print("Best args: \nlr = {}\ndim size = {}\nnum layers = {}".format(
        best_args.lr, best_args.dim_size, best_args.num_layers))

    print(
        "Use the model with the best Hyper-parameters and report the test set perplexity"
    )
    _best_perp = setup(best_args,
                       vocab_size,
                       _train_loader=train_loader,
                       _dev_loader=test_loader)
    print(_best_perp)
Example #19
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        default="alex-net",
                        type=str,
                        help='alex-net/resnet/vgg/custom')
    parser.add_argument('--load_from_file',
                        default="",
                        type=str,
                        help='relative path of model file to load')
    parser.add_argument('--epochs',
                        default=3,
                        type=int,
                        help='number of epochs')
    parser.add_argument('--cpu',
                        default=False,
                        type=bool,
                        help='use CPU instead of GPU')
    args = parser.parse_args()
    batch_size = 1
    model = args.model
    epochs = args.epochs
    model_path = args.load_from_file
    use_cpu = args.cpu

    if args.cpu:
        print("Using the CPU")
        device = torch.device("cpu")
    else:
        if torch.cuda.is_available():
            device = torch.device("cuda")
            print("There are %d GPU(s) available." % torch.cuda.device_count())
            print("We will use the GPU: ", torch.cuda.get_device_name(0))
        else:
            print("No GPU available, using the CPU instead")
            device = torch.device("cpu")

    tvmodel = None
    if model == "alex-net":
        tvmodel = models.alexnet(pretrained=True)
    elif model == "vgg":
        tvmodel = models.vgg11_bn(pretrained=True)
    elif model == 'resnet':
        tvmodel = models.resnet18(pretrained=True)
    elif model == "custom":
        tvmodel = custom.NovelNet()
    elif model == "ensemble":
        tvmodel = ensemble.EnsembleModel()
    else:
        print("Incorrect model was passed, exiting!")
        exit()

    print("Loading data...")
    train_dataloader, test_dataloader = get_dataloaders(device)  # torchtensors
    print("Done Loading Data.")

    trainer = Trainer(epochs=epochs,
                      batch_size=batch_size,
                      learning_rate=1e-5,
                      model=tvmodel,
                      model_name=model,
                      device=device)

    if model_path != "":
        print("Loading Model")
        trainer.model = torch.load(model_path)
        trainer.model.eval()
        print("Finished Loading Model")
    else:
        print("Fitting model...")
        trainer.fit(train_dataloader)
        print("Done Fitting Model")

    prediction, probs = trainer.predict(test_dataloader)
    prediction = np.array(prediction)

    probs = torch.cat(probs, dim=0)
    probs = np.array(probs)

    accuracy = accuracy_score(test_dataloader[:][1], prediction)
    print("Test accuracy: %.4f" % accuracy)

    f1 = f1_score(test_dataloader[:][1], prediction)
    print("Test F1: %.4f" % f1)

    auroc = roc_auc_score(test_dataloader[:][1], prediction)
    print("Test AUC_ROC: %.4f" % auroc)

    precision = precision_score(test_dataloader[:][1], prediction)
    print("Test Precision: %.4f" % precision)

    recall = recall_score(test_dataloader[:][1], prediction)
    print("Test Recall: %.4f" % recall)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(probs.shape[1]):
        fpr[i], tpr[i], _ = roc_curve(test_dataloader[:][1], probs[:, i])
        roc_auc[i] = roc_auc_score(test_dataloader[:][1], probs[:, i])

    plt.figure()
    #plt.plot(fpr[0], tpr[0], color='red', label='ROC curve (area = %0.4f)' % roc_auc[0])
    plt.plot(fpr[1],
             tpr[1],
             color='darkorange',
             label='ROC curve (area = %0.4f)' % roc_auc[1])
    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic for Alex Net')
    plt.legend(loc="lower right")
    plt.savefig("alex-net-roc.png")
Example #20
0
def main():

    global args
    best_err1, best_epoch = 100., 0

    if args.data.startswith('cifar'): 
        IMAGE_SIZE = 32
    else:
        IMAGE_SIZE = 224

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    model = getattr(models, args.arch)(args)
    print(model)
    n_flops, n_params = measure_model(model, IMAGE_SIZE, IMAGE_SIZE)
    # print("------------------------------")
    print(n_flops, n_params)
    # print("------------------------------")
    torch.save(n_flops, os.path.join(args.save, 'flop.pth'))
    del(model)
    torch.save(args, os.path.join(args.save, 'args.pth'))

    # return 

    model = getattr(models, args.arch)(args)
    # fout = open('model.txt', 'w')
    # print(model, file=fout)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # define optimizer
    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch'] + 1
            best_err1 = checkpoint['best_err1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    train_loader, val_loader, test_loader = get_dataloaders(args)
    print("*************************************")
    print(args.use_valid, len(train_loader), len(val_loader), len(test_loader))
    print("*************************************")

    if args.evalmode is not None:
        m = torch.load(args.evaluate_from)
        model.load_state_dict(m['state_dict'])

        if args.evalmode == 'anytime':
            validate(test_loader, model, criterion)
        else:
            dynamic_evaluate(model, test_loader, val_loader, args)
        return

    # set up logging
    global log_print, f_log
    f_log = open(os.path.join(args.save, 'log.txt'), 'w')

    def log_print(*args):
        print(*args)
        print(*args, file=f_log)
    log_print('args:')
    log_print(args)
    print('model:', file=f_log)
    print(model, file=f_log)
    log_print('# of params:',
              str(sum([p.numel() for p in model.parameters()])))

    f_log.flush()

    scores = ['epoch\tlr\ttrain_loss\tval_loss\ttrain_err1'
              '\tval_err1\ttrain_err5\tval_err5']

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        train_loss, train_err1, train_err5, lr = train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        # val_loss, val_err1, val_err5 = validate(val_loader, model, criterion)
        val_loss, val_err1, val_err5 = validate(test_loader, model, criterion)

        # save scores to a tsv file, rewrite the whole file to prevent
        # accidental deletion
        scores.append(('{}\t{:.3f}' + '\t{:.4f}' * 6)
                      .format(epoch, lr, train_loss, val_loss,
                              train_err1, val_err1, train_err5, val_err5))

        is_best = val_err1 < best_err1
        if is_best:
            best_err1 = val_err1
            best_epoch = epoch
            print('Best var_err1 {}'.format(best_err1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint({
            'epoch': epoch,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_err1': best_err1,
            'optimizer': optimizer.state_dict(),
        }, args, is_best, model_filename, scores)

    print('Best val_err1: {:.4f} at epoch {}'.format(best_err1, best_epoch))
Example #21
0
def main(_run):
    args = tupperware(_run.config)
    args.finetune = False
    args.batch_size = 1

    device = args.device

    # Get data
    data = get_dataloaders(args)

    # Model
    G = get_model.model(args).to(device)

    # LPIPS Criterion
    lpips_criterion = PerceptualLoss(
        model="net-lin", net="alex", use_gpu=True, gpu_ids=[device]
    ).to(device)

    # Load Models
    G, _, global_step, start_epoch, loss = load_models(
        G, g_optimizer=None, args=args, tag=args.inference_mode
    )

    # Metric loggers
    val_metrics_dict = {"PSNR": 0.0, "SSIM": 0.0, "LPIPS_01": 0.0, "LPIPS_11": 0.0}
    avg_val_metrics = AvgLoss_with_dict(loss_dict=val_metrics_dict, args=args)

    logging.info(f"Loaded experiment {args.exp_name} trained for {start_epoch} epochs.")

    # Train, val and test paths
    val_path = args.output_dir / f"val_{args.inference_mode}_epoch_{start_epoch}"
    test_path = args.output_dir / f"test_{args.inference_mode}_epoch_{start_epoch}"

    if args.self_ensemble:
        val_path = val_path.parent / f"{val_path.name}_self_ensemble"
        test_path = test_path.parent / f"{test_path.name}_self_ensemble"

    val_path.mkdir(exist_ok=True, parents=True)
    test_path.mkdir(exist_ok=True, parents=True)

    with torch.no_grad():
        G.eval()

        # Run val for an epoch
        avg_val_metrics.reset()
        pbar = tqdm(range(len(data.val_loader) * args.batch_size), dynamic_ncols=True)

        for i, batch in enumerate(data.val_loader):
            metrics_dict = defaultdict(float)

            source, target, filename = batch
            source, target = (source.to(device), target.to(device))

            output = G(source)

            if args.self_ensemble:
                output_ensembled = [output]

                for k in ensemble_ops.keys():
                    # Forward transform
                    source_t = ensemble_ops[k][0](source)
                    output_t = G(source_t)
                    # Inverse transform
                    output_t = ensemble_ops[k][1](output_t)

                    output_ensembled.append(output_t)

                output_ensembled = torch.cat(output_ensembled, dim=0)

                output = torch.mean(output_ensembled, dim=0, keepdim=True)

            # PSNR
            output_255 = (output.mul(0.5).add(0.5) * 255.0).int()
            output_quant = (output_255.float() / 255.0).sub(0.5).mul(2)

            target_255 = (target.mul(0.5).add(0.5) * 255.0).int()
            target_quant = (target_255.float() / 255.0).sub(0.5).mul(2)

            # LPIPS
            metrics_dict["LPIPS_01"] += lpips_criterion(
                output_quant.mul(0.5).add(0.5), target_quant.mul(0.5).add(0.5)
            ).item()

            metrics_dict["LPIPS_11"] += lpips_criterion(
                output_quant, target_quant
            ).item()

            for e in range(args.batch_size):
                # Compute SSIM
                target_numpy = (
                    target[e].mul(0.5).add(0.5).permute(1, 2, 0).cpu().detach().numpy()
                )

                output_numpy = (
                    output[e].mul(0.5).add(0.5).permute(1, 2, 0).cpu().detach().numpy()
                )

                metrics_dict["PSNR"] += PSNR_numpy(target_numpy, output_numpy)
                metrics_dict["SSIM"] += ssim(
                    target_numpy,
                    output_numpy,
                    gaussian_weights=True,
                    use_sample_covariance=False,
                    multichannel=True,
                )

                # Dump to output folder
                path_output = val_path / filename[e]

                cv2.imwrite(
                    str(path_output), (output_numpy[:, :, ::-1] * 255.0).astype(np.int)
                )

            metrics_dict["SSIM"] = metrics_dict["SSIM"] / args.batch_size
            metrics_dict["PSNR"] = metrics_dict["PSNR"] / args.batch_size

            avg_val_metrics += metrics_dict

            pbar.update(args.batch_size)
            pbar.set_description(
                f"Val Epoch : {start_epoch} Step: {global_step}| PSNR: {avg_val_metrics.loss_dict['PSNR']:.3f} | SSIM: {avg_val_metrics.loss_dict['SSIM']:.3f} | LPIPS 01: {avg_val_metrics.loss_dict['LPIPS_01']:.3f} | LPIPS 11: {avg_val_metrics.loss_dict['LPIPS_11']:.3f}"
            )

        with open(val_path / "metrics.txt", "w") as f:
            L = [
                f"exp_name:{args.exp_name} trained for {start_epoch} epochs\n",
                "Val Metrics \n\n",
            ]
            L = L + [f"{k}:{v}\n" for k, v in avg_val_metrics.loss_dict.items()]
            f.writelines(L)

        if data.test_loader:
            pbar = tqdm(
                range(len(data.test_loader) * args.batch_size), dynamic_ncols=True
            )

            for i, batch in enumerate(data.test_loader):

                source, filename = batch
                source = source.to(device)

                output = G(source)

                if args.self_ensemble:
                    output_ensembled = [output]

                    for k in ensemble_ops.keys():
                        # Forward transform
                        source_t = ensemble_ops[k][0](source)
                        output_t = G(source_t)
                        # Inverse transform
                        output_t = ensemble_ops[k][1](output_t)

                        output_ensembled.append(output_t)

                    output_ensembled = torch.cat(output_ensembled, dim=0)
                    output = torch.mean(output_ensembled, dim=0, keepdim=True)

                for e in range(args.batch_size):
                    output_numpy = (
                        output[e]
                        .mul(0.5)
                        .add(0.5)
                        .permute(1, 2, 0)
                        .cpu()
                        .detach()
                        .numpy()
                    )

                    # Dump to output folder
                    path_output = test_path / filename[e]

                    cv2.imwrite(
                        str(path_output),
                        (output_numpy[:, :, ::-1] * 255.0).astype(np.int),
                    )

                pbar.update(args.batch_size)
                pbar.set_description(f"Test Epoch : {start_epoch} Step: {global_step}")
Example #22
0
def main():

    global args
    best_prec1, best_epoch = 0.0, 0

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    torch.save(args, os.path.join(args.save, 'args.pth'))

    model = getattr(models, args.arch)(args)

    print(model)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch'] + 1
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    train_loader, val_loader, test_loader = get_dataloaders(args)

    if args.evalmode is not None:
        state_dict = torch.load(args.evaluate_from)['state_dict']
        model.load_state_dict(state_dict)

        if args.evalmode == 'anytime':
            validate(val_loader, model, criterion)
        else:
            dynamic_evaluate(model, test_loader, val_loader, args)
        return

    scores = [
        'epoch\tlr\ttrain_loss\tval_loss\ttrain_prec1'
        '\tval_prec1\ttrain_prec5\tval_prec5'
    ]

    for epoch in range(args.start_epoch, args.epochs):

        train_loss, train_prec1, train_prec5, lr = train(
            train_loader, model, criterion, optimizer, epoch)

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)

        scores.append(
            ('{}\t{:.3f}' + '\t{:.4f}' * 6).format(epoch, lr, train_loss,
                                                   val_loss, train_prec1,
                                                   val_prec1, train_prec5,
                                                   val_prec5))

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = val_prec1
            best_epoch = epoch
            print('Best var_prec1 {}'.format(best_prec1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, args, is_best, model_filename, scores)

    print('Best val_prec1: {:.4f} at epoch {}'.format(best_prec1, best_epoch))
Example #23
0
def train(config):
	# Initialize the device which to run the model on
	device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
	print("device:", device)
	# Get torch loaders for training and test data
	train_loader, test_loader = get_dataloaders(config.dataset, 
												markov_order=config.order+1, batch_size=config.batch_size)
	vocab_size = train_loader.dataset.vocab_size
	
	# Load single test batch for evaluation
	test_X, test_Y, test_xl, test_yl = next(iter(test_loader))
	
	# If we want the continue training and the given filename exists, load all params
	# Otherwise just start training from scratch
	if config.continue_training:
		file_path = config.model_dir+config.continue_training
		if os.path.isfile(file_path):
			print('Loading checkpoint \'{}\''.format(file_path))
			checkpoint = torch.load(file_path)
			config = checkpoint['config'] # Use saved config
			config.start_epoch = checkpoint['epoch']			
			print('Loaded  checkpoint \'{}\' (epoch {})'.format(file_path, checkpoint['epoch']))
			config.continue_training = file_path # To make sure it is no empty string
		else:
			print('No checkpoint found at \'{}\''.format(file_path))
			sys.exit('Please check the filename.')

	teacher_force_ratio = config.teacher_force_ratio

	# Define model
	embedding = nn.Embedding(vocab_size, config.embedding_dim, padding_idx=config.pad_token)

	if config.adasoft:
		output_size = 1024
	else:
		output_size = vocab_size

	# Define encoder
	if config.encoder_type == 'BOW': # Bag of Words
		encoder = BOWEncoder(vocab_size, config.embedding_dim, output_size)
	elif config.encoder_type == 'Conv': # Convolutions
		# 4 layers -> minimal X length = 2^4
		encoder = ConvEncoder(vocab_size, config.embedding_dim, 4, config.hidden_size, output_size)
	elif config.encoder_type == 'Attn': # Attention
		encoder = AttnEncoder(vocab_size, config.embedding_dim, config.order)

	# Define models and optimizer
	nnlm = NNLM(config.order, vocab_size, config.embedding_dim, [config.hidden_size]*3, output_size)
	model = FBModel(embedding, encoder, nnlm).to(device)
	optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

	# If we want to continue training, load the existing model and optimizer
	if config.continue_training and checkpoint != None:
		print('Model and optimizer are copied from checkpoint.')
		model.load_state_dict(checkpoint['model'])
		optimizer.load_state_dict(checkpoint['optimizer'])
		
	# Define loss
	if config.adasoft:
		criterion = nn.AdaptiveLogSoftmaxWithLoss(1024, vocab_size, [100, 1000, 5000, 10000]).to(device)
	else:
		# EXPERIMENTAL: set UNK weight lower (maybe not needed with better vocab)
		loss_weights = torch.ones(vocab_size).to(device)
		if 'UNK' in train_loader.dataset.w2i:
			loss_weights[train_loader.dataset.w2i['UNK']] = 0.3
		criterion = nn.CrossEntropyLoss(weight=loss_weights, ignore_index=0)
	
	if config.start_epoch >= config.num_epochs:
		sys.exit('Already trained for specified amount of epochs. Consider increasing num_epochs.')
	else:
		print('Start training.')
	losses = []
	for epoch in range(config.start_epoch, config.num_epochs):
		# TRAIN
		num_teacherforce = [0, 0]
		num_batches = len(train_loader)
		starttime = time.time()
		for batch_idx, (X, Y, xlen, ylen) in enumerate(train_loader):
			X = X.to(device)
			Y = Y.to(device)
			xlen = xlen.to(device)
			# Because we have history of size config.order, actual y_length is total y_length - order
			ylen = (ylen-config.order).to(device)
			# Make ngrams and targets
			y_c = torch.stack([Y[:, i:i+config.order] for i in range(0, Y.size(1)-config.order)], 1)
			y_t = Y[:, config.order:]

			# Train step
			model.train()
			optimizer.zero_grad()

			# No teacher forcing
			if np.random.random() > teacher_force_ratio:
				num_teacherforce[0] += 1
				y_c = y_c[:,0:1]
				out_length = y_t.size(1)
				out = model(X, y_c, xlen, ylen, output_length=out_length, teacher_forcing=False)
			else:
				num_teacherforce[1] += 1
				out = model(X, y_c, xlen, ylen, teacher_forcing=True)
			# Loss, optimization step
			out = out.reshape(-1, output_size)
			y_t = y_t.reshape(-1)
			loss = criterion(out.reshape(-1, output_size), y_t.reshape(-1))
			if config.adasoft:
				loss = loss.loss
			losses.append(loss.item())
			loss.backward()
			optimizer.step()
			if not batch_idx%20:
				if config.adasoft:
					pred = criterion.predict(out)
				else:
					pred = torch.argmax(out, -1)
				acc = accuracy(pred, y_t)
				print('[Epoch {}/{}], step {:04d}/{:04d} loss {:.4f} acc {:.4f} time {:.4f}'.format(epoch +1, config.num_epochs, batch_idx, num_batches, loss.item(), acc.item(), time.time() - starttime ))
				starttime = time.time()
			# Save model every final step of each 10 epochs or last epoch
			#if (epoch + 1 % 10 == 0 or epoch + 1 == config.num_epochs) and batch_idx == num_batches - 1:
			#	torch.save(model, config.output_dir + '/test_model_epoch_'+str(epoch+1)+'.pt')
			if batch_idx % 500 == 0:
				state = create_state(config, model, optimizer, criterion, epoch, loss, accuracy)
				is_best_model = check_is_best(config.model_dir, config.encoder_type, config.embedding_dim, config.hidden_size, loss.item())
				save_model(state, is_best_model, config.model_dir, config.encoder_type, config.embedding_dim, config.hidden_size, loss.item())
								
			if has_converged(losses):
				print('Model has converged.')
				return
			break
		# Decay teacherforcing
		teacher_force_ratio *= config.teacher_force_decay

		# EVAL #TODO: Seperate script or move to test.py
		model.eval()

		# Choose random sample
		test_idx = np.random.randint(config.batch_size)

		# Load random sample from test batch
		xlen = test_xl[[test_idx]].to(device)
		Y = test_Y[[test_idx],:].to(device)
		X = test_X[[test_idx],:xlen].to(device)
		ylen = torch.Tensor([1]).to(device)

		# Greedy Search
		greedy_sequence = greedy_search(model, X, Y, xlen, ylen, test_loader)

		# Beam Search
		all_sequences = beam_search(config, model, X, Y, xlen, ylen)

		# Target sequence
		y_t = Y[:, config.order:]
		correct = y_t.cpu()[-1].numpy()
		correct = [test_loader.dataset.i2w[i] for i in correct if i > 0]

		# print results
		print("greedy :", greedy_sequence)
		for counter, sequence in enumerate(all_sequences):
			print("number", counter+1, ":", [test_loader.dataset.i2w[i] for i in sequence[0].squeeze().cpu().numpy() if i > 1] )
		print('correct :', correct)
		print()
Example #24
0
def main():

    global args
    best_prec1, best_epoch = 0.0, 0

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    if args.data.startswith('cifar'):
        IM_SIZE = 32
    else:
        IM_SIZE = 224

    model = getattr(models, args.arch)(args)
    # 根据模型结构计算flops params
    n_flops, n_params = measure_model(model, IM_SIZE, IM_SIZE)
    # 存储下模型的flops
    torch.save(n_flops, os.path.join(args.save, 'flops.pth'))
    del (model)

    model = getattr(models, args.arch)(args)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # 断点继续
    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch'] + 1
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    train_loader, val_loader, test_loader = get_dataloaders(args)

    # evalmode两种 分别为anytime和dynamic
    if args.evalmode is not None:
        # args.evaluate_from为模型存储路径
        state_dict = torch.load(args.evaluate_from)['state_dict']
        model.load_state_dict(state_dict)
        # 不同的处理方式
        if args.evalmode == 'anytime':
            validate(test_loader, model, criterion)
        else:
            dynamic_evaluate(model, test_loader, val_loader, args)
        return
    # 训练看这里
    scores = [
        'epoch\tlr\ttrain_loss\tval_loss\ttrain_prec1'
        '\tval_prec1\ttrain_prec5\tval_prec5'
    ]

    for epoch in range(args.start_epoch, args.epochs):

        train_loss, train_prec1, train_prec5, lr = train(
            train_loader, model, criterion, optimizer, epoch)

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)

        scores.append(
            ('{}\t{:.3f}' + '\t{:.4f}' * 6).format(epoch, lr, train_loss,
                                                   val_loss, train_prec1,
                                                   val_prec1, train_prec5,
                                                   val_prec5))

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = val_prec1
            best_epoch = epoch
            print('Best var_prec1 {}'.format(best_prec1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, args, is_best, model_filename, scores)

    print('Best val_prec1: {:.4f} at epoch {}'.format(best_prec1, best_epoch))

    ### Test the final model

    print('********** Final prediction results **********')
    validate(test_loader, model, criterion)

    return
def main():

    opt = parse_option()
    wandb.init(project=opt.model_path.split("/")[-1], tags=opt.tags)
    wandb.config.update(opt)
    wandb.save('*.py')
    wandb.run.save()

    train_loader, val_loader, meta_testloader, meta_valloader, n_cls, no_sample = get_dataloaders(
        opt)
    # model
    model = create_model(opt.model,
                         n_cls,
                         opt.dataset,
                         n_trans=opt.trans,
                         embd_sz=opt.memfeature_size)
    wandb.watch(model)

    # optimizer
    if opt.adam:
        print("Adam")
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.learning_rate,
                                     weight_decay=0.0005)
    else:
        print("SGD")
        optimizer = optim.SGD(model.parameters(),
                              lr=opt.learning_rate,
                              momentum=opt.momentum,
                              weight_decay=opt.weight_decay)

    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        if opt.n_gpu > 1:
            model = nn.DataParallel(model)
        model = model.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True

    # set cosine annealing scheduler
    if opt.cosine:
        eta_min = opt.learning_rate * (opt.lr_decay_rate**3)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, opt.epochs, eta_min, -1)

    MemBank = np.random.randn(no_sample, opt.memfeature_size)
    MemBank = torch.tensor(MemBank, dtype=torch.float).cuda()
    MemBankNorm = torch.norm(MemBank, dim=1, keepdim=True)
    MemBank = MemBank / (MemBankNorm + 1e-6)

    # routine: supervised pre-training
    for epoch in range(1, opt.epochs + 1):
        if opt.cosine:
            scheduler.step()
        else:
            adjust_learning_rate(epoch, opt, optimizer)
        print("==> training...")

        time1 = time.time()
        train_acc, train_loss, MemBank = train(epoch, train_loader, model,
                                               criterion, optimizer, opt,
                                               MemBank)
        time2 = time.time()
        print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))

        val_acc, val_acc_top5, val_loss = 0, 0, 0  #validate(val_loader, model, criterion, opt)

        #validate
        start = time.time()
        meta_val_acc, meta_val_std = 0, 0  #meta_test(model, meta_valloader)
        test_time = time.time() - start
        print(
            'Meta Val Acc : {:.4f}, Meta Val std: {:.4f}, Time: {:.1f}'.format(
                meta_val_acc, meta_val_std, test_time))

        #evaluate
        start = time.time()
        meta_test_acc, meta_test_std = 0, 0  #meta_test(model, meta_testloader)
        test_time = time.time() - start
        print('Meta Test Acc: {:.4f}, Meta Test std: {:.4f}, Time: {:.1f}'.
              format(meta_test_acc, meta_test_std, test_time))

        # regular saving
        if epoch % opt.save_freq == 0 or epoch == opt.epochs:
            print('==> Saving...')
            state = {
                'epoch': epoch,
                'optimizer': optimizer.state_dict(),
                'model': model.state_dict(),
            }
            save_file = os.path.join(opt.save_folder,
                                     'model_' + str(wandb.run.name) + '.pth')
            torch.save(state, save_file)

            #wandb saving
            torch.save(state, os.path.join(wandb.run.dir, "model.pth"))

        wandb.log({
            'epoch': epoch,
            'Train Acc': train_acc,
            'Train Loss': train_loss,
            'Val Acc': val_acc,
            'Val Loss': val_loss,
            'Meta Test Acc': meta_test_acc,
            'Meta Test std': meta_test_std,
            'Meta Val Acc': meta_val_acc,
            'Meta Val std': meta_val_std
        })

    #final report
    print("GENERATING FINAL REPORT")
    generate_final_report(model, opt, wandb)

    #remove output.txt log file
    output_log_file = os.path.join(wandb.run.dir, "output.log")
    if os.path.isfile(output_log_file):
        os.remove(output_log_file)
    else:  ## Show an error ##
        print("Error: %s file not found" % output_log_file)
Example #26
0
def main(args):
    #######################################################################################
    ##   注释:
    ##   载入模型
    #######################################################################################
    best_prec1, best_epoch = 0.0, 0
    model = getattr(models, args.arch)(args)
    n_flops, n_params = measure_model(model, IM_SIZE, IM_SIZE)
    torch.save(n_flops, os.path.join(args.save, 'flops.pth'))
    del (model)
    model = getattr(models, args.arch)(args)

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    #######################################################################################
    ##   注释:
    ##   载入criterion
    #######################################################################################
    criterion = nn.CrossEntropyLoss().cuda()

    #######################################################################################
    ##   注释:
    ##   载入optimizer
    #######################################################################################
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #######################################################################################
    ##   注释:
    ##   接 中断的训练
    #######################################################################################
    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch'] + 1
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    #######################################################################################
    ##   注释:
    ##   导入数据集
    #######################################################################################
    train_loader, val_loader, test_loader = get_dataloaders(args)

    #######################################################################################
    ##   注释:
    ##   选择推理模式 imagenet--dynamic
    #######################################################################################
    if args.evalmode is not None:
        state_dict = torch.load(args.evaluate_from)['state_dict']
        model.load_state_dict(state_dict)

        if args.evalmode == 'anytime':
            validate(test_loader, model, criterion)
        else:
            dynamic_evaluate(model, test_loader, val_loader, args)
        return

    scores = [
        'epoch\tlr\ttrain_loss\tval_loss\ttrain_prec1'
        '\tval_prec1\ttrain_prec5\tval_prec5'
    ]

    for epoch in range(args.start_epoch, args.epochs):

        train_loss, train_prec1, train_prec5, lr = train(
            train_loader, model, criterion, optimizer, epoch)

        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)

        scores.append(
            ('{}\t{:.3f}' + '\t{:.4f}' * 6).format(epoch, lr, train_loss,
                                                   val_loss, train_prec1,
                                                   val_prec1, train_prec5,
                                                   val_prec5))

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = val_prec1
            best_epoch = epoch
            print('Best var_prec1 {}'.format(best_prec1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, args, is_best, model_filename, scores)

    print('Best val_prec1: {:.4f} at epoch {}'.format(best_prec1, best_epoch))

    ### Test the final model

    print('********** Final prediction results **********')
    validate(test_loader, model, criterion)

    return
Example #27
0
    train_util.set_logger(os.path.join(args.model_dir, 'train.log'))

    torch.manual_seed(0)
    if params.use_gpu:
        logging.info("GPU found")
        torch.cuda.manual_seed(0)
    else:
        logging.info("GPU not found")

    logging.info("Loading data")

    if params.data_type == "glyph_raster":
        data_types = ["image", "semantic"]
        print("Note : using images")
        dataloaders = get_dataloaders(params, ["train", "val"],
                                      data_types,
                                      character=params.character)
    elif params.data_type == "glyph_vector":
        data_types = ["svg", "semantic"]
        dataloaders = get_dataloaders(params, ["train", "val"],
                                      data_types,
                                      character=params.character)
    else:
        raise Exception("Invalid data type requested")

    train_dataloader = dataloaders["train"]
    val_dataloader = dataloaders["val"]

    logging.info("- done")

    Model = None
def main(_run):
    args = tupperware(_run.config)

    # Dir init
    dir_init(args, is_local_rank_0=is_local_rank_0)

    # Ignore warnings
    if not is_local_rank_0:
        warnings.filterwarnings("ignore")

    # Mutli GPUS Setup
    if args.distdataparallel:
        rank = int(os.environ["LOCAL_RANK"])
        torch.cuda.set_device(rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        world_size = dist.get_world_size()
    else:
        rank = args.device
        world_size = 1

    # Get data
    data = get_dataloaders(args, is_local_rank_0=is_local_rank_0)

    # Model
    G = get_model.model(args).to(rank)

    # Optimisers
    g_optimizer, g_lr_scheduler = get_optimisers(G, args)

    # Load Models
    G, g_optimizer, global_step, start_epoch, loss = load_models(
        G, g_optimizer, args, is_local_rank_0=is_local_rank_0)

    if args.distdataparallel:
        # Wrap with Distributed Data Parallel
        G = torch.nn.parallel.DistributedDataParallel(G,
                                                      device_ids=[rank],
                                                      output_device=rank)

    # Log no of GPUs
    if is_local_rank_0:
        world_size = int(
            os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
        logging.info("Using {} GPUs".format(world_size))

        writer = SummaryWriter(log_dir=str(args.run_dir))
        writer.add_text("Args", pprint_args(args))

        # Pbars
        train_pbar = tqdm(range(len(data.train_loader) * args.batch_size),
                          dynamic_ncols=True)

        val_pbar = (tqdm(range(len(data.val_loader) * args.batch_size),
                         dynamic_ncols=True) if data.val_loader else None)

        test_pbar = (tqdm(range(len(data.test_loader) * args.batch_size),
                          dynamic_ncols=True) if data.test_loader else None)

    # Initialise losses
    g_loss = GLoss(args).to(rank)

    # Compatibility with checkpoints without global_step
    if not global_step:
        global_step = start_epoch * len(data.train_loader) * args.batch_size

    start_epoch = global_step // len(data.train_loader.dataset)

    # Exponential averaging of loss
    loss_dict = {
        "total_loss": 0.0,
        "image_loss": 0.0,
        "cobi_rgb_loss": 0.0,
        "train_PSNR": 0.0,
    }

    metric_dict = {"PSNR": 0.0, "total_loss": 0.0}
    avg_metrics = AvgLoss_with_dict(loss_dict=metric_dict, args=args)
    exp_loss = ExpLoss_with_dict(loss_dict=loss_dict, args=args)

    try:
        for epoch in range(start_epoch, args.num_epochs):
            # Train mode
            G.train()

            if is_local_rank_0:
                train_pbar.reset()

            if args.distdataparallel:
                data.train_loader.sampler.set_epoch(epoch)

            for i, batch in enumerate(data.train_loader):
                # allows for interrupted training
                if ((global_step + 1) %
                    (len(data.train_loader) * args.batch_size)
                        == 0) and (epoch == start_epoch):
                    break

                loss_dict = defaultdict(float)

                source, target, filename = batch
                source, target = (source.to(rank), target.to(rank))

                # ------------------------------- #
                # Update Gen
                # ------------------------------- #
                G.zero_grad()
                output = G(source)

                g_loss(output=output, target=target)

                g_loss.total_loss.backward()
                g_optimizer.step()

                # Update lr schedulers
                g_lr_scheduler.step(epoch + i / len(data.train_loader))

                # if is_local_rank_0:
                # Train PSNR
                loss_dict["train_PSNR"] += PSNR(output, target)

                # Accumulate all losses
                loss_dict["total_loss"] += g_loss.total_loss
                loss_dict["image_loss"] += g_loss.image_loss
                loss_dict["cobi_rgb_loss"] += g_loss.cobi_rgb_loss

                exp_loss += reduce_loss_dict(loss_dict, world_size=world_size)

                global_step += args.batch_size * world_size

                if is_local_rank_0:
                    train_pbar.update(args.batch_size)
                    train_pbar.set_description(
                        f"Epoch: {epoch + 1} | Gen loss: {exp_loss.loss_dict['total_loss']:.3f} "
                    )

                # Write lr rates and metrics
                if is_local_rank_0 and i % (args.log_interval) == 0:
                    gen_lr = g_optimizer.param_groups[0]["lr"]
                    writer.add_scalar("lr/gen", gen_lr, global_step)

                    for metric in exp_loss.loss_dict:
                        writer.add_scalar(
                            f"Train_Metrics/{metric}",
                            exp_loss.loss_dict[metric],
                            global_step,
                        )

                    # Display images at end of epoch
                    n = np.min([3, args.batch_size])
                    for e in range(n):
                        source_vis = source[e].mul(0.5).add(0.5)
                        target_vis = target[e].mul(0.5).add(0.5)
                        output_vis = output[e].mul(0.5).add(0.5)

                        writer.add_image(
                            f"Source/Train_{e + 1}",
                            source_vis.cpu().detach(),
                            global_step,
                        )

                        writer.add_image(
                            f"Target/Train_{e + 1}",
                            target_vis.cpu().detach(),
                            global_step,
                        )

                        writer.add_image(
                            f"Output/Train_{e + 1}",
                            output_vis.cpu().detach(),
                            global_step,
                        )

                        writer.add_text(f"Filename/Train_{e + 1}", filename[e],
                                        global_step)

            if is_local_rank_0:
                # Save ckpt at end of epoch
                logging.info(
                    f"Saving weights at epoch {epoch + 1} global step {global_step}"
                )

                # Save weights
                save_weights(
                    epoch=epoch,
                    global_step=global_step,
                    G=G,
                    g_optimizer=g_optimizer,
                    loss=loss,
                    tag="latest",
                    args=args,
                )

                train_pbar.refresh()

            # Run val and test only occasionally
            if epoch % args.val_test_epoch_interval != 0:
                continue

            # Val and test
            with torch.no_grad():
                G.eval()

                if data.val_loader:
                    avg_metrics.reset()
                    if is_local_rank_0:
                        val_pbar.reset()

                    filename_static = []

                    for i, batch in enumerate(data.val_loader):
                        metrics_dict = defaultdict(float)

                        source, target, filename = batch
                        source, target = (source.to(rank), target.to(rank))

                        output = G(source)
                        g_loss(output=output, target=target)

                        # Total loss
                        metrics_dict["total_loss"] += g_loss.total_loss
                        # PSNR
                        metrics_dict["PSNR"] += PSNR(output, target)

                        avg_metrics += reduce_loss_dict(metrics_dict,
                                                        world_size=world_size)

                        # Save image
                        if args.static_val_image in filename:
                            filename_static = filename
                            source_static = source
                            target_static = target
                            output_static = output

                        if is_local_rank_0:
                            val_pbar.update(args.batch_size)
                            val_pbar.set_description(
                                f"Val Epoch : {epoch + 1} Step: {global_step}| PSNR: {avg_metrics.loss_dict['PSNR']:.3f}"
                            )
                    if is_local_rank_0:
                        for metric in avg_metrics.loss_dict:
                            writer.add_scalar(
                                f"Val_Metrics/{metric}",
                                avg_metrics.loss_dict[metric],
                                global_step,
                            )

                        n = np.min([3, args.batch_size])
                        for e in range(n):
                            source_vis = source[e].mul(0.5).add(0.5)
                            target_vis = target[e].mul(0.5).add(0.5)
                            output_vis = output[e].mul(0.5).add(0.5)

                            writer.add_image(
                                f"Source/Val_{e+1}",
                                source_vis.cpu().detach(),
                                global_step,
                            )
                            writer.add_image(
                                f"Target/Val_{e+1}",
                                target_vis.cpu().detach(),
                                global_step,
                            )
                            writer.add_image(
                                f"Output/Val_{e+1}",
                                output_vis.cpu().detach(),
                                global_step,
                            )

                            writer.add_text(f"Filename/Val_{e + 1}",
                                            filename[e], global_step)

                        for e, name in enumerate(filename_static):
                            if name == args.static_val_image:
                                source_vis = source_static[e].mul(0.5).add(0.5)
                                target_vis = target_static[e].mul(0.5).add(0.5)
                                output_vis = output_static[e].mul(0.5).add(0.5)

                                writer.add_image(
                                    f"Source/Val_Static",
                                    source_vis.cpu().detach(),
                                    global_step,
                                )
                                writer.add_image(
                                    f"Target/Val_Static",
                                    target_vis.cpu().detach(),
                                    global_step,
                                )
                                writer.add_image(
                                    f"Output/Val_Static",
                                    output_vis.cpu().detach(),
                                    global_step,
                                )

                                writer.add_text(
                                    f"Filename/Val_Static",
                                    filename_static[e],
                                    global_step,
                                )

                                break

                        logging.info(
                            f"Saving weights at END OF epoch {epoch + 1} global step {global_step}"
                        )

                        # Save weights
                        if avg_metrics.loss_dict["total_loss"] < loss:
                            is_min = True
                            loss = avg_metrics.loss_dict["total_loss"]
                        else:
                            is_min = False

                        # Save weights
                        save_weights(
                            epoch=epoch,
                            global_step=global_step,
                            G=G,
                            g_optimizer=g_optimizer,
                            loss=loss,
                            is_min=is_min,
                            args=args,
                            tag="best",
                        )

                        val_pbar.refresh()

                # Test
                if data.test_loader:
                    filename_static = []

                    if is_local_rank_0:
                        test_pbar.reset()

                    for i, batch in enumerate(data.test_loader):
                        source, filename = batch
                        source = source.to(rank)

                        output = G(source)

                        # Save image
                        if args.static_test_image in filename:
                            filename_static = filename
                            source_static = source
                            output_static = output

                        if is_local_rank_0:
                            test_pbar.update(args.batch_size)
                            test_pbar.set_description(
                                f"Test Epoch : {epoch + 1} Step: {global_step}"
                            )

                    if is_local_rank_0:
                        n = np.min([3, args.batch_size])
                        for e in range(n):
                            source_vis = source[e].mul(0.5).add(0.5)
                            output_vis = output[e].mul(0.5).add(0.5)

                            writer.add_image(
                                f"Source/Test_{e+1}",
                                source_vis.cpu().detach(),
                                global_step,
                            )

                            writer.add_image(
                                f"Output/Test_{e+1}",
                                output_vis.cpu().detach(),
                                global_step,
                            )

                            writer.add_text(f"Filename/Test_{e + 1}",
                                            filename[e], global_step)

                        for e, name in enumerate(filename_static):
                            if name == args.static_test_image:
                                source_vis = source_static[e]
                                output_vis = output_static[e]

                                writer.add_image(
                                    f"Source/Test_Static",
                                    source_vis.cpu().detach(),
                                    global_step,
                                )

                                writer.add_image(
                                    f"Output/Test_Static",
                                    output_vis.cpu().detach(),
                                    global_step,
                                )

                                writer.add_text(
                                    f"Filename/Test_Static",
                                    filename_static[e],
                                    global_step,
                                )

                                break

                        test_pbar.refresh()

    except KeyboardInterrupt:
        if is_local_rank_0:
            logging.info("-" * 89)
            logging.info("Exiting from training early. Saving models")

            for pbar in [train_pbar, val_pbar, test_pbar]:
                if pbar:
                    pbar.refresh()

            save_weights(
                epoch=epoch,
                global_step=global_step,
                G=G,
                g_optimizer=g_optimizer,
                loss=loss,
                is_min=True,
                args=args,
            )
Example #29
0
def train(config):
    log_fn = os.path.join(
        '../logs', 'S2S_{}_{}.log'.format(config.num_layers,
                                          config.hidden_size))
    logfile = open(log_fn, 'w', 1)
    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # get torch loaders for training and test data
    train_loader, test_loader = get_dataloaders(config.dataset,
                                                markov_order=2,
                                                batch_size=config.batch_size)
    vocab_size = train_loader.dataset.vocab_size

    # Load single test batch for evaluation
    test_X, test_Y, test_xl, test_yl = next(iter(test_loader))

    # If we want the continue training and the given filename exists, load all params
    # Otherwise just start training from scratch
    if config.continue_training:
        file_path = config.model_dir + config.continue_training
        if os.path.isfile(file_path):
            print('Loading checkpoint \'{}\''.format(file_path))
            checkpoint = torch.load(file_path)
            config = checkpoint['config']  # Use saved config
            config.start_epoch = checkpoint['epoch']
            print('Loaded  checkpoint \'{}\' (epoch {})'.format(
                file_path, checkpoint['epoch']))
            config.continue_training = file_path  # To make sure it is no empty string
        else:
            print('No checkpoint found at \'{}\''.format(file_path))
            sys.exit('Please check the filename.')

    teacher_force_ratio = config.teacher_force_ratio

    encoder = S2SEncoder(vocab_size,
                         config.embedding_dim,
                         config.hidden_size,
                         config.num_layers,
                         dropout=config.dropout)
    decoder = S2SAttnDecoder(vocab_size,
                             config.embedding_dim,
                             config.hidden_size,
                             config.num_layers,
                             dropout=config.dropout)
    model = S2S(encoder, decoder).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

    criterion = nn.CrossEntropyLoss(ignore_index=0)

    # If we want to continue training, load the existing model and optimizer
    if config.continue_training and checkpoint != None:
        print('Model and optimizer are copied from checkpoint.')
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])

    if config.start_epoch >= config.num_epochs:
        sys.exit(
            'Already trained for specified amount of epochs. Consider increasing num_epochs.'
        )
    else:
        print('Start training.')
    losses = []
    for epoch in range(config.num_epochs):
        # TRAIN
        num_teacherforce = [0, 0]
        num_batches = len(train_loader)
        for batch_idx, (X, Y, xlen, ylen) in enumerate(train_loader):

            X = X.to(device)
            Y = Y.to(device)
            Y_in = Y[:, :-1]
            Y_t = Y[:, 1:]
            xlen = xlen.to(device)
            # ylen -= 1, outputs do not predict start token
            ylen = (ylen - 1).to(device)

            # Train step
            model.train()
            optimizer.zero_grad()

            # No teacher forcing
            if np.random.random() > teacher_force_ratio:
                num_teacherforce[0] += 1
                Y_in = Y_in[:, 0:1]
                ylen = torch.ones_like(ylen).to(device)
                out_length = Y_t.size(1)
                out = model(X,
                            Y_in,
                            xlen,
                            ylen,
                            output_length=out_length,
                            teacher_forcing=False)
            else:
                num_teacherforce[1] += 1
                out = model(X, Y_in, xlen, ylen, teacher_forcing=True)

            # Loss, optimization step
            loss = criterion(out.reshape(-1, vocab_size), Y_t.reshape(-1))
            loss.backward()
            losses.append(loss.item())
            optimizer.step()

            if not batch_idx % 20:
                pred = torch.argmax(out, -1)
                acc = accuracy(pred, Y_t)
                print(
                    '[Epoch {}/{}], step {:04d}/{:04d} loss {:.4f} acc {:.4f}'.
                    format(epoch + 1, config.num_epochs, batch_idx,
                           num_batches, loss.item(), acc.item()))
                print('{} {} {:.4f} {:.4f}'.format(epoch + 1, batch_idx,
                                                   loss.item(), acc.item()),
                      file=logfile)

            # Save model every final step of each 10 epochs or last epoch
            #if (epoch + 1 % 10 == 0 or epoch + 1 == config.num_epochs) and batch_idx == num_batches - 1:
            #	torch.save(model, config.output_dir + '/test_model_epoch_'+str(epoch+1)+'.pt')
            if batch_idx % 500 == 0:
                state = create_state(config, model, optimizer, criterion,
                                     epoch, loss, accuracy)
                is_best_model = check_is_best(config.model_dir, 'S2SEncoder',
                                              config.embedding_dim,
                                              config.hidden_size, loss.item())
                save_model(state, is_best_model, config.model_dir,
                           'S2SEncoder', config.embedding_dim,
                           config.hidden_size, loss.item())

            if has_converged(losses):
                print('Model converged')
                return

        # EVAL
        # model.eval()
        # print(num_teacherforce)
        # # Load test batch
        # Y = test_Y.to(device)
        # X = test_X.to(device)
        # xlen = test_xl.to(device)
        # ylen = test_yl.to(device)
        # # Make ngrams and targets
        # y_c = torch.stack([Y[:, i:i+config.order] for i in range(0, Y.size(1)-config.order)], 1)
        # y_t = Y[:, config.order:]
        # out = model(X, y_c, xlen, ylen)
        # print(out.size())
        # if config.adasoft:
        #     test_sentence = criterion.predict(out.reshape(-1, output_size)).reshape(out.size(0), out.size(1))
        #     test_sentence = test_sentence.cpu().numpy()
        # else:
        #     test_sentence = torch.argmax(out[-1], -1).cpu().numpy()
        # test_sentence = [test_loader.dataset.i2w[i] if i > 0 else 'PAD' for i in test_sentence]
        # correct = y_t.cpu()[-1].numpy()
        # correct = [test_loader.dataset.i2w[i] for i in correct if i > 0]
        # print(test_sentence)
        # print(correct)
        # print()

        # Decay teacherforcing
        teacher_force_ratio *= config.teacher_force_decay
Example #30
0
def main():

    global args
    best_acc1, best_epoch = 0., 0

    if args.data.startswith('cifar'):
        IMAGE_SIZE = 32
    else:
        IMAGE_SIZE = 224

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    model = getattr(models, args.arch)(args)

    if not os.path.exists(os.path.join(args.save, 'args.pth')):
        torch.save(args, os.path.join(args.save, 'args.pth'))

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and pptimizer
    for param in model.module.net.parameters():
        param.requires_grad = False

    optimizer = torch.optim.SGD(
        [{
            'params': model.module.classifier.parameters()
        }, {
            'params': model.module.isc_modules.parameters()
        }],
        args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay)

    kd_loss = KDLoss(args)

    # optionally resume from a checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args)
        if checkpoint is not None:
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])

    cudnn.benchmark = True

    train_loader, val_loader, test_loader = get_dataloaders(args)
    print("*************************************")
    print(args.use_valid, len(train_loader), len(val_loader), len(test_loader))
    print("*************************************")

    if args.evalmode is not None:
        m = torch.load(args.evaluate_from)
        model.load_state_dict(m['state_dict'])

        if args.evalmode == 'anytime':
            validate(test_loader, model, kd_loss)
        else:
            dynamic_evaluate(model, test_loader, val_loader, args)
        return

    # set up logging
    global log_print, f_log
    f_log = open(os.path.join(args.save, 'log.txt'), 'w')

    def log_print(*args):
        print(*args)
        print(*args, file=f_log)

    log_print('args:')
    log_print(args)
    print('model:', file=f_log)
    print(model, file=f_log)
    log_print('# of params:',
              str(sum([p.numel() for p in model.parameters()])))

    f_log.flush()

    scores = [
        'epoch\tlr\ttrain_loss\tval_loss\ttrain_acc1'
        '\tval_acc1\ttrain_acc5\tval_acc5'
    ]

    for epoch in range(args.start_epoch, args.epochs):

        # train for one epoch
        train_loss, train_acc1, train_acc5, lr = train(train_loader, model,
                                                       kd_loss, optimizer,
                                                       epoch)

        # evaluate on validation set
        val_loss, val_acc1, val_acc5 = validate(test_loader, model, kd_loss)

        # save scores to a tsv file, rewrite the whole file to prevent
        # accidental deletion
        scores.append(
            ('{}\t{:.3f}' + '\t{:.4f}' * 6).format(epoch, lr, train_loss,
                                                   val_loss, train_acc1,
                                                   val_acc1, train_acc5,
                                                   val_acc5))

        is_best = val_acc1 > best_acc1
        if is_best:
            best_acc1 = val_acc1
            best_epoch = epoch
            print('Best var_acc1 {}'.format(best_acc1))

        model_filename = 'checkpoint_%03d.pth.tar' % epoch
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer': optimizer.state_dict(),
            }, args, is_best, model_filename, scores)

    print('Best val_acc1: {:.4f} at epoch {}'.format(best_acc1, best_epoch))