コード例 #1
0
ファイル: train.py プロジェクト: p-kar/pytorch-prototypeDL
def train(opts):

    device = torch.device("cuda" if use_cuda else "cpu")

    if opts.arch == 'small':
        channels = [32, 32, 32, 10]
    elif opts.arch == 'large':
        channels = [256, 128, 64, 32]
    else:
        raise NotImplementedError('Unknown model architecture')

    if opts.mode == 'train_mnist':
        train_loader, valid_loader = get_mnist_loaders(opts.data_dir,
                                                       opts.bsize,
                                                       opts.nworkers,
                                                       opts.sigma, opts.alpha)
        model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels)
    elif opts.mode == 'train_cifar':
        train_loader, valid_loader = get_cifar_loaders(opts.data_dir,
                                                       opts.bsize,
                                                       opts.nworkers,
                                                       opts.sigma, opts.alpha)
        model = CAE(3, 10, 32, opts.n_prototypes, opts.decoder_arch, channels)
    elif opts.mode == 'train_fmnist':
        train_loader, valid_loader = get_fmnist_loaders(
            opts.data_dir, opts.bsize, opts.nworkers, opts.sigma, opts.alpha)
        model = CAE(1, 10, 28, opts.n_prototypes, opts.decoder_arch, channels)
    else:
        raise NotImplementedError('Unknown train mode')

    if opts.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opts.lr,
                                     weight_decay=opts.wd)
    else:
        raise NotImplementedError("Unknown optim type")
    criterion = nn.CrossEntropyLoss()

    start_n_iter = 0
    # for choosing the best model
    best_val_acc = 0.0

    model_path = os.path.join(opts.save_path, 'model_latest.net')
    if opts.resume and os.path.exists(model_path):
        # restoring training from save_state
        print('====> Resuming training from previous checkpoint')
        save_state = torch.load(model_path, map_location='cpu')
        model.load_state_dict(save_state['state_dict'])
        start_n_iter = save_state['n_iter']
        best_val_acc = save_state['best_val_acc']
        opts = save_state['opts']
        opts.start_epoch = save_state['epoch'] + 1

    model = model.to(device)

    # for logging
    logger = TensorboardXLogger(opts.start_epoch, opts.log_iter, opts.log_dir)
    logger.set(['acc', 'loss', 'loss_class', 'loss_ae', 'loss_r1', 'loss_r2'])
    logger.n_iter = start_n_iter

    for epoch in range(opts.start_epoch, opts.epochs):
        model.train()
        logger.step()
        valid_sample = torch.stack([
            valid_loader.dataset[i][0]
            for i in random.sample(range(len(valid_loader.dataset)), 10)
        ]).to(device)

        for batch_idx, (data, target) in enumerate(train_loader):
            acc, loss, class_error, ae_error, error_1, error_2 = run_iter(
                opts, data, target, model, criterion, device)

            # optimizer step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm)
            optimizer.step()

            logger.update(acc, loss, class_error, ae_error, error_1, error_2)

        val_loss, val_acc, val_class_error, val_ae_error, val_error_1, val_error_2, time_taken = evaluate(
            opts, model, valid_loader, criterion, device)
        # log the validation losses
        logger.log_valid(time_taken, val_acc, val_loss, val_class_error,
                         val_ae_error, val_error_1, val_error_2)
        print('')

        # Save the model to disk
        if val_acc >= best_val_acc:
            best_val_acc = val_acc
            save_state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'n_iter': logger.n_iter,
                'opts': opts,
                'val_acc': val_acc,
                'best_val_acc': best_val_acc
            }
            model_path = os.path.join(opts.save_path, 'model_best.net')
            torch.save(save_state, model_path)
            prototypes = model.save_prototypes(opts.save_path,
                                               'prototypes_best.png')
            x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0)
            logger.writer.add_image('Prototypes (best)', x, epoch)

        save_state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'n_iter': logger.n_iter,
            'opts': opts,
            'val_acc': val_acc,
            'best_val_acc': best_val_acc
        }
        model_path = os.path.join(opts.save_path, 'model_latest.net')
        torch.save(save_state, model_path)
        prototypes = model.save_prototypes(opts.save_path,
                                           'prototypes_latest.png')
        x = torchvision.utils.make_grid(prototypes, nrow=10, pad_value=1.0)
        logger.writer.add_image('Prototypes (latest)', x, epoch)
        ae_samples = model.get_decoded_pairs_grid(valid_sample)
        logger.writer.add_image('AE_samples_latest', ae_samples, epoch)
コード例 #2
0
                                                 factor=0.1,
                                                 patience=6)

#todo: inserire salvataggi periodici durante allenamento, salvataggio log di accuracy e loss

train_loader, val_loader = loader_helper.get_loaders(
    batch_size=batch_size, merge_idda_classes=merge_idda_classes)

since = time.time()

val_acc_history = []

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0

logger = TensorboardXLogger('tensorboard')

for epoch in range(starting_epoch, num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'val']:  #['train', 'val']:
        if phase == 'train':
            print('\n-- Training epoch %d' % int(epoch + 1))
            model.train()  # Set model to training mode

        else:
            print('-- Validating epoch %d' % int(epoch + 1))
            model.eval()  # Set model to evaluate mode
コード例 #3
0
def train_sentiment(opts):
    
    device = torch.device("cuda" if use_cuda else "cpu")

    glove_loader = GloveLoader(os.path.join(opts.data_dir, 'glove', opts.glove_emb_file))
    train_loader = DataLoader(RottenTomatoesReviewDataset(opts.data_dir, 'train', glove_loader, opts.maxlen), \
        batch_size=opts.bsize, shuffle=True, num_workers=opts.nworkers)
    valid_loader = DataLoader(RottenTomatoesReviewDataset(opts.data_dir, 'val', glove_loader, opts.maxlen), \
        batch_size=opts.bsize, shuffle=False, num_workers=opts.nworkers)
    model = Classifier(opts.hidden_size, opts.dropout_p, glove_loader, opts.enc_arch)

    if opts.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.wd)
    else:
        raise NotImplementedError("Unknown optim type")

    criterion = nn.CrossEntropyLoss()

    start_n_iter = 0
    # for choosing the best model
    best_val_acc = 0.0

    model_path = os.path.join(opts.save_path, 'model_latest.net')
    if opts.resume and os.path.exists(model_path):
        # restoring training from save_state
        print ('====> Resuming training from previous checkpoint')
        save_state = torch.load(model_path, map_location='cpu')
        model.load_state_dict(save_state['state_dict'])
        start_n_iter = save_state['n_iter']
        best_val_acc = save_state['best_val_acc']
        opts = save_state['opts']
        opts.start_epoch = save_state['epoch'] + 1

    model = model.to(device)

    # for logging
    logger = TensorboardXLogger(opts.start_epoch, opts.log_iter, opts.log_dir)
    logger.set(['acc', 'loss'])
    logger.n_iter = start_n_iter

    for epoch in range(opts.start_epoch, opts.epochs):
        model.train()
        logger.step()

        for batch_idx, data in enumerate(train_loader):
            acc, loss = run_iter(opts, data, model, criterion, device)

            # optimizer step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm)
            optimizer.step()

            logger.update(acc, loss)

        val_loss, val_acc, time_taken = evaluate(opts, model, valid_loader, criterion, device)
        # log the validation losses
        logger.log_valid(time_taken, val_acc, val_loss)
        print ('')

        # Save the model to disk
        if val_acc >= best_val_acc:
            best_val_acc = val_acc
            save_state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'n_iter': logger.n_iter,
                'opts': opts,
                'val_acc': val_acc,
                'best_val_acc': best_val_acc
            }
            model_path = os.path.join(opts.save_path, 'model_best.net')
            torch.save(save_state, model_path)

        save_state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'n_iter': logger.n_iter,
            'opts': opts,
            'val_acc': val_acc,
            'best_val_acc': best_val_acc
        }
        model_path = os.path.join(opts.save_path, 'model_latest.net')
        torch.save(save_state, model_path)
コード例 #4
0
ファイル: run.py プロジェクト: fcdl94/UDA
                             num_workers=8)
    target_loader = DataLoader(target,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=8)
    source_loader = DataLoader(source,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=8)

    return target_loader, source_loader, test_loader, net, EPOCHS, init_lr


if __name__ == '__main__':
    # create the Logger
    log = Log(f'logs/{setting}', method_name)

    # Make the dataset
    target_loader, source_loader, test_loader, net, EPOCHS, init_lr = get_setting(
    )

    if args.epochs is not None:
        EPOCHS = args.epochs

    if args.so:
        loader_lenght = 'source'
        dl_len = len(source_loader)
        total_steps = EPOCHS * dl_len
        print(f"Num of Batches ({loader_lenght}) is {dl_len}")
        method = SourceOnly(net,
                            init_lr,
コード例 #5
0
def train_rank(opts):
    if opts.constraint == 'DemoParity' or opts.constraint == 'DispTreat' or opts.constraint == 'DispImpact':
            func = lambda x: rank_lp_func(opts.constraint, x)
    else:
            func = rank_collate_func 
    glove_loader = GloveLoader(os.path.join(opts.data_dir, 'glove', opts.glove_emb_file))
    train_dataset = RottenTomatoesRankingDataset(opts.data_dir, 'train', glove_loader, opts.maxlen, opts.div_by)
    train_loader = DataLoader(train_dataset, batch_size=opts.bsize, sampler=RankSampler(train_dataset), \
        collate_fn=func, num_workers=opts.nworkers)

    valid_dataset = RottenTomatoesRankingDataset(opts.data_dir, 'val', glove_loader, opts.maxlen, opts.div_by)
    valid_loader = DataLoader(valid_dataset, batch_size=opts.bsize, sampler=RankSampler(valid_dataset), \
        collate_fn=func, num_workers=opts.nworkers)
    model = RankNet(opts.hidden_size, opts.dropout_p, glove_loader, opts.enc_arch, \
        num_genres=len(train_dataset.genres), pretrained_base=opts.pretrained_base, loss_type=opts.loss_type)

    if opts.optim == 'adam':
        optimizer = torch.optim.Adam([
            {'params': model.encoder.parameters(), 'lr': opts.lr / 10.0},
            {'params': model.rank_layer.parameters()}], lr=opts.lr, weight_decay=opts.wd)
    else:
        raise NotImplementedError("Unknown optim type")

    start_n_iter = 0
    # for choosing the best model
    best_val_ndcg = 0.0

    model_path = os.path.join(opts.save_path, 'model_latest.net')
    if opts.resume and os.path.exists(model_path):
        # restoring training from save_state
        print ('====> Resuming training from previous checkpoint')
        save_state = torch.load(model_path, map_location='cpu')
        model.load_state_dict(save_state['state_dict'])
        start_n_iter = save_state['n_iter']
        best_val_ndcg = save_state['best_val_ndcg']
        opts = save_state['opts']
        opts.start_epoch = save_state['epoch'] + 1

    model = model.to(device)

    # for logging
    logger = TensorboardXLogger(opts.start_epoch, opts.log_iter, opts.log_dir)
    logger.set(['NDCG', opts.metric, 'loss'])
    logger.n_iter = start_n_iter

    for epoch in range(opts.start_epoch, opts.epochs):
        model.train()
        logger.step()

        for batch_idx, data in enumerate(train_loader):
            ndcg, fscore, loss = run_iter(opts, data, model)

            # optimizer step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm)
            optimizer.step()

            logger.update(ndcg, fscore, loss)

        val_loss, val_ndcg, val_fscore, time_taken = evaluate(opts, model, valid_loader)
        # log the validation losses
        logger.log_valid(time_taken, val_ndcg, val_fscore, val_loss)
        print ('')

        # Save the model to disk
        if val_ndcg >= best_val_ndcg:
            best_val_ndcg = val_ndcg
            save_state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'n_iter': logger.n_iter,
                'opts': opts,
                'val_ndcg': val_ndcg,
                'best_val_ndcg': best_val_ndcg
            }
            model_path = os.path.join(opts.save_path, 'model_best.net')
            torch.save(save_state, model_path)

        save_state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'n_iter': logger.n_iter,
            'opts': opts,
            'val_ndcg': val_ndcg,
            'best_val_ndcg': best_val_ndcg
        }
        model_path = os.path.join(opts.save_path, 'model_latest.net')
        torch.save(save_state, model_path)
コード例 #6
0
def train(opts):

    glove_loader = GloveLoader(
        os.path.join(opts.data_dir, opts.corpus, 'glove/',
                     opts.glove_emb_file))
    if opts.corpus in ['msvd', 'msvd_vgg']:
        VDDataset = MSVideoDescriptionDataset
    elif opts.corpus == 'msrvtt':
        VDDataset = MSRVideoToTextDataset
    else:
        raise NotImplementedError('Unknown dataset')

    train_loader = DataLoader(VDDataset(opts.data_dir, opts.corpus, 'train', glove_loader, opts.num_frames, opts.max_len), \
        batch_size=opts.bsize, shuffle=True, num_workers=opts.nworkers, collate_fn=collate_fn)
    valid_loader = DataLoader(VDDataset(opts.data_dir, opts.corpus, 'val', glove_loader, opts.num_frames, opts.max_len), \
        batch_size=opts.bsize, shuffle=False, num_workers=opts.nworkers, collate_fn=collate_fn)

    if opts.arch == 's2vt':
        model = S2VTModel(glove_loader, opts.dropout_p, opts.hidden_size,
                          opts.vid_feat_size, opts.max_len)
    elif opts.arch == 's2vt-att':
        model = S2VTAttModel(glove_loader, opts.dropout_p, opts.hidden_size,
                             opts.vid_feat_size, opts.max_len)
    elif opts.arch == 'transformer':
        #inputs are number of layers and number of heads (last two)
        model = Transformer(glove_loader, opts.dropout_p, opts.hidden_size,
                            opts.vid_feat_size, opts.max_len, 6, 8)
    else:
        raise NotImplementedError('Unknown model architecture')

    if opts.optim == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opts.lr,
                                     weight_decay=opts.wd)
    else:
        raise NotImplementedError("Unknown optim type")

    if opts.schedule_sample:
        sample_probs = inverse_sigmoid(opts.epochs)
    else:
        sample_probs = np.ones(opts.epochs)

    criterion = nn.CrossEntropyLoss(reduction='none')
    metrics_to_omit = ['Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', \
        'ROUGE_L', 'CIDEr', 'SkipThoughtCS', \
        'EmbeddingAverageCosineSimilairty', 'VectorExtremaCosineSimilarity', \
        'GreedyMatchingScore']
    nlg_eval = NLGEval(metrics_to_omit=metrics_to_omit)

    start_n_iter = 0
    # for choosing the best model
    best_val_meteor_score = 0.0

    model_path = os.path.join(opts.save_path, 'model_latest.net')
    if opts.resume and os.path.exists(model_path):
        # restoring training from save_state
        print('====> Resuming training from previous checkpoint')
        save_state = torch.load(model_path, map_location='cpu')
        model.load_state_dict(save_state['state_dict'])
        start_n_iter = save_state['n_iter']
        best_val_meteor_score = save_state['best_val_meteor_score']
        opts = save_state['opts']
        opts.start_epoch = save_state['epoch'] + 1

    model = model.to(device)

    # for logging
    logger = TensorboardXLogger(opts.start_epoch, opts.log_iter, opts.log_dir)
    logger.set(['acc', 'loss'])
    logger.n_iter = start_n_iter

    for epoch in range(opts.start_epoch, opts.epochs):
        model.train()
        model.teacher_force_prob = sample_probs[epoch]
        logger.step()

        sampler = StreamSampler(opts.n_sample_sent)
        for batch_idx, data in enumerate(train_loader):
            acc, loss, pred = run_iter(opts,
                                       data,
                                       model,
                                       criterion,
                                       return_pred=True)
            hyps = glove_loader.get_sents_from_indexes(pred.data.cpu().numpy())
            for hyp, ref, vk in zip(hyps, data['refs'], data['vid_key']):
                ref = random.choice(ref)
                sampler.add((hyp, ref, vk))

            # optimizer step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), opts.max_norm)
            optimizer.step()

            logger.update(acc, loss)

        meteor_eval_func = lambda pred, refs: calc_meteor_score(
            pred, refs, nlg_eval)
        val_loss, val_acc, val_meteor_score, sample_sent, time_taken = evaluate(
            opts, model, valid_loader, criterion, glove_loader,
            meteor_eval_func)
        print('')
        print(
            '********************************** TRAIN **********************************'
        )
        train_sample_sent = sampler.get()
        print_sample_sents(train_sample_sent)
        print(
            '***************************************************************************'
        )
        print('')
        print(
            '*********************************** VAL ***********************************'
        )
        # log the validation losses
        logger.log_valid(time_taken, val_acc, val_loss)
        logger.writer.add_scalar('val/METEOR', val_meteor_score, logger.n_iter)
        print('Validation METEOR score: {:.5f}'.format(val_meteor_score))
        print_sample_sents(sample_sent)
        print('')

        # Save the model to disk
        if val_meteor_score >= best_val_meteor_score:
            best_val_meteor_score = val_meteor_score
            save_state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'n_iter': logger.n_iter,
                'opts': opts,
                'val_meteor_score': val_meteor_score,
                'best_val_meteor_score': best_val_meteor_score
            }
            model_path = os.path.join(opts.save_path, 'model_best.net')
            torch.save(save_state, model_path)

        save_state = {
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'n_iter': logger.n_iter,
            'opts': opts,
            'val_meteor_score': val_meteor_score,
            'best_val_meteor_score': best_val_meteor_score
        }
        model_path = os.path.join(opts.save_path, 'model_latest.net')
        torch.save(save_state, model_path)
コード例 #7
0
                             num_workers=8)
    target_loader = DataLoader(target,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=8)
    source_loader = DataLoader(source,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=8)

    return target_loader, source_loader, test_loader


if __name__ == '__main__':
    # create the Logger
    log = Log(f'logs/{setting}', method_name)

    # Make the dataset
    target_loader, source_loader, test_loader = get_setting()

    if args.epochs is not None:
        EPOCHS = args.epochs

    loader_lenght = 'min'
    dl_len = min(len(source_loader), len(target_loader))
    print(f"Num of Batches ({loader_lenght}) is {dl_len}")
    total_steps = EPOCHS * dl_len
    method = NODA(net, init_lr, total_steps, device, num_classes=n_classes)

    print("Do a validation before starting to check it is ok...")
    val_loss, val_acc = valid(method, valid_loader=test_loader)