コード例 #1
0
def main(FG):
    vis = Visdom(port=10001, env=str(FG.vis_env))
    vis.text(argument_report(FG, end='<br>'), win='config')
    FG.global_step=0

    cae = CAE().cuda()

    print_model_parameters(cae)
    #criterion = nn.BCELoss()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(cae.parameters(), lr=FG.lr, betas=(0.5, 0.999))
    schedular = torch.optim.lr_scheduler.ExponentialLR(optimizer, FG.lr_gamma)
    printers = dict(
        loss = summary.Scalar(vis, 'loss', opts=dict(
            showlegend=True, title='loss', ytickmin=0, ytinkmax=2.0)),
        lr = summary.Scalar(vis, 'lr', opts=dict(
            showlegend=True, title='lr', ytickmin=0, ytinkmax=2.0)),
        input_printer = summary.Image3D(vis, 'input')
        output_printer = summary.Image3D(vis, 'output'))

    trainloader, validloader = make_dataloader(FG)

    z = 256
    batchSize = FG.batch_size
    imageSize = 64
    input = torch.FloatTensor(batchSize, 1, imageSize, imageSize, imageSize).cuda()
    noise = torch.FloatTensor(batchSize, z).cuda()
    fixed_noise = torch.FloatTensor(batchSize, z).normal_(0, 1).cuda()

    label = torch.FloatTensor(batchSize).cuda()
    real_label = 1
    fake_label = 0

    for epoch in range(FG.num_epoch):
        schedular.step()
        torch.set_grad_enabled(True)
        pbar = tqdm(total=len(trainloader), desc='Epoch {:>3}'.format(epoch))
        for i, data in enumerate(trainloader):
            real = data[0][0].cuda()

            output = cae(real)
            loss = criterion(output, real)
            loss.backward()
            optimizer.step()

            FG.global_step += 1
            printers['loss']('loss', FG.global_step/len(trainloader), loss)
            printers['input']('input', real)
            printers['output']('output', output/output.max())
            pbar.update()
        pbar.close()
コード例 #2
0
def main():
    # option flags
    FLG = train_args()

    # torch setting
    device = torch.device('cuda:{}'.format(FLG.devices[0]))
    torch.backends.cudnn.benchmark = True
    torch.cuda.set_device(FLG.devices[0])

    # create summary and report the option
    visenv = FLG.model
    summary = Summary(port=39199, env=visenv)
    summary.viz.text(argument_report(FLG, end='<br>'),
                     win='report' + str(FLG.running_fold))
    train_report = ScoreReport()
    valid_report = ScoreReport()
    timer = SimpleTimer()
    fold_str = 'fold' + str(FLG.running_fold)
    best_score = dict(epoch=0, loss=1e+100, accuracy=0)

    #### create dataset ###
    # kfold split
    target_dict = np.load(pjoin(FLG.data_root, 'target_dict.pkl'))
    trainblock, validblock, ratio = fold_split(
        FLG.fold, FLG.running_fold, FLG.labels,
        np.load(pjoin(FLG.data_root, 'subject_indices.npy')), target_dict)

    def _dataset(block, transform):
        return ADNIDataset(FLG.labels,
                           pjoin(FLG.data_root, FLG.modal),
                           block,
                           target_dict,
                           transform=transform)

    # create train set
    trainset = _dataset(trainblock, transform_presets(FLG.augmentation))

    # create normal valid set
    validset = _dataset(
        validblock,
        transform_presets('nine crop' if FLG.augmentation ==
                          'random crop' else 'no augmentation'))

    # each loader
    trainloader = DataLoader(trainset,
                             batch_size=FLG.batch_size,
                             shuffle=True,
                             num_workers=4,
                             pin_memory=True)
    validloader = DataLoader(validset, num_workers=4, pin_memory=True)

    # data check
    # for image, _ in trainloader:
    #     summary.image3d('asdf', image)

    # create model
    def kaiming_init(tensor):
        return kaiming_normal_(tensor, mode='fan_out', nonlinearity='relu')

    if 'plane' in FLG.model:
        model = Plane(len(FLG.labels),
                      name=FLG.model,
                      weights_initializer=kaiming_init)
    elif 'resnet11' in FLG.model:
        model = resnet11(len(FLG.labels),
                         FLG.model,
                         weights_initializer=kaiming_init)
    elif 'resnet19' in FLG.model:
        model = resnet19(len(FLG.labels),
                         FLG.model,
                         weights_initializer=kaiming_init)
    elif 'resnet35' in FLG.model:
        model = resnet35(len(FLG.labels),
                         FLG.model,
                         weights_initializer=kaiming_init)
    elif 'resnet51' in FLG.model:
        model = resnet51(len(FLG.labels),
                         FLG.model,
                         weights_initializer=kaiming_init)
    else:
        raise NotImplementedError(FLG.model)

    print_model_parameters(model)
    model = torch.nn.DataParallel(model, FLG.devices)
    model.to(device)

    # criterion
    train_criterion = torch.nn.CrossEntropyLoss(weight=torch.Tensor(
        list(map(lambda x: x * 2, reversed(ratio))))).to(device)
    valid_criterion = torch.nn.CrossEntropyLoss().to(device)

    # TODO resume
    # optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=FLG.lr,
                                 weight_decay=FLG.l2_decay)
    # scheduler
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, FLG.lr_gamma)

    start_epoch = 0
    global_step = start_epoch * len(trainloader)
    pbar = None
    for epoch in range(1, FLG.max_epoch + 1):
        timer.tic()
        scheduler.step()
        summary.scalar('lr',
                       fold_str,
                       epoch - 1,
                       optimizer.param_groups[0]['lr'],
                       ytickmin=0,
                       ytickmax=FLG.lr)

        # train()
        torch.set_grad_enabled(True)
        model.train(True)
        train_report.clear()
        if pbar is None:
            pbar = tqdm(total=len(trainloader) * FLG.validation_term,
                        desc='Epoch {:<3}-{:>3} train'.format(
                            epoch, epoch + FLG.validation_term - 1))
        for images, targets in trainloader:
            images = images.cuda(device, non_blocking=True)
            targets = targets.cuda(device, non_blocking=True)

            optimizer.zero_grad()

            outputs = model(images)
            loss = train_criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_report.update_true(targets)
            train_report.update_score(F.softmax(outputs, dim=1))

            summary.scalar('loss',
                           'train ' + fold_str,
                           global_step / len(trainloader),
                           loss.item(),
                           ytickmin=0,
                           ytickmax=1)

            pbar.update()
            global_step += 1

        if epoch % FLG.validation_term != 0:
            timer.toc()
            continue
        pbar.close()

        # valid()
        torch.set_grad_enabled(False)
        model.eval()
        valid_report.clear()
        pbar = tqdm(total=len(validloader),
                    desc='Epoch {:>3} valid'.format(epoch))
        for images, targets in validloader:
            true = targets
            npatchs = 1
            if len(images.shape) == 6:
                _, npatchs, c, x, y, z = images.shape
                images = images.view(-1, c, x, y, z)
                targets = torch.cat([targets
                                     for _ in range(npatchs)]).squeeze()
            images = images.cuda(device, non_blocking=True)
            targets = targets.cuda(device, non_blocking=True)

            output = model(images)
            loss = valid_criterion(output, targets)

            valid_report.loss += loss.item()

            if npatchs == 1:
                score = F.softmax(output, dim=1)
            else:
                score = torch.mean(F.softmax(output, dim=1),
                                   dim=0,
                                   keepdim=True)
            valid_report.update_true(true)
            valid_report.update_score(score)

            pbar.update()
        pbar.close()

        # report
        vloss = valid_report.loss / len(validloader)
        summary.scalar('accuracy',
                       'train ' + fold_str,
                       epoch,
                       train_report.accuracy,
                       ytickmin=-0.05,
                       ytickmax=1.05)

        summary.scalar('loss',
                       'valid ' + fold_str,
                       epoch,
                       vloss,
                       ytickmin=0,
                       ytickmax=0.8)
        summary.scalar('accuracy',
                       'valid ' + fold_str,
                       epoch,
                       valid_report.accuracy,
                       ytickmin=-0.05,
                       ytickmax=1.05)

        is_best = False
        if best_score['loss'] > vloss:
            best_score['loss'] = vloss
            best_score['epoch'] = epoch
            best_score['accuracy'] = valid_report.accuracy
            is_best = True

        print('Best Epoch {}: validation loss {} accuracy {}'.format(
            best_score['epoch'], best_score['loss'], best_score['accuracy']))

        # save
        if isinstance(model, torch.nn.DataParallel):
            state_dict = model.module.state_dict()
        else:
            state_dict = model.state_dict()

        save_checkpoint(
            dict(epoch=epoch,
                 best_score=best_score,
                 state_dict=state_dict,
                 optimizer_state_dict=optimizer.state_dict()),
            FLG.checkpoint_root, FLG.running_fold, FLG.model, is_best)
        pbar = None
        timer.toc()
        print('Time elapse {}h {}m {}s'.format(*timer.total()))
コード例 #3
0
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    run_desc_tl = FLAGS.run_desc_tl
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    checkpoints_dir_tl = Path(FLAGS.checkpoints_dir) / model_type / run_desc_tl
    models_dir_tl = Path(FLAGS.models_dir) / model_type / run_desc_tl
    results_dir_tl = Path(FLAGS.results_dir) / model_type / run_desc_tl
    learning_rate = FLAGS.learning_rate
    batch_size_fn = FLAGS.batch_size
    epoch_no = FLAGS.epoch
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir_tl, models_dir_tl, results_dir_tl)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    keys = ['train', 'test', 'val']
    FNN_DL_small = {}
    for i in keys:
        FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'),
                              GloVe_vectors, ELMo)
        FNN_DL_temp = data.DataLoader(dataset=FNN_temp,
                                      batch_size=batch_size_fn,
                                      num_workers=0,
                                      shuffle=True,
                                      drop_last=True,
                                      collate_fn=PadSortBatchFNN())
        FNN_DL_small[i] = FNN_DL_temp
    print('Uploaded FNN data.')

    # initialize the model, according to the model type
    print('Initializing the model for transfer learning...', end=' ')

    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     sent_hidden_dim=sent_hidden_dim,
                                     doc_hidden_dim=doc_hidden_dim,
                                     num_classes=NUM_CLASSES_FN,
                                     dropout=0).to(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()
    print('Working on: ', end='')
    print(DEVICE)

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    loss_func_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

    # load the last checkpoint (if it exists)
    results = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        _, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        _, _, _ = load_checkpoint(checkpoint_path, model, optimizer)
    print(f'Starting transfer learning on the model extracted from {epoch_no}')
    epoch = 0
    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL_small['train'],
                                                     model, optimizer,
                                                     loss_func_fn)

        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL_small['val'], model,
                                                loss_func_fn)

        results['epoch'].append(i)
        results['train_loss'].append(train_loss_fn)
        results['train_accuracy'].append(train_acc_fn)
        results['val_loss'].append(val_loss_fn)
        results['val_accuracy'].append(val_acc_fn)
        #print(results)
        best_accuracy = torch.tensor(val_acc_fn).max().item()
        create_checkpoint(checkpoints_dir_tl, i, model, optimizer, results,
                          best_accuracy)

    # save and plot the results
    save_results(results_dir_tl, results, model)
    save_model(models_dir_tl, model)
def test():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    epoch_no = FLAGS.epoch
    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    keys = ['train', 'test', 'val']
    FNN_DL_small = {}
    for i in keys:
        FNN_temp = FNNDataset(data_dir / ('FNN_small_' + i + '.pkl'),
                              GloVe_vectors, ELMo)
        FNN_DL_temp = data.DataLoader(dataset=FNN_temp,
                                      batch_size=BATCH_SIZE_FN,
                                      num_workers=0,
                                      shuffle=True,
                                      drop_last=True,
                                      collate_fn=PadSortBatchFNN())
        FNN_DL_small[i] = FNN_DL_temp
    print('Uploaded FNN data.')

    print('Initializing the model...', end=' ')

    model = initialize_han(input_dim, WORD_HIDDEN_DIM, NUM_CLASSES_FN, DEVICE)

    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    print('Loading model weights.')
    #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt'))
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        model = load_model(model_path, model, checkpoint=False)
        #_, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        model = load_model(checkpoint_path, model, checkpoint=True)
        #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer)

    #model.eval()
    loss_func_fn = nn.CrossEntropyLoss()
    #y_pred = []
    #y_true = []
    for split in keys:
        all_embeds = []
        for step, batch in enumerate(FNN_DL_small[split]):
            embeds = get_article_embeddings(model, batch)
            all_embeds.append(embeds[0])
        pkl.dump(
            all_embeds,
            open(
                data_dir /
                ('FNN_small_embeds_' + model_type + '_' + split + '.pkl'),
                'wb'))
def test():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    epoch_no = FLAGS.epoch
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data

    FNN_small_test = FNNDataset(data_dir / ('FNN_small_test.pkl'),
                                GloVe_vectors, ELMo)
    FNN_DL_small_test = data.DataLoader(dataset=FNN_small_test,
                                        batch_size=BATCH_SIZE_FN,
                                        num_workers=0,
                                        shuffle=True,
                                        drop_last=True,
                                        collate_fn=PadSortBatchFNN())
    print('Uploaded FNN data.')

    print('Initializing the model...', end=' ')

    model = initialize_han(input_dim, sent_hidden_dim, doc_hidden_dim,
                           NUM_CLASSES_FN, DEVICE)

    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    print('Loading model weights.')
    #model.load_state_dict(torch.load(CHECKPOINTS_DIR_DEFAULT / 'HierarchicalAttentionNet_model.pt'))
    if epoch_no == '0':
        model_path = models_dir / Path('HierarchicalAttentionNet_model.pt')
        model = load_model(model_path, model, checkpoint=False)
        #_, _, _ = load_latest_checkpoint(model_path, model, optimizer)
    else:
        checkpoint_path = checkpoints_dir / Path(
            'HierarchicalAttentionNet_Adam_checkpoint_' + str(epoch_no) +
            '_.pt')
        model = load_model(checkpoint_path, model, checkpoint=True)
        #_, _, _ = load_checkpoint(checkpoint_path, model, optimizer)

    model.eval()
    loss_func_fn = nn.CrossEntropyLoss()
    y_pred = []
    y_true = []
    for step, batch in enumerate(FNN_DL_small_test):
        articles, article_dims, labels = batch
        out = model(batch=articles, batch_dims=article_dims)
        y_pred.append(out.argmax(dim=1).to(DEVICE).item())
        y_true.append(labels.to(DEVICE).item())
        if step % 100 == 0 and step != 0:
            print(
                sum(1 for x, y in zip(y_pred, y_true) if x == y) / len(y_pred))
            #print(sklearn.metrics.precision_recall_fscore_support(y_true, y_pred, average=None))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average='micro'))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average='macro'))
    print(
        sklearn.metrics.precision_recall_fscore_support(y_true,
                                                        y_pred,
                                                        average=None))
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    learning_rate = LEARNING_RATE
    sent_hidden_dim = FLAGS.sent_hidden_dim
    doc_hidden_dim = FLAGS.doc_hidden_dim

    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embedding
    glove_dim = 0
    elmo_dim = 0
    GloVe_vectors = None
    ELMo = None
    if 'glove' in model_type:
        GloVe_vectors = GloVe()
        glove_dim = WORD_EMBED_DIM
        print('Uploaded GloVe embeddings.')
    if 'elmo' in model_type:
        ELMo = Elmo(options_file=ELMO_OPTIONS_FILE,
                    weight_file=ELMO_WEIGHT_FILE,
                    num_output_representations=1,
                    requires_grad=False,
                    dropout=0).to(DEVICE)
        elmo_dim = ELMO_EMBED_DIM
        print('Uploaded Elmo embeddings.')
    input_dim = glove_dim + elmo_dim
    # get the fnn and snli data
    FNN = {}
    FNN_DL = {}

    for path in ['train', 'val', 'test']:
        FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'),
                               GloVe_vectors, ELMo)
        FNN_DL[path] = data.DataLoader(dataset=FNN[path],
                                       batch_size=BATCH_SIZE_FN,
                                       num_workers=0,
                                       shuffle=True,
                                       drop_last=True,
                                       collate_fn=PadSortBatchFNN())
    print('Uploaded FNN data.')

    fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl')
    fnn_train_len = len(FNN['train'])

    # initialize the model, according to the model type
    print('Initializing the model...', end=' ')

    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     sent_hidden_dim=sent_hidden_dim,
                                     doc_hidden_dim=doc_hidden_dim,
                                     num_classes=NUM_CLASSES_FN,
                                     dropout=0).to(DEVICE)
    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl')
    weights = [(1.0 - real_ratio), (1.0 - fake_ratio)]
    print(weights)
    class_weights = torch.FloatTensor(weights).to(DEVICE)
    loss_func_fn = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    # load the last checkpoint (if it exists)
    results = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    epoch, results, best_accuracy = load_latest_checkpoint(
        checkpoints_dir, model, optimizer)
    if epoch == 0:
        print(f'Starting training at epoch {epoch + 1}...')
    else:
        print(f'Resuming training from epoch {epoch + 1}...')

    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        train_loss_fn, train_acc_fn = train_epoch_fn(FNN_DL['train'], model,
                                                     optimizer, loss_func_fn)

        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model,
                                                loss_func_fn)

        results['epoch'].append(i)
        results['train_loss'].append(train_loss_fn)
        results['train_accuracy'].append(train_acc_fn)
        results['val_loss'].append(val_loss_fn)
        results['val_accuracy'].append(val_acc_fn)
        #print(results)
        best_accuracy = torch.tensor(val_acc_fn).max().item()
        create_checkpoint(checkpoints_dir, i, model, optimizer, results,
                          best_accuracy)
        if (i + 1) % 4 == 0 and i != 0:
            learning_rate = learning_rate / 2
            optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

    # save and plot the results
    save_results(results_dir, results, model)
    save_model(models_dir, model)
コード例 #7
0
def train():
    model_type = FLAGS.model_type
    run_desc = FLAGS.run_desc
    data_dir = Path(FLAGS.data_dir)
    checkpoints_dir = Path(FLAGS.checkpoints_dir) / model_type / run_desc
    models_dir = Path(FLAGS.models_dir) / model_type / run_desc
    results_dir = Path(FLAGS.results_dir) / model_type / run_desc
    #data_percentage = FLAGS.data_percentage

    if model_type == 'STL':
        only_fn = True
    else:
        only_fn = False

    # check if data directory exists
    if not data_dir.exists():
        raise ValueError('Data directory does not exist')

    # create other directories if they do not exist
    create_directories(checkpoints_dir, models_dir, results_dir)

    # load the data
    print('Loading the data...')

    # get the glove and elmo embeddings
    GloVe_vectors = GloVe()
    print('Uploaded GloVe embeddings.')
    # ELMo = Elmo(
    #         options_file=ELMO_OPTIONS_FILE,
    #         weight_file=ELMO_WEIGHT_FILE,
    #         num_output_representations=1,
    #         requires_grad=False,
    #         dropout=0).to(DEVICE)
    # print('Uploaded Elmo embeddings.')
    # get the fnn and snli data
    FNN = {}
    FNN_DL = {}

    for path in ['train', 'val', 'test']:
        FNN[path] = FNNDataset(data_dir / ('FNN_' + path + '.pkl'),
                               GloVe_vectors)
        FNN_DL[path] = data.DataLoader(dataset=FNN[path],
                                       batch_size=BATCH_SIZE_FN,
                                       num_workers=0,
                                       shuffle=True,
                                       drop_last=True,
                                       collate_fn=PadSortBatch())
    print('Uploaded FNN data.')
    if not only_fn:
        SNLI = {}
        SNLI_DL = {}
        for path in ['train', 'val', 'test']:
            SNLI[path] = SNLIDataset(data_dir / ('SNLI_' + path + '.pkl'),
                                     GloVe_vectors)
            SNLI_DL[path] = data.DataLoader(dataset=SNLI[path],
                                            batch_size=BATCH_SIZE_NLI,
                                            num_workers=0,
                                            shuffle=True,
                                            drop_last=True,
                                            collate_fn=PadSortBatchSNLI())
        print('Uploaded SNLI data.')
        snli_train_sent_no = len(SNLI['train']) * 2
        snli_train_len = len(SNLI['train'])
    fnn_train_sent_no = get_number_sentences(data_dir / 'FNN_train.pkl')
    fnn_train_len = len(FNN['train'])

    # initialize the model, according to the model type
    print('Initializing the model...', end=' ')
    if model_type == 'MTL':
        NUM_CLASSES_NLI = 3
        print("Loading an MTL HAN model.")
    elif model_type == 'STL':
        NUM_CLASSES_NLI = None
        print("Loading an STL HAN model.")
    elif model_type == 'Transfer':
        print("Nothing for now.")
    if ELMO_EMBED_DIM is not None:
        # input_dim = WORD_EMBED_DIM + ELMO_EMBED_DIM
        input_dim = WORD_EMBED_DIM
    else:
        input_dim = WORD_EMBED_DIM
    model = HierarchicalAttentionNet(input_dim=input_dim,
                                     hidden_dim=WORD_HIDDEN_DIM,
                                     num_classes_task_fn=NUM_CLASSES_FN,
                                     embedding=None,
                                     num_classes_task_nli=NUM_CLASSES_NLI,
                                     dropout=0).to(DEVICE)
    print('Working on: ', end='')
    print(DEVICE)
    print('Done!')
    print_model_parameters(model)
    print()

    # set the criterion and optimizer
    # we weigh the loss: class [0] is real, class [1] is fake
    #
    real_ratio, fake_ratio = get_class_balance(data_dir / 'FNN_train.pkl')
    weights = [(1.0 - real_ratio), (1.0 - fake_ratio)]
    print(weights)
    class_weights = torch.FloatTensor(weights).to(DEVICE)
    loss_func_fn = nn.CrossEntropyLoss(weight=class_weights)
    if not only_fn:
        loss_func_nli = nn.CrossEntropyLoss()
        temperature = 2
    optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    # load the last checkpoint (if it exists)
    epoch, results, best_accuracy = load_latest_checkpoint(
        checkpoints_dir, model, optimizer)
    results_fn = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    results_nli = {
        'epoch': [],
        'train_loss': [],
        'train_accuracy': [],
        'val_loss': [],
        'val_accuracy': []
    }
    results = {'fn': results_fn, 'nli': results_nli}
    if epoch == 0:
        print(f'Starting training at epoch {epoch + 1}...')
    else:
        print(f'Resuming training from epoch {epoch + 1}...')

    for i in range(epoch, MAX_EPOCHS):
        print(f'Epoch {i+1:0{len(str(MAX_EPOCHS))}}/{MAX_EPOCHS}:')
        model.train()
        # one epoch of training
        if only_fn:
            train_loss_fn, train_acc_fn = train_epoch_fn(
                FNN_DL['train'], model, optimizer, loss_func_fn)
        elif model_type == 'MTL':
            model.train()

            train_loss_fn = []
            train_acc_fn = []
            loss_fn_weight_gradnorm = 1

            train_loss_nli = []
            train_acc_nli = []
            loss_nli_weight_gradnorm = 1

            #define by sentence number
            #loss_fn_weight_dataset = 1 - fnn_train_sent_no / (fnn_train_sent_no + snli_train_sent_no)
            #loss_nli_weight_dataset = 1 - snli_train_sent_no / (fnn_train_sent_no + snli_train_sent_no)
            loss_fn_weight_dataset = 1 - fnn_train_len / (fnn_train_len +
                                                          snli_train_len)
            loss_nli_weight_dataset = 1 - snli_train_len / (fnn_train_len +
                                                            snli_train_len)

            chance_fn = 1000 * (fnn_train_len / BATCH_SIZE_FN) / (
                (fnn_train_len / BATCH_SIZE_FN) +
                (snli_train_len / BATCH_SIZE_NLI))
            iterator_fnn = enumerate(FNN_DL['train'])
            iterator_snli = enumerate(SNLI_DL['train'])
            done_fnn, done_snli = False, False
            step_fnn = 0
            step_snli = 0
            print(
                f'Train set length, FNN: {fnn_train_len}. Train set length, SNLI: {snli_train_len}.'
            )
            print(
                f'Training set to batch size ratio for Fake News Detection is {fnn_train_len / BATCH_SIZE_FN}.'
            )
            print(
                f'Training set to batch size ratio for Language Inference is {snli_train_len / BATCH_SIZE_NLI}.'
            )

            while not (done_fnn and done_snli):
                if len(train_loss_fn) > 1 and len(train_loss_nli) > 1:
                    # computes loss weights based on the loss from the previous iterations
                    loss_fn_ratio = train_loss_fn[len(train_loss_fn) -
                                                  1] / train_loss_fn[
                                                      len(train_loss_fn) - 2]
                    loss_nli_ratio = train_loss_nli[
                        len(train_acc_nli) -
                        1] / train_loss_nli[len(train_loss_nli) - 2]
                    loss_fn_exp = math.exp(loss_fn_ratio / temperature)
                    loss_nli_exp = math.exp(loss_nli_ratio / temperature)
                    loss_fn_weight_gradnorm = loss_fn_exp / (loss_fn_exp +
                                                             loss_nli_exp)
                    loss_nli_weight_gradnorm = loss_nli_exp / (loss_fn_exp +
                                                               loss_nli_exp)
                    loss_fn_weight = math.exp(
                        loss_fn_weight_dataset * loss_fn_weight_gradnorm) / (
                            math.exp(loss_fn_weight_dataset *
                                     loss_fn_weight_gradnorm) +
                            math.exp(loss_nli_weight_dataset *
                                     loss_nli_weight_gradnorm))
                    loss_nli_weight = math.exp(
                        loss_nli_weight_dataset * loss_nli_weight_gradnorm) / (
                            math.exp(loss_fn_weight_dataset *
                                     loss_fn_weight_gradnorm) +
                            math.exp(loss_nli_weight_dataset *
                                     loss_nli_weight_gradnorm))
                else:
                    loss_fn_weight = loss_fn_weight_dataset
                    loss_nli_weight = loss_nli_weight_dataset

                # define the total loss function
                #loss_func = loss_func_fn + loss_func_nli
                # is this needed?

                if np.random.randint(0, 1000) < chance_fn:
                    try:
                        step_fnn, batch_fnn = next(iterator_fnn)
                    except StopIteration:
                        done_fnn = True
                    else:
                        try:
                            batch_loss_fn, batch_acc_fn = train_batch_fn(
                                batch_fnn, model, optimizer, loss_func_fn,
                                loss_fn_weight)
                            train_loss_fn.append(batch_loss_fn)
                            train_acc_fn.append(batch_acc_fn)
                        except:
                            print('Error in batch')
                else:
                    try:
                        step_snli, batch_snli = next(iterator_snli)
                    except StopIteration:
                        done_snli = True
                    else:
                        try:
                            batch_loss_nli, batch_acc_nli = train_batch_nli(
                                batch_snli, model, optimizer, loss_func_nli,
                                loss_nli_weight)
                            train_loss_nli.append(batch_loss_nli)
                            train_acc_nli.append(batch_acc_nli)
                        except:
                            print('Error in batch')
                print(f'FNN batch {step_fnn}')
                print(f'SNLI batch {step_snli}')
                if step_fnn % 50 == 0 and step_fnn != 0:
                    print(f'Processed {step_fnn} FNN batches.')
                    print(f'Accuracy: {train_acc_fn[len(train_acc_fn)-1]}.')
                    print(
                        f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.'
                    )
                if step_snli % 50 == 0 and step_snli != 0:
                    print(f'Processed {step_snli} SNLIbatches.')
                    print(f'Accuracy: {train_acc_nli[len(train_acc_nli)-1]}.')
                    print(
                        f'Weight for loss for NLI is {loss_nli_weight}, for loss for FN is {loss_fn_weight}.'
                    )
        # one epoch of eval
        model.eval()
        val_loss_fn, val_acc_fn = eval_epoch_fn(FNN_DL['val'], model,
                                                loss_func_fn)
        tasks = ['fn']
        if model_type == 'MTL':
            val_loss_nli, val_acc_nli = eval_epoch_nli(SNLI_DL['val'], model,
                                                       loss_func_nli)
            tasks.append('nli')

        for task in tasks:
            results[task]['epoch'].append(i)
            if task == 'fn':
                temp_train_loss = train_loss_fn
                temp_val_loss = val_loss_fn
                temp_train_acc = train_acc_fn
                temp_val_acc = val_acc_fn
            elif task == 'nli':
                temp_train_loss = train_loss_nli
                temp_val_loss = val_loss_nli
                temp_train_acc = train_acc_nli
                temp_val_acc = val_acc_nli

            results[task]['train_loss'].append(temp_train_loss)
            results[task]['train_accuracy'].append(temp_train_acc)
            results[task]['val_loss'].append(temp_val_loss)
            results[task]['val_accuracy'].append(temp_val_acc)
            print(results)

        best_accuracy = torch.tensor(temp_val_acc).max().item()
        create_checkpoint(checkpoints_dir, epoch, model, optimizer, results,
                          best_accuracy)

    # save and plot the results
    save_results(results_dir, results, model)
    save_model(models_dir, model)
    plot_results(results_dir, results, model)