Beispiel #1
0
 def train(self, trainX, trainY, verbose=False):
     trainY = np.squeeze(trainY)
     self.forwardPropagation(trainX)
     self.backPropagation(trainY)
     self.updateParameters()
     if verbose:
         loss = Metrics.crossEntropyLoss(trainY,
                                         self.outputLayer.predictions)
         accuracy = Metrics.accuracy(trainY,
                                     self.outputLayer.predictedLabels)
         return [loss, accuracy]
def main():
    global args
    args = parse_args()

    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    print(args)
    train_dir = glob.glob(os.path.join(args.data, 'train/holistic/*.pt'))
    dev_dir = glob.glob(os.path.join(args.data, 'val/holistic/*.pt'))
    test_dir = glob.glob(os.path.join(args.data, 'test/holistic/*.pt'))

    train_dataset = Dataset(os.path.join(args.data, 'train'), train_dir)
    dev_dataset = Dataset(os.path.join(args.data, 'val'), dev_dir)
    test_dataset = Dataset(os.path.join(args.data, 'test'), test_dir)

    print('==> Size of train data   : %d ' % len(train_dataset))
    print('==> Size of val data   : %d ' % len(dev_dataset))
    print('==> Size of test data   : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    if args.pretrained_model == 'vgg16':
        pretrained_vgg16 = models.vgg16(pretrained=True)

        # Freeze training for all layers
        for child in pretrained_vgg16.children():
            for param in child.parameters():
                param.requires_grad = False

        if args.pretrained_holistic == 0:
            model = model_vgg16.DocClassificationHolistic(
                args, pretrained_vgg16)
        elif args.pretrained_holistic == 1:
            pretrained_orig_vgg16 = model_vgg16.DocClassificationHolistic(
                args, pretrained_vgg16)
            pretrained_holistic = model_vgg16.DocClassificationHolistic(
                args, pretrained_orig_vgg16.pretrained_model)
            checkpoint = torch.load('./checkpoints/vgg16.pt')
            pretrained_holistic.load_state_dict(checkpoint['model'])

            model = model_vgg16.DocClassificationRest(args,
                                                      pretrained_orig_vgg16,
                                                      pretrained_holistic)

    elif args.pretrained_model == 'vgg19':
        pretrained_vgg19 = models.vgg19(pretrained=True)

        # Freeze training for all layers
        for child in pretrained_vgg19.children():
            for param in child.parameters():
                param.requires_grad = False

        if args.pretrained_holistic == 0:
            model = model_vgg19.DocClassificationHolistic(
                args, pretrained_vgg19)
        elif args.pretrained_holistic == 1:
            pretrained_orig_vgg19 = model_vgg19.DocClassificationHolistic(
                args, pretrained_vgg19)
            pretrained_holistic = model_vgg19.DocClassificationHolistic(
                args, pretrained_orig_vgg19.pretrained_model)
            checkpoint = torch.load('./checkpoints/vgg19.pt')
            pretrained_holistic.load_state_dict(checkpoint['model'])

            model = model_vgg19.DocClassificationRest(args,
                                                      pretrained_orig_vgg19,
                                                      pretrained_holistic)

    elif args.pretrained_model == 'resnet50':
        pretrained_resnet50 = models.resnet50(pretrained=True)

        # Freeze training for all layers
        for child in pretrained_resnet50.children():
            for param in child.parameters():
                param.requires_grad = False

        if args.pretrained_holistic == 0:
            model = model_resnet50.DocClassificationHolistic(
                args, pretrained_resnet50)
        elif args.pretrained_holistic == 1:
            pretrained_orig_resnet50 = model_resnet50.DocClassificationHolistic(
                args, pretrained_resnet50)
            pretrained_holistic = model_resnet50.DocClassificationHolistic(
                args, pretrained_orig_resnet50.pretrained_model)
            checkpoint = torch.load('./checkpoints/resnet50.pt')
            pretrained_holistic.load_state_dict(checkpoint['model'])

            model = model_resnet50.DocClassificationRest(
                args, pretrained_orig_resnet50, pretrained_holistic)

    elif args.pretrained_model == 'densenet121':
        pretrained_densenet121 = models.densenet121(pretrained=True)

        # Freeze training for all layers
        for child in pretrained_densenet121.children():
            for param in child.parameters():
                param.requires_grad = False

        if args.pretrained_holistic == 0:
            model = model_densenet121.DocClassificationHolistic(
                args, pretrained_densenet121)
        elif args.pretrained_holistic == 1:
            pretrained_orig_densenet121 = model_densenet121.DocClassificationHolistic(
                args, pretrained_densenet121)
            pretrained_holistic = model_densenet121.DocClassificationHolistic(
                args, pretrained_orig_densenet121.pretrained_model)
            checkpoint = torch.load('./checkpoints/densenet121.pt')
            pretrained_holistic.load_state_dict(checkpoint['model'])

            model = model_densenet121.DocClassificationRest(
                args, pretrained_orig_densenet121, pretrained_holistic)

    elif args.pretrained_model == 'inceptionv3':
        pretrained_inceptionv3 = models.inception_v3(pretrained=True)

        # Freeze training for all layers
        for child in pretrained_inceptionv3.children():
            for param in child.parameters():
                param.requires_grad = False

        if args.pretrained_holistic == 0:
            model = model_inceptionv3.DocClassificationHolistic(
                args, pretrained_inceptionv3)
        elif args.pretrained_holistic == 1:
            pretrained_orig_inceptionv3 = model_inceptionv3.DocClassificationHolistic(
                args, pretrained_inceptionv3)
            pretrained_holistic = model_inceptionv3.DocClassificationHolistic(
                args, pretrained_orig_inceptionv3.pretrained_model)
            checkpoint = torch.load('./checkpoints/inceptionv3.pt')
            pretrained_holistic.load_state_dict(checkpoint['model'])

            model = model_inceptionv3.DocClassificationRest(
                args, pretrained_orig_inceptionv3, pretrained_holistic)

    criterion = nn.CrossEntropyLoss(reduction='sum')

    parameters = filter(lambda p: p.requires_grad, model.parameters())

    if args.cuda:
        model.cuda(), criterion.cuda()

    if args.optim == 'adam':
        optimizer = optim.Adam(parameters, lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(parameters, lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(parameters, lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'adadelta':
        optimizer = optim.Adadelta(parameters,
                                   lr=args.lr,
                                   weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer)

    train_idx = list(np.arange(len(train_dataset)))
    dev_idx = list(np.arange(len(dev_dataset)))
    test_idx = list(np.arange(len(test_dataset)))

    best = float('inf')
    columns = ['ExpName', 'ExpNo', 'Epoch', 'Loss', 'Accuracy']
    results = []
    early_stop_count = 0

    for epoch in range(args.epochs):

        train_loss = 0.0
        dev_loss = 0.0
        test_loss = 0.0

        train_predictions = []
        train_labels = []

        dev_predictions = []
        dev_labels = []

        test_predictions = []
        test_labels = []

        random.shuffle(train_idx)
        random.shuffle(dev_idx)
        random.shuffle(test_idx)

        batch_train_data = [
            train_idx[i:i + args.batchsize]
            for i in range(0, len(train_idx), args.batchsize)
        ]
        batch_dev_data = [
            dev_idx[i:i + args.batchsize]
            for i in range(0, len(dev_idx), args.batchsize)
        ]
        batch_test_data = [
            test_idx[i:i + args.batchsize]
            for i in range(0, len(test_idx), args.batchsize)
        ]

        for batch in tqdm(batch_train_data, desc='Training batches..'):
            train_batch_holistic, \
            train_batch_header, \
            train_batch_footer, \
            train_batch_left_body, \
            train_batch_right_body, \
            train_batch_labels = train_dataset[batch]

            if args.pretrained_holistic == 0:
                _ = trainer.train_holistic(train_batch_holistic,
                                           train_batch_labels)
            elif args.pretrained_holistic == 1:
                _ = trainer.train_rest(train_batch_holistic, \
                                        train_batch_header, \
                                        train_batch_footer, \
                                        train_batch_left_body, \
                                        train_batch_right_body, \
                                        train_batch_labels)

        for batch in tqdm(batch_train_data, desc='Training batches..'):
            train_batch_holistic, \
            train_batch_header, \
            train_batch_footer, \
            train_batch_left_body, \
            train_batch_right_body, \
            train_batch_labels = train_dataset[batch]

            if args.pretrained_holistic == 0:
                train_batch_loss, train_batch_predictions, train_batch_labels = trainer.test_holistic(
                    train_batch_holistic, train_batch_labels)
            elif args.pretrained_holistic == 1:
                train_batch_loss, train_batch_predictions, train_batch_labels = trainer.test_rest(train_batch_holistic, \
                                                                                            train_batch_header, \
                                                                                            train_batch_footer, \
                                                                                            train_batch_left_body, \
                                                                                            train_batch_right_body, \
                                                                                            train_batch_labels)

            train_predictions.append(train_batch_predictions)
            train_labels.append(train_batch_labels)
            train_loss = train_loss + train_batch_loss

        train_accuracy = metrics.accuracy(np.concatenate(train_predictions),
                                          np.concatenate(train_labels))

        for batch in tqdm(batch_dev_data, desc='Dev batches..'):
            dev_batch_holistic, \
            dev_batch_header, \
            dev_batch_footer, \
            dev_batch_left_body, \
            dev_batch_right_body, \
            dev_batch_labels = dev_dataset[batch]

            if args.pretrained_holistic == 0:
                dev_batch_loss, dev_batch_predictions, dev_batch_labels = trainer.test_holistic(
                    dev_batch_holistic, dev_batch_labels)
            elif args.pretrained_holistic == 1:
                dev_batch_loss, dev_batch_predictions, dev_batch_labels = trainer.test_rest(dev_batch_holistic, \
                                                                                        dev_batch_header, \
                                                                                        dev_batch_footer, \
                                                                                        dev_batch_left_body, \
                                                                                        dev_batch_right_body, \
                                                                                        dev_batch_labels)

            dev_predictions.append(dev_batch_predictions)
            dev_labels.append(dev_batch_labels)
            dev_loss = dev_loss + dev_batch_loss

        dev_accuracy = metrics.accuracy(np.concatenate(dev_predictions),
                                        np.concatenate(dev_labels))

        for batch in tqdm(batch_test_data, desc='Test batches..'):
            test_batch_holistic, \
            test_batch_header, \
            test_batch_footer, \
            test_batch_left_body, \
            test_batch_right_body, \
            test_batch_labels = test_dataset[batch]

            if args.pretrained_holistic == 0:
                test_batch_loss, test_batch_predictions, test_batch_labels = trainer.test_holistic(
                    test_batch_holistic, test_batch_labels)
            elif args.pretrained_holistic == 1:
                test_batch_loss, test_batch_predictions, test_batch_labels = trainer.test_rest(test_batch_holistic, \
                                                                                        test_batch_header, \
                                                                                        test_batch_footer, \
                                                                                        test_batch_left_body, \
                                                                                        test_batch_right_body, \
                                                                                        test_batch_labels)

            test_predictions.append(test_batch_predictions)
            test_labels.append(test_batch_labels)
            test_loss = test_loss + test_batch_loss

        test_accuracy = metrics.accuracy(np.concatenate(test_predictions),
                                         np.concatenate(test_labels))

        print('==> Training Epoch: %d, \
                        \nLoss: %f, \
                        \nAccuracy: %f'                                       %(epoch + 1, \
                                            train_loss/(len(batch_train_data) * args.batchsize), \
                                            train_accuracy))
        print('==> Dev Epoch: %d, \
                        \nLoss: %f, \
                        \nAccuracy: %f'                                       %(epoch + 1, \
                                            dev_loss/(len(batch_dev_data) * args.batchsize), \
                                            dev_accuracy))

        print('==> Test Epoch: %d, \
                        \nLoss: %f, \
                        \nAccuracy: %f'                                       %(epoch + 1, \
                                            test_loss/(len(batch_test_data) * args.batchsize), \
                                            test_accuracy))
        #quit()
        results.append((args.expname, \
                        args.expno, \
                        epoch+1, \
                        test_loss/(len(batch_test_data) * args.batchsize), \
                        test_accuracy))

        if best > test_loss:
            best = test_loss
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'loss': test_loss,
                'accuracy': test_accuracy,
                'args': args,
                'epoch': epoch
            }
            print('==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint,
                       '%s.pt' % os.path.join(args.save, args.expname))
            #np.savetxt("test_pred.csv", test_pred.numpy(), delimiter=",")
        else:
            early_stop_count = early_stop_count + 1

            if early_stop_count == 20:
                quit()
Beispiel #3
0
def main():
    global args
    args = parse_args()
    args.input_dim, args.mem_dim = 300, 150
    args.hidden_dim, args.num_classes = 20, 2
    args.cuda = args.cuda and torch.cuda.is_available()
    if args.sparse and args.wd != 0:
        print('Sparsity and weight decay are incompatible, pick one!')
        exit()
    print(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    numpy.random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    sick_vocab_file = os.path.join(args.data, 'sick.vocab')
    if not os.path.isfile(sick_vocab_file):
        token_files_a = [
            os.path.join(split, 'toks.a')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files_b = [
            os.path.join(split, 'toks.b')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files = token_files_a + token_files_b
        sick_vocab_file = os.path.join(args.data, 'sick.vocab')
        build_vocab(token_files, sick_vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=sick_vocab_file,
                  data=[
                      Constants.PAD_WORD, Constants.UNK_WORD,
                      Constants.BOS_WORD, Constants.EOS_WORD
                  ])
    print('==> SICK vocabulary size : %d ' % vocab.size())

    # load SICK dataset splits
    train_file = os.path.join(args.data, 'sick_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SICKDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    print('==> Size of train data   : %d ' % len(train_dataset))
    dev_file = os.path.join(args.data, 'sick_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    print('==> Size of dev data     : %d ' % len(dev_dataset))
    test_file = os.path.join(args.data, 'sick_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SICKDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    print('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = SimilarityTreeLSTM(args.cuda, vocab.size(), args.input_dim,
                               args.mem_dim, args.hidden_dim, args.num_classes,
                               args.sparse)
    criterion = nn.KLDivLoss()
    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.Tensor(vocab.size(),
                           glove_emb.size(1)).normal_(-0.05, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
                Constants.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            word_new = word.decode("utf8")
            idx_set = [
                glove_vocab.getIndex(token)
                for token in word_tokenize(word_new)
            ]
            idx_set = [id for id in idx_set if id is not None]

            if len(idx_set) != 0:
                idx_set = torch.LongTensor(idx_set)
                sum_emb = F.torch.sum(glove_emb.index_select(0, idx_set), 0)
            else:
                sum_emb = glove_emb[1] * 0


#            for token in word_tokenize(word_new):
#                idx = glove_vocab.getIndex(token)
#                if idx is not None:
#                    if sum_emb is None:
#                        sum_emb = glove_emb[idx]
#                    else:
#                        sum_emb += glove_emb[idx]

            emb[vocab.getIndex(word)] = sum_emb
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer)

    best = -float('inf')
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        print(train_pred)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        print(dev_pred)
        test_loss, test_pred = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.accuracy(train_pred, train_dataset.labels)
        print('==> Train    Loss: {}\tPearson: {}\tL1: {}'.format(
            train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.accuracy(dev_pred, dev_dataset.labels)
        print('==> Dev      Loss: {}\tPearson: {}\tL1: {}'.format(
            dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.accuracy(test_pred, test_dataset.labels)
        print('==> Test     Loss: {}\tPearson: {}\tL1: {}'.format(
            test_loss, test_pearson, test_mse))

        if best < test_pearson:
            best = test_pearson
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'pearson': test_pearson,
                'mse': test_mse,
                'args': args,
                'epoch': epoch
            }
            print('==> New optimum found, checkpointing everything now...')
            torch.save(
                checkpoint,
                '%s.pt' % os.path.join(args.save, args.expname + '.pth'))
Beispiel #4
0
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()), lr=args.lr, weight_decay=args.wd)
model.to(device), criterion.to(device)
trainer = Trainer(args, model, criterion, optimizer, device)
best = - float("inf")
metrics = Metrics(8)
for epoch in range(args.epochs):
    train_loss = trainer.train(train_dataset)
    train_loss, train_pred = trainer.test(train_dataset)
    test_loss, test_preds = trainer.test(test_dataset)
    train_pred = train_pred[:, 1]
    test_preds = test_preds[:, 1]
    train_labels = utils.get_labels(train_dataset).squeeze(1)
    train_targets = utils.map_labels_to_targets(train_labels, args.num_classes)
    train_mse = metrics.mse(train_pred, train_labels)
    acc = metrics.accuracy(train_pred, train_labels)  # label!!!
    fpr, tpr, threshold = roc_curve(train_labels, train_pred)
    train_auc = auc(fpr, tpr)
    logger.info("==> Epoch {}, Train \t Loss: {}\t Auc: {}\tMSE{} \t Accuracy{}".format(
        epoch, train_loss, train_auc, train_mse, acc
    ))

    test_lables = utils.get_labels(test_dataset).squeeze(1)
    test_targets = utils.map_labels_to_targets(test_lables, args.num_classes)
    test_mse = metrics.mse(test_preds, test_lables)
    test_acc = metrics.accuracy(test_preds, test_lables)
    fpr, tpr, t = roc_curve(test_lables, test_preds)
    test_auc = auc(fpr, tpr)
    logger.info("==> Epoch {}, Test \t Loss: {}\tAuc: {}\tMSE{} \t Accuracy{} \t".format(
        epoch, test_loss, test_auc, test_mse, test_acc
    ))