def main():
    global args
    args = parse_args()
    if args.model_name == 'dependency':
        args.mem_dim = 168
    elif args.model_name == 'constituency':
        args.mem_dim = 200
    if args.fine_grain:
        args.num_classes = 5  # 0 1 2 3 4
    else:
        args.num_classes = 3  # 0 1 2 (1 neutral)
    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    print(args)
    # torch.manual_seed(args.seed)
    # if args.cuda:
    # torch.cuda.manual_seed(args.seed)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    token_files = [
        os.path.join(split, 'sents.toks')
        for split in [train_dir, dev_dir, test_dir]
    ]
    vocab_file = os.path.join(args.data, 'vocab-cased.txt')  # use vocab-cased
    # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=vocab_file)
    print('==> SST vocabulary size : %d ' % vocab.size())

    # Load SST dataset splits

    is_preprocessing_data = False  # let program turn off after preprocess data

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes,
                                   args.fine_grain, args.model_name)
        torch.save(train_dataset, train_file)
        is_preprocessing_data = True

    # dev
    dev_file = os.path.join(args.data, 'sst_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes,
                                 args.fine_grain, args.model_name)
        torch.save(dev_dataset, dev_file)
        is_preprocessing_data = True

    # test
    test_file = os.path.join(args.data, 'sst_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SSTDataset(test_dir, vocab, args.num_classes,
                                  args.fine_grain, args.model_name)
        torch.save(test_dataset, test_file)
        is_preprocessing_data = True

    criterion = nn.NLLLoss()
    # initialize model, criterion/loss_function, optimizer
    model = TreeLSTMSentiment(args.cuda, vocab.size(), args.input_dim,
                              args.mem_dim, args.num_classes, args.model_name,
                              args.attention_dim, args.dropout2,
                              args.attention_flag, criterion)

    embedding_model = nn.Embedding(vocab.size(), args.input_dim)

    if args.cuda:
        embedding_model = embedding_model.cuda()

    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        # optimizer   = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
        optimizer = optim.Adagrad([{
            'params': model.parameters(),
            'lr': args.lr
        }],
                                  lr=args.lr,
                                  weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    utils.count_param(model)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sst_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:

        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))

        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
            else:
                emb[vocab.getIndex(word)] = torch.Tensor(
                    emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05)
        torch.save(emb, emb_file)
        is_preprocessing_data = True  # flag to quit
        print('done creating emb, quit')

    if is_preprocessing_data:
        print('done preprocessing data, quit program to prevent memory leak')
        print('please run again')
        quit()

    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()

    # model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb)
    embedding_model.state_dict()['weight'].copy_(emb)

    # create trainer object for training and testing
    trainer = SentimentTrainer(args, model, embedding_model, criterion,
                               optimizer)

    mode = 'EXPERIMENT'
    # mode = 'TEST'
    if mode == 'DEBUG':
        for epoch in range(args.epochs):
            dev_loss = trainer.train(dev_dataset)
            dev_loss, dev_pred = trainer.test(dev_dataset)
            test_loss, test_pred = trainer.test(test_dataset)

            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Dev loss   : %f \t' % dev_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
    elif mode == "PRINT_TREE":
        for i in range(0, 10):
            ttree, tsent, tlabel = dev_dataset[i]
            utils.print_tree(ttree, 0)
            print('_______________')
        print('break')
        quit()
    elif mode == "EXPERIMENT":
        max_dev = 0
        max_dev_epoch = 0
        filename = args.name + '.pth'
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            dev_loss, dev_pred = trainer.test(dev_dataset)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            torch.save(model, args.saved + str(epoch) + '_model_' + filename)
            torch.save(embedding_model,
                       args.saved + str(epoch) + '_embedding_' + filename)
            if dev_acc > max_dev:
                max_dev = dev_acc
                max_dev_epoch = epoch
            gc.collect()
        print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev))
        print('eva on test set ')
        model = torch.load(args.saved + str(max_dev_epoch) + '_model_' +
                           filename)
        embedding_model = torch.load(args.saved + str(max_dev_epoch) +
                                     '_embedding_' + filename)
        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        test_loss, test_pred = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(test_pred,
                                                    test_dataset.labels)
        print('Epoch with max dev:' + str(max_dev_epoch) +
              ' |test percentage ' + str(test_acc))
        print('____________________' + str(args.name) + '___________________')
    elif mode == "TEST":
        filename = args.name + '.pth'
        epoch = args.epochs
        print('eva on test set ')
        model = torch.load(args.saved + str(epoch) + '_model_' + filename)
        embedding_model = torch.load(args.saved + str(epoch) + '_embedding_' +
                                     filename)
        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        test_loss, test_pred = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(test_pred,
                                                    test_dataset.labels)
        print('Epoch with max dev:' + str(epoch) + ' |test percentage ' +
              str(test_acc))
        print('____________________' + str(args.name) + '___________________')

    else:
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            train_loss, train_pred = trainer.test(train_dataset)
            dev_loss, dev_pred = trainer.test(dev_dataset)
            test_loss, test_pred = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)
Exemple #2
0
        # column : 3
        X_03 = self.up_concat03(X_12, X_00, X_01, X_02)
        X_13 = self.up_concat13(X_22, X_10, X_11, X_12)
        # column : 4
        X_04 = self.up_concat04(X_13, X_00, X_01, X_02, X_03)

        # final layer
        final_1 = self.final_1(X_01)
        final_2 = self.final_2(X_02)
        final_3 = self.final_3(X_03)
        final_4 = self.final_4(X_04)

        final = (final_1 + final_2 + final_3 + final_4) / 4

        if self.is_ds:
            return final
        else:
            return final_4


if __name__ == '__main__':
    print('#### Test Case ###')
    from torch.autograd import Variable

    x = Variable(torch.rand(2, 1, 64, 64)).cuda()
    model = UNet_Nested().cuda()
    param = count_param(model)
    y = model(x)
    print('Output shape:', y.shape)
    print('UNet++ totoal parameters: %.2fM (%d)' % (param / 1e6, param))
Exemple #3
0
def main():
    global args
    args = parse_args(type=1)
    print(args.name)
    print(args.model_name)

    args.input_dim = 300

    if args.mem_dim == 0:
        if args.model_name == 'dependency':
            args.mem_dim = 168
        elif args.model_name == 'constituency':
            args.mem_dim = 150
        elif args.model_name == 'lstm':
            args.mem_dim = 168
        elif args.model_name == 'bilstm':
            args.mem_dim = 168

    if args.num_classes == 0:
        if args.fine_grain:
            args.num_classes = 5  # 0 1 2 3 4
        else:
            args.num_classes = 3  # 0 1 2 (1 neutral)
    elif args.num_classes == 2:
        # assert False # this will not work
        assert not args.fine_grain

    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    print(args)
    # torch.manual_seed(args.seed)
    # if args.cuda:
    # torch.cuda.manual_seed(args.seed)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    token_files = [
        os.path.join(split, 'sents.toks')
        for split in [train_dir, dev_dir, test_dir]
    ]
    vocab_file = os.path.join(args.data, 'vocab-cased.txt')  # use vocab-cased
    # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=vocab_file)
    print('==> SST vocabulary size : %d ' % vocab.size())

    # Load SST dataset splits

    is_preprocessing_data = False  # let program turn off after preprocess data

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes,
                                   args.fine_grain, args.model_name)
        torch.save(train_dataset, train_file)
        is_preprocessing_data = True

    # dev
    dev_file = os.path.join(args.data, 'sst_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes,
                                 args.fine_grain, args.model_name)
        torch.save(dev_dataset, dev_file)
        is_preprocessing_data = True

    # test
    test_file = os.path.join(args.data, 'sst_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SSTDataset(test_dir, vocab, args.num_classes,
                                  args.fine_grain, args.model_name)
        torch.save(test_dataset, test_file)
        is_preprocessing_data = True

    criterion = nn.NLLLoss()
    # initialize model, criterion/loss_function, optimizer

    model = DMNWraper(args.cuda, args.input_dim, args.mem_dim, criterion,
                      args.train_subtrees, args.num_classes, args.embdrop)

    embedding_model = nn.Embedding(vocab.size(), args.input_dim)

    if args.cuda:
        embedding_model = embedding_model.cuda()

    if args.cuda:
        model.cuda(), criterion.cuda()

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    if args.embedding == 'glove':
        emb_torch = 'sst_embed.pth'
        emb_vector = 'glove.840B.300d'
        emb_vector_path = os.path.join(args.glove, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram':
        emb_torch = 'sst_embed_paragram.pth'
        emb_vector = 'paragram_300_sl999'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram_xxl':
        emb_torch = 'sst_embed_paragram_xxl.pth'
        emb_vector = 'paragram-phrase-XXL'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    else:
        assert False

    emb_file = os.path.join(args.data, emb_torch)
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:

        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(emb_vector_path)
        print('==> Embedding vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))

        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
            else:
                emb[vocab.getIndex(word)] = torch.Tensor(
                    emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05)
        torch.save(emb, emb_file)
        is_preprocessing_data = True  # flag to quit
        print('done creating emb, quit')

    if is_preprocessing_data:
        print('quit program')
        quit()

    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    embedding_model.state_dict()['weight'].copy_(emb)

    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        # optimizer   = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'adam_combine':
        optimizer = optim.Adam([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adagrad_combine':
        optimizer = optim.Adagrad([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adam_combine_v2':
        model.embedding_model = embedding_model
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
        args.manually_emb = 0
    metrics = Metrics(args.num_classes)
    utils.count_param(model)

    trainer = SentimentTrainer(args, model, embedding_model, criterion,
                               optimizer)

    trainer.set_initial_emb(emb)
    question_idx = vocab.labelToIdx['sentiment']
    question_idx = torch.Tensor([question_idx])
    trainer.set_question(question_idx)

    # trainer = SentimentTrainer(args, model, embedding_model ,criterion, optimizer)

    mode = args.mode
    if mode == 'DEBUG':
        for epoch in range(args.epochs):
            # print a tree
            tree, sent, label = dev_dataset[3]
            utils.print_span(tree, sent, vocab)
            quit()

            dev_loss = trainer.train(dev_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Dev loss   : %f \t' % dev_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
    elif mode == "PRINT_TREE":
        for i in range(0, 10):
            ttree, tsent, tlabel = dev_dataset[i]
            utils.print_tree(ttree, 0)
            print('_______________')
        print('break')
        quit()
    elif mode == 'EVALUATE':
        filename = args.name + '.pth'
        epoch = args.epochs
        model_name = str(epoch) + '_model_' + filename
        embedding_name = str(epoch) + '_embedding_' + filename
        model = torch.load(os.path.join(args.saved, model_name))
        embedding_model = torch.load(os.path.join(args.saved, embedding_name))

        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, subtree_metrics = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(epoch) + ' |test percentage ' +
              str(test_acc))
        print('____________________' + str(args.name) + '___________________')
        print_list = subtree_metrics.print_list
        torch.save(print_list,
                   os.path.join(args.saved, args.name + 'printlist.pth'))
        utils.print_trees_file(args,
                               vocab,
                               test_dataset,
                               print_list,
                               name='tree')
    elif mode == "EXPERIMENT":
        # dev_loss, dev_pred = trainer.test(dev_dataset)
        # dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels, num_classes=args.num_classes)
        max_dev = 0
        max_dev_epoch = 0
        filename = args.name + '.pth'
        for epoch in range(args.epochs):
            # train_loss, train_pred, _ = trainer.test(train_dataset)
            train_loss_while_training = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            dev_acc = metrics.sentiment_accuracy_score(
                dev_pred, dev_dataset.labels, num_classes=args.num_classes)
            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels, num_classes=args.num_classes)
            print('==> Train loss   : %f \t' % train_loss_while_training,
                  end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch %d dev percentage %f ' % (epoch, dev_acc))
            print('Train acc %f ' % (train_acc))
            if dev_acc > max_dev:
                print('update best dev acc %f ' % (dev_acc))
                max_dev = dev_acc
                max_dev_epoch = epoch
                utils.mkdir_p(args.saved)
                torch.save(
                    model,
                    os.path.join(args.saved,
                                 str(epoch) + '_model_' + filename))
                torch.save(
                    embedding_model,
                    os.path.join(args.saved,
                                 str(epoch) + '_embedding_' + filename))
            gc.collect()
        print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev))
        print('eva on test set ')
        model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_model_' + filename))
        embedding_model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_embedding_' + filename))
        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, _ = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(max_dev_epoch) +
              ' |test percentage ' + str(test_acc))
        print('____________________' + str(args.name) + '___________________')
    else:
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)