import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader from transformers import BertTokenizer, BertModel import pandas as pd from model import SentimentClassifier from dataset import SSTDataset #Create validation set val_set = SSTDataset(filename='data/dev.tsv', maxlen=30) #Create validation dataloader val_loader = DataLoader(val_set, batch_size=64, num_workers=5) #Create the network net = SentimentClassifier() #CPU or GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #Put the network to the GPU if available net = net.to(device) #Load the state dictionary of the network net.load_state_dict(torch.load('./models/model', map_location=device)) #Takes as the input the logits of the positive class and computes the binary cross-entropy criterion = nn.BCEWithLogitsLoss() def get_accuracy_from_logits(logits, labels): #Get a tensor of shape [B, 1, 1] with probabilities that the sentiment is positive probs = torch.sigmoid(logits.unsqueeze(-1)) #Convert probabilities to predictions, 1 being positive and 0 being negative soft_probs = (probs > 0.5).long() #Check which predictions are the same as the ground truth and calculate the accuracy
def main(write_to): startTime = time.time() global args args = parse_args(type=1) args.input_dim = 300 if args.model_name == 'dependency': args.mem_dim = 168 elif args.model_name == 'constituency': args.mem_dim = 150 if args.fine_grain: args.num_classes = 5 # 0 1 2 3 4 else: args.num_classes = 3 # 0 1 2 (1 neutral) args.cuda = args.cuda and torch.cuda.is_available() # args.cuda = False print(args) # torch.manual_seed(args.seed) # if args.cuda: # torch.cuda.manual_seed(args.seed) # train_dir = os.path.join(args.data,'train/') train_dir = os.path.join( args.data, 'dev/') # Fei: wants to train on a smaller data set # dev_dir = os.path.join(args.data,'dev/') # test_dir = os.path.join(args.data,'test/') # write unique words from all token files token_files = [os.path.join(split, 'sents.toks') for split in [train_dir]] vocab_file = os.path.join(args.data, 'vocab-cased.txt') # use vocab-cased # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB, USE OLD VOCAB # vocab_file = os.path.join(args.data, 'vocab-cased-dev.txt') # build_vocab(token_files, vocab_file) # get vocab object from vocab file previously written vocab = Vocab(filename=vocab_file) print('==> SST vocabulary size : %d ' % vocab.size()) # Load SST dataset splits is_preprocessing_data = False # let program turn off after preprocess data # train train_file = os.path.join(args.data, 'sst_train.pth') if os.path.isfile(train_file): train_dataset = torch.load(train_file) else: train_dataset = SSTDataset(train_dir, vocab, args.num_classes, args.fine_grain, args.model_name) torch.save(train_dataset, train_file) is_preprocessing_data = True # dev # dev_file = os.path.join(args.data,'sst_dev.pth') # if os.path.isfile(dev_file): # dev_dataset = torch.load(dev_file) # else: # dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes, args.fine_grain, args.model_name) # torch.save(dev_dataset, dev_file) # is_preprocessing_data = True # test # test_file = os.path.join(args.data,'sst_test.pth') # if os.path.isfile(test_file): # test_dataset = torch.load(test_file) # else: # test_dataset = SSTDataset(test_dir, vocab, args.num_classes, args.fine_grain, args.model_name) # torch.save(test_dataset, test_file) # is_preprocessing_data = True criterion = nn.NLLLoss() # initialize model, criterion/loss_function, optimizer model = TreeLSTMSentiment(args.cuda, vocab.size(), args.input_dim, args.mem_dim, args.num_classes, args.model_name, criterion) embedding_model = nn.Embedding(vocab.size(), args.input_dim) # Fei: don't optimize embedding embedding_model.weight.requires_grad = False if args.cuda: embedding_model = embedding_model.cuda() if args.cuda: model.cuda(), criterion.cuda() if args.optim == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': # optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) optimizer = optim.Adagrad( [{ 'params': filter(lambda p: p.requires_grad, model.parameters()), 'lr': args.lr } # Fei: filter non_trainable ], lr=args.lr, weight_decay=args.wd) metrics = Metrics(args.num_classes) utils.count_param(model) # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors emb_file = os.path.join(args.data, 'sst_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = load_word_vectors( os.path.join(args.glove, 'glove.840B.300d')) print('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.zeros(vocab.size(), glove_emb.size(1)) for word in vocab.labelToIdx.keys(): if glove_vocab.getIndex(word): emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex( word)] else: emb[vocab.getIndex(word)] = torch.Tensor( emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05) torch.save(emb, emb_file) is_preprocessing_data = True # flag to quit print('done creating emb, quit') if is_preprocessing_data: print('done preprocessing data, quit program to prevent memory leak') print('please run again') quit() # plug these into embedding matrix inside model if args.cuda: emb = emb.cuda() # model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb) embedding_model.state_dict()['weight'].copy_(emb) # create trainer object for training and testing trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer) loopStart = time.time() #print('prepare time is %s ' % (loopStart - startTime)) loss_save = [] mode = 'EXPERIMENT' if mode == 'DEBUG': for epoch in range(args.epochs): dev_loss = trainer.train(dev_dataset) dev_loss, dev_pred = trainer.test(dev_dataset) test_loss, test_pred = trainer.test(test_dataset) dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels) print('==> Dev loss : %f \t' % dev_loss, end="") print('Epoch ', epoch, 'dev percentage ', dev_acc) elif mode == "PRINT_TREE": for i in range(0, 10): ttree, tsent, tlabel = dev_dataset[i] utils.print_tree(ttree, 0) print('_______________') print('break') quit() elif mode == "EXPERIMENT": max_dev = 0 max_dev_epoch = 0 filename = args.name + '.pth' for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) #dev_loss, dev_pred = trainer.test(dev_dataset) #dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) print('==> Train loss : %f \t' % train_loss, end="") loss_save.append(train_loss) #print('Epoch ', epoch, 'dev percentage ', dev_acc) #torch.save(model, args.saved + str(epoch) + '_model_' + filename) #torch.save(embedding_model, args.saved + str(epoch) + '_embedding_' + filename) #if dev_acc > max_dev: # max_dev = dev_acc # max_dev_epoch = epoch #gc.collect() print("done") #print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev)) #print('eva on test set ') #model = torch.load(args.saved + str(max_dev_epoch) + '_model_' + filename) #embedding_model = torch.load(args.saved + str(max_dev_epoch) + '_embedding_' + filename) #trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer) #test_loss, test_pred = trainer.test(test_dataset) #test_acc = metrics.sentiment_accuracy_score(test_pred, test_dataset.labels) #print('Epoch with max dev:' + str(max_dev_epoch) + ' |test percentage ' + str(test_acc)) #print('____________________' + str(args.name) + '___________________') else: for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) train_loss, train_pred = trainer.test(train_dataset) dev_loss, dev_pred = trainer.test(dev_dataset) test_loss, test_pred = trainer.test(test_dataset) train_acc = metrics.sentiment_accuracy_score( train_pred, train_dataset.labels) dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels) print('==> Train loss : %f \t' % train_loss, end="") print('Epoch ', epoch, 'train percentage ', train_acc) print('Epoch ', epoch, 'dev percentage ', dev_acc) print('Epoch ', epoch, 'test percentage ', test_acc) loopEnd = time.time() print('looptime is %s ' % (loopEnd - loopStart)) prepareTime = loopStart - startTime loopTime = loopEnd - loopStart timePerEpoch = loopTime / args.epochs with open(write_to, "w") as f: f.write("unit: " + "1 epoch\n") for loss in loss_save: f.write(str(loss) + "\n") f.write("run time: " + str(prepareTime) + " " + str(timePerEpoch) + "\n")
def main(): # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') parser = argparse.ArgumentParser() parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--epoch', type=int, default=1) parser.add_argument('--kernel_num', type=int, default=100) parser.add_argument('--label_num', type=int, default=2) parser.add_argument('--log_interval', type=int, default=100) parser.add_argument('--wordvec_dim', type=int, default=50) parser.add_argument('--model_name', type=str, default='rcnn') parser.add_argument( '--early-stop', type=int, default=1000, help='iteration numbers to stop without performance increasing') parser.add_argument( '--test-interval', type=int, default=200, help='how many steps to wait before testing [default: 100]') parser.add_argument('--kernel_sizes', type=str, default='3,4,5') parser.add_argument('--dataset_path', type=str, default='data/dataset/') args = parser.parse_args() # torch.manual_seed(args.seed)[] start = time.time() wordvec = loadGloveModel('data/glove/glove.6B.' + str(args.wordvec_dim) + 'd.txt') args.device = device args.weight = torch.tensor(wordvec.values, dtype=torch.float) args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] # Datasets testing_set = SSTDataset(args.dataset_path, 'test', args.label_num, args.wordvec_dim, wordvec) testing_iter = DataLoader(dataset=testing_set, batch_size=args.batch_size, num_workers=0, collate_fn=collate_fn, pin_memory=True) print(time.time() - start) model_name = args.model_name.lower() # training_set = SSTDataset(args.dataset_path, 'train', args.label_num, args.wordvec_dim, wordvec) models = [ TextCNN(args).to(device), LSTMClassifier(args).to(device), RCNN(args).to(device), myRNN(args).to(device) ] models[0].load_state_dict( torch.load('model_cnn_{}_{}.ckpt'.format(args.wordvec_dim, args.label_num))) models[1].load_state_dict( torch.load('model_lstm_{}_{}.ckpt'.format(args.wordvec_dim, args.label_num))) models[2].load_state_dict( torch.load('model_rcnn_{}_{}.ckpt'.format(args.wordvec_dim, args.label_num))) models[3].load_state_dict( torch.load('model_rnn_{}_{}.ckpt'.format(args.wordvec_dim, args.label_num))) del wordvec # Save some memory print(evaluation(testing_iter, models, args)) print("Parameters:") delattr(args, 'weight') for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value))
if __name__ == "__main__": #Get the parameters from arguments if used parser = ArgumentParser() parser.add_argument('-freeze_bert', action='store_true') parser.add_argument('-maxlen', type=int, default=25) parser.add_argument('-batch_size', type=int, default=32) parser.add_argument('-lr', type=float, default=2e-5) parser.add_argument('-print_every', type=int, default=100) parser.add_argument('-num_eps', type=int, default=5) args = parser.parse_args() #Instantiate the classifier model net = SentimentClassifier(args.freeze_bert) #CPU or GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #Put the network to the GPU if available net = net.to(device) #Takes as the input the logits of the positive class and computes the binary cross-entropy criterion = nn.BCEWithLogitsLoss() #Adam optimizer optimizer = optim.Adam(net.parameters(), lr=args.lr) #Create instances of training and validation set train_set = SSTDataset(filename='data/train.tsv', maxlen=args.maxlen) val_set = SSTDataset(filename='data/dev.tsv', maxlen=args.maxlen) #Create intsances of training and validation dataloaders train_loader = DataLoader(train_set, batch_size=args.batch_size, num_workers=5) val_loader = DataLoader(val_set, batch_size=args.batch_size, num_workers=5) #Train the network train(net, criterion, optimizer, train_loader, val_loader, args)
elif config.model_type == 'distilbert': model = DistilBertForSentimentClassification.from_pretrained( args.model_name_or_path, config=config) else: raise ValueError('This transformer model is not supported yet.') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) #Takes as the input the logits of the positive class and computes the binary cross-entropy criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(params=model.parameters(), lr=args.lr) train_set = SSTDataset(filename='data/train.tsv', maxlen=args.maxlen_train, tokenizer=tokenizer) val_set = SSTDataset(filename='data/dev.tsv', maxlen=args.maxlen_val, tokenizer=tokenizer) train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, num_workers=args.num_threads) val_loader = DataLoader(dataset=val_set, batch_size=args.batch_size, num_workers=args.num_threads) train(model=model, criterion=criterion, optimizer=optimizer,
def main(): global args args = parse_args(type=1) args.input_dim, args.mem_dim = 300, 168 if args.fine_grain: args.num_classes = 5 # 0 1 2 3 4 else: args.num_classes = 3 # 0 1 2 (1 neutral) args.cuda = args.cuda and torch.cuda.is_available() print(args) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) train_dir = os.path.join(args.data, 'train/') dev_dir = os.path.join(args.data, 'dev/') test_dir = os.path.join(args.data, 'test/') # write unique words from all token files token_files = [ os.path.join(split, 'sents.toks') for split in [train_dir, dev_dir, test_dir] ] vocab_file = os.path.join(args.data, 'vocab.txt') build_vocab(token_files, vocab_file) # get vocab object from vocab file previously written vocab = Vocab(filename=vocab_file, data=[ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]) print('==> SST vocabulary size : %d ' % vocab.size()) # Load SST dataset splits is_preprocessing_data = False # let program turn off after preprocess data # train train_file = os.path.join(args.data, 'sst_train.pth') if os.path.isfile(train_file): train_dataset = torch.load(train_file) else: train_dataset = SSTDataset(train_dir, vocab, args.num_classes, args.fine_grain) torch.save(train_dataset, train_file) is_preprocessing_data = True # dev dev_file = os.path.join(args.data, 'sst_dev.pth') if os.path.isfile(dev_file): dev_dataset = torch.load(dev_file) else: dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes, args.fine_grain) torch.save(dev_dataset, dev_file) is_preprocessing_data = True # test test_file = os.path.join(args.data, 'sst_test.pth') if os.path.isfile(test_file): test_dataset = torch.load(test_file) else: test_dataset = SSTDataset(test_dir, vocab, args.num_classes, args.fine_grain) torch.save(test_dataset, test_file) is_preprocessing_data = True # initialize model, criterion/loss_function, optimizer model = TreeLSTMSentiment(args.cuda, vocab.size(), args.input_dim, args.mem_dim, args.num_classes) criterion = nn.CrossEntropyLoss() if args.cuda: model.cuda(), criterion.cuda() if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) metrics = Metrics(args.num_classes) utils.count_param(model) # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors emb_file = os.path.join(args.data, 'sst_embed.pth') if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = load_word_vectors( os.path.join(args.glove, 'glove.840B.300d')) print('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.Tensor(vocab.size(), glove_emb.size(1)).normal_(-0.05, 0.05) # zero out the embeddings for padding and other special words if they are absent in vocab for idx, item in enumerate([ Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD ]): emb[idx].zero_() for word in vocab.labelToIdx.keys(): if glove_vocab.getIndex(word): emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex( word)] torch.save(emb, emb_file) is_preprocessing_data = True # flag to quit print('done creating emb, quit') if is_preprocessing_data: print( 'quit program due to memory leak during preprocess data, please rerun sentiment.py' ) quit() # plug these into embedding matrix inside model if args.cuda: emb = emb.cuda() model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb) # create trainer object for training and testing trainer = SentimentTrainer(args, model, criterion, optimizer) for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) # train_loss, train_pred = trainer.test(dev_dataset) dev_loss, dev_pred = trainer.test(dev_dataset) test_loss, test_pred = trainer.test(test_dataset) # TODO: torch.Tensor(dev_dataset.labels) turn label into tensor # done dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) test_acc = metrics.sentiment_accuracy_score(test_pred, test_dataset.labels) print('==> Train loss : %f \t' % train_loss, end="") print('Epoch ', epoch, 'dev percentage ', dev_acc) print('Epoch ', epoch, 'test percentage ', test_acc)
def main(): global args args = parse_args(type=1) print(args.name) print(args.model_name) args.input_dim = 300 if args.mem_dim == 0: if args.model_name == 'dependency': args.mem_dim = 168 elif args.model_name == 'constituency': args.mem_dim = 150 elif args.model_name == 'lstm': args.mem_dim = 168 elif args.model_name == 'bilstm': args.mem_dim = 168 if args.num_classes == 0: if args.fine_grain: args.num_classes = 5 # 0 1 2 3 4 else: args.num_classes = 3 # 0 1 2 (1 neutral) elif args.num_classes == 2: # assert False # this will not work assert not args.fine_grain args.cuda = args.cuda and torch.cuda.is_available() # args.cuda = False print(args) # torch.manual_seed(args.seed) # if args.cuda: # torch.cuda.manual_seed(args.seed) train_dir = os.path.join(args.data, 'train/') dev_dir = os.path.join(args.data, 'dev/') test_dir = os.path.join(args.data, 'test/') # write unique words from all token files token_files = [ os.path.join(split, 'sents.toks') for split in [train_dir, dev_dir, test_dir] ] vocab_file = os.path.join(args.data, 'vocab-cased.txt') # use vocab-cased # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB, USE OLD VOCAB # get vocab object from vocab file previously written vocab = Vocab(filename=vocab_file) print('==> SST vocabulary size : %d ' % vocab.size()) # Load SST dataset splits is_preprocessing_data = False # let program turn off after preprocess data # train train_file = os.path.join(args.data, 'sst_train.pth') if os.path.isfile(train_file): train_dataset = torch.load(train_file) else: train_dataset = SSTDataset(train_dir, vocab, args.num_classes, args.fine_grain, args.model_name) torch.save(train_dataset, train_file) is_preprocessing_data = True # dev dev_file = os.path.join(args.data, 'sst_dev.pth') if os.path.isfile(dev_file): dev_dataset = torch.load(dev_file) else: dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes, args.fine_grain, args.model_name) torch.save(dev_dataset, dev_file) is_preprocessing_data = True # test test_file = os.path.join(args.data, 'sst_test.pth') if os.path.isfile(test_file): test_dataset = torch.load(test_file) else: test_dataset = SSTDataset(test_dir, vocab, args.num_classes, args.fine_grain, args.model_name) torch.save(test_dataset, test_file) is_preprocessing_data = True criterion = nn.NLLLoss() # initialize model, criterion/loss_function, optimizer model = DMNWraper(args.cuda, args.input_dim, args.mem_dim, criterion, args.train_subtrees, args.num_classes, args.embdrop) embedding_model = nn.Embedding(vocab.size(), args.input_dim) if args.cuda: embedding_model = embedding_model.cuda() if args.cuda: model.cuda(), criterion.cuda() # for words common to dataset vocab and GLOVE, use GLOVE vectors # for other words in dataset vocab, use random normal vectors if args.embedding == 'glove': emb_torch = 'sst_embed.pth' emb_vector = 'glove.840B.300d' emb_vector_path = os.path.join(args.glove, emb_vector) assert os.path.isfile(emb_vector_path + '.txt') elif args.embedding == 'paragram': emb_torch = 'sst_embed_paragram.pth' emb_vector = 'paragram_300_sl999' emb_vector_path = os.path.join(args.paragram, emb_vector) assert os.path.isfile(emb_vector_path + '.txt') elif args.embedding == 'paragram_xxl': emb_torch = 'sst_embed_paragram_xxl.pth' emb_vector = 'paragram-phrase-XXL' emb_vector_path = os.path.join(args.paragram, emb_vector) assert os.path.isfile(emb_vector_path + '.txt') else: assert False emb_file = os.path.join(args.data, emb_torch) if os.path.isfile(emb_file): emb = torch.load(emb_file) else: # load glove embeddings and vocab glove_vocab, glove_emb = load_word_vectors(emb_vector_path) print('==> Embedding vocabulary size: %d ' % glove_vocab.size()) emb = torch.zeros(vocab.size(), glove_emb.size(1)) for word in vocab.labelToIdx.keys(): if glove_vocab.getIndex(word): emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex( word)] else: emb[vocab.getIndex(word)] = torch.Tensor( emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05) torch.save(emb, emb_file) is_preprocessing_data = True # flag to quit print('done creating emb, quit') if is_preprocessing_data: print('quit program') quit() # plug these into embedding matrix inside model if args.cuda: emb = emb.cuda() embedding_model.state_dict()['weight'].copy_(emb) if args.optim == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adagrad': # optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd) optimizer = optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optim == 'adam_combine': optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': args.lr, 'weight_decay': args.wd }, { 'params': embedding_model.parameters(), 'lr': args.emblr, 'weight_decay': args.embwd }]) args.manually_emb = 0 elif args.optim == 'adagrad_combine': optimizer = optim.Adagrad([{ 'params': model.parameters(), 'lr': args.lr, 'weight_decay': args.wd }, { 'params': embedding_model.parameters(), 'lr': args.emblr, 'weight_decay': args.embwd }]) args.manually_emb = 0 elif args.optim == 'adam_combine_v2': model.embedding_model = embedding_model optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) args.manually_emb = 0 metrics = Metrics(args.num_classes) utils.count_param(model) trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer) trainer.set_initial_emb(emb) question_idx = vocab.labelToIdx['sentiment'] question_idx = torch.Tensor([question_idx]) trainer.set_question(question_idx) # trainer = SentimentTrainer(args, model, embedding_model ,criterion, optimizer) mode = args.mode if mode == 'DEBUG': for epoch in range(args.epochs): # print a tree tree, sent, label = dev_dataset[3] utils.print_span(tree, sent, vocab) quit() dev_loss = trainer.train(dev_dataset) dev_loss, dev_pred, _ = trainer.test(dev_dataset) test_loss, test_pred, _ = trainer.test(test_dataset) dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels) print('==> Dev loss : %f \t' % dev_loss, end="") print('Epoch ', epoch, 'dev percentage ', dev_acc) elif mode == "PRINT_TREE": for i in range(0, 10): ttree, tsent, tlabel = dev_dataset[i] utils.print_tree(ttree, 0) print('_______________') print('break') quit() elif mode == 'EVALUATE': filename = args.name + '.pth' epoch = args.epochs model_name = str(epoch) + '_model_' + filename embedding_name = str(epoch) + '_embedding_' + filename model = torch.load(os.path.join(args.saved, model_name)) embedding_model = torch.load(os.path.join(args.saved, embedding_name)) trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer) trainer.set_question(question_idx) test_loss, test_pred, subtree_metrics = trainer.test(test_dataset) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels, num_classes=args.num_classes) print('Epoch with max dev:' + str(epoch) + ' |test percentage ' + str(test_acc)) print('____________________' + str(args.name) + '___________________') print_list = subtree_metrics.print_list torch.save(print_list, os.path.join(args.saved, args.name + 'printlist.pth')) utils.print_trees_file(args, vocab, test_dataset, print_list, name='tree') elif mode == "EXPERIMENT": # dev_loss, dev_pred = trainer.test(dev_dataset) # dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels, num_classes=args.num_classes) max_dev = 0 max_dev_epoch = 0 filename = args.name + '.pth' for epoch in range(args.epochs): # train_loss, train_pred, _ = trainer.test(train_dataset) train_loss_while_training = trainer.train(train_dataset) train_loss, train_pred, _ = trainer.test(train_dataset) dev_loss, dev_pred, _ = trainer.test(dev_dataset) dev_acc = metrics.sentiment_accuracy_score( dev_pred, dev_dataset.labels, num_classes=args.num_classes) train_acc = metrics.sentiment_accuracy_score( train_pred, train_dataset.labels, num_classes=args.num_classes) print('==> Train loss : %f \t' % train_loss_while_training, end="") print('Epoch ', epoch, 'dev percentage ', dev_acc) print('Epoch %d dev percentage %f ' % (epoch, dev_acc)) print('Train acc %f ' % (train_acc)) if dev_acc > max_dev: print('update best dev acc %f ' % (dev_acc)) max_dev = dev_acc max_dev_epoch = epoch utils.mkdir_p(args.saved) torch.save( model, os.path.join(args.saved, str(epoch) + '_model_' + filename)) torch.save( embedding_model, os.path.join(args.saved, str(epoch) + '_embedding_' + filename)) gc.collect() print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev)) print('eva on test set ') model = torch.load( os.path.join(args.saved, str(max_dev_epoch) + '_model_' + filename)) embedding_model = torch.load( os.path.join(args.saved, str(max_dev_epoch) + '_embedding_' + filename)) trainer = SentimentTrainer(args, model, embedding_model, criterion, optimizer) trainer.set_question(question_idx) test_loss, test_pred, _ = trainer.test(test_dataset) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels, num_classes=args.num_classes) print('Epoch with max dev:' + str(max_dev_epoch) + ' |test percentage ' + str(test_acc)) print('____________________' + str(args.name) + '___________________') else: for epoch in range(args.epochs): train_loss = trainer.train(train_dataset) train_loss, train_pred, _ = trainer.test(train_dataset) dev_loss, dev_pred, _ = trainer.test(dev_dataset) test_loss, test_pred, _ = trainer.test(test_dataset) train_acc = metrics.sentiment_accuracy_score( train_pred, train_dataset.labels) dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels) test_acc = metrics.sentiment_accuracy_score( test_pred, test_dataset.labels) print('==> Train loss : %f \t' % train_loss, end="") print('Epoch ', epoch, 'train percentage ', train_acc) print('Epoch ', epoch, 'dev percentage ', dev_acc) print('Epoch ', epoch, 'test percentage ', test_acc)
from tqdm import tqdm import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, default='', help='model path') parser.add_argument('--train_data', type=str) parser.add_argument('--test_data', type=str) parser.add_argument('--normalizer', type=str) parser.add_argument('--tstep', type=float) opt = parser.parse_args() print(opt) dset = SSTDataset(opt.train_data, dim_control=1, dim_state=4) dset_eval = SSTDataset(opt.test_data, dim_control=1, dim_state=4) use_cuda = True dl = DataLoader(dset, batch_size=200, num_workers=0, drop_last=True) dl_eval = DataLoader(dset_eval, batch_size=200, num_workers=0, drop_last=True) G1 = nx.path_graph(2).to_directed() G_target = nx.path_graph(2).to_directed() # nx.draw(G1) # plt.show() node_feat_size = 2 edge_feat_size = 3 graph_feat_size = 10
def main(): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') parser = argparse.ArgumentParser() parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--kernel_num', type=int, default=100, help='Number of each size of kernels used in CNN') parser.add_argument('--label_num', type=int, default=2, help='Target label numbers') parser.add_argument('--log_interval', type=int, default=100) parser.add_argument('--wordvec_dim', type=int, default=50, help='Dimension of GloVe vectors') parser.add_argument('--model_name', type=str, default='rcnn', help='Which model to use') parser.add_argument('--kernel_sizes', type=str, default='3,4,5', help='Sizes of kernels used in CNN') parser.add_argument('--dataset_path', type=str, default='data/dataset/', help='PATH to dataset') args = parser.parse_args() # torch.manual_seed(args.seed)[] start = time.time() wordvec = loadGloveModel('data/glove/glove.6B.' + str(args.wordvec_dim) + 'd.txt') args.device = device args.weight = torch.tensor( wordvec.values, dtype=torch.float) # word embedding for the embedding layer args.kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] # Datasets training_set = SSTDataset(args.dataset_path, 'train', args.label_num, args.wordvec_dim, wordvec) testing_set = SSTDataset(args.dataset_path, 'test', args.label_num, args.wordvec_dim, wordvec) validation_set = SSTDataset(args.dataset_path, 'dev', args.label_num, args.wordvec_dim, wordvec) training_iter = DataLoader(dataset=training_set, batch_size=args.batch_size, num_workers=0, shuffle=True, collate_fn=collate_fn, pin_memory=True) testing_iter = DataLoader(dataset=testing_set, batch_size=args.batch_size, num_workers=0, collate_fn=collate_fn, pin_memory=True) validation_iter = DataLoader(dataset=validation_set, batch_size=args.batch_size, num_workers=0, collate_fn=collate_fn, pin_memory=True) print(time.time() - start) model_name = args.model_name.lower() print(model_name) # Select model if model_name == 'cnn': model = TextCNN(args).to(device) elif model_name == 'lstm': model = LSTMClassifier(args).to(device) elif model_name == 'rcnn': model = RCNN(args).to(device) elif model_name == 'rnn': model = myRNN(args).to(device) else: print('Unrecognized model name!') exit(1) del wordvec # Save some memory criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=config.lr) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # Adam优化器 step = 0 loss_sum = 0 # Train test_acc = [] best_acc = 0 for epoch in range(1, args.epoch + 1): for data, label in training_iter: sentences = data.to(device, non_blocking=True) # Asynchronous loading # sentences = data.flip(dims=(-1,)).to(device, dtype=torch.long) labels = label.to(device, non_blocking=True) optimizer.zero_grad() logits = model(sentences) # 训练 loss = criterion(logits, labels) # 损失 loss_sum += loss.data # 求和loss step += 1 if step % args.log_interval == 0: print("epoch", epoch, end=' ') print("avg loss: %.5f" % (loss_sum / args.log_interval)) loss_sum = 0 step = 0 loss.backward() optimizer.step() # test acc = evaluation(testing_iter, model, args) if acc > best_acc: best_acc = acc # torch.save(model.state_dict(), 'model_{}_{}_{}.ckpt'.format(args.model_name, args.wordvec_dim, args.label_num)) test_acc.append(acc) print('test acc {:.4f}'.format(acc)) print('train acc {:.4f}'.format(evaluation(training_iter, model, args))) best = 0 best_acc = 0 for i, a in enumerate(test_acc): if a > best_acc: best_acc = a best = i + 1 print('best: epoch {}, acc {:.4f}'.format(best, best_acc)) print("Parameters:") delattr(args, 'weight') for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value))
import rntn from sentiment_tensor import SentimentTree import pickle import torch from dataset import SSTDataset from torch.utils.data import DataLoader stoi = pickle.load(open('./assets/stoi.pkl', 'rb')) lexis_size = len(stoi) BATCH_SIZE = 128 PARAMETERS = "./assets/batch_parameters/net_parameters_6.pth" test = SSTDataset("./sst/test.txt", stoi) testloader = DataLoader(test, batch_size=BATCH_SIZE) N = test.__len__() # Since Sentiment Tree have no support for GPU allocation # they can't be fed to the model using cuda device. Training is done # on the CPU with a subset of the training set. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = rntn.RNTensorN(lexis_size) net.load_state_dict(torch.load(PARAMETERS)) test_loss = acc = 0 with torch.no_grad():