def predict(prices, check_ml): sent_analysis = SentimentAnalysis() model, _, _ = sent_analysis.get_model() model.load_state_dict(torch.load('server/models/portfolio/rnn_20.pkl')) model.eval() preds = model(prices) predict_loader = TextClassDataLoader(preds, batch_size = 1, predict=True, check_ml = check_ml) _, preds = predict_loader.predict_batches return preds
def get_sentence(): train_loader = TextClassDataLoader('data/input.txt', d_word_index, batch_size=1) arr = [] for i, (seq, target, seq_lengths) in enumerate(train_loader): print(seq) print(target) print(seq_lengths) output = model(seq, seq_lengths) arr = output[0].data.numpy().tolist() print(arr) print(arr.index(max(arr))) return arr.index(max(arr))
def get_trainer(self): print('Creating dataloaders...') train_loader = TextClassDataLoader('server/models/portfolio/data/test.csv', batch_size = self.batch_size) val_loader = TextClassDataLoader('server/models/portfolio/data/test.csv', batch_size = self.batch_size) return train_loader, val_loader
def run_model(domain): # create vocab print("===> creating vocabs for domain..." + domain) end = time.time() domain_d = 'reviews/leave_out_' + domain lda_model = models.LdaModel.load(domain_d + '/lda_model/lda_' + domain) lda_dict = gensim.corpora.Dictionary.load(domain_d + '/lda_model/dict_' + domain) print(domain_d) v_builder = VocabBuilder(path_file=domain_d + '/train.csv', min_sample=args.min_samples) d_word_index = v_builder.get_word_index() vocab_size = len(d_word_index) word2id = {v: k for k, v in d_word_index.iteritems()} #print (word2id) embeddings = load_glove_embeddings( '/home/DebanjanChaudhuri/topic_lstm_torch/word_vecs/glove.6B.50d.txt', d_word_index) if not os.path.exists('gen_' + domain): os.mkdir('gen_' + domain) joblib.dump(d_word_index, 'gen_' + domain + '/d_word_index.pkl', compress=3) print('===> vocab creating: {t:.3f}'.format(t=time.time() - end)) # create trainer print("===> creating dataloaders ...") end = time.time() train_loader = TextClassDataLoader(domain_d + '/train.csv', d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader(domain_d + '/val.csv', d_word_index, batch_size=args.batch_size) test_loader = TextClassDataLoader(domain_d + '/test.csv', d_word_index, batch_size=args.batch_size) print('===> Dataloader creating: {t:.3f}'.format(t=time.time() - end)) # create model print("===> creating rnn model ...") if args.mit_topic: print("with topic vectors.") model = RNNTopic(vocab_size=vocab_size, embed_size=args.embedding_size, num_output=args.classes, topic_size=50, hidden_size=args.hidden_size, num_layers=args.layers, batch_first=True, use_gpu=args.cuda, embeddings=embeddings, emb_drop=args.emb_drop, fc_size=args.fc_layer) else: model = RNN(vocab_size=vocab_size, embed_size=args.embedding_size, num_output=args.classes, hidden_size=args.hidden_size, num_layers=args.layers, batch_first=True, use_gpu=args.cuda, embeddings=embeddings, emb_drop=args.emb_drop, fc_size=args.fc_layer) print(model) # optimizer and loss optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() print(optimizer) print(criterion) if args.cuda: torch.backends.cudnn.enabled = True cudnn.benchmark = True model.cuda() criterion = criterion.cuda() #List for checking early stopping val_acc = [] for epoch in range(1, args.epochs + 1): adjust_learning_rate(args.lr, optimizer, epoch) train(train_loader, model, criterion, optimizer, epoch, lda_model, lda_dict, word2id) print("getting performance on validation set!") v_acc = validate(val_loader, model, criterion, lda_model, lda_dict, word2id) print(len(val_acc), args.early_stopping) #if len(val_acc) > args.early_stopping: print("checking early stopping.") if earlystop(val_acc, v_acc): print("Early stopping!") break val_acc.append(v_acc) # save current model if epoch % args.save_freq == 0: name_model = 'rnn_{}.pkl'.format(epoch) path_save_model = os.path.join('gen_' + domain + '/', name_model) joblib.dump(model.float(), path_save_model, compress=2) print("Results on test set for leave-out-domain!" + domain) test_acc = test(test_loader, model, criterion, lda_model, lda_dict, word2id) return test_acc
parser.add_argument('--clip', type=float, default=0.25, help='gradient clipping') args = parser.parse_args() gen = args.gen + str(args.embedding_size) + 'v' # load vocab d_word_index, model = None, None if os.path.exists(gen + '/d_word_index.pkl'): d_word_index = joblib.load(gen + '/d_word_index.pkl') # create tester print("===> creating dataloaders ...") val_loader = TextClassDataLoader('data/test_pdtb.tsv', d_word_index, batch_size=args.batch_size) # load model if os.path.exists(gen + '/rnn_50.pkl'): model = joblib.load(gen + '/rnn_50.pkl') # optimizer and loss optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() print(optimizer) print(criterion)
else: v_builder = VocabBuilder(path_file='data/train1.csv') d_word_index, embed = v_builder.get_word_index(min_sample=args.min_samples) if not os.path.exists('gen'): os.mkdir('gen') joblib.dump(d_word_index, 'gen/d_word_index.pkl', compress=3) print('===> vocab creatin: {t:.3f}'.format(t=time.time() - end)) print('args: ', args) # create trainer print("===> creating dataloaders ...") end = time.time() train_loader = TextClassDataLoader('data/train1.csv', d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader('data/test1.csv', d_word_index, batch_size=args.batch_size) print('===> dataloader creatin: {t:.3f}'.format(t=time.time() - end)) # create model print("===> creating rnn model ...") vocab_size = len(d_word_index) model = RNN(vocab_size=vocab_size, embed_size=args.embedding_size, num_output=args.classes, rnn_model=args.rnn, use_last=(not args.mean_seq), hidden_size=args.hidden_size,
try: os.makedirs('models/' + args.name) except FileExistsError: pass with codecs.open('models/' + args.name + '/classify_stat.pkl', 'wb') as fout: pickle.dump(d_word_index, fout) # joblib.dump(d_word_index, 'models/' + args.name + '/d_word_index.pkl', compress=3) print('===> vocab creatin: {t:.3f}'.format(t=time.time() - end)) print('args: ', args) # create trainer print("===> creating dataloaders ...") end = time.time() train_loader = TextClassDataLoader(args.train, d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader(args.test, d_word_index, batch_size=args.batch_size) print('===> dataloader creatin: {t:.3f}'.format(t=time.time() - end)) # create model print("===> creating rnn model ...") vocab_size = len(d_word_index) model = RNN(vocab_size=vocab_size, embed_size=args.embedding_size, num_output=args.classes, rnn_model=args.rnn, use_last=(not args.mean_seq), hidden_size=args.hidden_size,
model_dir = os.path.join('checkpoints', args.model) if not os.path.exists(model_dir): os.makedirs(model_dir, exist_ok=True) joblib.dump(d_word_index, os.path.join(model_dir, 'd_word_index.pkl'), compress=3) print('===> vocab creatin: {t:.3f}'.format(t=time.time() - end)) print('args: ', args) # create trainer print("===> creating dataloaders ...") end = time.time() if not args.multi_label: train_loader = TextClassDataLoader(train_file, d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader(val_file, d_word_index, batch_size=args.batch_size) test_loader = TextClassDataLoader(test_file, d_word_index, batch_size=args.batch_size) else: train_loader = TextClassDataLoader_multi(train_file, d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader_multi(val_file, d_word_index, batch_size=args.batch_size) test_loader = TextClassDataLoader_multi(test_file,
args.embedding_size = embed.size(1) else: v_builder = VocabBuilder(path_file='data/train.tsv') d_word_index, embed = v_builder.get_word_index(min_sample=args.min_samples) if not os.path.exists('gen'): os.mkdir('gen') joblib.dump(d_word_index, 'gen/d_word_index.pkl', compress=3) print('===> vocab creatin: {t:.3f}'.format(t=time.time() - end)) print('args: ', args) # create trainer print("===> creating dataloaders ...") end = time.time() train_loader = TextClassDataLoader('data/aminer_train.tsv', d_word_index, batch_size=args.batch_size) val_loader = TextClassDataLoader('data/aminer_test.tsv', d_word_index, batch_size=args.batch_size) print('===> dataloader creatin: {t:.3f}'.format(t=time.time() - end)) # create model print("===> creating rnn model ...") vocab_size = len(d_word_index) model = RNN(vocab_size=vocab_size, embed_size=args.embedding_size, num_output=args.classes, rnn_model=args.rnn, use_last=(not args.mean_seq), hidden_size=args.hidden_size,