def home(): """Home page of app with form""" form = ReusableForm(request.form) if request.method == 'POST': if 'predict_button' in request.form: return render_template('prediction.html', form=form, review= request.form['user_text'], input= predict_sentiment(model, tokenizer, request.form['user_text'])) return render_template('home.html', form=form, table_data= generate_reviews(model, test_data, test_labels, test_texts))
def content2sentence(table, model_path): """ Divide users' comment contents into separate sentence and collect the postive or negtive scores """ sentiment_model = model.CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX) sentiment_model = sentiment_model.cuda() sentiment_model.load_state_dict(torch.load(model_path)) nlp = spacy.load('en_core_web_lg') TEXT = data.Field(tokenize='spacy', batch_first=True, tokenizer_language='en_core_web_lg') LABEL = data.LabelField(dtype=torch.float) vdata, _ = datasets.IMDB.splits(TEXT, LABEL, root='.data/') TEXT.build_vocab(vdata, max_size=25002, vectors="glove.840B.300d", vectors_cache='.vector_cache/', unk_init=torch.Tensor.normal_) with tqdm.tqdm(total=len(table), desc='Remove empty content') as pbar: for i in range(len(table)): if len(table.Content[i]) == 0: table.drop(i, inplace=True) pbar.update(1) c2s = [] with tqdm.tqdm(total=len(table), desc='Processing...') as pbar: for row in table.itertuples(index=False): date = row.ReviewData sentences = re.split('\!|\?|\.', process_content(row.Content)[0])[:-1] for sentence in sentences: sentence = sentence.lower() for c in string.punctuation: sentence = sentence.replace(c, '') if len(sentence.split(' ')) >= 10: emotion_label = utils.predict_sentiment(sentiment_model, sentence, nlp, TEXT, min_len=5) c2s.append([row.id, sentence, emotion_label, date]) pbar.update(1) return pd.DataFrame( c2s, columns=['cotent_id', 'sentence', 'emotion label', 'date'])
def predict(): review = request.args['value'] # print(review) score = predict_sentiment(model, tokenizer, review) print(score) return jsonify(float(score))
from utils import predict_sentiment, load_dataset from keras.models import load_model model = load_model('model.h5') tokenizer = load_dataset('tokenizer.pkl') trainX, _ = load_dataset('data/train.pkl') max_length = max([len(s.split()) for s in trainX]) print(max_length) with open('data/txt_sentoken/neg/cv999_14636.txt') as f: review = f.read() percent, sentiment = predict_sentiment(review, tokenizer, max_length, model) print("Review: [{}]\nSentiment: {} {:.3f}%".format(review, sentiment, percent * 100)) with open('data/txt_sentoken/pos/cv999_13106.txt') as f: review = f.read() percent, sentiment = predict_sentiment(review, tokenizer, max_length, model) print("Review: [{}]\nSentiment: {} {:.3f}%".format(review, sentiment, percent * 100))
if oov_count > 0: print("There are %d oov words.") return embed net.embedding.weight.data.copy_(load_pretrained_embedding(vocab.itos, glove)) net.embedding.weight.requires_grad = False # 直接加载预训练好的, 所以不需要更新它 print('训练并评价模型,只训练一轮') lr, num_epochs = 0.01, 1 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) loss = nn.CrossEntropyLoss() utils.train(train_iter, test_iter, net, loss, optimizer, device, num_epochs) print('尝试预测') print( utils.predict_sentiment(net, vocab, ['this', 'movie', 'is', 'so', 'great'])) print(utils.predict_sentiment(net, vocab, ['this', 'movie', 'is', 'so', 'bad'])) ''' training on cpu epoch 1, loss 0.6503, train acc 0.593, test acc 0.790, time 1577.5 sec 尝试预测 positive negative '''