Esempio n. 1
0
def home():
    """Home page of app with form"""
    form = ReusableForm(request.form)
    if request.method == 'POST':
        if 'predict_button' in request.form:
            return render_template('prediction.html', form=form, review= request.form['user_text'],
                                   input= predict_sentiment(model, tokenizer, request.form['user_text']))

    return render_template('home.html', form=form, table_data= generate_reviews(model, test_data, test_labels, test_texts))
Esempio n. 2
0
def content2sentence(table, model_path):
    """
    Divide users' comment contents into separate sentence and 
    collect the postive or negtive scores
    """
    sentiment_model = model.CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS,
                                FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    sentiment_model = sentiment_model.cuda()
    sentiment_model.load_state_dict(torch.load(model_path))
    nlp = spacy.load('en_core_web_lg')
    TEXT = data.Field(tokenize='spacy',
                      batch_first=True,
                      tokenizer_language='en_core_web_lg')
    LABEL = data.LabelField(dtype=torch.float)
    vdata, _ = datasets.IMDB.splits(TEXT, LABEL, root='.data/')
    TEXT.build_vocab(vdata,
                     max_size=25002,
                     vectors="glove.840B.300d",
                     vectors_cache='.vector_cache/',
                     unk_init=torch.Tensor.normal_)

    with tqdm.tqdm(total=len(table), desc='Remove empty content') as pbar:
        for i in range(len(table)):
            if len(table.Content[i]) == 0:
                table.drop(i, inplace=True)
            pbar.update(1)
    c2s = []
    with tqdm.tqdm(total=len(table), desc='Processing...') as pbar:
        for row in table.itertuples(index=False):
            date = row.ReviewData
            sentences = re.split('\!|\?|\.',
                                 process_content(row.Content)[0])[:-1]
            for sentence in sentences:
                sentence = sentence.lower()
                for c in string.punctuation:
                    sentence = sentence.replace(c, '')
                if len(sentence.split(' ')) >= 10:
                    emotion_label = utils.predict_sentiment(sentiment_model,
                                                            sentence,
                                                            nlp,
                                                            TEXT,
                                                            min_len=5)
                    c2s.append([row.id, sentence, emotion_label, date])
            pbar.update(1)

    return pd.DataFrame(
        c2s, columns=['cotent_id', 'sentence', 'emotion label', 'date'])
Esempio n. 3
0
def predict():
    review = request.args['value']
    # print(review)
    score = predict_sentiment(model, tokenizer, review)
    print(score)
    return jsonify(float(score))
Esempio n. 4
0
from utils import predict_sentiment, load_dataset
from keras.models import load_model

model = load_model('model.h5')
tokenizer = load_dataset('tokenizer.pkl')

trainX, _ = load_dataset('data/train.pkl')
max_length = max([len(s.split()) for s in trainX])
print(max_length)

with open('data/txt_sentoken/neg/cv999_14636.txt') as f:
    review = f.read()

percent, sentiment = predict_sentiment(review, tokenizer, max_length, model)
print("Review: [{}]\nSentiment: {} {:.3f}%".format(review, sentiment,
                                                   percent * 100))

with open('data/txt_sentoken/pos/cv999_13106.txt') as f:
    review = f.read()

percent, sentiment = predict_sentiment(review, tokenizer, max_length, model)
print("Review: [{}]\nSentiment: {} {:.3f}%".format(review, sentiment,
                                                   percent * 100))
Esempio n. 5
0
    if oov_count > 0:
        print("There are %d oov words.")
    return embed


net.embedding.weight.data.copy_(load_pretrained_embedding(vocab.itos, glove))
net.embedding.weight.requires_grad = False  # 直接加载预训练好的, 所以不需要更新它

print('训练并评价模型,只训练一轮')

lr, num_epochs = 0.01, 1
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    net.parameters()),
                             lr=lr)
loss = nn.CrossEntropyLoss()
utils.train(train_iter, test_iter, net, loss, optimizer, device, num_epochs)

print('尝试预测')
print(
    utils.predict_sentiment(net, vocab,
                            ['this', 'movie', 'is', 'so', 'great']))
print(utils.predict_sentiment(net, vocab,
                              ['this', 'movie', 'is', 'so', 'bad']))
'''
training on  cpu
epoch 1, loss 0.6503, train acc 0.593, test acc 0.790, time 1577.5 sec
尝试预测
positive
negative
'''