Ejemplo n.º 1
0
def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus((tasks_base_path / 'imdb'))
    label_dict = corpus.make_label_dictionary()
    embedding = FlairEmbeddings('news-forward-fast')
    document_embeddings = DocumentRNNEmbeddings([embedding], 128, 1, False, 64,
                                                False, False)
    model = TextClassifier(document_embeddings, label_dict, False)
    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, max_epochs=2, shuffle=False)
    sentence = Sentence('Berlin is a really nice city.')
    for s in model.predict(sentence):
        for l in s.labels:
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)
    loaded_model = TextClassifier.load((results_base_path / 'final-model.pt'))
    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')
    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])
    shutil.rmtree(results_base_path)
Ejemplo n.º 2
0
def test_train_load_use_classifier_flair(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb",
                                                 label_type="topic")
    label_dict = corpus.make_label_dictionary(label_type="topic")

    flair_document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
        [flair_embeddings], 128, 1, False, 64, False, False)

    model: TextClassifier = TextClassifier(
        document_embeddings=flair_document_embeddings,
        label_dictionary=label_dict,
        label_type="topic",
        multi_label=False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, max_epochs=2, shuffle=False)

    sentence = Sentence("Berlin is a really nice city.")

    model.predict(sentence)

    for label in sentence.labels:
        assert label.value is not None
        assert 0.0 <= label.score <= 1.0
        assert type(label.score) is float

    del trainer, model, corpus, flair_document_embeddings
    loaded_model = TextClassifier.load(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
    del loaded_model
Ejemplo n.º 3
0
def test_train_load_use_classifier_with_sampler(results_base_path,
                                                tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
    label_dict = corpus.make_label_dictionary()

    model: TextClassifier = TextClassifier(document_embeddings,
                                           label_dict,
                                           multi_label=False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        max_epochs=2,
        shuffle=False,
        sampler=ImbalancedClassificationDatasetSampler,
    )

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    del trainer, model, corpus
    loaded_model = TextClassifier.load(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
    del loaded_model
Ejemplo n.º 4
0
    def main(FIN, FOUT):

        print("Reading in data.")

        df = pd.read_csv(FIN)

        count = 0  # global count

        # filter dataframe for only USA tweets
        df = df[df['place_country_code'] == 'US'].fillna('None')

        df = df[df['language'] == 'en']

        # load classifier
        classifier = TextClassifier.load('sentiment')

        print('Model has been loaded from flair.')

        try:
            os.mkdir(FOUT)
        except:
            pass

        print('Running Script.')

        for row in (df[[
                'created_at', 'place_full_name', 'language', 'mentions',
                'hashtags', 'clean_text'
        ]].iterrows()):
            tweet, count = run_stack(count, row[1]['created_at'],
                                     row[1]['place_full_name'],
                                     row[1]['language'], row[1]['mentions'],
                                     row[1]['hashtags'], row[1]['clean_text'])
            dump_tweet(FOUT + '/sentiment_tweets.json', tweet)

        print('Script has finished.')
Ejemplo n.º 5
0
def test_train_load_use_classifier_with_prob(results_base_path,
                                             tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
    label_dict = corpus.make_label_dictionary()

    word_embedding: WordEmbeddings = WordEmbeddings("turian")
    document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
        [word_embedding], 128, 1, False, 64, False, False)

    model = TextClassifier(document_embeddings, label_dict, False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  EvaluationMetric.MICRO_F1_SCORE,
                  max_epochs=2,
                  shuffle=False)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence, multi_class_prob=True):
        for l in s.labels:
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    loaded_model = TextClassifier.load(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence, multi_class_prob=True)
    loaded_model.predict([sentence, sentence_empty], multi_class_prob=True)
    loaded_model.predict([sentence_empty], multi_class_prob=True)

    # clean up results directory
    shutil.rmtree(results_base_path)
def main():
    db_name = 'restaurants.db'
    years = [
        '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
        '2019', '2020'
    ]
    restaurants = ['Tibits', 'Mildreds Soho', 'By Chloe']
    classifier = TextClassifier.load('sentiment')

    for restaurant in restaurants:
        print('restaurant', restaurant)
        for year in years:
            print('year', year)
            sentiment_scores = calcualte_sentiment_for_given_restaurant_year(
                db_name, classifier, restaurant, year)
            number_of_positive = 0
            number_of_negative = 0
            for score in sentiment_scores:
                if score == 1:
                    number_of_positive = number_of_positive + 1
                else:
                    number_of_negative = number_of_negative + 1
            print('positive', number_of_positive)
            print('negative', number_of_negative * -1)
Ejemplo n.º 7
0
import pandas as pd
import streamlit as st
from flair.data import Sentence
from flair.models import TextClassifier
from twitterscraper import query_tweets
import sys

sys.path.append('/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages')

# Set page title
st.title('Twitter Sentiment Analysis')

# Load classification model
with st.spinner('Loading classification model...'):
    classifier = TextClassifier.load('models/best-model.pt')

# Preprocess function
allowed_chars = ' AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789~`!@#$%^&*()-=_+[]{}|;:",./<>?'
punct = '!?,.@#'
maxlen = 280

def preprocess(text):
    # Delete URLs, cut to maxlen, space out punction with spaces, and remove unallowed chars
    return ''.join([' ' + char + ' ' if char in punct else char for char in [char for char in re.sub(r'http\S+', 'http', text, flags=re.MULTILINE) if char in allowed_chars]])

### SINGLE TWEET CLASSIFICATION ###
st.subheader('Single tweet classification')

# Get sentence input, preprocess it, and convert to flair.data.Sentence format
tweet_input = st.text_input('Tweet:')
Ejemplo n.º 8
0
                                                                     )

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('resources/taggers/ag_news',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/ag_news/loss.tsv')
plotter.plot_weights('resources/taggers/ag_news/weights.txt')

classifier = TextClassifier.load('resources/taggers/ag_news/final-model.pt')

# create example sentence
sentence = Sentence('France is the current world cup winner.')

# predict tags and print
classifier.predict(sentence)

print(sentence.labels)
Ejemplo n.º 9
0
    reproject_words_dimension=256,
)

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('resources/classifiers',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 以下、モデルを読み込んで実行してみる
classifier = TextClassifier.load('resources/classifiers/best-model.pt')

# create example sentence
sentence = Sentence("現実を受け入れて生きるしかない", use_tokenizer=japanese_tokenizer)
print(sentence.to_tokenized_string())

# predict class and print
classifier.predict(sentence)

label_dict = sentence.to_dict()["labels"][0]

label_dict["confidence"] if label_dict["value"] == "__label__O" else 0
#apply document LSTM to the stacked embeddings
document_embeddings = DocumentRNNEmbeddings(
        word_embeddings,
#         hidden_size=512,
#         reproject_words=True,
#         reproject_words_dimension=256,
    )

#build model
classifier = TextClassifier(document_embeddings, label_dictionary=corpus.make_label_dictionary(), multi_label=False)
trainer = ModelTrainer(classifier, corpus)

#specify parameters and train model
trainer.train(PATH/'models/', max_epochs=3,checkpoint=True, learning_rate=1e-1) 

classifier = TextClassifier.load('/content/drive/My Drive/emnlp/models/best-model.pt')



"""## Dev Set Prediction"""

dev_folder = ""     # if not adjust these variables accordingly
dev_template_labels_file = ""
task_SLC_output_file = ""



def read_articles_from_file_list(folder_name, file_pattern="*.txt"):
    
    file_list = glob.glob(os.path.join(folder_name, file_pattern))
    article_id_list, sentence_id_list, sentence_list = ([], [], [])
Ejemplo n.º 11
0
import datetime as dt
import re

import pandas as pd
import streamlit as st
from flair.data import Sentence
from flair.models import TextClassifier

# Set page title
st.title('Sentiment Analysis')

# Load classification model
with st.spinner('Loading classification model...'):
    classifier = TextClassifier.load(
        '/Users/mengyu/Desktop/engineering/models/best-model.pt')

# Preprocess function
allowed_chars = ' AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789~`!@#$%^&*()-=_+[]{}|;:",./<>?'
punct = '!?,.@#'
maxlen = 280


def preprocess(text):
    # Delete URLs, cut to maxlen, space out punction with spaces, and remove unallowed chars
    return ''.join([
        ' ' + char + ' ' if char in punct else char for char in [
            char
            for char in re.sub(r'http\S+', 'http', text, flags=re.MULTILINE)
            if char in allowed_chars
        ]
    ])
        return thevalence
    #Surely I missed the builtin method that just returns the value?
    except Exception:
        print("An exception occurred. Text was not passed to get_valence")
        return 'n/a'


def plotone_df(df):
    plt.hist(df['valence'], color='blue', edgecolor='black')
    #plt.tight_layout()
    plt.show()


print_intro()

classifier = TextClassifier.load('en-sentiment')

num = 0
for df in dfs:
    threelines()
    print("Preparing valence for df %d - this may take a second" % num)
    #print("Preparing valence for df %s "%str(dfs.index(df)))
    df['valence'] = df['text'].apply(get_valence)
    #df.reset_index(drop=True)
    #plt.hist(df['valence'], color='blue', edgecolor='black')
    # plt.tight_layout()
    #plt.show()
    num += 1

    #plotone_df(df)
    #print(df['valence'].dtypes)
Ejemplo n.º 13
0
from flask import Flask, jsonify, request
from fastai.text import *
from flair.models import TextClassifier
from flair.data import Sentence
import json
import heapq

application = Flask(__name__)

classifier_sentiment = TextClassifier.load('./models/best-model.pt')


@application.route("/")
def hello():
    return "Hello World!"


@application.route('/classify', methods=['POST'])
def post_tasks():
    return_object = []
    data = json.loads(request.data)
    df = pd.DataFrame(data)
    items = TextList.from_df(df[['sample']])
    learn = load_learner('./models', 'export.pkl', test=items)
    preds = learn.get_preds(ds_type=DatasetType.Test)[0].tolist()
    preds = [get_classes(item) for item in preds]
    for indx, sample in enumerate(data, start=0):
        sample["categories"] = preds[indx]
        print(sample)
        return_object.append(sample)
    return jsonify(return_object)
Ejemplo n.º 14
0
import pandas as pd
from flair.models import TextClassifier
from flair.data import Sentence
import sys
from flair.models import SequenceTagger
import webbrowser
import re
import os

debug = False
play_tags = ['ok','play','yes','sure','like','love','awesome','nice','yep','yeah','good']
retry_tags = ['no','next','shuffle','hate','dislike','another','nope','nay','jeez','nah','ugh','not']
#model loadings

tagger = SequenceTagger.load('pos')
mood = TextClassifier.load('en-sentiment')
classifier = TextClassifier.load(sys.argv[1])
df = pd.read_pickle('./data/music.pkl')
df = df.loc[df['valence'] != '0.0']

genres = set(df['genre'])

mood_history = []
current_genre = False

def debug_print(*objects):
	global debug
	if(debug):
		print(objects)

Ejemplo n.º 15
0
import streamlit as st

import pandas as pd
import numpy as np
from flair.models import TextClassifier
from flair.data import Sentence

##checkpt: Jun6 8:31pm done prediction and reading dataframe!

new_data_folder = './gdp_benchmark_classifier//'
finetuned_classifier = TextClassifier.load(new_data_folder + 'best-model.pt')


def finetuned_model_predictions(input_file_path, col_text,
                                finetuned_classifier, output_file_path):
    '''Makes Sentiment Predictions on unannotated data points contained in the input csvfile by loading the user-defined classifier.
     Exports the csvfile by adding new columns and filling in results from model predictions.
  '''

    if col_text.isdigit():  # if no text header
        unannotated_df = pd.read_csv(input_file_path, header=None)
        col_text = int(
            col_text
        )  ## indexing works after turning input string into integer !!
    else:
        unannotated_df = pd.read_csv(input_file_path)
    ## drop some duplicated rows
    #unannotated_df = unannotated_df.drop_duplicates(col_text)

    ## add new columns to export predictions
    ## modified on May28 to export predict_prob for less likely labels as well
Ejemplo n.º 16
0
 def __init__(self, ckpt=CKPT, name2nats=DICT):
     self.classifier = TextClassifier.load(ckpt)
     self.name2nats = self.construct(name2nats)
Ejemplo n.º 17
0
consumer_key = 'XXXX'
consumer_secret = 'XXXX'
access_token = 'XXXX'
access_token_secret = 'XXXX'

auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)

# Set page title
st.title('Twitter Sentiment Analysis')

# Load classification model
with st.spinner('Loading classification model...'):

    classifier = TextClassifier.load('model-saves/my_fine_tuned_bert1.pt')

# Preprocess function
allowed_chars = ' AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789~`!@#$%^&*()-=_+[]{}|;:",./<>?'
punct = '!?,.@#'
maxlen = 280


def preprocess(text):
    # Delete URLs, cut to maxlen, space out punction with spaces, and remove unallowed chars
    return ''.join([
        ' ' + char + ' ' if char in punct else char for char in [
            char
            for char in re.sub(r'http\S+', 'http', text, flags=re.MULTILINE)
            if char in allowed_chars
        ]
Ejemplo n.º 18
0
def predict_labels(data, pretrained_model):
    classifier = TextClassifier.load(pretrained_model)
    data_to_classify, sub_outcomes = [], []

    with open(data, 'r') as d:
        instances = []
        for i in d.readlines():
            if i != '\n':
                i = i.split()
                instances.append(i)
            else:
                # if instances:
                instances_copy = instances.copy()
                data_to_classify.append(instances_copy)
                outcome, sub_instances, l = '', [], 0
                _outcomes_ = ()
                #print(instances_copy)
                for x in range(len(instances_copy)):
                    if x == l:
                        x_str = instances_copy[x][1]
                        #     if x_str != 'O':
                        if x_str.startswith('B') or x_str.startswith('I'):
                            outcome = instances_copy[x][0]
                            if x == len(instances_copy) - 1:
                                if str(outcome.strip()) != 'nan':
                                    sent = Sentence(outcome)
                                    classifier.predict(sent)
                                    sub_instances.append('{}:{}'.format(
                                        outcome.strip(), sent.labels[0].value))
                            else:
                                for y in range(x + 1, len(instances_copy)):
                                    if not instances_copy[y][1].startswith(
                                            'B'
                                    ) and instances_copy[y][1] != 'O':
                                        outcome += ' {}'.format(
                                            instances_copy[y][0])
                                        outcome = outcome.strip()
                                        if y == len(instances_copy) - 1:
                                            outcome_copy = outcome
                                            if str(outcome_copy.strip()
                                                   ) != 'nan':
                                                sent = Sentence(outcome_copy)
                                                classifier.predict(sent)
                                                sub_instances.append(
                                                    '{}:{}'.format(
                                                        outcome_copy.strip(),
                                                        sent.labels[0].value))
                                            outcome = ''
                                        l = y
                                    else:
                                        if outcome:
                                            outcome_copy = outcome
                                            if str(outcome_copy.strip()
                                                   ) != 'nan':
                                                sent = Sentence(outcome_copy)
                                                classifier.predict(sent)
                                                sub_instances.append(
                                                    '{}:{}'.format(
                                                        outcome_copy.strip(),
                                                        sent.labels[0].value))
                                            outcome = ''
                                        break
                        l += 1
                sub_outcomes.append(tuple(sub_instances))
                instances.clear()
    data_to_classify = [' '.join(j[0] for j in i) for i in data_to_classify]
    data_to_classify_frame = pd.DataFrame(data_to_classify,
                                          columns=['Abstract'])
    max_outcomes_per_sentence = max([len(i) for i in sub_outcomes])
    columns_ = [
        'Outcome {}'.format(i + 1) for i in range(max_outcomes_per_sentence)
    ]
    sub_outcomes_frame = pd.DataFrame(sub_outcomes, columns=columns_)
    data_to_classify_frame = pd.concat(
        [data_to_classify_frame, sub_outcomes_frame], axis=1)
    print(tabulate(data_to_classify_frame, headers='keys', tablefmt='psql'))
Ejemplo n.º 19
0
def clean(raw):
    """ Remove hyperlinks and markup """
    result = re.sub("<[a][^>]*>(.+?)</[a]>", 'Link.', raw)
    result = re.sub('&gt;', "", result)
    result = re.sub('&#x27;', "'", result)
    result = re.sub('&quot;', '"', result)
    result = re.sub('&#x2F;', ' ', result)
    result = re.sub('<p>', ' ', result)
    result = re.sub('</i>', '', result)
    result = re.sub('&#62;', '', result)
    result = re.sub('<i>', ' ', result)
    result = re.sub("\n", '', result)
    return result


classifier = TextClassifier.load('./model_result/final-model.pt')

#df = review_dataframe
#df = df.head(10)

df['text'] = df['text'].fillna('').apply(str)

d = []

for i, row in df.iterrows():
    document = row['text']
    document = clean(document)
    sentence = Sentence(document)
    classifier.predict(sentence)
    print(document + "\n\n")
    print(sentence.labels)
Ejemplo n.º 20
0
def get_classifier_model(model_name) -> TextClassifier:
    return TextClassifier.load(model_name)
    reproject_words_dimension=256,
)

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# 6. initialize the text classifier trainer
trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train('/home/anna/Desktop/markup/8',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot weight traces (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_weights('/home/anna/Desktop/markup/8/weights.txt')

classifier = TextClassifier.load('/home/anna/Desktop/markup/8/final-model.pt')

# create example sentences
sentence = Sentence('France is the current world cup winner.')

# predict class and print
classifier.predict(sentence)

print(sentence.labels)
Ejemplo n.º 22
0
def load_flair():
	return TextClassifier.load('en-sentiment')
Ejemplo n.º 23
0
 def __init__(self, model_name_or_path: str):
     self.classifier = TextClassifier.load(model_name_or_path)
# Create model
from flair.models import TextClassifier

classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

# Create model trainer
from flair.trainers import ModelTrainer

trainer = ModelTrainer(classifier, corpus)

# Train the model
trainer.train('model-saves',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=8,
              max_epochs=200)

# Load the model and make predictions
from flair.data import Sentence

classifier = TextClassifier.load('model-saves/final-model.pt')

pos_sentence = Sentence(preprocess('I love Python!'))
neg_sentence = Sentence(preprocess('Python is the worst!'))

classifier.predict(pos_sentence)
classifier.predict(neg_sentence)

print(pos_sentence.labels, neg_sentence.labels)
Ejemplo n.º 25
0
 def __init__(self, path_to_model: str) -> None:
     "Input Flair trained sentiment model"
     from flair.models import TextClassifier
     self.classifier = TextClassifier.load(path_to_model)
Ejemplo n.º 26
0
def test_train_load_use_classifier_multi_label(results_base_path,
                                               tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path /
                                                 "multi_class",
                                                 label_type="topic")
    label_dict = corpus.make_label_dictionary(label_type="topic")

    model: TextClassifier = TextClassifier(
        document_embeddings=document_embeddings,
        label_dictionary=label_dict,
        label_type="topic",
        multi_label=True,
    )

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        mini_batch_size=1,
        max_epochs=20,
        shuffle=False,
        checkpoint=False,
        train_with_test=True,
        train_with_dev=True,
    )

    sentence = Sentence("apple tv")

    model.predict(sentence)

    assert "apple" in [label.value for label in sentence.labels]
    assert "tv" in [label.value for label in sentence.labels]

    for label in sentence.labels:
        print(label)
        assert label.value is not None
        assert 0.0 <= label.score <= 1.0
        assert type(label.score) is float

    del trainer, model, corpus
    loaded_model = TextClassifier.load(results_base_path / "final-model.pt")

    sentence = Sentence("apple tv")

    loaded_model.predict(sentence)

    assert "apple" in [label.value for label in sentence.labels]
    assert "tv" in [label.value for label in sentence.labels]

    for label in sentence.labels:
        assert label.value is not None
        assert 0.0 <= label.score <= 1.0
        assert type(label.score) is float

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    del loaded_model
Ejemplo n.º 27
0
from flask import Flask, request, jsonify
from flask_cors import CORS
from flair.models import TextClassifier
from flair.data import Sentence
classifier = TextClassifier.load('./model/best-model.pt')

mapping = {
    'sad': '&#x1F61E',
    'smile': '&#x1F600',
    'food': '&#x1F37D',
    'heart': '&#10084;',
    'baseball': '&#x26be;'
}
app = Flask(__name__)
CORS(app)


@app.route('/emojify', methods=['POST'])
def emoji():
    data = request.form.get('text')
    if not len(data.strip()):
        return ''
    sentence = Sentence(data)
    classifier.predict(sentence)
    print(str(sentence.labels))
    if 'sad' in str(sentence.labels):
        return mapping['sad']
    elif 'smile' in str(sentence.labels):
        return mapping['smile']
    elif 'food' in str(sentence.labels):
        return mapping['food']
Ejemplo n.º 28
0
class SentimentAnalysisAPI(object):
    default_sentence: str = "N/A"
    flair_sent_model: TextClassifier = TextClassifier.load("sentiment")
    nltk_sent_model: SentimentIntensityAnalyzer = SentimentIntensityAnalyzer()

    @staticmethod
    def get_document_language(doc: str) -> str:
        language: str = ""
        try:
            language: str = detect(doc)
        except Exception as e:
            logger.error(e)
        return language

    @staticmethod
    def translate_to_english(src_doc: str, src_lang: str) -> str:
        translation: str = ""
        try:
            eng_translator: Translator = Translator()
            res: Translated = eng_translator.translate(src_doc,
                                                       src=src_lang,
                                                       dest="en")

            translation: str = res.text
        except Exception as e:
            logger.error(e)
        return translation

    @staticmethod
    def make_sentences(text: str, min_char: int = 3) -> list:
        """ Break apart text into a list of sentences """
        if len(text) > min_char:
            sentences: list = [
                sent for sent in split_single(text) if len(sent) > min_char
            ]
        else:
            sentences: list = []

        if not sentences:
            logger.warning("Default sentence was added")
            sentences: list = [SentimentAnalysisAPI.default_sentence]
        return sentences

    @staticmethod
    def get_label_decision(score: float,
                           lower_boundary: float = .4,
                           upper_boundary: float = .6) -> str:
        final_label: str = ""
        try:
            if score < lower_boundary:
                final_label: str = "Negative"
            elif lower_boundary <= score < upper_boundary:
                final_label: str = "Neutral"
            else:
                final_label: str = "Positive"
        except Exception as e:
            logger.error(e)
        return final_label

    @staticmethod
    def get_flair_sentiment_analysis(doc: str) -> FlairSentOutput:
        output: FlairSentOutput = FlairSentOutput()
        try:
            was_translated: bool = False
            # 1. Get the language of the document
            lang: str = SentimentAnalysisAPI.get_document_language(doc=doc)
            if lang != "en":
                # Translate
                doc: str = SentimentAnalysisAPI.translate_to_english(
                    src_doc=doc, src_lang=lang)
                was_translated: bool = True

            sentences: list = SentimentAnalysisAPI.make_sentences(text=doc)
            scores: list = []
            for sent in sentences:
                # 2. Load model and predict
                sentence: Sentence = Sentence(sent)
                SentimentAnalysisAPI.flair_sent_model.predict(sentence)
                single_res: Label = sentence.labels[0]
                single_label: str = single_res.value
                single_score: float = single_res.score if single_label == "POSITIVE" else (
                    1 - single_res.score)
                scores.append(single_score)

            final_score: float = round(float(np.mean(scores)), 3)
            final_label: str = SentimentAnalysisAPI.get_label_decision(
                score=final_score)

            output: FlairSentOutput = FlairSentOutput(
                label=final_label,
                confidence=final_score,
                translated=was_translated,
                analysed=True)
        except Exception as e:
            logger.error(e)
        return output

    @staticmethod
    def get_textblob_sentiment_analysis(doc: str) -> TextBlobSentOutput:
        output: TextBlobSentOutput = TextBlobSentOutput()
        try:
            was_translated: bool = False
            # 1. Get the language of the document
            lang: str = SentimentAnalysisAPI.get_document_language(doc=doc)
            if lang != "en":
                # Translate
                doc: str = SentimentAnalysisAPI.translate_to_english(
                    src_doc=doc, src_lang=lang)
                was_translated: bool = True

            sentences: list = SentimentAnalysisAPI.make_sentences(text=doc)
            polarity_scores: list = []
            subjectivity_scores: list = []
            for sent in sentences:
                subjectivity: float = TextBlob(sent).sentiment.subjectivity
                polarity: float = TextBlob(sent).sentiment.polarity
                polarity_scores.append(polarity)
                subjectivity_scores.append(subjectivity)

            final_subjectivity: float = round(
                float(np.mean(subjectivity_scores)), 3)
            final_polarity: float = round(float(np.mean(polarity_scores)), 3)

            output: TextBlobSentOutput = TextBlobSentOutput(
                analysed=True,
                polarity=final_polarity,
                subjectivity=final_subjectivity,
                translated=was_translated)

        except Exception as e:
            logger.error(e)
        return output

    @staticmethod
    def get_nltk_sentiment_analysis(doc: str) -> NLTKSentOutput:
        output: NLTKSentOutput = NLTKSentOutput()

        try:
            was_translated: bool = False
            # 1. Get the language of the document
            lang: str = SentimentAnalysisAPI.get_document_language(doc=doc)
            if lang != "en":
                # Translate
                doc: str = SentimentAnalysisAPI.translate_to_english(
                    src_doc=doc, src_lang=lang)
                was_translated: bool = True

            sentences: list = SentimentAnalysisAPI.make_sentences(text=doc)
            polarity_scores: list = []

            for sent in sentences:
                polarity_scores.append(
                    SentimentAnalysisAPI.nltk_sent_model.polarity_scores(
                        text=sent))
            neg_prob: float = SentimentAnalysisAPI.get_nltk_scores(
                scores=polarity_scores, key="neg")
            neu_prob: float = SentimentAnalysisAPI.get_nltk_scores(
                scores=polarity_scores, key="neu")
            pos_prob: float = SentimentAnalysisAPI.get_nltk_scores(
                scores=polarity_scores, key="pos")
            compound_prob: float = SentimentAnalysisAPI.get_nltk_scores(
                scores=polarity_scores, key="compound")

            output: NLTKSentOutput = NLTKSentOutput(
                analysed=True,
                negative_prob=neg_prob,
                neutral_prob=neu_prob,
                positive_prob=pos_prob,
                compound_prob=compound_prob,
                translated=was_translated)
        except Exception as e:
            logger.error(e)
        return output

    @staticmethod
    def get_nltk_scores(scores: list, key: str) -> float:
        score: float = 0.0
        try:
            score: float = round(float(np.mean([i.get(key) for i in scores])),
                                 3)
        except Exception as e:
            logger.error(e)
        return score
Ejemplo n.º 29
0
def get_lyrics_df(folder, artist):

    lyrics_df = concat_lyrics_df(folder, artist)
    lyrics_df.loc[:, 'artist'] = artist

    # Create columns
    lyrics_df.loc[:, 'artist_wrote_song'] = lyrics_df.apply(
        lambda x: 1 if x['artist'] in x['writers'] else 0, axis=1)
    lyrics_df.loc[:, 'artist_produced_song'] = lyrics_df.apply(
        lambda x: 1 if x['artist'] in x['producers'] else 0, axis=1)
    lyrics_df.loc[:, 'structure_tags'] = lyrics_df['lyrics'].apply(
        lambda x: re.findall(r"(\[.*\])", x))
    lyrics_df.loc[:, 'lyrics_clean'] = lyrics_df.apply(
        lambda x: clean_lyrics(x['lyrics'], x['structure_tags']), axis=1)
    lyrics_df.loc[:,
                  'structure_tags_clean'] = lyrics_df['structure_tags'].apply(
                      lambda x: clean_structure_tags(x))
    lyrics_df.loc[:,
                  'song_structure'] = lyrics_df['structure_tags_clean'].apply(
                      lambda x: get_song_structure(x))

    # Analyze tokens and text
    nlp = spacy.load('en', disable=['parser', 'ner'])
    lyrics_df.loc[:, 'total_word_count'] = lyrics_df['lyrics_clean'].apply(
        lambda x: len(nlp(x)))
    lyrics_df.loc[:, 'lyrics_lemmatized'] = lyrics_df['lyrics_clean'].apply(
        lambda x: ' '.join([
            token.lemma_.lower() for token in nlp(x)
            if token.is_alpha and not token.is_stop
        ]))
    lyrics_df['lemma_count'] = lyrics_df['lyrics_lemmatized'].apply(
        lambda x: len(x.split(' ')))
    lyrics_df['unique_lemmas_on_song'] = lyrics_df['lyrics_lemmatized'].apply(
        lambda x: len(set(x.split(' '))))

    # Sentiment analysis
    classifier = TextClassifier.load('sentiment')

    sents = []

    for i in range(0, len(lyrics_df.index.tolist())):
        text = lyrics_df.iloc[i]['lyrics_clean']
        if text:
            sentence = Sentence(text)
            classifier.predict(sentence)
            sents.append(str(sentence.labels))

        else:
            sents.append('')

    lyrics_df.loc[:, 'flair_sentiment'] = sents

    lyrics_df.loc[:, 'sentiment_label'] = lyrics_df['flair_sentiment'].str[
        1:-1].str.split('(', expand=True).iloc[:, 0].fillna('').str.strip()
    lyrics_df.loc[:,
                  'sentiment_probability'] = lyrics_df['flair_sentiment'].str[
                      1:-1].str.split('(',
                                      expand=True).iloc[:,
                                                        1].str[:-1].fillna(0)

    lyrics_df.loc[:, 'sentiment'] = lyrics_df['sentiment_label'].apply(
        lambda x: 1 if x == 'POSITIVE' else -1 if x == 'NEGATIVE' else 0)

    lyrics_df.loc[:, 'sentiment_score'] = (
        lyrics_df['sentiment_probability'].astype(float) - 0.5)
    lyrics_df.loc[:, 'sentiment_score'] = lyrics_df[
        'sentiment_score'] * lyrics_df['sentiment']

    os.chdir(folder / '{}'.format(artist))
    lyrics_df.to_csv(
        'lyrics_final.csv',
        index=False,
    )

    return lyrics_df
from pymongo import MongoClient
import pandas as pd
import numpy as np
from flair.models import TextClassifier
from flair.data import Sentence 
import logging
logging.basicConfig(level=logging.ERROR)

###### Mongodb connection
client = MongoClient('localhost', 27017)
db = client.covML
data_col = db.scrapedData
############################

flair_sentiment = TextClassifier.load('en-sentiment')

## 
def analyze_sentiment(headline):
    s = Sentence(headline)
    flair_sentiment.predict(s)
    total_sentiment = s.labels[0].to_dict()
    return total_sentiment

##
def analyseSentiments():
    myNews=data_col.find()
    df =  pd.DataFrame(myNews)
    del df['_id']
    df['Result'] = np.array([analyze_sentiment(headline)['value'] for headline in df['headline']])
    df['confidence'] = np.array([analyze_sentiment(headline)['confidence'] for headline in df['headline']])
    data_col.delete_many({})