コード例 #1
0
def spacy_sent_benchmark(datasets):

    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)

    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()

        df['valence'] = df['valence'].map(to_label)

        # predict with spacy sentiment
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #mathc the labels
            labels = {
                'positiv': 'positive',
                'neutral': 'neutral',
                'negativ': 'negative'
            }
            return labels[pred]

        df['pred'] = df.text.map(lambda x: predict(x))

        report(df['valence'], df['pred'], 'Spacy sentiment (polarity)',
               dataset)
コード例 #2
0
def spacy_sent_benchmark(datasets):
    
    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)
   
    for dataset in datasets:
        if dataset == 'euparlsent':
            data = EuroparlSentiment1()
        if dataset == 'lccsent':
            data = LccSentiment()

        df = data.load_with_pandas()
        
        df['valence'] = df['valence'].map(sentiment_score_to_label)
        
        # predict with spacy sentiment 
        def predict(x):
            doc = nlpS(x)
            pred = max(doc.cats.items(), key=operator.itemgetter(1))[0]
            #match the labels 
            labels = {'positiv': 'positive', 'neutral': 'neutral', 'negativ': 'negative'}
            return labels[pred]

        spellings_map = {'subjective': 'subjektivt', 'objective': 'objektivt', 'positive': 'positiv', 'negative': 'negativ', 'neutral': 'neutral'}
        start = time.time()
        df['pred'] = df.text.map(lambda x: spellings_map[predict(x)])
        print_speed_performance(start, len(df))

        f1_report(df['valence'], df['pred'], 'Spacy sentiment (polarity)', dataset)
コード例 #3
0
def benchmark_spacy_mdl():
    def normalize_spacy_head(i, hd):
        return 0 if i == hd else hd + 1

    nlp = load_spacy_model()
    parser = nlp.parser

    start = time.time()

    deps_pred = []
    for sent in sentences_tokens:
        doc = nlp.tokenizer.tokens_from_list(sent)
        doc = parser(doc)

        deprels = []
        depheads = []
        for i, tok in enumerate(doc):
            deprels.append(tok.dep_.lower())
            depheads.append(normalize_spacy_head(i, tok.head.i))
        deps_pred.append([(r, h) for r, h in zip(deprels, depheads)])

    print('**Spacy model**')
    print_speed_performance(start, num_sentences, num_tokens)

    assert len(deps_pred) == num_sentences
    assert sum([len(s) for s in deps_pred]) == num_tokens

    print(dependency_report(deps_true, deps_pred))
コード例 #4
0
def benchmark_spacy_mdl():
    nlp = load_spacy_model()
    ner = nlp.entity

    predictions = []
    start = time.time()
    for token in sentences_tokens:
        doc = nlp.tokenizer.tokens_from_list(token)
        ner(doc)
        ents = []
        for t in doc:
            if t.ent_iob_ == 'O':
                ents.append(t.ent_iob_)
            else:
                ents.append(t.ent_iob_ + "-" + t.ent_type_)

        predictions.append(ents)

    print("Made predictions on {} sentences and {} tokens in {}s".format(
        num_sentences, num_tokens, time.time() - start)
    )

    assert len(predictions) == num_sentences

    print(classification_report(sentences_entities, remove_miscs(predictions),
                                digits=4))
コード例 #5
0
ファイル: pos_benchmarks.py プロジェクト: bplank/danlp
def benchmark_spacy_mdl():
    nlp = load_spacy_model()
    tagger = nlp.tagger

    start = time.time()

    tags_pred = []
    for sent in sentences_tokens:
        doc = nlp.tokenizer.tokens_from_list(sent)
        doc = tagger(doc)

        tags = []
        for tok in doc:

            tags.append(tok.pos_)

        tags_pred.append(tags)
    print('**Spacy model**')
    print("Made predictions on {} sentences and {} tokens in {}s".format(
        num_sentences, num_tokens,
        time.time() - start))

    assert len(tags_pred) == num_sentences
    assert sum([len(s) for s in tags_pred]) == num_tokens

    print(classification_report(tags_true, tags_pred, digits=4))
コード例 #6
0
ファイル: test_spacy_model.py プロジェクト: bplank/danlp
    def test_predictions(self):

        nlp = load_spacy_model()
        some_text = "Jeg gik en tur med Lars"
        doc = nlp(some_text)
        self.assertTrue(doc.is_parsed)
        self.assertTrue(doc.is_nered)
        self.assertTrue(doc.is_tagged)
コード例 #7
0
    def test_predictions(self):

        nlp = load_spacy_model(textcat='sentiment')
        some_text = "Vi er glade for spacy!"
        doc = nlp(some_text)
        self.assertTrue(doc.is_parsed)
        self.assertTrue(doc.is_nered)
        self.assertTrue(doc.is_tagged)
        self.assertEqual(max(doc.cats.items(), key=operator.itemgetter(1))[0],'positiv')
コード例 #8
0
def spacy_benchmark():
    nlpS = load_spacy_model(textcat='sentiment', vectorError=True)

    # predict with spacy sentiment
    def predict(x):
        doc = nlpS(x)
        return max(doc.cats.items(), key=operator.itemgetter(1))[0]

    df_val['spacy'] = df_val.text.map(lambda x: predict(x))

    report(df_val['polarity'], df_val['spacy'], 'Spacy',
           "twitter_sentiment(val)")
コード例 #9
0
    def test_predictions(self):
        nlp = load_spacy_model()
        some_text = "Jeg gik en tur med Lars Bo Jensen i går"
        doc = nlp(some_text)
        self.assertTrue(doc.is_parsed)
        self.assertTrue(doc.is_nered)
        self.assertTrue(doc.is_tagged)

        chunker = load_spacy_chunking_model(spacy_model=nlp)
        chunks_from_text = chunker.predict(some_text)
        chunks_from_tokens = chunker.predict([t.text for t in doc])
        self.assertEqual(chunks_from_text, chunks_from_tokens)
        self.assertEqual(len(chunks_from_text), len(doc))
コード例 #10
0
ファイル: danish_sent.py プロジェクト: snorre87/wordviz
    def __init__(self, hisia=True):
        try:
            from afinn import Afinn
            self.afinn = Afinn(language='da')
        except:
            print('afinn not installed')
            self.afinn = False
        try:
            from sentida import Sentida
            self.sent = Sentida()
        except:
            print('sentida not loading')
            self.sent = False
        try:
            from danlp.models import load_bert_emotion_model
            self.classifier = load_bert_emotion_model()
        except:
            self.classifier = False
            print('bert emotion not loading')

        try:
            from danlp.models import load_bert_tone_model
            self.classifier_tone = load_bert_tone_model()
        except:
            print('bert tone not working')
            self.classifier_tone = False
        try:
            from danlp.models import load_spacy_model
            self.nlp = load_spacy_model(
                textcat='sentiment', vectorError=True
            )  # if you got an error saying da.vectors not found, try setting vectorError=True - it is an temp fix
        except:
            print('spacy sentiment not working')
            self.nlp = False
        if hisia:
            try:
                from hisia import Hisia
                self.hisia = Hisia
            except:
                self.hisia = False
                print('hisia not working')
        else:
            self.hisia = False
コード例 #11
0
def benchmark_spacy_mdl():
    nlp = load_spacy_model()
    ner = nlp.entity

    predictions = []
    start = time.time()
    for token in sentences_tokens:
        doc = nlp.tokenizer.tokens_from_list(token)
        ner(doc)
        ents = []
        for t in doc:
            if t.ent_iob_ == 'O':
                ents.append(t.ent_iob_)
            else:
                ents.append(t.ent_iob_ + "-" + t.ent_type_)

        predictions.append(ents)
    print('spaCy:')
    print_speed_performance(start, num_sentences, num_tokens)

    assert len(predictions) == num_sentences

    print(f1_report(sentences_entities, remove_miscs(predictions), bio=True))
コード例 #12
0
def benchmark_spacy_mdl():
    nlp = load_spacy_model()
    tagger = nlp.tagger

    start = time.time()

    tags_pred = []
    for sent in sentences_tokens:
        doc = nlp.tokenizer.tokens_from_list(sent)
        doc = tagger(doc)

        tags = []
        for tok in doc:

            tags.append(tok.pos_)

        tags_pred.append(tags)
    print('**Spacy model**')
    print_speed_performance(start, num_sentences, num_tokens)

    assert len(tags_pred) == num_sentences
    assert sum([len(s) for s in tags_pred]) == num_tokens

    print(accuracy_report(tags_true, tags_pred), end="\n\n")
コード例 #13
0
from utils.lipht_visualization import topic_distribution_barplot
from utils.lipht_lda_utils import PrepareDictionary, lda_predict_df, df_lda_features, get_topics_and_probability, get_lda_topics, df_lda_preprocessing, TrainLDAModel
from utils.lipht_data import getEngine, save_model, list_to_stringlist, stringlist_to_list

import sys
import warnings

import logging.config

if not sys.warnoptions:
    warnings.simplefilter("ignore")

#Load the danish spacy model from danlp
import spacy
from danlp.models import load_spacy_model
nlp = load_spacy_model()

# Load models to train
from train_pipeline import pipe as train_pipeline


###############
#### Define global variables
###############

# establish connection  to sql server
engine = getEngine('THN-P53','GRE')


###############
#### Start main loop