예제 #1
0
from pathlib import Path
import os

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load model
    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    # load evaluation corpus
    corpus = opts['-c']
    if opts['--final']:
        reader = InterTASSReader(
            corpus, res_filename="InterTASS/ES/TASS2017_T1_test_res.qrel")
    else:
        reader = InterTASSReader(corpus)

    X, y_true = list(reader.X()), list(reader.y())

    # normalize
    #X = model.normalize(X)

    # classify
    y_pred = model.predict(X)

    # evaluate and print
    evaluator = Evaluator()
    evaluator.evaluate(y_true, y_pred)
    evaluator.print_results()
예제 #2
0
  stats.py [options] -f <folder>
  stats.py -h | --help

Options:
  -f <folder>   Folder containing TASS corpora.
  -h --help     Show this screen.
"""
from docopt import docopt
from os import path
from sentiment.tass import InterTASSReader
from collections import Counter

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load corpora
    filenames = [
        f'intertass-{location}-train-tagged.xml'
        for location in ['ES', 'CR', 'PE']
    ]
    folder = opts['-f']

    for filename in filenames:
        filepath = path.join(folder, filename)
        reader = InterTASSReader(filepath)
        y = list(reader.y())
        print('***')
        print(filename)
        print('tweets:', len(y))
        print(Counter(y))
예제 #3
0
from sentiment.evaluator import Evaluator
from sentiment.tass import InterTASSReader

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load model
    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    # load corpus
    if not opts['--final']:
        reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_development.xml')
    else:
        reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_test.xml',
                                 'TASS/InterTASS/TASS2017_T1_test_res.qrel')
    X, y_true = list(reader.X()), list(reader.y())

    # classify
    y_pred = model.predict(X)

    # evaluate and print
    evaluator = Evaluator()
    evaluator.evaluate(y_true, y_pred)
    evaluator.print_results()
    evaluator.print_confusion_matrix()

    # detailed confusion matrix, for result analysis
예제 #4
0
from sentiment.evaluator import Evaluator
from sentiment.tass import InterTASSReader

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load model
    filename = opts['-i']
    f = open(filename, 'rb')
    model = pickle.load(f)
    f.close()

    # load evaluation corpus
    corpus = opts['-c']
    final = opts.get('-f', None)
    reader = InterTASSReader(corpus, final)

    X, y_true = list(reader.X()), list(reader.y())

    # classify
    y_pred = model.predict(X)

    # evaluate and print
    evaluator = Evaluator()
    evaluator.evaluate(y_true, y_pred)
    evaluator.print_results()
    evaluator.print_confusion_matrix()

    # detailed confusion matrix, for result analysis
    cm_items = defaultdict(list)
    for i, (true, pred) in enumerate(zip(y_true, y_pred)):
예제 #5
0
파일: stats.py 프로젝트: agusmdev/PLN-2019
def count_tweets(path):
    reader = InterTASSReader(path)
    dist = Counter(reader.y())
    return dict(dist), sum(dist.values())
예제 #6
0
from sentiment.tass import InterTASSReader
from sentiment.baselines import MostFrequent
from sentiment.classifier import SentimentClassifier

models = {
    'basemf': MostFrequent,
    'clf': SentimentClassifier,
}

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load corpora
    corpus = opts['-i']
    reader = InterTASSReader(corpus)
    X, y = list(reader.X()), list(reader.y())
    development = opts['-d']
    reader = InterTASSReader(development)
    X_dev, y_dev = list(reader.X()), list(reader.y())

    # train model
    model_type = opts['-m']
    if model_type == 'clf':
        model = models[model_type](clf=opts['-c'])
    else:
        model = models[model_type]()  # baseline

    model.fit(X, y, X_dev, y_dev)

    # save model
예제 #7
0
파일: train.py 프로젝트: mrcmoresi/PLN-2019
from sentiment.baselines import MostFrequent
from sentiment.classifier import SentimentClassifier


models = {
    'basemf': MostFrequent,
    'clf': SentimentClassifier,
}


if __name__ == '__main__':
    opts = docopt(__doc__)

    # load corpora
    corpus = opts['-i']
    reader = InterTASSReader(corpus)
    X, y = list(reader.X()), list(reader.y())

    # train model
    model_type = opts['-m']
    if model_type == 'clf':
        model = models[model_type](clf=opts['-c'])
    else:
        model = models[model_type]()  # baseline

    if opts['--train']:
      #print("lalolanda")
      model.fit(X, y)
    else:
      #print('otra cosa')
      model.cross_validation(X, y)
예제 #8
0
import pickle

from sentiment.tass import InterTASSReader, GeneralTASSReader
from sentiment.baselines import MostFrequent
from sentiment.classifier import SentimentClassifier

models = {
    'basemf': MostFrequent,
    'clf': SentimentClassifier,
}

if __name__ == '__main__':
    opts = docopt(__doc__)

    # load corpora
    reader1 = InterTASSReader('TASS/InterTASS/tw_faces4tassTrain1000rc.xml')
    X1, y1 = list(reader1.X()), list(reader1.y())
    reader2 = GeneralTASSReader(
        'TASS/GeneralTASS/general-tweets-train-tagged.xml', simple=True)
    X2, y2 = list(reader2.X()), list(reader2.y())
    X, y = X1 + X2, y1 + y2

    # train model
    model_type = opts['-m']
    if model_type == 'clf':
        model = models[model_type](clf=opts['-c'])
    else:
        model = models[model_type]()  # baseline

    model.fit(X, y)
예제 #9
0
    matplotlib.use('TkAgg')

import matplotlib.pyplot as plt


models = {
    'basemf': MostFrequent,
    'clf': SentimentClassifier,
}


if __name__ == '__main__':
    opts = docopt(__doc__)

    # load training corpus
    reader1 = InterTASSReader(cfg.tweets['InterTASS']['train']['path'])
    X1, y1 = list(reader1.X()), list(reader1.y())
    reader2 = GeneralTASSReader(
        cfg.tweets['GeneralTASS']['train']['path'], simple=True)
    X2, y2 = list(reader2.X()), list(reader2.y())
    X, y = X1 + X2, y1 + y2

    # load development corpus (for evaluation)
    reader = InterTASSReader(cfg.tweets['InterTASS']['development']['path'])
    Xdev, y_true = list(reader.X()), list(reader.y())


    # load model if given
    if opts['-i']:
      filename = opts['-i']
      f = open(filename, 'rb')