from pathlib import Path import os if __name__ == '__main__': opts = docopt(__doc__) # load model filename = opts['-i'] f = open(filename, 'rb') model = pickle.load(f) f.close() # load evaluation corpus corpus = opts['-c'] if opts['--final']: reader = InterTASSReader( corpus, res_filename="InterTASS/ES/TASS2017_T1_test_res.qrel") else: reader = InterTASSReader(corpus) X, y_true = list(reader.X()), list(reader.y()) # normalize #X = model.normalize(X) # classify y_pred = model.predict(X) # evaluate and print evaluator = Evaluator() evaluator.evaluate(y_true, y_pred) evaluator.print_results()
stats.py [options] -f <folder> stats.py -h | --help Options: -f <folder> Folder containing TASS corpora. -h --help Show this screen. """ from docopt import docopt from os import path from sentiment.tass import InterTASSReader from collections import Counter if __name__ == '__main__': opts = docopt(__doc__) # load corpora filenames = [ f'intertass-{location}-train-tagged.xml' for location in ['ES', 'CR', 'PE'] ] folder = opts['-f'] for filename in filenames: filepath = path.join(folder, filename) reader = InterTASSReader(filepath) y = list(reader.y()) print('***') print(filename) print('tweets:', len(y)) print(Counter(y))
from sentiment.evaluator import Evaluator from sentiment.tass import InterTASSReader if __name__ == '__main__': opts = docopt(__doc__) # load model filename = opts['-i'] f = open(filename, 'rb') model = pickle.load(f) f.close() # load corpus if not opts['--final']: reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_development.xml') else: reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_test.xml', 'TASS/InterTASS/TASS2017_T1_test_res.qrel') X, y_true = list(reader.X()), list(reader.y()) # classify y_pred = model.predict(X) # evaluate and print evaluator = Evaluator() evaluator.evaluate(y_true, y_pred) evaluator.print_results() evaluator.print_confusion_matrix() # detailed confusion matrix, for result analysis
from sentiment.evaluator import Evaluator from sentiment.tass import InterTASSReader if __name__ == '__main__': opts = docopt(__doc__) # load model filename = opts['-i'] f = open(filename, 'rb') model = pickle.load(f) f.close() # load evaluation corpus corpus = opts['-c'] final = opts.get('-f', None) reader = InterTASSReader(corpus, final) X, y_true = list(reader.X()), list(reader.y()) # classify y_pred = model.predict(X) # evaluate and print evaluator = Evaluator() evaluator.evaluate(y_true, y_pred) evaluator.print_results() evaluator.print_confusion_matrix() # detailed confusion matrix, for result analysis cm_items = defaultdict(list) for i, (true, pred) in enumerate(zip(y_true, y_pred)):
def count_tweets(path): reader = InterTASSReader(path) dist = Counter(reader.y()) return dict(dist), sum(dist.values())
from sentiment.tass import InterTASSReader from sentiment.baselines import MostFrequent from sentiment.classifier import SentimentClassifier models = { 'basemf': MostFrequent, 'clf': SentimentClassifier, } if __name__ == '__main__': opts = docopt(__doc__) # load corpora corpus = opts['-i'] reader = InterTASSReader(corpus) X, y = list(reader.X()), list(reader.y()) development = opts['-d'] reader = InterTASSReader(development) X_dev, y_dev = list(reader.X()), list(reader.y()) # train model model_type = opts['-m'] if model_type == 'clf': model = models[model_type](clf=opts['-c']) else: model = models[model_type]() # baseline model.fit(X, y, X_dev, y_dev) # save model
from sentiment.baselines import MostFrequent from sentiment.classifier import SentimentClassifier models = { 'basemf': MostFrequent, 'clf': SentimentClassifier, } if __name__ == '__main__': opts = docopt(__doc__) # load corpora corpus = opts['-i'] reader = InterTASSReader(corpus) X, y = list(reader.X()), list(reader.y()) # train model model_type = opts['-m'] if model_type == 'clf': model = models[model_type](clf=opts['-c']) else: model = models[model_type]() # baseline if opts['--train']: #print("lalolanda") model.fit(X, y) else: #print('otra cosa') model.cross_validation(X, y)
import pickle from sentiment.tass import InterTASSReader, GeneralTASSReader from sentiment.baselines import MostFrequent from sentiment.classifier import SentimentClassifier models = { 'basemf': MostFrequent, 'clf': SentimentClassifier, } if __name__ == '__main__': opts = docopt(__doc__) # load corpora reader1 = InterTASSReader('TASS/InterTASS/tw_faces4tassTrain1000rc.xml') X1, y1 = list(reader1.X()), list(reader1.y()) reader2 = GeneralTASSReader( 'TASS/GeneralTASS/general-tweets-train-tagged.xml', simple=True) X2, y2 = list(reader2.X()), list(reader2.y()) X, y = X1 + X2, y1 + y2 # train model model_type = opts['-m'] if model_type == 'clf': model = models[model_type](clf=opts['-c']) else: model = models[model_type]() # baseline model.fit(X, y)
matplotlib.use('TkAgg') import matplotlib.pyplot as plt models = { 'basemf': MostFrequent, 'clf': SentimentClassifier, } if __name__ == '__main__': opts = docopt(__doc__) # load training corpus reader1 = InterTASSReader(cfg.tweets['InterTASS']['train']['path']) X1, y1 = list(reader1.X()), list(reader1.y()) reader2 = GeneralTASSReader( cfg.tweets['GeneralTASS']['train']['path'], simple=True) X2, y2 = list(reader2.X()), list(reader2.y()) X, y = X1 + X2, y1 + y2 # load development corpus (for evaluation) reader = InterTASSReader(cfg.tweets['InterTASS']['development']['path']) Xdev, y_true = list(reader.X()), list(reader.y()) # load model if given if opts['-i']: filename = opts['-i'] f = open(filename, 'rb')