def search(clf, params_list): results = [] for params in params_list: sc = SentimentClassifier(clf=clf) ev = Evaluator() sc._pipeline.set_params(**params) sc.fit(X_train, y_train) y_pred = sc.predict(X_dev) ev.evaluate(y_dev, y_pred) results.append({'acc': ev.accuracy(), 'f1': ev.macro_f1(), **params}) results_df = pd.DataFrame(results) print(results_df.sort_values(['acc', 'f1'], ascending=False))
y_pred = model.predict(X) if opts['-r']: # FIXME: broken if model._clf.startswith('svm'): Y_pred = model.decision_function(X) else: Y_pred = model.predict_proba(X) out_filename = opts['-o'] if out_filename: # FIXME: broken f = open(out_filename, 'w') for t, x, pred in zip(reader.tweets(), X, y_pred): f.write('{}\t{}\n'.format(t['tweetid'], pred)) f.close() # evaluate and print labels = ['0', '1'] evaluator = Evaluator(labels) evaluator.evaluate(y_true, y_pred) if opts['-r']: # FIXME: broken Y_true = label_binarize(y_true, model._pipeline.classes_) evaluator.roc_auc(Y_true, Y_pred) evaluator.rank_error(y_true, Y_pred) if opts['--short']: evaluator.print_short_results() else: evaluator.print_results() evaluator.print_confusion_matrix()
'TASS/GeneralTASS/general-tweets-train-tagged.xml', simple=True) X2, y2 = list(reader2.X()), list(reader2.y()) X, y = X1 + X2, y1 + y2 # load development corpus (for evaluation) reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_development.xml') Xdev, y_true = list(reader.X()), list(reader.y()) # create model and evaluator instances # train model model_type = opts['-m'] if model_type == 'clf': model = models[model_type](clf=opts['-c']) else: model = models[model_type]() # baseline evaluator = Evaluator() N = len(X) for i in reversed(range(8)): n = int(N / 2**i) this_X = X[:n] this_y = y[:n] # train, test and evaluate model.fit(this_X, this_y) y_pred = model.predict(Xdev) evaluator.evaluate(y_true, y_pred) # print this data point: acc = evaluator.accuracy() f1 = evaluator.macro_f1()
if opts['--final']: reader = InterTASSReader( corpus, res_filename="InterTASS/ES/TASS2017_T1_test_res.qrel") else: reader = InterTASSReader(corpus) X, y_true = list(reader.X()), list(reader.y()) # normalize #X = model.normalize(X) # classify y_pred = model.predict(X) # evaluate and print evaluator = Evaluator() evaluator.evaluate(y_true, y_pred) evaluator.print_results() evaluator.print_confusion_matrix() # detailed confusion matrix, for result analysis cm_items = defaultdict(list) for i, (true, pred) in enumerate(zip(y_true, y_pred)): cm_items[true, pred] += [i] # Save results to file my_file = Path("results.csv") f_exists = my_file.is_file() res = evaluator.get_results() if "ES" in opts['-c']: