Esempio n. 1
0
def search(clf, params_list):
    results = []
    for params in params_list:
        sc = SentimentClassifier(clf=clf)
        ev = Evaluator()
        sc._pipeline.set_params(**params)
        sc.fit(X_train, y_train)
        y_pred = sc.predict(X_dev)
        ev.evaluate(y_dev, y_pred)

        results.append({'acc': ev.accuracy(), 'f1': ev.macro_f1(), **params})

    results_df = pd.DataFrame(results)
    print(results_df.sort_values(['acc', 'f1'], ascending=False))
Esempio n. 2
0
    y_pred = model.predict(X)
    if opts['-r']:
        # FIXME: broken
        if model._clf.startswith('svm'):
            Y_pred = model.decision_function(X)
        else:
            Y_pred = model.predict_proba(X)

    out_filename = opts['-o']
    if out_filename:
        # FIXME: broken
        f = open(out_filename, 'w')
        for t, x, pred in zip(reader.tweets(), X, y_pred):
            f.write('{}\t{}\n'.format(t['tweetid'], pred))
        f.close()

    # evaluate and print
    labels = ['0', '1']
    evaluator = Evaluator(labels)
    evaluator.evaluate(y_true, y_pred)
    if opts['-r']:
        # FIXME: broken
        Y_true = label_binarize(y_true, model._pipeline.classes_)
        evaluator.roc_auc(Y_true, Y_pred)
        evaluator.rank_error(y_true, Y_pred)
    if opts['--short']:
        evaluator.print_short_results()
    else:
        evaluator.print_results()
        evaluator.print_confusion_matrix()
Esempio n. 3
0
        'TASS/GeneralTASS/general-tweets-train-tagged.xml', simple=True)
    X2, y2 = list(reader2.X()), list(reader2.y())
    X, y = X1 + X2, y1 + y2

    # load development corpus (for evaluation)
    reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_development.xml')
    Xdev, y_true = list(reader.X()), list(reader.y())

    # create model and evaluator instances
    # train model
    model_type = opts['-m']
    if model_type == 'clf':
        model = models[model_type](clf=opts['-c'])
    else:
        model = models[model_type]()  # baseline
    evaluator = Evaluator()

    N = len(X)
    for i in reversed(range(8)):
        n = int(N / 2**i)
        this_X = X[:n]
        this_y = y[:n]

        # train, test and evaluate
        model.fit(this_X, this_y)
        y_pred = model.predict(Xdev)
        evaluator.evaluate(y_true, y_pred)

        # print this data point:
        acc = evaluator.accuracy()
        f1 = evaluator.macro_f1()
Esempio n. 4
0
    if opts['--final']:
        reader = InterTASSReader(
            corpus, res_filename="InterTASS/ES/TASS2017_T1_test_res.qrel")
    else:
        reader = InterTASSReader(corpus)

    X, y_true = list(reader.X()), list(reader.y())

    # normalize
    #X = model.normalize(X)

    # classify
    y_pred = model.predict(X)

    # evaluate and print
    evaluator = Evaluator()
    evaluator.evaluate(y_true, y_pred)
    evaluator.print_results()
    evaluator.print_confusion_matrix()

    # detailed confusion matrix, for result analysis
    cm_items = defaultdict(list)
    for i, (true, pred) in enumerate(zip(y_true, y_pred)):
        cm_items[true, pred] += [i]

    # Save results to file
    my_file = Path("results.csv")
    f_exists = my_file.is_file()
    res = evaluator.get_results()

    if "ES" in opts['-c']: