Esempio n. 1
0
def search(clf, params_list):
    results = []
    for params in params_list:
        sc = SentimentClassifier(clf=clf)
        ev = Evaluator()
        sc._pipeline.set_params(**params)
        sc.fit(X_train, y_train)
        y_pred = sc.predict(X_dev)
        ev.evaluate(y_dev, y_pred)

        results.append({'acc': ev.accuracy(), 'f1': ev.macro_f1(), **params})

    results_df = pd.DataFrame(results)
    print(results_df.sort_values(['acc', 'f1'], ascending=False))
Esempio n. 2
0
    X, y = X1 + X2, y1 + y2

    # load development corpus (for evaluation)
    reader = InterTASSReader('TASS/InterTASS/TASS2017_T1_development.xml')
    Xdev, y_true = list(reader.X()), list(reader.y())

    # create model and evaluator instances
    # train model
    model_type = opts['-m']
    if model_type == 'clf':
        model = models[model_type](clf=opts['-c'])
    else:
        model = models[model_type]()  # baseline
    evaluator = Evaluator()

    N = len(X)
    for i in reversed(range(8)):
        n = int(N / 2**i)
        this_X = X[:n]
        this_y = y[:n]

        # train, test and evaluate
        model.fit(this_X, this_y)
        y_pred = model.predict(Xdev)
        evaluator.evaluate(y_true, y_pred)

        # print this data point:
        acc = evaluator.accuracy()
        f1 = evaluator.macro_f1()
        print('n={}, acc={:2.2f}, f1={:2.2f}'.format(n, acc, f1))