예제 #1
0
        print 'Incremental test'
        df_out = pd.DataFrame(index=inc_transforms,
                              columns=['accuracy-cv', 'accuracy-test'],
                              data=np.nan)
        inc_transforms_cls = []
        for i, k in enumerate(inc_transforms):
            inc_transforms_cls.append(transforms[k])
            print 'Using features:', inc_transforms[:i + 1]

            p = predictor(inc_transforms_cls)
            cv_score = RunCV(X, y, p, display=False).run_cv()
            df_out.ix[k, 'accuracy-cv'] = cv_score.accuracy

            if args.t:
                # calculate test set accuracy score
                test_score = run_test(X, y, test_data, p, display=True)
                df_out.ix[k, 'accuracy-test'] = test_score.accuracy
        print(df_out)
    elif args.a:
        # ablation
        print 'Ablation test'

        if args.f:
            features = args.f
        else:
            features = "BoW-S,Q,BoUg,BoBg,PPDB,RootDep,NegAlgn,SVO"
        ablations = [[x] for x in features.split(',')]
        df_out = pd.DataFrame(index=['-' + str(a) for a in ablations],
                              columns=['accuracy-cv', 'accuracy-test'],
                              data=np.nan)
        inc_transforms_cls = [transforms[t] for t in inc_transforms]
예제 #2
0
import sys
import os

sys.path.append(os.path.join('..', 'src'))

from model.utils import get_dataset, split_data, run_test
from model.baseline.baseline_predictors import ProbabilityPredictor, ChancePredictor, \
    MajorityPredictor, WordOverlapBaselinePredictor


if __name__ == '__main__':
    train_data = get_dataset('url-versions-2015-06-14-clean-train.csv')
    X, y = split_data(train_data)
    test_data = get_dataset('url-versions-2015-06-14-clean-test.csv')

    print('\n>> Chance predictor <<\n')
    print(run_test(X, y, test_data, ChancePredictor()))

    print('\n>> Majority predictor <<\n')
    print(run_test(X, y, test_data, MajorityPredictor()))

    print('\n>> Probability predictor <<\n')
    print(run_test(X, y, test_data, ProbabilityPredictor()))

    print('\n>> Word overlap predictor <<\n')
    print(run_test(X, y, test_data, WordOverlapBaselinePredictor()))
예제 #3
0
        print 'Unrecognised features:', diff
        sys.exit(1)
    print 'Feature set:', inc_transforms
    if args.i:
        # incremental
        print 'Incremental test'
        df_out = pd.DataFrame(index=inc_transforms,
                              columns=['accuracy-cv', 'accuracy-test'], data=np.nan)
        inc_transforms_cls = []
        for i, k in enumerate(inc_transforms):
            inc_transforms_cls.append(transforms[k])
            print 'Using features:', inc_transforms[:i+1]

            p = predictor(inc_transforms_cls)
            cv_score = RunCV(X, y, p, display=True).run_cv()
            test_score = run_test(X, y, test_data, p, display=True)

            df_out.ix[k, 'accuracy-cv'] = cv_score.accuracy
            df_out.ix[k, 'accuracy-test'] = test_score.accuracy
        print(df_out)
    elif args.a:
        # ablation
        print 'Ablation test'

        if args.f:
            features = args.f
        else:
            features = "Q,BoW,AlgnW2V,AlgnPPDB,RootDist,NegAlgn,SVO"
        ablations = [[x] for x in features.split(',')]
        df_out = pd.DataFrame(index=['-' + str(a) for a in ablations],
                              columns=['accuracy-cv', 'accuracy-test'], data=np.nan)