print 'Incremental test' df_out = pd.DataFrame(index=inc_transforms, columns=['accuracy-cv', 'accuracy-test'], data=np.nan) inc_transforms_cls = [] for i, k in enumerate(inc_transforms): inc_transforms_cls.append(transforms[k]) print 'Using features:', inc_transforms[:i + 1] p = predictor(inc_transforms_cls) cv_score = RunCV(X, y, p, display=False).run_cv() df_out.ix[k, 'accuracy-cv'] = cv_score.accuracy if args.t: # calculate test set accuracy score test_score = run_test(X, y, test_data, p, display=True) df_out.ix[k, 'accuracy-test'] = test_score.accuracy print(df_out) elif args.a: # ablation print 'Ablation test' if args.f: features = args.f else: features = "BoW-S,Q,BoUg,BoBg,PPDB,RootDep,NegAlgn,SVO" ablations = [[x] for x in features.split(',')] df_out = pd.DataFrame(index=['-' + str(a) for a in ablations], columns=['accuracy-cv', 'accuracy-test'], data=np.nan) inc_transforms_cls = [transforms[t] for t in inc_transforms]
import sys import os sys.path.append(os.path.join('..', 'src')) from model.utils import get_dataset, split_data, run_test from model.baseline.baseline_predictors import ProbabilityPredictor, ChancePredictor, \ MajorityPredictor, WordOverlapBaselinePredictor if __name__ == '__main__': train_data = get_dataset('url-versions-2015-06-14-clean-train.csv') X, y = split_data(train_data) test_data = get_dataset('url-versions-2015-06-14-clean-test.csv') print('\n>> Chance predictor <<\n') print(run_test(X, y, test_data, ChancePredictor())) print('\n>> Majority predictor <<\n') print(run_test(X, y, test_data, MajorityPredictor())) print('\n>> Probability predictor <<\n') print(run_test(X, y, test_data, ProbabilityPredictor())) print('\n>> Word overlap predictor <<\n') print(run_test(X, y, test_data, WordOverlapBaselinePredictor()))
print 'Unrecognised features:', diff sys.exit(1) print 'Feature set:', inc_transforms if args.i: # incremental print 'Incremental test' df_out = pd.DataFrame(index=inc_transforms, columns=['accuracy-cv', 'accuracy-test'], data=np.nan) inc_transforms_cls = [] for i, k in enumerate(inc_transforms): inc_transforms_cls.append(transforms[k]) print 'Using features:', inc_transforms[:i+1] p = predictor(inc_transforms_cls) cv_score = RunCV(X, y, p, display=True).run_cv() test_score = run_test(X, y, test_data, p, display=True) df_out.ix[k, 'accuracy-cv'] = cv_score.accuracy df_out.ix[k, 'accuracy-test'] = test_score.accuracy print(df_out) elif args.a: # ablation print 'Ablation test' if args.f: features = args.f else: features = "Q,BoW,AlgnW2V,AlgnPPDB,RootDist,NegAlgn,SVO" ablations = [[x] for x in features.split(',')] df_out = pd.DataFrame(index=['-' + str(a) for a in ablations], columns=['accuracy-cv', 'accuracy-test'], data=np.nan)