max_iterations=100 ) y_true = labels[target_label] gs = GridSearchCV(crf, params_space, cv=folds, verbose=1, n_jobs=n_jobs, scoring=f1_scorer, refit=False) gs.fit(X, y_true) pprint(gs.cv_results_) pkl_fname = 'grid_exp_2_{}.pkl'.format(target_label) pickle.dump(gs, open(pkl_fname, 'wb')) best_crf = PruneCRF() best_crf.set_params(**gs.best_params_) print('\nBest CRF:\n') pprint(best_crf) y_pred = cross_val_predict(best_crf, X, y_true, cv=folds, verbose=2, n_jobs=n_jobs) print(flat_classification_report(y_true, y_pred, digits=3, labels=('B', 'I'))) preds[target_label] = y_pred eval_exp_train(preds)
# If you want to save time by reusing existing feats, comment out the line below: generate_feats(train_spacy_dir, train_base_feats_dir, features1) dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy') dev_base_feats_dir = join('_dev', 'features1') # If you want to save time by reusing existing feats, comment out the line below: generate_feats(dev_spacy_dir, dev_base_feats_dir, features1) test_spacy_dir = join(LOCAL_DIR, 'test', 'spacy') test_base_feats_dir = join('_test', 'features1') generate_feats(test_spacy_dir, test_base_feats_dir, features1) # Step 2: Run experiments crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True) train_feat_dirs = [train_base_feats_dir] dev_feat_dirs = [dev_base_feats_dir] test_feat_dirs = [test_base_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs, test_feat_dirs, label) # Step 3: Evaluate # Even though test data is unlabeled, but generates teh Brat files to submit eval_exp_train(preds, 'test')
preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label) # ---------------------------------------------------------------------------- # Process # ---------------------------------------------------------------------------- label = 'Process' crf = PruneCRF() train_feat_dirs = make_feats('train', label) dev_feat_dirs = make_feats('dev', label) preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label) # ---------------------------------------------------------------------------- # Task # ---------------------------------------------------------------------------- label = 'Task' crf = PruneCRF() train_feat_dirs = make_feats('train', label) dev_feat_dirs = make_feats('dev', label) preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label) # ---------------------------------------------------------------------------- # Evaluate # ---------------------------------------------------------------------------- eval_exp_train(preds, 'dev', postproc=postproc_labels, zip_fname='best_exp_dev_1.zip')
from sie import ENTITIES, LOCAL_DIR from sie.feats import generate_feats, features1 from sie.exp import run_exp_dev, eval_exp_train # Step 1: Generate features train_spacy_dir = join(LOCAL_DIR, 'train', 'spacy') train_base_feats_dir = join('_train', 'features1') # If you want to save time by reusing existing feats, comment out the line below: generate_feats(train_spacy_dir, train_base_feats_dir, features1) dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy') dev_base_feats_dir = join('_dev', 'features1') generate_feats(dev_spacy_dir, dev_base_feats_dir, features1) # Step 2: Run experiments crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True) train_feat_dirs = [train_base_feats_dir] dev_feat_dirs = [dev_base_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label) # Step 3: Evaluate eval_exp_train(preds, 'dev')
# ---------------------------------------------------------------------------- label = 'Process' crf = PruneCRF() train_feat_dirs = make_feats('train', label) dev_feat_dirs = make_feats('dev', label) test_feat_dirs = make_feats('test', label) preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs, test_feat_dirs, label) # ---------------------------------------------------------------------------- # Task # ---------------------------------------------------------------------------- label = 'Task' crf = PruneCRF() train_feat_dirs = make_feats('train', label) dev_feat_dirs = make_feats('dev', label) test_feat_dirs = make_feats('test', label) preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs, test_feat_dirs, label) # ---------------------------------------------------------------------------- # Evaluate # ---------------------------------------------------------------------------- eval_exp_train(preds, 'test', postproc=postproc_labels, zip_fname='best_exp_test_1.zip')
# ---------------------------------------------------------------------------- label = 'Material' crf = PruneCRF() feat_dirs = make_feats('train', label) preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1) # ---------------------------------------------------------------------------- # Process # ---------------------------------------------------------------------------- label = 'Process' crf = PruneCRF() feat_dirs = make_feats('train', label) preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1) # ---------------------------------------------------------------------------- # Task # ---------------------------------------------------------------------------- label = 'Task' crf = PruneCRF() feat_dirs = make_feats('train', label) preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1) # ---------------------------------------------------------------------------- # Evaluate # ---------------------------------------------------------------------------- eval_exp_train(preds, postproc=postproc_labels)