Ejemplo n.º 1
0
                   max_iterations=100
                   )

    y_true = labels[target_label]

    gs = GridSearchCV(crf,
                      params_space,
                      cv=folds,
                      verbose=1,
                      n_jobs=n_jobs,
                      scoring=f1_scorer,
                      refit=False)
    gs.fit(X, y_true)

    pprint(gs.cv_results_)

    pkl_fname = 'grid_exp_2_{}.pkl'.format(target_label)
    pickle.dump(gs, open(pkl_fname, 'wb'))

    best_crf = PruneCRF()
    best_crf.set_params(**gs.best_params_)
    print('\nBest CRF:\n')
    pprint(best_crf)

    y_pred = cross_val_predict(best_crf, X, y_true, cv=folds, verbose=2, n_jobs=n_jobs)
    print(flat_classification_report(y_true, y_pred, digits=3, labels=('B', 'I')))

    preds[target_label] = y_pred

eval_exp_train(preds)
Ejemplo n.º 2
0
# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(train_spacy_dir, train_base_feats_dir, features1)

dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy')
dev_base_feats_dir = join('_dev', 'features1')

# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(dev_spacy_dir, dev_base_feats_dir, features1)

test_spacy_dir = join(LOCAL_DIR, 'test', 'spacy')
test_base_feats_dir = join('_test', 'features1')

generate_feats(test_spacy_dir, test_base_feats_dir, features1)

# Step 2: Run experiments

crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
train_feat_dirs = [train_base_feats_dir]
dev_feat_dirs = [dev_base_feats_dir]
test_feat_dirs = [test_base_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs,
                                test_feat_dirs, label)

# Step 3: Evaluate

# Even though test data is unlabeled, but generates teh Brat files to submit
eval_exp_train(preds, 'test')
Ejemplo n.º 3
0
preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label)

# ----------------------------------------------------------------------------
# Process
# ----------------------------------------------------------------------------

label = 'Process'
crf = PruneCRF()
train_feat_dirs = make_feats('train', label)
dev_feat_dirs = make_feats('dev', label)
preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label)

# ----------------------------------------------------------------------------
# Task
# ----------------------------------------------------------------------------

label = 'Task'
crf = PruneCRF()
train_feat_dirs = make_feats('train', label)
dev_feat_dirs = make_feats('dev', label)
preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label)

# ----------------------------------------------------------------------------
# Evaluate
# ----------------------------------------------------------------------------

eval_exp_train(preds,
               'dev',
               postproc=postproc_labels,
               zip_fname='best_exp_dev_1.zip')
Ejemplo n.º 4
0
from sie import ENTITIES, LOCAL_DIR
from sie.feats import generate_feats, features1
from sie.exp import run_exp_dev, eval_exp_train

# Step 1: Generate features

train_spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
train_base_feats_dir = join('_train', 'features1')

# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(train_spacy_dir, train_base_feats_dir, features1)

dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy')
dev_base_feats_dir = join('_dev', 'features1')

generate_feats(dev_spacy_dir, dev_base_feats_dir, features1)

# Step 2: Run experiments

crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
train_feat_dirs = [train_base_feats_dir]
dev_feat_dirs = [dev_base_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_dev(crf, train_feat_dirs, dev_feat_dirs, label)

# Step 3: Evaluate

eval_exp_train(preds, 'dev')
Ejemplo n.º 5
0
# ----------------------------------------------------------------------------

label = 'Process'
crf = PruneCRF()
train_feat_dirs = make_feats('train', label)
dev_feat_dirs = make_feats('dev', label)
test_feat_dirs = make_feats('test', label)
preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs,
                            test_feat_dirs, label)

# ----------------------------------------------------------------------------
# Task
# ----------------------------------------------------------------------------

label = 'Task'
crf = PruneCRF()
train_feat_dirs = make_feats('train', label)
dev_feat_dirs = make_feats('dev', label)
test_feat_dirs = make_feats('test', label)
preds[label] = run_exp_test(crf, train_feat_dirs, dev_feat_dirs,
                            test_feat_dirs, label)

# ----------------------------------------------------------------------------
# Evaluate
# ----------------------------------------------------------------------------

eval_exp_train(preds,
               'test',
               postproc=postproc_labels,
               zip_fname='best_exp_test_1.zip')
Ejemplo n.º 6
0
# ----------------------------------------------------------------------------

label = 'Material'
crf = PruneCRF()
feat_dirs = make_feats('train', label)
preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1)

# ----------------------------------------------------------------------------
# Process
# ----------------------------------------------------------------------------

label = 'Process'
crf = PruneCRF()
feat_dirs = make_feats('train', label)
preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1)

# ----------------------------------------------------------------------------
# Task
# ----------------------------------------------------------------------------

label = 'Task'
crf = PruneCRF()
feat_dirs = make_feats('train', label)
preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1)

# ----------------------------------------------------------------------------
# Evaluate
# ----------------------------------------------------------------------------

eval_exp_train(preds, postproc=postproc_labels)