Exemple #1
0
def make_material_feats(part, force):
    spacy_dir = join(LOCAL_DIR, part, 'spacy')

    lempos_feats_dir = '_{}/Material/lempos_feats'.format(part)
    if force or not exists(lempos_feats_dir):
        generate_feats(spacy_dir, lempos_feats_dir, lemma_pos_feats, nlp=nlp)

    word_feats_dir = '_{}/Material/word_feats'.format(part)
    if force or not exists(word_feats_dir):
        generate_feats(spacy_dir,
                       word_feats_dir,
                       lambda sent: word_feats(sent, context_size=1),
                       nlp=nlp)

    wn_feats_dir = '_{}/Material/wordnet_feats'.format(part)
    if force or not exists(wn_feats_dir):
        generate_feats(spacy_dir,
                       wn_feats_dir,
                       lambda s: wordnet_feats(s, context_size=2),
                       nlp=nlp)

    dep_feats_dir = '_{}/Material/dep_feats'.format(part)
    if force or not exists(dep_feats_dir):
        generate_feats(spacy_dir,
                       dep_feats_dir,
                       lambda s: dep_feats(s, context_size=2),
                       nlp=nlp)

    return [lempos_feats_dir, word_feats_dir, wn_feats_dir]
Exemple #2
0
def make_task_feats(part, force):
    spacy_dir = join(LOCAL_DIR, part, 'spacy')

    lempos_feats_dir = '_{}/Task/lempos_feats'.format(part)
    if force or not exists(lempos_feats_dir):
        generate_feats(spacy_dir,
                       lempos_feats_dir,
                       lambda s: lemma_pos_feats(s, context_size=1),
                       nlp=nlp)

    word_feats_dir = '_{}/Task/word_feats'.format(part)
    if force or not exists(word_feats_dir):
        generate_feats(spacy_dir,
                       word_feats_dir,
                       lambda sent: word_feats(sent, context_size=0),
                       nlp=nlp)

    dep_feats_dir = '_{}/Task/dep_feats'.format(part)
    if force or not exists(dep_feats_dir):
        generate_feats(spacy_dir,
                       dep_feats_dir,
                       lambda sent: dep_feats(sent, context_size=1),
                       nlp=nlp)

    return [lempos_feats_dir, word_feats_dir]
Exemple #3
0
            token_feats['{}:wnhypernym3'.format(j)] = wnhypernym3

        sent_feats.append(token_feats)

    return sent_feats



# Step 1: Generate features

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
feats_dir = join('_train', 'features2')

# If you want to save time by resusing feats from crf1-exp/py,
# comment out the line below:
generate_feats(spacy_dir, feats_dir, features2)

# Step 2: Collect data for running CRF classifier

true_iob_dir = join(LOCAL_DIR, 'train', 'iob')

data = collect_crf_data(true_iob_dir, feats_dir)

# Step 3: Create folds

# create folds from complete texts only (i.e. instances of the same text
# are never in different folds)
# TODO How to set seed for random generator?
group_k_fold = GroupKFold(n_splits=5)

# use same split for all three entities
Exemple #4
0
                for synset in synsets:
                    try:
                        token_feats['{}:{}'.format(j, synset.hypernyms()[0].hypernyms()[0].hypernyms()[0].name())] = 1
                    except:
                        pass

            #token_feats['{}:lemma'.format(j)] = lemma
            #token_feats['{}:pos'.format(j)] = pos

        sent_feats.append(token_feats)

    return sent_feats



generate_feats(spacy_dir, wn_feats_dir, wnfeats1)


# Step 2: Run experiments

crf = PruneCRF(c1=0.1, c2=0.1, all_possible_transitions=True)
feat_dirs = [base_feats_dir, wn_feats_dir, word_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5)


# Step 3: Evaluate

eval_exp_train(preds)
Exemple #5
0
from os.path import join

from sklearn_crfsuite import CRF

from sie import ENTITIES, LOCAL_DIR
from sie.feats import generate_feats, features1
from sie.exp import run_exp_test, eval_exp_train

# Step 1: Generate features

train_spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
train_base_feats_dir = join('_train', 'features1')

# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(train_spacy_dir, train_base_feats_dir, features1)

dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy')
dev_base_feats_dir = join('_dev', 'features1')

# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(dev_spacy_dir, dev_base_feats_dir, features1)

test_spacy_dir = join(LOCAL_DIR, 'test', 'spacy')
test_base_feats_dir = join('_test', 'features1')

generate_feats(test_spacy_dir, test_base_feats_dir, features1)

# Step 2: Run experiments

crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
Exemple #6
0
from os.path import join

from sie import ENTITIES, LOCAL_DIR, EXPS_DIR
from sie.crf import PruneCRF
from sie.exp import run_exp_train_cv, eval_exp_train
from sie.feats import generate_feats, wordnet_feats

# Step 1: Generate features

base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1')
word_feats_dir = join(EXPS_DIR, 'wordfeats/_train/wordfeats1')

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
wn_feats_dir = join('_train', 'wnfeats1')
generate_feats(spacy_dir, wn_feats_dir, wordnet_feats)

# Step 2: Run experiments

crf = PruneCRF()  #c1=0.1, c2=0.1, min_freq=5)#, all_possible_transitions=True)
feat_dirs = [base_feats_dir, wn_feats_dir, word_feats_dir]
preds = {}

for label in ENTITIES[:1]:
    preds[label] = run_exp_train_cv(crf,
                                    feat_dirs,
                                    label,
                                    n_folds=5,
                                    n_jobs=-1)

# Step 3: Evaluate
Exemple #7
0
with word features
"""

from os.path import join

from sie.crf import PruneCRF
from sie import ENTITIES, LOCAL_DIR, EXPS_DIR
from sie.feats import generate_feats, word_feats
from sie.exp import run_exp_train_cv, eval_exp_train

# Step 1: Generate features

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
word_feats_dir = join('_train', 'wordfeats1')

generate_feats(spacy_dir, word_feats_dir,
               lambda sent: word_feats(sent, context_size=1))

# Step 2: Run experiments

crf = PruneCRF()  #c1=0.1, c2=0.1, all_possible_transitions=True)

base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1')
feat_dirs = [base_feats_dir, word_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_train_cv(crf, feat_dirs, label)

# Step 3: Evaluate

eval_exp_train(preds)
Exemple #8
0
from os.path import join

from sie import ENTITIES, LOCAL_DIR, EXPS_DIR
from sie.crf import PruneCRF
from sie.exp import run_exp_train_cv, eval_exp_train
from sie.feats import generate_feats, brown_feats

# Step 1: Generate features

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
brown_feats_dir = join('_train', 'brown_feats')
base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1')
word_feats_dir = join(EXPS_DIR, 'wordfeats/_train/wordfeats1')

generate_feats(spacy_dir, brown_feats_dir, brown_feats)

# Step 2: Run experiments

crf = PruneCRF()  #c1=0.1, c2=0.1, all_possible_transitions=True)

base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1')
feat_dirs = [base_feats_dir, brown_feats_dir, word_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_train_cv(crf, feat_dirs, label)

# Step 3: Evaluate

eval_exp_train(preds)
Exemple #9
0
from os.path import join

from sie import ENTITIES, LOCAL_DIR, EXPS_DIR
from sie.crf import PruneCRF
from sie.exp import run_exp_train_cv, eval_exp_train
from sie.feats import generate_feats, dep_feats

# Step 1: Generate features

base_feats_dir = join(EXPS_DIR, 'best/_train/Material/lempos_feats')
word_feats_dir = join(EXPS_DIR, 'best/_train/Material/word_feats')
wordnet_feats_dir = join(EXPS_DIR, 'best/_train/Material/wordnet_feats')

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
dep_feats_dir = join('_train', 'dep_feats')
generate_feats(spacy_dir, dep_feats_dir,
               lambda sent: dep_feats(sent, context_size=1))

# Step 2: Run experiments

crf = PruneCRF()  # c1=0.1, c2=0.1, all_possible_transitions=True)

feat_dirs = [
    #base_feats_dir,
    #word_feats_dir,
    #wordnet_feats_dir,
    dep_feats_dir
]

preds = {}

for label in ENTITIES:
"""

from os.path import join

from sklearn_crfsuite import CRF

from sie import ENTITIES, LOCAL_DIR
from sie.feats import generate_feats, features1
from sie.exp import run_exp_train, eval_exp_train

# Step 1: Generate features

spacy_dir = join(LOCAL_DIR, 'train', 'spacy')
base_feats_dir = join('_train', 'features1')

# If you want to save time by reusing existing feats, comment out the line below:
generate_feats(spacy_dir, base_feats_dir, features1)

# Step 2: Run experiments

crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
feat_dirs = [base_feats_dir]
preds = {}

for label in ENTITIES:
    preds[label] = run_exp_train(crf, feat_dirs, label)

# Step 3: Evaluate

eval_exp_train(preds)