Exemple #1
0
def corpus():
    rel_ext_data_home =  os.path.join('data', 'rel_ext_data')
    src_filename = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        '..',
        rel_ext_data_home, 'corpus.tsv.gz')
    return rel_ext.Corpus(src_filename)
Exemple #2
0
import numpy as np
import os
import rel_ext
from sklearn.linear_model import LogisticRegression
import utils

# As usual, we unite our corpus and KB into a dataset, and create some splits for experimentation:

# In[3]:

rel_ext_data_home = os.path.join('data', 'rel_ext_data')

# In[4]:

corpus = rel_ext.Corpus(os.path.join(rel_ext_data_home, 'corpus.tsv.gz'))

# In[5]:

kb = rel_ext.KB(os.path.join(rel_ext_data_home, 'kb.tsv.gz'))

# In[6]:

dataset = rel_ext.Dataset(corpus, kb)

# You are not wedded to this set-up for splits. The bake-off will be conducted on a previously unseen test-set, so all of the data in `dataset` is fair game:

# In[7]:

splits = dataset.build_splits(split_names=['tiny', 'train', 'dev'],
                              split_fracs=[0.01, 0.79, 0.20],