def corpus(): rel_ext_data_home = os.path.join('data', 'rel_ext_data') src_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', rel_ext_data_home, 'corpus.tsv.gz') return rel_ext.Corpus(src_filename)
import numpy as np import os import rel_ext from sklearn.linear_model import LogisticRegression import utils # As usual, we unite our corpus and KB into a dataset, and create some splits for experimentation: # In[3]: rel_ext_data_home = os.path.join('data', 'rel_ext_data') # In[4]: corpus = rel_ext.Corpus(os.path.join(rel_ext_data_home, 'corpus.tsv.gz')) # In[5]: kb = rel_ext.KB(os.path.join(rel_ext_data_home, 'kb.tsv.gz')) # In[6]: dataset = rel_ext.Dataset(corpus, kb) # You are not wedded to this set-up for splits. The bake-off will be conducted on a previously unseen test-set, so all of the data in `dataset` is fair game: # In[7]: splits = dataset.build_splits(split_names=['tiny', 'train', 'dev'], split_fracs=[0.01, 0.79, 0.20],