def test_examine_model_weights(featurizer, vectorize, corpus, kb): dataset = rel_ext.Dataset(corpus, kb) splits = dataset.build_splits( split_names=['tiny_train', 'tiny_dev', 'rest'], split_fracs=[0.05, 0.05, 0.90], seed=1) results = rel_ext.experiment(splits, train_split='tiny_train', test_split='tiny_dev', featurizers=[featurizer], vectorize=vectorize, verbose=False) rel_ext.examine_model_weights(results)
solver='liblinear') # In[12]: baseline_results = rel_ext.experiment(splits, train_split='train', test_split='dev', featurizers=featurizers, model_factory=model_factory, verbose=True) # Studying model weights might yield insights: # In[13]: rel_ext.examine_model_weights(baseline_results) # ### Distributed representations # # This simple baseline sums the GloVe vector representations for all of the words in the "middle" span and feeds those representations into the standard `LogisticRegression`-based `model_factory`. The crucial parameter that enables this is `vectorize=False`. This essentially says to `rel_ext.experiment` that your featurizer or your model will do the work of turning examples into vectors; in that case, `rel_ext.experiment` just organizes these representations by relation type. # In[14]: GLOVE_HOME = os.path.join('data', 'glove.6B') # In[15]: glove_lookup = utils.glove2dict(os.path.join(GLOVE_HOME, 'glove.6B.300d.txt')) # In[16]: