def test_build_dataset(): nli.build_dataset(reader=nli.SNLITrainReader(snli_home, samp_percentage=0.01), phi=lambda x, y: {"$UNK": 1}, vectorizer=None, vectorize=True)
# # Because SNLI and MultiNLI are huge, we can't afford to do experiments on the full datasets all the time. Thus, we will mainly work within the training sets, using the train readers to sample smaller datasets that can then be divided for training and assessment. # # Here, we sample 10% of the training examples. I set the random seed (`random_state=42`) so that we get consistency across the samples; setting `random_state=None` will give new random samples each time. # In[18]: train_reader = nli.SNLITrainReader(SNLI_HOME, samp_percentage=0.10, random_state=42) # An experimental dataset can be built directly from the reader and a feature function: # In[19]: dataset = nli.build_dataset(train_reader, word_overlap_phi) # In[20]: dataset.keys() # However, it's more efficient to use `nli.experiment` to bring all these pieces together. This wrapper will work for all the models we consider. # In[21]: _ = nli.experiment(train_reader=nli.SNLITrainReader(SNLI_HOME, samp_percentage=0.10, random_state=42), phi=word_overlap_phi, train_func=fit_softmax_with_crossvalidation, assess_reader=None,
def test_build_dataset(): nli.build_dataset(reader=nli.NLIReader(snli['train'], samp_percentage=0.01), phi=lambda ex: {"$UNK": 1}, vectorizer=None, vectorize=True)