コード例 #1
0
ファイル: train.py プロジェクト: Zhang-Mengfan/CS224u
def main():
    exp = nli.experiment(train_reader=nli.SNLITrainReader(SNLI_HOME,
                                                          samp_percentage=1.0),
                         assess_reader=nli.SNLIDevReader(SNLI_HOME,
                                                         samp_percentage=1.0),
                         phi=sentence_encoding_rnn_phi,
                         train_func=fit_bilstm_attention,
                         random_state=None,
                         vectorize=False)
    print(exp)
コード例 #2
0
def test_experiment(assess_reader):
    def fit_maxent(X, y):
        mod = LogisticRegression(solver='liblinear', multi_class='auto')
        mod.fit(X, y)
        return mod
    nli.experiment(
        train_reader=nli.SNLITrainReader(snli_home, samp_percentage=0.01),
        phi=lambda x, y: {"$UNK": 1},
        train_func=fit_maxent,
        assess_reader=assess_reader,
        random_state=42)
コード例 #3
0
def test_experiment():
    def fit_maxent(X, y):
        mod = LogisticRegression()
        mod.fit(X, y)
        return mod

    nli.experiment(train_reader=nli.SNLITrainReader(samp_percentage=0.01),
                   phi=lambda x, y: {"$UNK": 1},
                   train_func=fit_maxent,
                   assess_reader=None,
                   random_state=42)
コード例 #4
0
    param_grid = {'C': [0.4, 0.6, 0.8, 1.0], 'penalty': ['l1', 'l2']}
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, basemod, cv, param_grid)
    return best_mod


# ### Assessment
#
# Because SNLI and MultiNLI are huge, we can't afford to do experiments on the full datasets all the time. Thus, we will mainly work within the training sets, using the train readers to sample smaller datasets that can then be divided for training and assessment.
#
# Here, we sample 10% of the training examples. I set the random seed (`random_state=42`) so that we get consistency across the samples; setting `random_state=None` will give new random samples each time.

# In[18]:

train_reader = nli.SNLITrainReader(SNLI_HOME,
                                   samp_percentage=0.10,
                                   random_state=42)

# An experimental dataset can be built directly from the reader and a feature function:

# In[19]:

dataset = nli.build_dataset(train_reader, word_overlap_phi)

# In[20]:

dataset.keys()

# However, it's more efficient to use `nli.experiment` to bring all these pieces together. This wrapper will work for all the models we consider.

# In[21]:
コード例 #5
0
def test_build_dataset():
    nli.build_dataset(reader=nli.SNLITrainReader(snli_home,
                                                 samp_percentage=0.01),
                      phi=lambda x, y: {"$UNK": 1},
                      vectorizer=None,
                      vectorize=True)