def test_save_and_load_meta(tmp_path):
    classifier = SemanticEquivalenceMetaClassifier(n_numerical_features=1,
                                                   pretrained="bert",
                                                   batch_size=2,
                                                   eval_batch_size=1)

    # Save and load for Meta Models only accepts strings (not PosixPath)
    classifier._initialise_models()
    classifier.save(str(tmp_path.absolute()) + '.h5')
    config_1 = classifier.config

    classifier = SemanticEquivalenceMetaClassifier(n_numerical_features=1)
    classifier.load(str(tmp_path.absolute()) + '.h5')

    config_2 = classifier.config

    assert config_1 == config_2
Ejemplo n.º 2
0
def test_semantic_meta_fit():
    classifier = SemanticEquivalenceMetaClassifier(n_numerical_features=2,
                                                   pretrained="scibert",
                                                   batch_size=6,
                                                   eval_batch_size=4,
                                                   dropout_rate=0.1,
                                                   batch_norm=True)

    X = [[
        'This sentence has context_1', 'This one also has context_1', 0.1, 0.2
    ], [
        'This sentence has context_2', 'This one also has context_2', 0.2, 0.2
    ],
         [
             'This sentence is about something else', 'God save the queen',
             -0.5, -0.5
         ]] * 5

    y = [1, 1, 0] * 5

    classifier.fit(X, y, epochs=5)

    # loss_initial = classifier.history['loss'][0]
    scores = classifier.predict_proba(X)

    # Assert it returns a vector of correct length (15 training points) and 15*2 scores
    assert len(classifier.predict(X)) == 15
    assert (scores > 0).sum() == 15 * 2
    assert (scores < 1).sum() == 15 * 2
Ejemplo n.º 3
0
def test_semantic_meta_fit():
    classifier = SemanticEquivalenceMetaClassifier(n_numerical_features=2,
                                                   pretrained="scibert",
                                                   batch_size=2,
                                                   eval_batch_size=1,
                                                   dropout_rate=0.1,
                                                   batch_norm=True)

    X = [[
        'This sentence has context_1', 'This one also has context_1', 0.1, 0.2
    ], [
        'This sentence has context_2', 'This one also has context_2', 0.2, 0.2
    ],
         [
             'This sentence is about something else', 'God save the queen',
             -0.5, -0.5
         ]]

    y = [1, 1, 0]

    classifier.fit(X, y, epochs=3)

    loss_initial = classifier.history['loss'][0]
    scores = classifier.score(X)

    assert len(classifier.predict(X)) == 3
    assert (scores > 0).sum() == 6
    assert (scores < 1).sum() == 6

    # Fits two extra epochs

    classifier.fit(X, y, epochs=2)

    # Asserts that the classifier model is adding to the history, and still
    # not re-training from scratch

    assert len(classifier.history['loss']) == 5

    loss_final = classifier.history['loss'][4]

    # Asserts loss is decreasing
    assert loss_final < loss_initial