def test_semeval_all_roberta(roberta_subst_generator,
                             semeval_all_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/roberta.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet
        --run-dir='debug/lexsub-all-models/semeval_all_roberta'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='semeval_all_roberta'
    """
    scores = LexSubEvaluation(
        substitute_generator=roberta_subst_generator,
        dataset_reader=semeval_all_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap"] == pytest.approx(50.14, 0.01), str(scores)
    assert scores["gap_normalized"] == pytest.approx(56.74, 0.02), str(scores)
    assert scores["gap_vocab_normalized"] == pytest.approx(62.22,
                                                           0.01), str(scores)
    assert scores["prec@1"] == pytest.approx(32.25), str(scores)
    assert scores["prec@3"] == pytest.approx(24.26), str(scores)
    assert scores["rec@10"] == pytest.approx(36.65), str(scores)
    assert scores["precision"] == pytest.approx(0.01), str(scores)
    assert scores["recall"] == pytest.approx(79.24), str(scores)
    assert scores["f1_score"] == pytest.approx(0.02), str(scores)
Beispiel #2
0
def test_coinco_c2v_embs(c2v_embs_subst_generator, coinco_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/c2v_embs.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/coinco.jsonnet
        --run-dir='debug/lexsub-all-models/coinco_c2v_embs'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='coinco_c2v_embs'
    """
    scores = LexSubEvaluation(
        substitute_generator=c2v_embs_subst_generator,
        dataset_reader=coinco_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap_normalized"] == pytest.approx(50.73, 0.02), str(scores)
    assert scores["prec@1"] == pytest.approx(29.64), str(scores)
    assert scores["prec@3"] == pytest.approx(24.0), str(scores)
    assert scores["rec@10"] == pytest.approx(21.97), str(scores)
Beispiel #3
0
def test_semeval_all_c2v(c2v_subst_generator, semeval_all_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/c2v.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet
        --run-dir='debug/lexsub-all-models/semeval_all_c2v'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='semeval_all_c2v'
    """
    scores = LexSubEvaluation(
        substitute_generator=c2v_subst_generator,
        dataset_reader=semeval_all_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap_normalized"] == pytest.approx(55.82, 0.02), str(scores)
    assert scores["prec@1"] == pytest.approx(7.79), str(scores)
    assert scores["prec@3"] == pytest.approx(5.92), str(scores)
    assert scores["rec@10"] == pytest.approx(11.03), str(scores)
Beispiel #4
0
def test_semeval_all_bert_embs(bert_embs_subst_generator, semeval_all_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/bert_embs.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet
        --run-dir='debug/lexsub-all-models/semeval_all_bert_embs'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='semeval_all_bert_embs'
    """
    scores = LexSubEvaluation(
        substitute_generator=bert_embs_subst_generator,
        dataset_reader=semeval_all_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap_normalized"] == pytest.approx(53.87, 0.02), str(scores)
    assert scores["prec@1"] == pytest.approx(41.64), str(scores)
    assert scores["prec@3"] == pytest.approx(30.59), str(scores)
    assert scores["rec@10"] == pytest.approx(43.88), str(scores)
Beispiel #5
0
def test_coinco_ooc(ooc_subst_generator, coinco_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/ooc.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/coinco.jsonnet
        --run-dir='debug/lexsub-all-models/coinco_ooc'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='coinco_ooc'
    """
    scores = LexSubEvaluation(
        substitute_generator=ooc_subst_generator,
        dataset_reader=coinco_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap_normalized"] == pytest.approx(46.3, 0.02), str(scores)
    assert scores["prec@1"] == pytest.approx(19.58), str(scores)
    assert scores["prec@3"] == pytest.approx(15.03), str(scores)
    assert scores["rec@10"] == pytest.approx(12.99), str(scores)
def test_semeval_all_roberta_embs(roberta_embs_subst_generator,
                                  semeval_all_dataset_reader):
    """
    Reproduction command:
    python lexsubgen/evaluations/lexsub.py solve
        --substgen-config-path configs/subst_generators/lexsub/roberta_embs.jsonnet
        --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet
        --run-dir='debug/lexsub-all-models/semeval_all_roberta_embs'
        --force
        --experiment-name='lexsub-all-models'
        --run-name='semeval_all_roberta_embs'
    """
    scores = LexSubEvaluation(
        substitute_generator=roberta_embs_subst_generator,
        dataset_reader=semeval_all_dataset_reader,
    ).evaluate()["mean_metrics"]
    assert scores["gap"] == pytest.approx(52.17, 0.01), str(scores)
    assert scores["gap_normalized"] == pytest.approx(58.74, 0.02), str(scores)
    assert scores["gap_vocab_normalized"] == pytest.approx(64.42,
                                                           0.01), str(scores)
    assert scores["prec@1"] == pytest.approx(43.19), str(scores)
    assert scores["prec@3"] == pytest.approx(31.19), str(scores)
    assert scores["rec@10"] == pytest.approx(44.61), str(scores)
    assert scores["prec@10"] == pytest.approx(16.31), str(scores)