def test_semeval_all_roberta(roberta_subst_generator, semeval_all_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/roberta.jsonnet --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet --run-dir='debug/lexsub-all-models/semeval_all_roberta' --force --experiment-name='lexsub-all-models' --run-name='semeval_all_roberta' """ scores = LexSubEvaluation( substitute_generator=roberta_subst_generator, dataset_reader=semeval_all_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap"] == pytest.approx(50.14, 0.01), str(scores) assert scores["gap_normalized"] == pytest.approx(56.74, 0.02), str(scores) assert scores["gap_vocab_normalized"] == pytest.approx(62.22, 0.01), str(scores) assert scores["prec@1"] == pytest.approx(32.25), str(scores) assert scores["prec@3"] == pytest.approx(24.26), str(scores) assert scores["rec@10"] == pytest.approx(36.65), str(scores) assert scores["precision"] == pytest.approx(0.01), str(scores) assert scores["recall"] == pytest.approx(79.24), str(scores) assert scores["f1_score"] == pytest.approx(0.02), str(scores)
def test_coinco_c2v_embs(c2v_embs_subst_generator, coinco_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/c2v_embs.jsonnet --dataset-config-path configs/dataset_readers/lexsub/coinco.jsonnet --run-dir='debug/lexsub-all-models/coinco_c2v_embs' --force --experiment-name='lexsub-all-models' --run-name='coinco_c2v_embs' """ scores = LexSubEvaluation( substitute_generator=c2v_embs_subst_generator, dataset_reader=coinco_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap_normalized"] == pytest.approx(50.73, 0.02), str(scores) assert scores["prec@1"] == pytest.approx(29.64), str(scores) assert scores["prec@3"] == pytest.approx(24.0), str(scores) assert scores["rec@10"] == pytest.approx(21.97), str(scores)
def test_semeval_all_c2v(c2v_subst_generator, semeval_all_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/c2v.jsonnet --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet --run-dir='debug/lexsub-all-models/semeval_all_c2v' --force --experiment-name='lexsub-all-models' --run-name='semeval_all_c2v' """ scores = LexSubEvaluation( substitute_generator=c2v_subst_generator, dataset_reader=semeval_all_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap_normalized"] == pytest.approx(55.82, 0.02), str(scores) assert scores["prec@1"] == pytest.approx(7.79), str(scores) assert scores["prec@3"] == pytest.approx(5.92), str(scores) assert scores["rec@10"] == pytest.approx(11.03), str(scores)
def test_semeval_all_bert_embs(bert_embs_subst_generator, semeval_all_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/bert_embs.jsonnet --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet --run-dir='debug/lexsub-all-models/semeval_all_bert_embs' --force --experiment-name='lexsub-all-models' --run-name='semeval_all_bert_embs' """ scores = LexSubEvaluation( substitute_generator=bert_embs_subst_generator, dataset_reader=semeval_all_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap_normalized"] == pytest.approx(53.87, 0.02), str(scores) assert scores["prec@1"] == pytest.approx(41.64), str(scores) assert scores["prec@3"] == pytest.approx(30.59), str(scores) assert scores["rec@10"] == pytest.approx(43.88), str(scores)
def test_coinco_ooc(ooc_subst_generator, coinco_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/ooc.jsonnet --dataset-config-path configs/dataset_readers/lexsub/coinco.jsonnet --run-dir='debug/lexsub-all-models/coinco_ooc' --force --experiment-name='lexsub-all-models' --run-name='coinco_ooc' """ scores = LexSubEvaluation( substitute_generator=ooc_subst_generator, dataset_reader=coinco_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap_normalized"] == pytest.approx(46.3, 0.02), str(scores) assert scores["prec@1"] == pytest.approx(19.58), str(scores) assert scores["prec@3"] == pytest.approx(15.03), str(scores) assert scores["rec@10"] == pytest.approx(12.99), str(scores)
def test_semeval_all_roberta_embs(roberta_embs_subst_generator, semeval_all_dataset_reader): """ Reproduction command: python lexsubgen/evaluations/lexsub.py solve --substgen-config-path configs/subst_generators/lexsub/roberta_embs.jsonnet --dataset-config-path configs/dataset_readers/lexsub/semeval_all.jsonnet --run-dir='debug/lexsub-all-models/semeval_all_roberta_embs' --force --experiment-name='lexsub-all-models' --run-name='semeval_all_roberta_embs' """ scores = LexSubEvaluation( substitute_generator=roberta_embs_subst_generator, dataset_reader=semeval_all_dataset_reader, ).evaluate()["mean_metrics"] assert scores["gap"] == pytest.approx(52.17, 0.01), str(scores) assert scores["gap_normalized"] == pytest.approx(58.74, 0.02), str(scores) assert scores["gap_vocab_normalized"] == pytest.approx(64.42, 0.01), str(scores) assert scores["prec@1"] == pytest.approx(43.19), str(scores) assert scores["prec@3"] == pytest.approx(31.19), str(scores) assert scores["rec@10"] == pytest.approx(44.61), str(scores) assert scores["prec@10"] == pytest.approx(16.31), str(scores)