def test_compare_cross_validation( training_configuration: _MentorTrainAndTestConfiguration, compare_configuration: _MentorTrainAndTestConfiguration, tmpdir, shared_root: str, ): mentor = load_mentor_csv( fixture_mentor_data(training_configuration.mentor_id, "data.csv")) data = {"data": {"mentor": mentor.to_dict()}} responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) lr_train = (ClassifierFactory().new_training( mentor=training_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=training_configuration.arch, ).train(shared_root)) assert lr_train.accuracy >= training_configuration.expected_training_accuracy hf_train = (ClassifierFactory().new_training( mentor=compare_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=compare_configuration.arch, ).train(shared_root)) assert hf_train.accuracy >= compare_configuration.expected_training_accuracy
def _test_gets_off_topic( monkeypatch, data_root: str, shared_root: str, mentor_id: str, question: str, expected_answer_id: str, expected_answer: str, expected_media: List[Media], ): monkeypatch.setenv("OFF_TOPIC_THRESHOLD", "1.0") # everything is offtopic with open(fixture_path("graphql/{}.json".format(mentor_id))) as f: data = json.load(f) responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) _ensure_trained(mentor_id, shared_root, data_root) classifier = ClassifierFactory().new_prediction(mentor=mentor_id, shared_root=shared_root, data_path=data_root) result = classifier.evaluate(question, shared_root) assert result.highest_confidence < get_off_topic_threshold() assert result.answer_id == expected_answer_id assert result.answer_text == expected_answer assert result.answer_media == expected_media assert result.feedback_id is not None
def _ensure_trained(mentor_id: str, shared_root: str, output_dir: str) -> None: """ NOTE: we don't want this test to do any training. But for the case that there's not trained model, more convienient to just train it here. Once it has been trained, it should be committed and then subsequent runs of the test will use the fixture/trained model """ if path.isdir(path.join(output_dir, mentor_id)): return training = ClassifierFactory().new_training(mentor_id, shared_root, output_dir) training.train(shared_root)
def test_compare_test_accuracy( training_configuration: _MentorTrainAndTestConfiguration, compare_configuration: _MentorTrainAndTestConfiguration, tmpdir, shared_root: str, example: str, test_set_file: str, ): mentor = load_mentor_csv( fixture_mentor_data(training_configuration.mentor_id, "data.csv")) test_set = load_test_csv( fixture_mentor_data(training_configuration.mentor_id, test_set_file or "test.csv")) data = {"data": {"mentor": mentor.to_dict()}} responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) lr_train = (ClassifierFactory().new_training( mentor=training_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=training_configuration.arch, ).train(shared_root)) hf_train = (ClassifierFactory().new_training( mentor=compare_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=compare_configuration.arch, ).train(shared_root)) assert hf_train.accuracy >= lr_train.accuracy hf_classifier = ClassifierFactory().new_prediction( mentor=compare_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=compare_configuration.arch, ) hf_test_results = run_model_against_testset_ignore_confidence( hf_classifier, test_set, shared_root) hf_test_accuracy = hf_test_results.passing_tests / len( hf_test_results.results) lr_classifier = ClassifierFactory().new_prediction( mentor=training_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=training_configuration.arch, ) lr_test_results = run_model_against_testset_ignore_confidence( lr_classifier, test_set, shared_root) lr_test_accuracy = lr_test_results.passing_tests / len( lr_test_results.results) assert lr_test_accuracy <= hf_test_accuracy hf_result = hf_classifier.evaluate(example, shared_root) lr_result = lr_classifier.evaluate(example, shared_root) assert hf_result.highest_confidence >= lr_result.highest_confidence
def train_task(mentor: str, arch: str = "") -> float: try: result = (ClassifierFactory().new_training( mentor=mentor, shared_root=SHARED_ROOT, data_path=OUTPUT_ROOT, arch=arch).train(SHARED_ROOT)) return result.accuracy except Exception as err: logging.exception(err) raise (err)
def test_trains_and_outputs_models(data_root: str, shared_root: str, mentor_id: str): with open(fixture_path("graphql/{}.json".format(mentor_id))) as f: data = json.load(f) responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) result = (ClassifierFactory().new_training(mentor_id, shared_root, data_root).train(shared_root)) assert result.model_path == path.join(data_root, mentor_id, ARCH_DEFAULT) assert path.exists(path.join(result.model_path, "model.pkl")) assert path.exists(path.join(result.model_path, "w2v.txt"))
def find_classifier( self, mentor_id: str, arch: str = ARCH_DEFAULT) -> QuestionClassifierPrediction: if mentor_id in self.cache: e = self.cache[mentor_id] if e and e.last_trained_at >= e.classifier.get_last_trained_at(): return e.classifier c = ClassifierFactory().new_prediction(mentor=mentor_id, shared_root=self.shared_root, data_path=self.data_root) self.cache[mentor_id] = Entry(c) return c
def test_train_and_predict_transformers( training_configuration: _MentorTrainAndTestConfiguration, tmpdir, shared_root: str, ): mentor = load_mentor_csv( fixture_mentor_data(training_configuration.mentor_id, "data.csv")) test_set = load_test_csv( fixture_mentor_data(training_configuration.mentor_id, "test.csv")) data = {"data": {"mentor": mentor.to_dict()}} responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) result = (ClassifierFactory().new_training( mentor=training_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=training_configuration.arch, ).train(shared_root)) assert result.accuracy >= training_configuration.expected_training_accuracy classifier = ClassifierFactory().new_prediction( mentor=training_configuration.mentor_id, shared_root=shared_root, data_path=tmpdir, arch=training_configuration.arch, ) test_results = run_model_against_testset(classifier, test_set, shared_root) logging.warning(test_results.errors) logging.warning( f"percentage passed = {test_results.passing_tests}/{len(test_results.results)}" ) assert len(test_results.errors) == 0
def test_gets_answer_for_exact_match_and_paraphrases( data_root: str, shared_root: str, mentor_id: str, question: str, expected_answer_id: str, expected_answer: str, expected_media: List[Media], ): with open(fixture_path("graphql/{}.json".format(mentor_id))) as f: data = json.load(f) responses.add(responses.POST, "http://graphql/graphql", json=data, status=200) _ensure_trained(mentor_id, shared_root, data_root) classifier = ClassifierFactory().new_prediction(mentor_id, shared_root, data_root) result = classifier.evaluate(question, shared_root) assert result.answer_id == expected_answer_id assert result.answer_text == expected_answer assert result.answer_media == expected_media assert result.highest_confidence == 1 assert result.feedback_id is not None