def test_train_pickle_save(tmp_path, data_path, label_binarizer_path): approach = "tfidf-svm" model_path = os.path.join(tmp_path, "model.pkl") train(data_path, label_binarizer_path, approach=approach, model_path=model_path, parameters="{'tfidf__min_df': 1, 'tfidf__stop_words': None}") assert os.path.exists(model_path)
def test_train_model_save(tmp_path, data_path, label_binarizer_path): approach = "mesh-cnn" train(data_path, label_binarizer_path, approach, model_path=tmp_path, sparse_labels=True) expected_vectorizer_path = os.path.join(tmp_path, "vectorizer.pkl") expected_model_variables_path = os.path.join(tmp_path, "variables") expected_model_assets_path = os.path.join(tmp_path, "assets") assert os.path.exists(expected_vectorizer_path) assert os.path.exists(expected_model_variables_path) assert os.path.exists(expected_model_assets_path)
def mesh_cnn_path(tmp_path): mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl") create_data(X, Y_mesh, mesh_data_path) label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl") model_path = os.path.join(tmp_path, "mesh_cnn") train(mesh_data_path, label_binarizer_path, approach="mesh-cnn", model_path=model_path, sparse_labels=True, verbose=False) return model_path
def scibert_path(tmp_path): data_path = os.path.join(tmp_path, "data.jsonl") create_data(X, Y, data_path) label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl") label_binarizer = create_label_binarizer(data_path, label_binarizer_path) scibert_path = os.path.join(tmp_path, "scibert") parameters = {"epochs": 1} train(data_path, label_binarizer_path, approach="scibert", model_path=scibert_path, parameters=str(parameters), verbose=False) return scibert_path
def tfidf_svm_path(tmp_path): data_path = os.path.join(tmp_path, "data.jsonl") create_data(X, Y, data_path) label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl") label_binarizer = create_label_binarizer(data_path, label_binarizer_path) # TODO: Replace approach with science-ensemble when fit implemented tfidf_svm_path = os.path.join(tmp_path, "tfidf-svm.pkl") parameters = {'tfidf__min_df': 1, 'tfidf__stop_words': None} train(data_path, label_binarizer_path, approach="tfidf-svm", model_path=tfidf_svm_path, parameters=str(parameters), verbose=False) return tfidf_svm_path
def mesh_xlinear_path(tmp_path): mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl") create_data(X, Y_mesh, mesh_data_path) label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl") model_path = os.path.join(tmp_path, "mesh_xlinear") parameters = { 'min_df': 1, 'stop_words': None, 'vectorizer_library': 'sklearn' } train(mesh_data_path, label_binarizer_path, approach="mesh-xlinear", model_path=model_path, sparse_labels=True, verbose=False, parameters=str(parameters)) return model_path
def mesh_tfidf_svm_path(tmp_path): mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl") create_data(X, Y_mesh, mesh_data_path) label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl") model_path = os.path.join(tmp_path, "mesh_tfidf_svm") parameters = { 'tfidf__min_df': 1, 'tfidf__stop_words': None, 'svm__estimator__loss': 'log', 'model_path': model_path } train(mesh_data_path, label_binarizer_path, approach="mesh-tfidf-svm", model_path=model_path, parameters=str(parameters), sparse_labels=True, verbose=False) return model_path
def test_train_and_evaluate_generator(data_path, label_binarizer_path): approach = "mesh-cnn" train(data_path, label_binarizer_path, approach, data_format="generator", sparse_labels=True)
def test_train_and_evaluate(data_path, label_binarizer_path): approach = "tfidf-svm" train(data_path, label_binarizer_path, approach, parameters="{'tfidf__min_df': 1, 'tfidf__stop_words': None}")