def test_train_pickle_save(tmp_path, data_path, label_binarizer_path):
    approach = "tfidf-svm"

    model_path = os.path.join(tmp_path, "model.pkl")
    train(data_path, label_binarizer_path, approach=approach,
          model_path=model_path, parameters="{'tfidf__min_df': 1, 'tfidf__stop_words': None}")
    assert os.path.exists(model_path)
def test_train_model_save(tmp_path, data_path, label_binarizer_path):
    approach = "mesh-cnn"

    train(data_path, label_binarizer_path,
          approach, model_path=tmp_path, sparse_labels=True)

    expected_vectorizer_path = os.path.join(tmp_path, "vectorizer.pkl")
    expected_model_variables_path = os.path.join(tmp_path, "variables")
    expected_model_assets_path = os.path.join(tmp_path, "assets")
    assert os.path.exists(expected_vectorizer_path)
    assert os.path.exists(expected_model_variables_path)
    assert os.path.exists(expected_model_assets_path)
Exemple #3
0
def mesh_cnn_path(tmp_path):
    mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl")
    create_data(X, Y_mesh, mesh_data_path)

    label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl")
    model_path = os.path.join(tmp_path, "mesh_cnn")
    train(mesh_data_path,
          label_binarizer_path,
          approach="mesh-cnn",
          model_path=model_path,
          sparse_labels=True,
          verbose=False)
    return model_path
Exemple #4
0
def scibert_path(tmp_path):
    data_path = os.path.join(tmp_path, "data.jsonl")
    create_data(X, Y, data_path)

    label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl")
    label_binarizer = create_label_binarizer(data_path, label_binarizer_path)

    scibert_path = os.path.join(tmp_path, "scibert")
    parameters = {"epochs": 1}
    train(data_path,
          label_binarizer_path,
          approach="scibert",
          model_path=scibert_path,
          parameters=str(parameters),
          verbose=False)

    return scibert_path
Exemple #5
0
def tfidf_svm_path(tmp_path):
    data_path = os.path.join(tmp_path, "data.jsonl")
    create_data(X, Y, data_path)

    label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl")
    label_binarizer = create_label_binarizer(data_path, label_binarizer_path)

    # TODO: Replace approach with science-ensemble when fit implemented
    tfidf_svm_path = os.path.join(tmp_path, "tfidf-svm.pkl")
    parameters = {'tfidf__min_df': 1, 'tfidf__stop_words': None}
    train(data_path,
          label_binarizer_path,
          approach="tfidf-svm",
          model_path=tfidf_svm_path,
          parameters=str(parameters),
          verbose=False)
    return tfidf_svm_path
Exemple #6
0
def mesh_xlinear_path(tmp_path):
    mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl")
    create_data(X, Y_mesh, mesh_data_path)

    label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl")
    model_path = os.path.join(tmp_path, "mesh_xlinear")
    parameters = {
        'min_df': 1,
        'stop_words': None,
        'vectorizer_library': 'sklearn'
    }
    train(mesh_data_path,
          label_binarizer_path,
          approach="mesh-xlinear",
          model_path=model_path,
          sparse_labels=True,
          verbose=False,
          parameters=str(parameters))
    return model_path
Exemple #7
0
def mesh_tfidf_svm_path(tmp_path):
    mesh_data_path = os.path.join(tmp_path, "mesh_data.jsonl")
    create_data(X, Y_mesh, mesh_data_path)

    label_binarizer_path = os.path.join(tmp_path, "label_binarizer.pkl")

    model_path = os.path.join(tmp_path, "mesh_tfidf_svm")
    parameters = {
        'tfidf__min_df': 1,
        'tfidf__stop_words': None,
        'svm__estimator__loss': 'log',
        'model_path': model_path
    }
    train(mesh_data_path,
          label_binarizer_path,
          approach="mesh-tfidf-svm",
          model_path=model_path,
          parameters=str(parameters),
          sparse_labels=True,
          verbose=False)
    return model_path
def test_train_and_evaluate_generator(data_path, label_binarizer_path):
    approach = "mesh-cnn"

    train(data_path, label_binarizer_path, approach,
          data_format="generator", sparse_labels=True)
def test_train_and_evaluate(data_path, label_binarizer_path):
    approach = "tfidf-svm"

    train(data_path, label_binarizer_path, approach,
          parameters="{'tfidf__min_df': 1, 'tfidf__stop_words': None}")