Python PLT.fit 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: napkinxc.models

클래스/타입: PLT

메소드/함수: fit

hotexamples.com에서의 예제들: 7

Python PLT.fit - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 napkinxc.models.PLT.fit에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PLT(10)

fit(7)

predict(6)

get_tree_structure(3)

build_tree(2)

predict_proba(2)

get_nodes_to_update(1)

get_nodes_updates(1)

load(1)

ofo(1)

set_tree_structure(1)

unload(1)

예제 #1

파일 보기

파일: test_X_Y_inputs.py 프로젝트: mwydmuch/napkinXC

def test_numpy_3d_input():
    size = 100
    X_train = np.ones((size, size, size))
    Y_train = np.ones((size))

    print(f"Type (shape, dtype): {type(X_train)} ({X_train.shape}, {X_train.dtype})")
    print(f"Type (shape, dtype): {type(Y_train)} ({Y_train.shape}, {Y_train.dtype})")

    with pytest.raises(ValueError):
        model = PLT(MODEL_PATH, optimizer="adagrad", epochs=1)
        model.fit(X_train, Y_train)

예제 #2

파일 보기

파일: test_tree_structure.py 프로젝트: mwydmuch/napkinXC

def test_set_get_tree_structure():
    X, Y = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH)
    plt = PLT(MODEL_PATH)
    plt.build_tree(X, Y)
    tree_structure = plt.get_tree_structure()
    plt.set_tree_structure(tree_structure)
    tree_structure2 = plt.get_tree_structure()
    assert len(set(tree_structure) - set(tree_structure2)) == 0

    nodes_to_update = plt.get_nodes_to_update(Y)
    assert len(nodes_to_update) == X.shape[0]

    nodes_updates = plt.get_nodes_updates(Y)
    assert len(nodes_updates) == len(tree_structure)

    plt.fit(X, Y)
    tree_structure3 = plt.get_tree_structure()
    assert len(set(tree_structure) - set(tree_structure3)) == 0

    shutil.rmtree(MODEL_PATH, ignore_errors=True)

예제 #3

파일 보기

def test_plt_exact_prediction_reproducibility():
    X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH)
    X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH)

    print("\n")
    for mc in model_configs:
        print("model config: ", mc)
        plt = PLT(MODEL_PATH, **mc)
        plt.fit(X_train, Y_train)
        Y_pred = plt.predict(X_test, top_k=1)
        p_at_1 = precision_at_k(Y_test, Y_pred, k=1)

        for rc in representation_configs:
            print("  prediction config: ", rc)
            for _ in range(repeat):
                plt = PLT(MODEL_PATH, **mc, **rc)
                Y_pred = plt.predict(X_test, top_k=1)
                assert p_at_1 == precision_at_k(Y_test, Y_pred, k=1)

        shutil.rmtree(MODEL_PATH, ignore_errors=True)

예제 #4

파일 보기

def test_seed_reproducibility():
    X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH)
    X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH)

    for i in range(repeat):
        plt_1 = PLT(MODEL_PATH + "-1", optimizer="adagrad", epochs=1, loss="log", seed=i)
        plt_1.fit(X_train, Y_train)
        Y_pred_1 = plt_1.predict(X_test, top_k=1)
        p_at_1_1 = precision_at_k(Y_test, Y_pred_1, k=1)
        tree_structure_1 = plt_1.get_tree_structure()

        plt_2 = PLT(MODEL_PATH + "-2", optimizer="adagrad", epochs=1, loss="log", seed=i)
        plt_2.fit(X_train, Y_train)
        Y_pred_2 = plt_2.predict(X_test, top_k=1)
        p_at_1_2 = precision_at_k(Y_test, Y_pred_2, k=1)
        tree_structure_2 = plt_2.get_tree_structure()

        assert len(set(tree_structure_1) - set(tree_structure_2)) == 0
        assert p_at_1_1 == p_at_1_2

        shutil.rmtree(MODEL_PATH + "-1", ignore_errors=True)
        shutil.rmtree(MODEL_PATH + "-2", ignore_errors=True)

예제 #5

파일 보기

파일: test_compare_with_xclib_measures.py 프로젝트: xiaohan2012/napkinXC

def test_compare_napkinxc_with_xclib():

    # Train model and predict
    model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              "eurlex-model")
    X_train, Y_train = load_dataset("eurlex-4k", "train")
    X_test, Y_test = load_dataset("eurlex-4k", "test")
    plt = PLT(model_path)
    if not os.path.exists(model_path):
        plt.fit(X_train, Y_train)
    Y_pred = plt.predict_proba(X_test, top_k=5)

    # Prepare dataset
    csr_Y_train = to_csr_matrix(Y_train)
    csr_Y_test = to_csr_matrix(Y_test)
    csr_Y_pred = to_csr_matrix(Y_pred, shape=csr_Y_test.shape)

    # Calculate propensities
    nxc_inv_ps = inverse_propensity(Y_train, A=0.55, B=1.5)
    csr_nxc_inv_ps = inverse_propensity(csr_Y_train, A=0.55, B=1.5)
    xcl_inv_ps = compute_inv_propesity(csr_Y_train, A=0.55, B=1.5)
    assert np.allclose(nxc_inv_ps, csr_nxc_inv_ps)
    assert np.allclose(nxc_inv_ps, xcl_inv_ps)

    # Compare results
    measures = {
        "P@k": {
            "nxc": precision_at_k,
            "xclib": precision,
            "inv_ps": False
        },
        "R@k": {
            "nxc": recall_at_k,
            "xclib": recall,
            "inv_ps": False
        },
        "nDCG@k": {
            "nxc": ndcg_at_k,
            "xclib": ndcg,
            "inv_ps": False
        },
        "PSP@k": {
            "nxc": psprecision_at_k,
            "xclib": psprecision,
            "inv_ps": True
        },
        "PSR@k": {
            "nxc": psrecall_at_k,
            "xclib": psrecall,
            "inv_ps": True
        },
        "PSnDCG@k": {
            "nxc": psndcg_at_k,
            "xclib": psndcg,
            "inv_ps": True
        }
    }

    for m, v in measures.items():
        print("\n{} time comparison:".format(m))
        t_start = time()
        xclib_r = v["xclib"](csr_Y_pred, csr_Y_test, xcl_inv_ps,
                             k=5) if v["inv_ps"] else v["xclib"](
                                 csr_Y_pred, csr_Y_test, k=5)
        print("\txclib.evaluation.xc_metrics.{} with csr_matrices: {}s".format(
            v["xclib"].__name__,
            time() - t_start))

        t_start = time()
        nxc_r = v["nxc"](Y_test, Y_pred, xcl_inv_ps,
                         k=5) if v["inv_ps"] else v["nxc"](Y_test, Y_pred, k=5)
        print("\tnapkinXC.measures.{} with lists: {}s".format(
            v["nxc"].__name__,
            time() - t_start))

        t_start = time()
        csr_nxc_r = v["nxc"](csr_Y_test, csr_Y_pred, csr_nxc_inv_ps,
                             k=5) if v["inv_ps"] else v["nxc"](
                                 csr_Y_test, csr_Y_pred, k=5)
        print("\tnapkinXC.measures.{} with csr_matrices: {}s".format(
            v["nxc"].__name__,
            time() - t_start))

        assert np.allclose(nxc_r, csr_nxc_r)
        assert np.allclose(nxc_r, xclib_r)

예제 #6

파일 보기

파일: basic.py 프로젝트: xiaohan2012/napkinXC

# Use load_dataset function to load one of the benchmark datasets
# from XML Repository (http://manikvarma.org/downloads/XC/XMLRepository.html).
X_train, Y_train = load_dataset("eurlex-4k", "train")
X_test, Y_test = load_dataset("eurlex-4k", "test")

# Create Probabilistic Labels Tree model,
# directory "eurlex-model" will be created and used during model training.
# napkinXC stores already trained parts of the model to save RAM.
# Model directory is only a required argument for model constructors.
plt = PLT("eurlex-model")

# Fit the model on the training dataset.
# The model weights and additional data will be stored in "eurlex-model" directory.
# Features matrix X must be SciPy csr_matrix, NumPy array, or list of tuples of (idx, value),
# while labels matrix Y should be list of lists or tuples containing positive labels.
plt.fit(X_train, Y_train)

# After the training model is not loaded to RAM.
# You can preload the model to RAM to perform prediction.
plt.load()

# Predict only five top labels for each data point in the test dataset.
# This will also load the model if it is not loaded.
Y_pred = plt.predict(X_test, top_k=5)

# Evaluate the prediction with precision at 5 measure.
print("Precision at k:", precision_at_k(Y_test, Y_pred, k=5))

# Unload the model from RAM
# You can also just delete the object if you do not need it
plt.unload()

예제 #7

파일 보기

def test_compare_napkinxc_with_xclib():
    k = 5

    # Train model and predict
    X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH)
    X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH)
    plt = PLT(MODEL_PATH)
    plt.fit(X_train, Y_train)
    Y_pred = plt.predict_proba(X_test, top_k=k)
    shutil.rmtree(MODEL_PATH, ignore_errors=True)

    # Prepare dataset
    csr_Y_train = to_csr_matrix(Y_train)
    csr_Y_test = to_csr_matrix(Y_test)
    csr_Y_pred = to_csr_matrix(Y_pred, shape=csr_Y_test.shape)

    # Calculate propensities
    nxc_inv_ps = inverse_propensity(Y_train, A=0.55, B=1.5)
    csr_nxc_inv_ps = inverse_propensity(csr_Y_train, A=0.55, B=1.5)
    xcl_inv_ps = compute_inv_propesity(csr_Y_train, A=0.55, B=1.5)
    assert np.allclose(nxc_inv_ps, csr_nxc_inv_ps)
    assert np.allclose(nxc_inv_ps, xcl_inv_ps)

    # Compare results
    measures = {
        "P@k": {
            "nxc": precision_at_k,
            "xclib": precision,
            "inv_ps": False
        },
        "R@k": {
            "nxc": recall_at_k,
            "xclib": recall,
            "inv_ps": False
        },
        "nDCG@k": {
            "nxc": ndcg_at_k,
            "xclib": ndcg,
            "inv_ps": False
        },
        "PSP@k": {
            "nxc": psprecision_at_k,
            "xclib": psprecision,
            "inv_ps": True
        },
        "PSR@k": {
            "nxc": psrecall_at_k,
            "xclib": psrecall,
            "inv_ps": True
        },
        "PSnDCG@k": {
            "nxc": psndcg_at_k,
            "xclib": psndcg,
            "inv_ps": True
        }
    }

    print("\n")
    for m, v in measures.items():
        print("\n{} time comparison:".format(m))
        t_start = time()
        xclib_r = v["xclib"](csr_Y_pred, csr_Y_test, xcl_inv_ps,
                             k=k) if v["inv_ps"] else v["xclib"](
                                 csr_Y_pred, csr_Y_test, k=k)
        print("\txclib.evaluation.xc_metrics.{} with csr_matrices: {}s".format(
            v["xclib"].__name__,
            time() - t_start))

        t_start = time()
        nxc_r = v["nxc"](Y_test, Y_pred, xcl_inv_ps,
                         k=k) if v["inv_ps"] else v["nxc"](Y_test, Y_pred, k=k)
        print("\tnapkinXC.measures.{} with lists: {}s".format(
            v["nxc"].__name__,
            time() - t_start))

        t_start = time()
        csr_nxc_r = v["nxc"](csr_Y_test, csr_Y_pred, csr_nxc_inv_ps,
                             k=k) if v["inv_ps"] else v["nxc"](
                                 csr_Y_test, csr_Y_pred, k=k)
        print("\tnapkinXC.measures.{} with csr_matrices: {}s".format(
            v["nxc"].__name__,
            time() - t_start))

        assert np.allclose(nxc_r, csr_nxc_r)
        assert np.allclose(nxc_r, xclib_r)