def test_numpy_3d_input(): size = 100 X_train = np.ones((size, size, size)) Y_train = np.ones((size)) print(f"Type (shape, dtype): {type(X_train)} ({X_train.shape}, {X_train.dtype})") print(f"Type (shape, dtype): {type(Y_train)} ({Y_train.shape}, {Y_train.dtype})") with pytest.raises(ValueError): model = PLT(MODEL_PATH, optimizer="adagrad", epochs=1) model.fit(X_train, Y_train)
def test_set_get_tree_structure(): X, Y = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH) plt = PLT(MODEL_PATH) plt.build_tree(X, Y) tree_structure = plt.get_tree_structure() plt.set_tree_structure(tree_structure) tree_structure2 = plt.get_tree_structure() assert len(set(tree_structure) - set(tree_structure2)) == 0 nodes_to_update = plt.get_nodes_to_update(Y) assert len(nodes_to_update) == X.shape[0] nodes_updates = plt.get_nodes_updates(Y) assert len(nodes_updates) == len(tree_structure) plt.fit(X, Y) tree_structure3 = plt.get_tree_structure() assert len(set(tree_structure) - set(tree_structure3)) == 0 shutil.rmtree(MODEL_PATH, ignore_errors=True)
def test_plt_exact_prediction_reproducibility(): X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH) X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH) print("\n") for mc in model_configs: print("model config: ", mc) plt = PLT(MODEL_PATH, **mc) plt.fit(X_train, Y_train) Y_pred = plt.predict(X_test, top_k=1) p_at_1 = precision_at_k(Y_test, Y_pred, k=1) for rc in representation_configs: print(" prediction config: ", rc) for _ in range(repeat): plt = PLT(MODEL_PATH, **mc, **rc) Y_pred = plt.predict(X_test, top_k=1) assert p_at_1 == precision_at_k(Y_test, Y_pred, k=1) shutil.rmtree(MODEL_PATH, ignore_errors=True)
def test_seed_reproducibility(): X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH) X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH) for i in range(repeat): plt_1 = PLT(MODEL_PATH + "-1", optimizer="adagrad", epochs=1, loss="log", seed=i) plt_1.fit(X_train, Y_train) Y_pred_1 = plt_1.predict(X_test, top_k=1) p_at_1_1 = precision_at_k(Y_test, Y_pred_1, k=1) tree_structure_1 = plt_1.get_tree_structure() plt_2 = PLT(MODEL_PATH + "-2", optimizer="adagrad", epochs=1, loss="log", seed=i) plt_2.fit(X_train, Y_train) Y_pred_2 = plt_2.predict(X_test, top_k=1) p_at_1_2 = precision_at_k(Y_test, Y_pred_2, k=1) tree_structure_2 = plt_2.get_tree_structure() assert len(set(tree_structure_1) - set(tree_structure_2)) == 0 assert p_at_1_1 == p_at_1_2 shutil.rmtree(MODEL_PATH + "-1", ignore_errors=True) shutil.rmtree(MODEL_PATH + "-2", ignore_errors=True)
def test_compare_napkinxc_with_xclib(): # Train model and predict model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "eurlex-model") X_train, Y_train = load_dataset("eurlex-4k", "train") X_test, Y_test = load_dataset("eurlex-4k", "test") plt = PLT(model_path) if not os.path.exists(model_path): plt.fit(X_train, Y_train) Y_pred = plt.predict_proba(X_test, top_k=5) # Prepare dataset csr_Y_train = to_csr_matrix(Y_train) csr_Y_test = to_csr_matrix(Y_test) csr_Y_pred = to_csr_matrix(Y_pred, shape=csr_Y_test.shape) # Calculate propensities nxc_inv_ps = inverse_propensity(Y_train, A=0.55, B=1.5) csr_nxc_inv_ps = inverse_propensity(csr_Y_train, A=0.55, B=1.5) xcl_inv_ps = compute_inv_propesity(csr_Y_train, A=0.55, B=1.5) assert np.allclose(nxc_inv_ps, csr_nxc_inv_ps) assert np.allclose(nxc_inv_ps, xcl_inv_ps) # Compare results measures = { "P@k": { "nxc": precision_at_k, "xclib": precision, "inv_ps": False }, "R@k": { "nxc": recall_at_k, "xclib": recall, "inv_ps": False }, "nDCG@k": { "nxc": ndcg_at_k, "xclib": ndcg, "inv_ps": False }, "PSP@k": { "nxc": psprecision_at_k, "xclib": psprecision, "inv_ps": True }, "PSR@k": { "nxc": psrecall_at_k, "xclib": psrecall, "inv_ps": True }, "PSnDCG@k": { "nxc": psndcg_at_k, "xclib": psndcg, "inv_ps": True } } for m, v in measures.items(): print("\n{} time comparison:".format(m)) t_start = time() xclib_r = v["xclib"](csr_Y_pred, csr_Y_test, xcl_inv_ps, k=5) if v["inv_ps"] else v["xclib"]( csr_Y_pred, csr_Y_test, k=5) print("\txclib.evaluation.xc_metrics.{} with csr_matrices: {}s".format( v["xclib"].__name__, time() - t_start)) t_start = time() nxc_r = v["nxc"](Y_test, Y_pred, xcl_inv_ps, k=5) if v["inv_ps"] else v["nxc"](Y_test, Y_pred, k=5) print("\tnapkinXC.measures.{} with lists: {}s".format( v["nxc"].__name__, time() - t_start)) t_start = time() csr_nxc_r = v["nxc"](csr_Y_test, csr_Y_pred, csr_nxc_inv_ps, k=5) if v["inv_ps"] else v["nxc"]( csr_Y_test, csr_Y_pred, k=5) print("\tnapkinXC.measures.{} with csr_matrices: {}s".format( v["nxc"].__name__, time() - t_start)) assert np.allclose(nxc_r, csr_nxc_r) assert np.allclose(nxc_r, xclib_r)
# Use load_dataset function to load one of the benchmark datasets # from XML Repository (http://manikvarma.org/downloads/XC/XMLRepository.html). X_train, Y_train = load_dataset("eurlex-4k", "train") X_test, Y_test = load_dataset("eurlex-4k", "test") # Create Probabilistic Labels Tree model, # directory "eurlex-model" will be created and used during model training. # napkinXC stores already trained parts of the model to save RAM. # Model directory is only a required argument for model constructors. plt = PLT("eurlex-model") # Fit the model on the training dataset. # The model weights and additional data will be stored in "eurlex-model" directory. # Features matrix X must be SciPy csr_matrix, NumPy array, or list of tuples of (idx, value), # while labels matrix Y should be list of lists or tuples containing positive labels. plt.fit(X_train, Y_train) # After the training model is not loaded to RAM. # You can preload the model to RAM to perform prediction. plt.load() # Predict only five top labels for each data point in the test dataset. # This will also load the model if it is not loaded. Y_pred = plt.predict(X_test, top_k=5) # Evaluate the prediction with precision at 5 measure. print("Precision at k:", precision_at_k(Y_test, Y_pred, k=5)) # Unload the model from RAM # You can also just delete the object if you do not need it plt.unload()
def test_compare_napkinxc_with_xclib(): k = 5 # Train model and predict X_train, Y_train = load_dataset(TEST_DATASET, "train", root=TEST_DATA_PATH) X_test, Y_test = load_dataset(TEST_DATASET, "test", root=TEST_DATA_PATH) plt = PLT(MODEL_PATH) plt.fit(X_train, Y_train) Y_pred = plt.predict_proba(X_test, top_k=k) shutil.rmtree(MODEL_PATH, ignore_errors=True) # Prepare dataset csr_Y_train = to_csr_matrix(Y_train) csr_Y_test = to_csr_matrix(Y_test) csr_Y_pred = to_csr_matrix(Y_pred, shape=csr_Y_test.shape) # Calculate propensities nxc_inv_ps = inverse_propensity(Y_train, A=0.55, B=1.5) csr_nxc_inv_ps = inverse_propensity(csr_Y_train, A=0.55, B=1.5) xcl_inv_ps = compute_inv_propesity(csr_Y_train, A=0.55, B=1.5) assert np.allclose(nxc_inv_ps, csr_nxc_inv_ps) assert np.allclose(nxc_inv_ps, xcl_inv_ps) # Compare results measures = { "P@k": { "nxc": precision_at_k, "xclib": precision, "inv_ps": False }, "R@k": { "nxc": recall_at_k, "xclib": recall, "inv_ps": False }, "nDCG@k": { "nxc": ndcg_at_k, "xclib": ndcg, "inv_ps": False }, "PSP@k": { "nxc": psprecision_at_k, "xclib": psprecision, "inv_ps": True }, "PSR@k": { "nxc": psrecall_at_k, "xclib": psrecall, "inv_ps": True }, "PSnDCG@k": { "nxc": psndcg_at_k, "xclib": psndcg, "inv_ps": True } } print("\n") for m, v in measures.items(): print("\n{} time comparison:".format(m)) t_start = time() xclib_r = v["xclib"](csr_Y_pred, csr_Y_test, xcl_inv_ps, k=k) if v["inv_ps"] else v["xclib"]( csr_Y_pred, csr_Y_test, k=k) print("\txclib.evaluation.xc_metrics.{} with csr_matrices: {}s".format( v["xclib"].__name__, time() - t_start)) t_start = time() nxc_r = v["nxc"](Y_test, Y_pred, xcl_inv_ps, k=k) if v["inv_ps"] else v["nxc"](Y_test, Y_pred, k=k) print("\tnapkinXC.measures.{} with lists: {}s".format( v["nxc"].__name__, time() - t_start)) t_start = time() csr_nxc_r = v["nxc"](csr_Y_test, csr_Y_pred, csr_nxc_inv_ps, k=k) if v["inv_ps"] else v["nxc"]( csr_Y_test, csr_Y_pred, k=k) print("\tnapkinXC.measures.{} with csr_matrices: {}s".format( v["nxc"].__name__, time() - t_start)) assert np.allclose(nxc_r, csr_nxc_r) assert np.allclose(nxc_r, xclib_r)