def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Make sure this is true also when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr', random_state=42) model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure ranks are computed pessimistically when # there are ties (that is, equal predictions for every # item will assign maximum rank to each). model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 99) assert np.all(ranks.max(axis=1) == 99) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_predict_ranks(): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute ranks for all items rank_input = sp.csr_matrix(np.ones((no_users, no_items))) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == no_items - 1) for row in range(no_users): assert np.all(np.sort(ranks[row]) == np.arange(no_items)) # Train set exclusions. All ranks should be zero # if train interactions is dense. ranks = model.predict_rank(rank_input, train_interactions=rank_input).todense() assert np.all(ranks == 0) # Max rank should be num_items - 1 - number of positives # in train in that row ranks = model.predict_rank(rank_input, train_interactions=train).todense() assert np.all(np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 - np.squeeze(np.array(train.getnnz(axis=1)))) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) ranks = model.predict_rank(rank_input, num_threads=2).todense() assert np.all(ranks.min(axis=1) == 0) assert np.all(ranks.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
def test_precision_at_k_with_ties(): no_users, no_items = (10, 100) train, test = _generate_data(no_users, no_items) model = LightFM(loss="bpr") model.fit_partial(train) # Make all predictions zero model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) k = 10 precision = evaluation.precision_at_k(model, test, k=k) # Pessimistic precision with all ties assert precision.mean() == 0.0
def test_predict_scores(num_threads=2): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute scores and check if results equal to model.predict predict_input = sp.csr_matrix(np.ones((no_users, no_items))) scores = model.predict_score(predict_input, num_threads=num_threads).todense() for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) score_slice = np.array(scores)[uid, :] assert np.array_equal(score_slice, scores_arr) # check if precompute and parallelization work correctly scores_serial = model.predict_score(predict_input, num_threads=1).todense() scores_no_prec = model.predict_score(predict_input, num_threads=num_threads, precompute_representations=False ).todense() scores_ser_no_prec = model.predict_score(predict_input, num_threads=1, precompute_representations=False ).todense() assert np.array_equal(scores, scores_serial) assert np.array_equal(scores, scores_no_prec) assert np.array_equal(scores, scores_ser_no_prec) # Compute ranks and compares with ranks computed from scores ranks = model.predict_rank(predict_input, num_threads=num_threads).todense() def rank_scores(s): # ranks from scores as in http://stackoverflow.com/a/14672797/5251962 u, v = np.unique(s, return_inverse=True) return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v] check_ranks = np.apply_along_axis(rank_scores, 1, scores) assert np.array_equal(ranks, check_ranks) # Train set exclusions. All scores should be zero # if train interactions is dense. scores = model.predict_score(predict_input, train_interactions=predict_input).todense() assert np.all(scores == 0) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) scores = model.predict_score(predict_input, num_threads=num_threads).todense() assert np.all(scores.min(axis=1) == 0) assert np.all(scores.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)
def test_predict_scores(num_threads=2): no_users, no_items = (10, 100) train = sp.coo_matrix((no_users, no_items), dtype=np.float32) train = sp.rand(no_users, no_items, format='csr') model = LightFM() model.fit_partial(train) # Compute scores and check if results equal to model.predict predict_input = sp.csr_matrix(np.ones((no_users, no_items))) scores = model.predict_score(predict_input, num_threads=num_threads).todense() for uid in range(no_users): scores_arr = model.predict(np.repeat(uid, no_items), np.arange(no_items)) score_slice = np.array(scores)[uid, :] assert np.array_equal(score_slice, scores_arr) # check if precompute and parallelization work correctly scores_serial = model.predict_score(predict_input, num_threads=1).todense() scores_no_prec = model.predict_score( predict_input, num_threads=num_threads, precompute_representations=False).todense() scores_ser_no_prec = model.predict_score( predict_input, num_threads=1, precompute_representations=False).todense() assert np.array_equal(scores, scores_serial) assert np.array_equal(scores, scores_no_prec) assert np.array_equal(scores, scores_ser_no_prec) # Compute ranks and compares with ranks computed from scores ranks = model.predict_rank(predict_input, num_threads=num_threads).todense() def rank_scores(s): # ranks from scores as in http://stackoverflow.com/a/14672797/5251962 u, v = np.unique(s, return_inverse=True) return len(s) - 1 - (np.cumsum(np.bincount(v)) - 1)[v] check_ranks = np.apply_along_axis(rank_scores, 1, scores) assert np.array_equal(ranks, check_ranks) # Train set exclusions. All scores should be zero # if train interactions is dense. scores = model.predict_score(predict_input, train_interactions=predict_input).todense() assert np.all(scores == 0) # Make sure invariants hold when there are ties model.user_embeddings = np.zeros_like(model.user_embeddings) model.item_embeddings = np.zeros_like(model.item_embeddings) model.user_biases = np.zeros_like(model.user_biases) model.item_biases = np.zeros_like(model.item_biases) scores = model.predict_score(predict_input, num_threads=num_threads).todense() assert np.all(scores.min(axis=1) == 0) assert np.all(scores.max(axis=1) == 0) # Wrong input dimensions with pytest.raises(ValueError): model.predict_score(sp.csr_matrix((5, 5)), num_threads=num_threads)
### Convert data to sparse matrix and split for cv### #Known jobID's (["e3625ad", "39ee3f", "45de815", "40a2c38","63146c6"]) _str,_spr = ID_selector("dfasdfadfdsd") _train, _test = random_train_test_split(_spr, test_percentage=0.25, random_state = None) ### create and train LightFM model ### NUM_THREADS = 4 NUM_COMPONENTS = 30 NUM_EPOCHS = 300 ITEM_ALPHA = 1e-6 _model = LightFM(loss='warp' , item_alpha=ITEM_ALPHA , no_components=NUM_COMPONENTS) _model.item_biases = 0.0 %time _model_fit = _model.fit(_train, epochs=NUM_EPOCHS, num_threads=NUM_THREADS) #%time pos1_modelTest = pos1_model.fit(pos1_test, epochs=NUM_EPOCHS, num_threads=NUM_THREADS) train_precision = precision_at_k(_model, _train, k=10).mean() print('train precision at k: %s' %train_precision) test_precision = precision_at_k(_model, _test, k=10).mean() print('test precision at k: %s' %test_precision) train_auc = auc_score(_model, _train, num_threads=NUM_THREADS).mean() print('train AUC: %s' %train_auc) test_auc = auc_score(_model, _test, num_threads=NUM_THREADS).mean() print('test AUC: %s' %test_auc)