def recommend_batch(self, userids, N=10, urm=None, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False): if not self._has_fit(): return None R = data.get_urm() if urm is None else urm if userids is None or not len(userids) > 0: print('Recommending for all users...') # compute the R^ by multiplying: R•S or S•R if self._matrix_mul_order == 'inverse': R_hat = sim.dot_product(self._sim_matrix, R, target_rows=userids, k=R.shape[0], format_output='csr', verbose=verbose) else: R_hat = sim.dot_product(R, self._sim_matrix, target_rows=userids, k=R.shape[0], format_output='csr', verbose=verbose) if filter_already_liked: # remove from the R^ the items already in the R R_hat[R.nonzero()] = -np.inf if len(items_to_exclude) > 0: # TO-DO: test this part because it does not work! R_hat = R_hat.T R_hat[items_to_exclude] = -np.inf R_hat = R_hat.T # make recommendations only for the target rows if len(userids) > 0: R_hat = R_hat[userids] else: userids = [i for i in range(R_hat.shape[0])] recommendations = self._extract_top_items(R_hat, N=N) return self._insert_userids_as_first_col(userids, recommendations).tolist()
def recommend_batch(self, userids, urm=None, N=10, filter_already_liked=True, with_scores=False, items_to_exclude=[], verbose=False): if not self._has_fit(): return None if userids is not None: if len(userids) > 0: matrix = urm[userids] if urm is not None else data.get_urm()[userids] else: return [] else: print('Recommending for all users...') matrix = urm if urm is not None else data.get_urm() # compute the R^ by multiplying R•S self.r_hat = sim.dot_product(matrix, self._sim_matrix, target_rows=None, k=data.N_TRACKS, format_output='csr', verbose=verbose) if filter_already_liked: user_profile_batch = matrix self.r_hat[user_profile_batch.nonzero()] = -np.inf if len(items_to_exclude)>0: # TO-DO: test this part because it does not work! self.r_hat = self.r_hat.T self.r_hat[items_to_exclude] = -np.inf self.r_hat = self.r_hat.T recommendations = self._extract_top_items(self.r_hat, N=N) return self._insert_userids_as_first_col(userids, recommendations).tolist()
def get_r_hat(self, verbose=False): """ Return the r_hat matrix as: R^ = R•S or R^ = S•R """ R = self.urm targetids = data.get_target_playlists() if self._matrix_mul_order == 'inverse': return sim.dot_product(self._sim_matrix, R, target_rows=targetids, k=R.shape[0], format_output='csr', verbose=verbose)[targetids] else: return sim.dot_product(R, self._sim_matrix, target_rows=targetids, k=R.shape[0], format_output='csr', verbose=verbose)[targetids]
def ipcf(df_train, UWP_sparse, n_items, alpha=0.25, q=5, k=10): # Construct the item-basket sparse matrix idMax_basket = df_train.BID.max() + 1 item_basket_mat = sparse.coo_matrix( (np.ones((df_train.shape[0]), dtype=int), (df_train.PID.values, df_train.BID.values)), shape=(n_items, idMax_basket)) # Convert it to Compressed Sparse Row format to exploit its efficiency in arithmetic operations sparse_mat = sparse.csr_matrix(item_basket_mat) # Caculate the Asymetric Cosine Similarity matrix itemSimMat = sim.asymmetric_cosine(sparse_mat, None, alpha, k) # recommend k items to users UWP_sparse.shape, itemSimMat.shape user_recommendations = sim.dot_product(UWP_sparse, itemSimMat.power(q), k) return user_recommendations
def upcf(df_train, UWP_sparse, n_items, alpha=0.25, q=5, k=10): n_users = df_train['UID'].unique().shape[0] df_user_item = df_train.groupby( ['UID', 'PID']).size().reset_index(name="bool")[['UID', 'PID']] # Generate the User_item matrix using the parse matrix COOrdinate format. userItem_mat = sparse.coo_matrix( (np.ones((df_user_item.shape[0])), (df_user_item.UID.values, df_user_item.PID.values)), shape=(n_users, n_items)) # Calculate the asymmetric similarity cosine matrix userSim = sim.asymmetric_cosine(sparse.csr_matrix(userItem_mat), alpha=0.25, k=10) # recommend k items to users user_recommendations = sim.dot_product(userSim.power(5), UWP_sparse, k=10) return user_recommendations
def test_readme_code(): import similaripy as sim import scipy.sparse as sps # create a random user-rating matrix (URM) urm = sps.random(1000, 2000, density=0.025) # normalize matrix with bm25 urm = sim.normalization.bm25(urm) # train the model with 50 knn per item model = sim.cosine(urm.T, k=50) # recommend 100 items to users 1, 14 and 8 filtering the items already seen by each users user_recommendations = sim.dot_product(urm, model.T, k=100, target_rows=[1, 14, 8], filter_cols=urm) print('Test README.md code passed!!!')
def check_similarity(m, k, rtol=0.0001, full=False): # cython dot = sim.dot_product(m, k=k) cosine = sim.cosine(m, k=k) asy_cosine = sim.asymmetric_cosine(m, alpha=0.2, k=k) jaccard = sim.jaccard(m, k=k) dice = sim.dice(m, k=k) tversky = sim.tversky(m, alpha=0.8, beta=0.4, k=k) p3alpha = sim.p3alpha(m, alpha=0.8, k=k) rp3beta = sim.rp3beta(m, alpha=0.8, beta=0.4, k=k) # python dot2 = py_dot(m, k) cosine2 = py_cosine(m, k).tocsr() asy_cosine2 = py_asy_cosine(m, 0.2, k=k) jaccard2 = py_jaccard(m, k) dice2 = py_dice(m, k) tversky2 = py_tversky(m, alpha=0.8, beta=0.4, k=k) p3alpha2 = py_p3alpha(m, alpha=0.8, k=k) rp3beta2 = py_rp3beta(m, alpha=0.8, beta=0.4, k=k) # test np.testing.assert_allclose(check_sum(dot), check_sum(dot2), rtol=rtol, err_msg='dot error') np.testing.assert_allclose(check_sum(cosine), check_sum(cosine2), rtol=rtol, err_msg='cosine error') np.testing.assert_allclose(check_sum(asy_cosine), check_sum(asy_cosine2), rtol=rtol, err_msg='asy_cosine error') np.testing.assert_allclose(check_sum(jaccard), check_sum(jaccard2), rtol=rtol, err_msg='jaccard error') np.testing.assert_allclose(check_sum(dice), check_sum(dice2), rtol=rtol, err_msg='dice error') np.testing.assert_allclose(check_sum(tversky), check_sum(tversky2), rtol=rtol, err_msg='tversky error') np.testing.assert_allclose(check_sum(p3alpha), check_sum(p3alpha2), rtol=rtol, err_msg='p3alpha error') np.testing.assert_allclose(check_sum(rp3beta), check_sum(rp3beta2), rtol=rtol, err_msg='rp3beta error') # test full rows if full: np.testing.assert_(check_full(dot, dot2, rtol) == 0, msg='dot error') np.testing.assert_(check_full(cosine, cosine2, rtol) == 0, msg='cosine error') np.testing.assert_(check_full(asy_cosine, asy_cosine2, rtol) == 0, msg='asy_cosine error') np.testing.assert_(check_full(jaccard, jaccard2, rtol) == 0, msg='jaccard error') np.testing.assert_(check_full(dice, dice2, rtol) == 0, msg='dice error') np.testing.assert_(check_full(tversky, tversky2, rtol) == 0, msg='tversky error') np.testing.assert_(check_full(p3alpha, p3alpha2, rtol) == 0, msg='p3alpha error') np.testing.assert_(check_full(rp3beta, rp3beta2, rtol) == 0, msg='rp3beta error') return
def get_r_hat(self, verbose=False): """ Return the R^ matrix as: R^ = S•H, ONLY for the target playlists last sequences """ return sim.dot_product(self._sim_matrix, self.H, target_rows=self.target_indices, k=self.H.shape[0], format_output='csr', verbose=verbose)
def get_r_hat(self): r_hat = sim.dot_product(self.urm, self._sim_matrix, target_rows=data.get_target_playlists(), k=data.N_TRACKS, format_output='csr') return r_hat[data.get_target_playlists()]
def instance_selection(model, X_train, y_train, X_valid, y_valid, criterion, sparse=True, add_channel=True, flatten=False, treshold=False, return_influences=False): hessian_matrix = compute_hessian(model, X_train, y_train, criterion, sparse=sparse, add_channel=add_channel, flatten=flatten, treshold=treshold) if sparse: print('Hessian matrix sparsity: {:.2f}%'.format( hessian_matrix.nnz / (hessian_matrix.shape[0]**2) * 100)) hessian_matrix_inv = inv(hessian_matrix) else: hessian_matrix_inv = torch.inverse(hessian_matrix) selected_indices = [] influences = [] for i, train_sample in enumerate(tqdm(X_train, desc='Instance selection')): model.zero_grad() train_sample = np.expand_dims(train_sample, axis=0) train_sample = torch.Tensor(train_sample) label = y_train[i] label = torch.LongTensor([label]) train_sample, model, label = train_sample.to(device), model.to( device), label.to(device) if flatten: train_sample = train_sample.view(train_sample.shape[0], -1) output = model(train_sample) loss = criterion(output, label) loss.backward() jacobian_i = [] for idx, param in enumerate(model.parameters()): jacobian_i.append(param.grad.view(-1)) jacobian_i = torch.cat(jacobian_i) if sparse: jacobian_i = sps.csc_matrix( jacobian_i.tolist()).transpose(copy=False) intermediate = sim.dot_product(hessian_matrix_inv, jacobian_i, verbose=False) else: jacobian_i = jacobian_i.to(device) intermediate = torch.matmul(hessian_matrix_inv, jacobian_i) j_loss = 0 for j, valid_sample in enumerate(X_valid): model.zero_grad() valid_sample = np.expand_dims(valid_sample, axis=0) valid_sample = torch.Tensor(valid_sample) label = y_valid[j] label = torch.LongTensor([label]) valid_sample, model, label = valid_sample.to(device), model.to( device), label.to(device) if flatten: valid_sample = valid_sample.view(valid_sample.shape[0], -1) output = model(valid_sample) loss = criterion(output, label) loss.backward() jacobian_j = [] for param in model.parameters(): jacobian_j.append(param.grad.view(-1)) jacobian_j = torch.cat(jacobian_j) if sparse: jacobian_j = sps.csc_matrix(jacobian_j.tolist()) j_loss += sim.dot_product(jacobian_j * (-1), intermediate, verbose=False).data[0] else: jacobian_j = jacobian_j.to(device) j_loss += torch.matmul((jacobian_j * (-1)), intermediate) influences.append(j_loss) if j_loss <= 0: selected_indices.append(i) return influences if return_influences else selected_indices