def test_likelihood_kernel_L_gram_factor(self): phi = rndm.randn(self.rank, self.N) dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L_gram_factor': phi}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_likelihood_kernel(self): eig_vals = 1 + rndm.geometric(p=0.5, size=self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L': (eig_vecs * eig_vals).dot(eig_vecs.T)}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_kernel_eig(self): eig_vals = rndm.rand(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=False, **{'K_eig_dec': (eig_vals, eig_vecs)}) for size in self.sizes: for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(size, mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('AED', 'AD'): dpp.flush_samples() dpp.sample_mcmc_k_dpp(size, **{'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel_A_zono(self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from A_zono: K = A.T (A A.T)^-1 A """ A = rndm.randn(self.rank, self.N) dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'A_zono': A}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_as_kDPP_with_correlation_kernel(self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_from_kernel_mode_Schur(self): """ Test whether 'Schur' sampling mode generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from its eigendecomposition Evaluate the conditionals using the Schur complement updates """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode='Schur') self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_from_eigdec_mode_KuTa12(self): """ Test whether 'KuTa12' sampling mode generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from its eigendecomposition Complexity :math:`\\mathcal{O}(N rank^3)` """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K_eig_dec': (eig_vals, eig_vecs)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode='KuTa12') self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_generic_kernel(self): """ Test whether 'Chol' sampling mode generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from its eigendecomposition. The ``projection`` argument is set to ``False`` to make sure the :py:func:`~dppy.exact_sampling.dpp_sampler_generic_kernel` is used This is the default sampler when calling `.sample_exact()` """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=False, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode='Chol') self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def run_adequation_tests(self, kernel_type, list_dpp_params, dict_sampler_mode_param, adequation_to_check): for sampler, modes in dict_sampler_mode_param.items(): for md, md_params in modes.items(): for idx, (proj, dpp_param) in enumerate(list_dpp_params): dpp = FiniteDPP(kernel_type, projection=proj, **dpp_param) with self.subTest(idx=idx, dpp=(dpp.kernel_type, dpp.projection, dpp.params_keys), sampler=(sampler, md, md_params)): dpp.flush_samples() samples = self.get_samples(dpp, sampler, md, **md_params) for adeq_typ in adequation_to_check: with self.subTest(adequation=adeq_typ): adeq, msg = self.adequation( adeq_typ, samples, dpp) self.assertTrue(adeq, msg)
def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_true( self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=True, **{'L_eig_dec': (eig_vals, eig_vecs)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) dpp.compute_L() dpp.K = dpp.L self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_from_kernel_mode_Chol(self): """ Test whether 'Chol' sampling mode generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from its eigendecomposition. Complexity :math:`\\mathcal{O}(N rank^2)` .. seealso:: - :cite:`Pou19` Algorithm 1 """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode='Chol') self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_from_kernel_mode_GS(self): """ Test whether 'GS' sampling mode generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection correlation kernel K from its eigendecomposition Complexity :math:`\\mathcal{O}(N rank^2)` This is the default sampler when calling `.sample_exact()` """ eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K': (eig_vecs * eig_vals).dot(eig_vecs.T)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode='GS') # dpp.compute_K() self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def test_proj_dpp_sampler_as_kDPP_with_likelihood_kernel_eig_proj_false( self): """ Test whether projection DPP sampled as a k-DPP with k=rank(K) generates samples with the right 1 and 2 points inclusion probabilities when DPP defined by orthogonal projection likelihood kernel L from its eigendecomposition and projection is set to False in order to go through the computation of elementary symmetric polynomials etc """ eig_vals = np.zeros(self.N) eig_vals[:self.rank] = 1.0 eig_vecs, _ = qr(rndm.randn(self.N, self.N), mode='economic') dpp = FiniteDPP(kernel_type='likelihood', projection=False, **{'L_eig_dec': (eig_vals, eig_vecs)}) dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact_k_dpp(self.rank) dpp.compute_L() dpp.K = dpp.L self.assertTrue(self.singleton_adequation(dpp, dpp.list_of_samples)) self.assertTrue(self.doubleton_adequation(dpp, dpp.list_of_samples))
def get_diverse(X, y, size=10): # This is a heuristic if the covariance matrix is ill conditioned. if X.shape[0] > 10: size = int(X.shape[0] / 2) else: return (X, y) K = rbf_kernel(X, X) L = np.matmul(K, np.linalg.inv(np.eye(K.shape[0]) - K)) DPP = FiniteDPP('likelihood', **{'L': L}) DPP.flush_samples() try: DPP.sample_exact_k_dpp(size=size) except ValueError: return (X, y) newX = X[DPP.list_of_samples[0], :] newy = y[DPP.list_of_samples[0], :] return (newX, newy)
def test_correlation_kernel_projection_A_zono(self): A = rndm.randn(self.rank, self.N) dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'A_zono': A}) for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('zonotope', 'E'): dpp.flush_samples() dpp.sample_mcmc(mode, **{'size': self.rank, 'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def test_correlation_kernel_projection_kernel_eig(self): eig_vals = np.ones(self.rank) eig_vecs, _ = qr(rndm.randn(self.N, self.rank), mode='economic') dpp = FiniteDPP(kernel_type='correlation', projection=True, **{'K_eig_dec': (eig_vals, eig_vecs)}) for mode in ('GS', 'GS_bis', 'KuTa12'): dpp.flush_samples() for _ in range(self.nb_samples): dpp.sample_exact(mode) self.check_right_cardinality(dpp, dpp.list_of_samples) for mode in ('E'): dpp.flush_samples() dpp.sample_mcmc(mode, **{'size': self.rank, 'nb_iter': self.nb_samples}) self.check_right_cardinality(dpp, dpp.list_of_samples[0])
def get_recommendations(self, unseen_items, liked_items, indices, sim_matrix, user_id): zufall = random.random() liked_items_ind = liked_items.index unseen_items_ind = unseen_items.index liked_iloc = np.asarray([indices.index(v) for v in liked_items_ind]) not_seen_iloc = np.asarray( [indices.index(v) for v in unseen_items_ind]) result = sim_matrix[liked_iloc.ravel()] max_sim = np.amax(np.asarray(result), axis=0) max_not_seen = max_sim[not_seen_iloc] print(max_not_seen.shape) max_not_seen_lower = max_not_seen[(max_not_seen >= 0.5)] candidates = max_not_seen_lower.shape[0] print('Candidates: ', candidates) if (user_id % 2 == 1 and candidates >= 5): max_not_seen_ind = np.argpartition(max_not_seen, -10)[-10:] final_indices = unseen_items_ind[max_not_seen_ind] # time --> 0.003999233245849609 # final_recommendations sind die richtigen ids für die Beiträge! recommendation_df = unseen_items.loc[unseen_items.index.isin( final_indices)] recommendation_df['explanations'] = '' return recommendation_df if (user_id % 2 == 1 and candidates <= 5): return 0 if (user_id % 2 == 0 and candidates > 15): print('Kandidaten 1: ', candidates) print('Kandidaten: ', candidates) # Hier kann die Länge der ausgespuckten Liste verändert werden! if (candidates >= 30): max_not_seen_ind = np.argpartition(max_not_seen, -30)[-30:] else: max_not_seen_ind = np.argpartition(max_not_seen, -candidates)[-candidates:] final_indices = unseen_items_ind[max_not_seen_ind] print(final_indices.shape) #time --> 0.003999233245849609 #final_recommendations sind die richtigen ids für die Beiträge! recommendation_df = unseen_items.loc[unseen_items.index.isin( final_indices)] vectors = recommendation_df['final_vectors'].tolist() final_recommendations = list(recommendation_df.index) #Phi = np.array(vectors) #L = Phi.dot(Phi.T) L = self.cosine_similarity(np.asarray(vectors)) #print(dist_out_diversity) DPP = FiniteDPP('likelihood', **{'L': L}) if (candidates >= 30): k = 10 else: k = round(candidates / 3) DPP.flush_samples() #DPP.sample_exact_k_dpp(size=k) #for _ in range(2000): DPP.sample_mcmc_k_dpp(size=k, **{'nb_iter': 200}) #print(DPP.projection) #Die final_diversity_list speichert, ob ein vorgeschlagenes Element in den ersten 10 recommendations ist oder nicht! list_of_samples = DPP.list_of_samples[0] det_sim_list = [] for values in list_of_samples: final_diversity_list_indices = [ final_recommendations[i] for i in values ] recommendation_diversity_df = unseen_items.loc[ final_diversity_list_indices, :] vectors = recommendation_diversity_df['final_vectors'].tolist() dist_out_diversity = self.cosine_similarity( np.asarray(vectors)) det_sim_list.append(np.linalg.det(dist_out_diversity)) final_diversity_list = DPP.list_of_samples[0][np.argmax( det_sim_list)] #Indices der finalen diversität recommendations. final_diversity_list_indices = [ final_recommendations[i] for i in final_diversity_list ] recommendation_diversity_df = unseen_items.loc[ final_diversity_list_indices, :] # time --> 0.03799939155578613 # Get most similar documents... sim_list = [] for values in recommendation_diversity_df.final_vectors: most_similar_document = [] for vals in liked_items.final_vectors: result = 1 - spatial.distance.cosine(values, vals) most_similar_document.append(result) sim_list.append(most_similar_document) sim_doc_explanations = [] sim_doc_index = [] for values in range(0, len(sim_list)): max_doc = np.argmax(sim_list[values]) titles = liked_items['title'].tolist() sim_doc_index.append(liked_items.index[max_doc]) if (zufall <= 0.2): sim_doc_explanations.append( 'Das ähnlichste, von Ihnen gemochte Dokument zu diesem vorgeschlagenen Beitrag hat den Titel: ' + str(titles[max_doc]) + ' und die Audio_id: ' + str(liked_items.index[max_doc]) + '.') elif (zufall >= 0.2 and zufall <= 0.4): sim_doc_explanations.append( 'Dieser Beitrag wird Ihnen empfohlen, da Sie den Beitrag "' + str(titles[max_doc]) + '" mit der Audio ID: ' + str(liked_items.index[max_doc]) + ' mögen.') elif (zufall >= 0.4 and zufall <= 0.6): sim_doc_explanations.append( 'Der gerade empfohlene Beitrag wurde für Sie aufgrund des gemochten Beitrags: "' + str(titles[max_doc]) + '" mit der ID : ' + str(liked_items.index[max_doc]) + ' ausgewählt.') else: sim_doc_explanations.append( 'Sie könnten diesen Titel mögen, da er eine gewisse Ähnlichkeit zu dem Beitrag "' + str(titles[max_doc]) + '" besitzt, den Sie mögen. Dieser besitzt die ID: ' + str(liked_items.index[max_doc]) + '.') for values in range(0, len(recommendation_diversity_df)): words_1 = recommendation_diversity_df.final_nouns.values[ values] id = int(sim_doc_index[values]) words_2 = liked_items.final_nouns[id] sim_dict = {} for v in set(words_1): for n in set(words_2): word = str(v + ' ' + n) v_result = Vectors.query.filter( Vectors.word == v).first() v_vector = json.loads(v_result.vector) n_result = Vectors.query.filter( Vectors.word == n).first() n_vector = json.loads(n_result.vector) sim_dict[word] = dot(v_vector, n_vector) / ( norm(v_vector) * norm(n_vector)) top_3_sim_dict = ({ key: value for key, value in sim_dict.items() if value in heapq.nlargest(3, sim_dict.values()) }) sorted_dict = collections.OrderedDict(top_3_sim_dict) zufall_2 = random.random() for k, v in sorted_dict.items(): words = k.split() if v < 1 and v >= 0.5 and zufall_2 <= 0.33: sim_doc_explanations[values] = sim_doc_explanations[ values] + ' Aus dem gemochten Beitrag ist das Wort ' + ( str(words[0].capitalize()) + ' ähnlich zu dem Wort aus dem empfohlenen Beitrag ' + str(words[1].capitalize()) + '.') elif v < 1 and v >= 0.5 and zufall_2 >= 0.33 and zufall_2 <= 0.66: sim_doc_explanations[values] = sim_doc_explanations[ values] + ' Die zwei Worte ' + ( str(words[0].capitalize()) + ' und ' + str(words[1].capitalize()) + ' werden in den Beiträgen als ähnlich angesehen.' ) if v < 1 and v >= 0.5 and zufall_2 >= 0.66 and zufall_2 <= 1: sim_doc_explanations[values] = sim_doc_explanations[ values] + ' Der gemochte und der vorgeschlagene Beitrag werden sich durch die in Ihnen vorkommenden Worte ' + ( str(words[0].capitalize()) + ' und ' + str(words[1].capitalize()) + ' als ähnlich angesehen.') else: if (' Die beiden Beiträge beinhalten das identische Wort: ' + (str(words[0].capitalize()) + '.') not in sim_doc_explanations[values]): sim_doc_explanations[values] = sim_doc_explanations[ values] + ' Die beiden Beiträge beinhalten das identische Wort: ' + ( str(words[0].capitalize()) + '.') #time --> 0.0010020732879638672 recommendation_diversity_df['explanations'] = sim_doc_explanations return recommendation_diversity_df else: return 0
for i in range(num_trials): gaps = [] t_gap = [] avgs = [] # Generate a dataset of size Max N, Y is sampled from the prior X = np.random.randn(N_sequence[-1], 1) Kff = k.compute_K_symm(X) Y = np.random.multivariate_normal( mean=np.zeros(N_sequence[-1]), cov=Kff + np.square(sn) * np.eye(N_sequence[-1]))[:, None] for N, M in zip(N_sequence, M_sequence): X_cur = X[:N, :] kff = k.compute_K_symm(X_cur) DPP = FiniteDPP('likelihood', **{'L': kff + np.eye(N) * 1e-6}) DPP.flush_samples() DPP.sample_exact_k_dpp(size=M) ind = DPP.list_of_samples[0] Z_cur = X_cur[ind] Y_cur = Y[:N, :] with gpflow.settings.temp_settings(low_jitter): # bound from theorem 4 avg_kl = KL_bound2(k_var=k.variance.value, k_ls=lengthscale, sigma_n=sn, N=N, p_sd=1, p_success=0.5, M=M) # We set the GP to have the parameters used in generating data full_m = gpflow.models.GPR(X_cur, Y_cur, k)