def exploitation(self): # do a hypothesis testing for the previous item self.hypothesis_test() # compute recommendable items from V to the current user activeitems = np.multiply(self.Xi_u == 0, np.array(self.V) == 1) # if there are some items recommendable, recommend least recently recommended among V if sum(activeitems) > 0: # make the element that are not in V or activeitems to be larger than + self.M cnt_modified = np.array(self.item_cnt) + self.M * ( np.array(activeitems) == 0) + self.M * (np.array(self.V) == 0) self.item_selected = rand_argmin(cnt_modified) # if there are none from V, recommend item that is never recommended before and add to V else: cnt_modified = np.array( self.item_cnt) + self.M * (np.array(self.Xi_u) == 1) self.item_selected = rand_argmin(cnt_modified) self.V[self.item_selected] = 1 #update the counter for the selected item self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1
def exploitation(self): k = self.compute_UCB_ind() if k == 1 or k == 2: # compute recommendable items from S to current user recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) else: # do a recommendations in a round robbins manner if self.k_prev[self.u_current] == 1: # recom from 2 k = 2 self.k_prev[self.u_current] = 2 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) #print('random sampling (exploi)') else: # recom from 1 k = 1 self.k_prev[self.u_current] = 1 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) #print('random sampling (exploi)') #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1 assert self.Xi_current[self.item_selected, self.u_current] == 0
def choose_item(self, state): (u_current, Xi_current, _) = state Xi_u = Xi_current[:, u_current] assert len(Xi_u) == self.N #choose the item that is not recommended uniformly at random i = rand_argmin(Xi_u) assert Xi_current[i, u_current] == 0 return i
def explorations(self): #remove the previously recommended items for the current user from the candidate #item_cnt_temp: element that is previously recommended items or not in I_0 is made M item_cnt_temp = np.array(self.Xi_u == 1) * self.M + np.array( np.array(self.I_0) == 0) * self.M + self.item_cnt item_cnt_temp = np.minimum(item_cnt_temp, self.M) # If some of item_cnt_temp is not M <-> if there exists an item in I_0_minus that can be recommended to curent user if sum(item_cnt_temp) < self.M * self.N: # select item from argmin item_cnt_temp self.item_selected = rand_argmin(item_cnt_temp) assert self.Xi_current[self.item_selected, self.u_current] == 0 #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1 #update I_0_minus self.I_0_minus = np.multiply(self.I_0, (np.array(self.item_cnt) < self.T_0)) # indicate to record the reward self.reward_rec_flag = 1 else: # do a random sampling from a new item self.item_selected = rand_argmin(self.Xi_u) assert self.Xi_current[self.item_selected, self.u_current] == 0 print('random sampling') # do not update the counter # indicate not to record the reward self.reward_rec_flag = 0
def explorations(self): #remove the previously recommended items (Xi_u = 1) for the current user from the candidate #item_cnt_temp: element that is previously recommended items or not in S (S = 0) is made M item_cnt_temp = np.array(self.Xi_u == 1) * self.M + np.array( np.array(self.S) == 0) * self.M + self.item_cnt item_cnt_temp = np.minimum(item_cnt_temp, self.M) # If some of item_cnt_temp is not M <-> if there exists an item in I_0_minus that can be recommended to curent user if sum(item_cnt_temp) < self.M * self.N: # select item from S with the smallest recommended number self.item_selected = rand_argmin(item_cnt_temp) assert self.Xi_current[self.item_selected, self.u_current] == 0 else: # select recommendable item from outside of S self.item_selected = rand_argmin(self.Xi_u) assert self.Xi_current[self.item_selected, self.u_current] == 0 print('random sampling') #update the counter for the selected item self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1
def exploitation(self): # compute user's tendency if none, return 0 A_u = np.multiply( np.array(self.Xi_u) == 0, np.array(self.hatA[:, self.u_current])) - np.array( self.LARGE_CONSTANT * self.maxhatA * np.array(self.Xi_u) == 1) self.item_selected = rand_argmax(A_u) # if we cannot select good item, force to do a random sampling. if self.Xi_current[self.item_selected, self.u_current] == 1: self.item_selected = rand_argmin(self.Xi_u) #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1 assert self.Xi_current[self.item_selected, self.u_current] == 0
def choose_item(self, state): (self.u_current, self.Xi_current, self.prev_reward) = state Xi_u = self.Xi_current[:, self.u_current] assert len(Xi_u) == self.N if self.t == 1: #select T/m log T items self.random_I_0_selection() #update the reward sum self.update_reward_sum() #update emirical average self.update_emprical_average() #update KLUCB index self.update_KLUCB_index() KLUCB_index_u = self.KLUCB_indexes # remove items that are not recommendable to the user are removed for i in range(self.N): if Xi_u[i] == 1: KLUCB_index_u[i] = 0 # choose item based on modified KLUCB indexes if sum(KLUCB_index_u) > 0: # select item in I_0 with largest KLUCB index self.item_selected = rand_argmax(KLUCB_index_u) else: #random item selections self.item_selected = rand_argmin(Xi_u) #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1 # record previously used item self.item_prev = self.item_selected self.t = self.t + 1 return self.item_selected
def exploitation(self): # update I_1 self.update_V() # compute recommendable items from V to current user activeitems = np.multiply(self.Xi_u == 0, np.array(self.V) == 1) if sum(activeitems) > 0: # compute emirical averages averages = self.emprical_average() # keep only for the active items. (otherwise element is -1) for i in range(self.N): if activeitems[i] == 0: averages[i] = -1 # choose the best (empirical average) item in V self.item_selected = rand_argmax(averages) assert self.Xi_current[self.item_selected, self.u_current] == 0 else: # random sampling from V_0^c when there are no items from V that can be recommended to current user # Recompute activeitems, recommendable and in V_0^c activeitems = np.multiply(self.Xi_u == 0, np.array(self.V_0) == 0) if sum(activeitems) == 0: self.item_selected = rand_argmin(self.Xi_u) else: # select item unifromly at random from activeitems self.item_selected = rand_argmax(activeitems) # add the item to V and V_0 self.V[self.item_selected] = 1 self.V_0[self.item_selected] = 1 if self.Xi_current[self.item_selected, self.u_current] == 1: from IPython.core.debugger import Pdb Pdb().set_trace() # update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1
def explorations(self): #remove the previously recommended items for the current user from the candidate #candidates: element that is previously recommended items or not in S is made 0, othewise elements are 1 (recommendable) candidates = np.multiply(np.array(self.Xi_u == 0), np.array(np.array(self.S) == 1)) # If there are recommendable item from S, if sum(candidates) > 0: # select item from candidates, self.item_selected = rand_argmax(candidates) assert self.Xi_current[self.item_selected, self.u_current] == 0 else: # do a random sampling from a new item self.item_selected = rand_argmin(self.Xi_u) assert self.Xi_current[self.item_selected, self.u_current] == 0 print('random sampling') #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1
def do_clustering(self): A = self.generate_A() # adj matrix (s times s) p_tilde = 2 * np.sum(A) / self.s / (self.s - 1) A_low = lowrank_approx(A, self.K) # rank-2 approximation r_t = [0 for ind in range(int(np.floor(np.log(self.s))))] for ind in range(int(np.floor(np.log(self.s)))): # find neighborhoods Q = [0 for i in range(self.s)] for i in range(self.s): Q[i] = set() for j in range(self.s): if np.linalg.norm(A_low[i] - A_low[j])**2 <= ( ind + 1) * p_tilde * self.epsilon_alg: Q[i].add(j) T = [0 for i in range(self.K)] xi = np.zeros((self.K, self.s)) Qprev = set() for k in range(self.K): cardinalities = [0 for i in range(self.s)] for i in range(self.s): cardinalities[i] = len(Q[i] - Qprev) # compute the index v_k^\star v_k = rand_argmax(np.array(cardinalities)) T[k] = Q[v_k] - Qprev Qprev = Qprev.union(Q[v_k]) for i in range(self.s): if i in T[k]: xi[k] = xi[k] + A_low[i] / len(T[k]) # remaining items assignment if len(Qprev) != self.s: for v in set(range(self.s)) - Qprev: distances = np.zeros(self.K) for k in range(self.K): distances[k] = np.linalg.norm(A_low[v] - xi[k])**2 k_star = rand_argmax(distances) T[k_star].add(v) #compute r_t for k in range(self.K): for i in range(self.s): if i in T[k]: r_t[ind] = r_t[ind] + np.linalg.norm(A_low[v] - xi[k])**2 #end for ind... minind = rand_argmin(np.array(r_t)) ind = minind # do a clustering with a smallerst error # do a clustering again with minind # find neighborhoods Q = [0 for i in range(self.s)] for i in range(self.s): Q[i] = set() for j in range(self.s): if np.linalg.norm(A_low[i] - A_low[j])**2 <= ( ind + 1) * p_tilde * self.epsilon_alg: Q[i].add(j) T = [0 for i in range(self.K)] xi = np.zeros((self.K, self.s)) Qprev = set() for k in range(self.K): cardinalities = [0 for i in range(self.s)] for i in range(self.s): cardinalities[i] = len(Q[i] - Qprev) # compute the index v_k^\star v_k = rand_argmax(np.array(cardinalities)) T[k] = Q[v_k] - Qprev Qprev = Qprev.union(Q[v_k]) for i in range(self.s): if i in T[k]: xi[k] = xi[k] + A_low[i] / len(T[k]) # remaining items assignment if len(Qprev) != self.s: #from IPython.core.debugger import Pdb; Pdb().set_trace() for v in set(range(self.s)) - Qprev: distances = np.zeros(self.K) for k in range(self.K): distances[k] = np.linalg.norm(A_low[v] - xi[k])**2 k_star = rand_argmin(distances) T[k_star].add(v) for k in range(self.K): for i in T[k]: self.I_S[i] = k + 1 for i in range(self.N): if self.S[i] == 1: self.I[i] = self.I_S[self.N_to_S[i]] # for the debug err_num = 0 for i in range(self.N): if i <= int(self.N / 2 - 1): if self.I[i] == 2: err_num = err_num + 1 if i > int(self.N / 2 - 1): if self.I[i] == 1: err_num = err_num + 1 err_rate = min(err_num / self.s, 1 - err_num / self.s) print('err_rate after SC=', end="") print(err_rate) #estimation of \hat{p}(i, j) for i in range(self.K): for j in range(self.K): numerator = 0 for v in T[i]: for u in T[j]: numerator = numerator + A[v, u] denominator = len(T[i]) * self.s self.P_kl[i, j] = numerator / denominator # local improvement S = [0 for i in range(self.K)] Sprev = [0 for i in range(self.K)] for k in range(self.K): Sprev[k] = T[k] for ind in range(int(np.floor(np.log(self.s)))): for k in range(self.K): S[k] = set() for v in range(self.s): # computation of likelihood likelihoods = np.zeros(self.K) for i in range(self.K): # sum up over all k wegihtsum = 0 psum = 0 for k in range(self.K): weight_by_Avw = 0 for w in Sprev[i]: weight_by_Avw = weight_by_Avw + A[v, w] wegihtsum = wegihtsum + weight_by_Avw psum = psum + self.P_kl[i, k] likelihoods[i] = likelihoods[ i] + weight_by_Avw * np.log(self.P_kl[i, k]) # add the case of k = 0 (in the paper's notations) likelihoods[i] = likelihoods[i] + (self.s - wegihtsum) * (1 - psum) # maximum likelihood i_star = rand_argmax(likelihoods) S[i_star].add(v) #update Sprev for k in range(self.K): Sprev[k] = S[k] # (end for ind loop) for k in range(self.K): for i in S[k]: self.I_S[i] = k + 1 for i in range(self.N): if self.S[i] == 1: self.I[i] = self.I_S[self.N_to_S[i]] # for the debug (compuation of err rate) err_num = 0 for i in range(self.N): if i <= int(self.N / 2 - 1): if self.I[i] == 2: err_num = err_num + 1 if i > int(self.N / 2 - 1): if self.I[i] == 1: err_num = err_num + 1 err_rate2 = min(err_num / self.s, 1 - err_num / self.s) print('err_rate after SP=', end="") print(err_rate2) print('err_rate improvement = ', end="") print(err_rate - err_rate2)
def exploitation(self): #round robbin recommendations if self.U_0[self.u_current] == 1 and self.t <= self.T_1: # do a recommendations in a round robbins manner if self.k_prev[self.u_current] == 1: # recom from 2 k = 2 self.k_prev[self.u_current] = 2 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) #print('random sampling (exploi)') else: # recom from 1 k = 1 self.k_prev[self.u_current] = 1 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) #end round robbin else: # exploitation using L x_kl = np.zeros((self.K, 2)) for k in range(self.K): for l in range(2): x_kl[k, l] = np.max([ np.abs(self.P_kl_user[k, l] - self.rho_users[self.u_current, k]) - self.eps_users, 0 ]) L_ind = set() for l in range(2): term = 0 for k in range(self.K): cnt_k = np.sum( np.multiply(np.array(self.Xi_u), np.array(self.I) == k + 1)) term = term + cnt_k * x_kl[k, l]**2 cnt_user = np.sum(np.array(self.Xi_u)) if term < 0.01 * np.log(cnt_user): L_ind.add(l) recom_k = 0 if len(L_ind) != 0: setbestk = set() for l in L_ind: setbestk.add(self.argmax_k[l]) recom_k = random.sample(setbestk, 1) else: recom_k = random.sample(range(self.K), 1) # increment k so that it align with the actual index recom_k = recom_k[0] + 1 if recom_k == 1 or recom_k == 2: # compute recommendable items from S to current user recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == recom_k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax(recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) else: # do a recommendations in a round robbins manner if self.k_prev[self.u_current] == 1: # recom from 2 recom_k = 2 self.k_prev[self.u_current] = 2 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == recom_k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax( recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) else: # recom from 1 recom_k = 1 self.k_prev[self.u_current] = 1 recommendable_from_I_k = np.multiply( np.array(self.Xi_u) == 0, np.array(self.I) == recom_k) if sum(recommendable_from_I_k) > 0: self.item_selected = rand_argmax( recommendable_from_I_k) else: self.item_selected = rand_argmin(self.Xi_u) #update the counter self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1 assert self.Xi_current[self.item_selected, self.u_current] == 0
def explorations(self): #random explorations self.item_selected = rand_argmin(self.Xi_u) self.item_cnt[ self.item_selected] = self.item_cnt[self.item_selected] + 1