def getML(S, N=1, testmode=False): if testmode: reglast, regmax = S[-N:], -bn.partsort(-np.asarray(S, np.int), N)[:N] M, L = prod(regmax), prod(reglast) print("reglast, regmax : ", reglast, regmax) else: L = prod(S[-N:]) M = prod(-bn.partsort(-np.asarray(S, np.int), N)[:N]) return M, L
def mean_rrank_at_k_batch(train_data, heldout_data, Et, Eb, user_idx, k=5, mu=None, vad_data=None): ''' mean reciprocal rank@k: For each user, make predictions and rank for all the items. Then calculate the mean reciprocal rank for the top K that are in the held-out set. ''' batch_users = user_idx.stop - user_idx.start X_pred = _make_prediction(train_data, Et, Eb, user_idx, batch_users, mu=mu, vad_data=vad_data) all_rrank = 1. / (np.argsort(np.argsort(-X_pred, axis=1), axis=1) + 1) X_true_binary = (heldout_data[user_idx] > 0).toarray() heldout_rrank = X_true_binary * all_rrank top_k = bn.partsort(-heldout_rrank, k, axis=1) return -top_k[:, :k].mean(axis=1)
def _phase2(self): """ Execute phase 2 of the SP region. This phase is used to compute the active columns. Note - This should only be called after phase 1 has been called and after the inhibition radius and neighborhood have been updated. """ # Shift the outputs self.y[:, 1:] = self.y[:, :-1] self.y[:, 0] = 0 # Calculate k # - For a column to be active its overlap must be above the overlap # value of the k-th largest column in its neighborhood. k = self._get_num_cols() if self.global_inhibition: # The neighborhood is all columns, thus the set of active columns # is simply columns that have an overlap above the k-th largest # in the entire region # Compute the winning column indexes if self.learn: # Randomly break ties ix = bn.argpartsort( -self.overlap[:, 0] - self.prng.uniform(.1, .2, self.ncolumns), k)[:k] else: # Choose the same set of columns each time ix = bn.argpartsort(-self.overlap[:, 0], k)[:k] # Set the active columns self.y[ix, 0] = self.overlap[ix, 0] > 0 else: # The neighborhood is bounded by the inhibition radius, therefore # each column's neighborhood must be considered for i in xrange(self.ncolumns): # Get the neighbors ix = np.where(self.neighbors[i])[0] # Compute the minimum top overlap if ix.shape[0] <= k: # Desired number of candidates is at or below the desired # activity level, so find the overall max m = max(bn.nanmax(self.overlap[ix, 0]), 1) else: # Desired number of candidates is above the desired # activity level, so find the k-th largest m = max(-bn.partsort(-self.overlap[ix, 0], k + 1)[k], 1) # Set the column activity if self.overlap[i, 0] >= m: self.y[i, 0] = True
def _phase2(self): """ Execute phase 2 of the SP region. This phase is used to compute the active columns. Note - This should only be called after phase 1 has been called and after the inhibition radius and neighborhood have been updated. """ # Shift the outputs self.y[:, 1:] = self.y[:, :-1] self.y[:, 0] = 0 # Calculate k # - For a column to be active its overlap must be above the overlap # value of the k-th largest column in its neighborhood. k = self._get_num_cols() if self.global_inhibition: # The neighborhood is all columns, thus the set of active columns # is simply columns that have an overlap above the k-th largest # in the entire region # Compute the winning column indexes if self.learn: # Randomly break ties ix = bn.argpartsort(-self.overlap[:, 0] - self.prng.uniform(.1, .2, self.ncolumns), k)[:k] else: # Choose the same set of columns each time ix = bn.argpartsort(-self.overlap[:, 0], k)[:k] # Set the active columns self.y[ix, 0] = self.overlap[ix, 0] > 0 else: # The neighborhood is bounded by the inhibition radius, therefore # each column's neighborhood must be considered for i in xrange(self.ncolumns): # Get the neighbors ix = np.where(self.neighbors[i])[0] # Compute the minimum top overlap if ix.shape[0] <= k: # Desired number of candidates is at or below the desired # activity level, so find the overall max m = max(bn.nanmax(self.overlap[ix, 0]), 1) else: # Desired number of candidates is above the desired # activity level, so find the k-th largest m = max(-bn.partsort(-self.overlap[ix, 0], k + 1)[k], 1) # Set the column activity if self.overlap[i, 0] >= m: self.y[i, 0] = True
def bn_topn(arr, N, ascending=None): """ Return the top N results. Negative N will give N lowest results Paramters --------- arr : Series one dimension array N : int number of elements to return. Negative numbers will return smallest ascending : bool Ordering of the return values. Default behavior is greatest absolute magnitude. Note ---- Default ascending order depends on N and whether you are looking for the top and bottom results. If you are looking for the top results, the most positive results will come first. If you are looking for the bottom results, then the most negative results comes first """ if arr.ndim > 1: raise Exception("Only works on ndim=1") if ascending is None: ascending = not N > 0 arr = arr[~np.isnan(arr)] if N > 0: # nlargest N = min(abs(N), len(arr)) N = len(arr) - abs(N) sl = slice(N, None) else: # nsmallest N = min(abs(N), len(arr)) sl = slice(None, N) if N == 0: bn_res = arr else: out = nb.partsort(arr, N) bn_res = out[sl] bn_res = np.sort(bn_res) # sort output if not ascending: bn_res = bn_res[::-1] return bn_res
def test_transpose(): "partsort transpose test" a = np.arange(12).reshape(4, 3) actual = bn.partsort(a.T, 2, -1).T desired = bn.slow.partsort(a.T, 2, -1).T assert_equal(actual, desired, 'partsort transpose test')
def n_smallest(arr, n): fin = np_ravel(arr) fin = partsort(fin, n) return fin[:n]
def n_largest(arr, n): fin = np_ravel(arr) fin = 255 - fin fin = partsort(fin, n) fin = 255 - fin return fin[:n]
def random_epstein_exact_weird_correction(values_input, nb_to_keep): nb_to_remove = len(values_input) - nb_to_keep k = nb_to_remove values = values_input[:] S = set(range(len(values))) n = len(S) indice_from_which_to_start = len(values) Aleft = weighted_average_weird_correction(values) Aright = float('inf') #max([x[0] for x in values])#float('inf') while len(S) > 1: sampled = sample(S, 1)[0] vi = values[sampled][0] wi = values[sampled][1] X, Y, Z, E = COMPUTE_X_Y_Z_E(S, values, Aleft, Aright, vi, wi) while True: #print 'HeLLo' if len(Z) > 0: A = median([(vi - values[j][0]) / (wi - values[j][1]) for j in Z]) #A=[(vi-values[j][0])/(wi-values[j][1]) for j in Z][len(Z)/2] #print A #print [(vi-values[j][0])/(wi-values[j][1]) for j in Z],A l2 = partsort(([A * values[j][1] - values[j][0] for j in S]), len(S) - k)[:len(S) - k] F_A = -sum(l2) if F_A == 0: return A elif F_A > 0: Aleft = A else: Aright = A X, Y, Z = UPDATE_X_Y_Z(S, values, Aleft, Aright, vi, wi, X, Y, Z) if ((len(X) + len(E)) >= (len(S) - k)) and k > 0: nb_to_remove = min(len(E), len(X) + len(E) - (len(S) - k)) to_remove_E = set(sample(E, nb_to_remove)) S = S - to_remove_E E = E - to_remove_E S = S - Y #k=k-(len(Y)+nb_to_remove) k = k - (len(Y) + nb_to_remove) Y = set() elif (len(Y) + len(E)) >= k: nb_to_collapse = min(len(E), len(Y) + len(E) - k) values_to_collapse_E = set(sample(E, nb_to_collapse)) E = E - values_to_collapse_E values_to_collapse = values_to_collapse_E | X S = S - values_to_collapse collapsed_v = 0. collapsed_w = 1. for x in values_to_collapse: vx, wx = values[x] collapsed_v += vx collapsed_w += wx collapsed = (collapsed_v, collapsed_w) values.append(collapsed) X = {indice_from_which_to_start} S.add(indice_from_which_to_start) indice_from_which_to_start += 1 # if len(Z)<=len(S)/32: # break if len(Z) <= len(S) / 32.: break spop = S.pop() #print values[spop] return values[spop][0] / values[spop][1]