def _get_nns(self, x): hidden_reprs, _ = self._get_hidden_repr(x) knns = [ nn.kneighbors(hidden_repr, return_distance=False) for hidden_repr, nn in zip(hidden_reprs, self._nns) ] knns = np.concatenate(knns, axis=1) return knns
def EpsDBSCAN(D, k): nn = NearestNeighbors(n_neighbors=k + 1) nn.fit(D) distances, indices = nn.kneighbors(D) distances = np.delete(distances, 0, 1) Dist = distances.max(axis=1) Array = sorted(Dist) AvgDist = distances.sum(axis=1) / k Avg_Array = sorted(AvgDist) plt.plot(Avg_Array, 'b') num = len(Avg_Array) n_Array = [0 for i in range(num)] minArray = min(Avg_Array) maxArray = max(Avg_Array) for i in range(num): n_Array[i] = (Avg_Array[i] - minArray) / (maxArray - minArray) * (1.0 - 0.0) bins = np.linspace(0, 1, 10) bin_indice = np.digitize(n_Array, bins) Eps = [] Avg_Array = np.array(Avg_Array) count_max = 0 for i in range(10): count = len(np.where(bin_indice == i)[0]) if count >= k: #print count e = np.sum(Avg_Array[bin_indice == i], axis=0) / count plt.hlines(e, xmin=0, xmax=len(Array), colors='r') Eps.append(e) N = len(Eps) Eps_index = [] for i in range(N): for j in range(num): if Avg_Array[j] > Eps[i]: Eps_index.append(j) break ave_slope = (maxArray - minArray) / num #print 'ave slope' #print ave_slope #print '' for i in range(N - 1): slope = (Eps[i + 1] - Eps[i]) / (Eps_index[i + 1] - Eps_index[i]) #print slope if slope > ave_slope * 2: out = Eps[i] break else: out = Eps[i + 1] return Eps
def EpsValue(D, k): nn = NearestNeighbors(n_neighbors=k + 1) nn.fit(D) distances, indices = nn.kneighbors(D) distances = np.delete(distances, 0, 1) Dist = distances.max(axis=1) AvgDist = distances.sum(axis=1) / k out = (max(Dist) - min(AvgDist)) / 100 return min(AvgDist), out
def uniformly_random_subsample(pairs_file, n_samples, out_file): pairs = pd.read_csv(pairs_file, sep='\t') samples = np.random.uniform(size=(n_samples,pairs.shape[1]-2)) nn = NearestNeighbors(1, n_jobs=-1) nn.fit(pairs[['vec_sim', 'jac_sim', 'len_sim', 'top_sim']]) index = pd.DataFrame(nn.kneighbors(samples, return_distance=False), columns=['index']) df = pairs.reset_index().merge(index).drop_duplicates() df.to_csv(out_file, sep='\t', index=None)
def _eval(feats_labels_sk, feats_labels_im, n=200): """ :param feats_labels_sk: a two-element tuple [features_of_sketches, labels_of_sketches] labels_of_sketches and labels_of_images are scalars(class id). :param feats_labels_im: a two-element tuple [features_of_images, labels_of_images] features_of_images and features_of_sketches are used for distance calculation. :param n: the top n elements used for evaluation :return: precision@n, mAP@all """ nn = NN(n_neighbors=feats_labels_im[0].shape[0], metric='hamming', algorithm='brute').fit(feats_labels_im[0]) _, indices = nn.kneighbors(feats_labels_sk[0]) retrieved_classes = np.array(feats_labels_im[1])[indices] matches = np.vstack([(retrieved_classes[i] == feats_labels_sk[1][i]) for i in range(retrieved_classes.shape[0]) ]).astype(np.uint16) return _get_pre_from_matches( matches[:, :n]), _get_map_from_matches(matches)
def EpsDBSCAN(D, k): nn = NearestNeighbors(n_neighbors=k+1) nn.fit(D) distances, indices = nn.kneighbors(D) distances = np.delete(distances, 0, 1) Dist = distances.max(axis=1) Array = sorted(Dist) AvgDist = distances.sum(axis=1)/k Avg_Array = sorted(AvgDist) ##plt.plot(Avg_Array, 'b') num = len(Avg_Array) n_Array = [0 for i in range(num)] minArray = min(Avg_Array) maxArray = max(Avg_Array) for i in range(num): n_Array[i] = (Avg_Array[i]-minArray)/(maxArray-minArray)*(1.0-0.0) bins = np.linspace(0, 1, 10) bin_indice = np.digitize(n_Array, bins) Eps = [] Avg_Array = np.array(Avg_Array) count_max = 0 for i in range(10): count = len(np.where(bin_indice == i)[0]) if count >= k: e = np.sum(Avg_Array[bin_indice == i], axis=0)/count ##plt.hlines(e, xmin=0, xmax=len(Array), colors='r') Eps.append(e) N = len(Eps) Eps_index = [] for i in range(N): for j in range(num): if Avg_Array[j] > Eps[i]: Eps_index.append(j) break ave_slope = (maxArray - minArray)/num Slopes = [] old_slope = 0.0 for i in range(N-1): slope = (Eps[i+1] - Eps[i]) / (Eps_index[i+1] - Eps_index[i]) Slopes.append(slope) ##if slope > old_slope and slope < old_slope * 1.1: ## out = Eps[i] ## break #if i > 0 and slope > ave_slope: # out = Eps[i] # break #else: # out = Eps[i+1] # old_slope = slope ave_slope = sum(Slopes)/len(Slopes) for i in range(N-1): if i > 0 and Slopes[i] > ave_slope: out = Eps[i] break else: out = Eps[i+1] #if N % 2 == 0: # median1 = N/2 # median2 = N/2 + 1 # median1 = int(median1) - 1 # median2 = int(median2) - 1 # median = (Eps[median1] + Eps[median2]) / 2 #else: # median = (N + 1) / 2 # median = int(median) - 1 # median = Eps[median] #out = median #out = Avg_Array[int(num*0.9)] #out = Array[int(num*0.8)] #out = float(sum(Eps)/len(Eps)) out = Eps[1] ##plt.show() return out