def doKNN(k): dm = cdist(teXf, trXf,'euclidean') cfm = np.zeros((10,10), dtype = int) for a in range(0,len(dm)): knn = np.argpartition(dm[a], k)[:k] preds = trY[knn] counts = np.bincount(preds) pred = -1 if len(counts)>=2: top2 = np.argpartition(-counts, 1) if counts[top2[0]] == counts[top2[1]]: d = 99999 for i in xrange(0,len(knn)): val = dm[a][i] if val < d: d = dm[a][i] pred = trY[knn[i]] else: pred = top2[0] else: pred = 0 #print pred #mnist.visualize(teX[a]) cfm[teY[a]][pred] += 1 #print cfm #print "ER: ", 1 - np.sum(np.diagonal(cfm))/np.sum(cfm) return cfm
def similarity_matrix(self): """ Calculate the similarity matrix given all samples used for GTM map training :return: similarity_matrix: Matrix assessing the similarity between samples used for GTM map training """ print "Calculating similarity matrix..." # Find one tenth of the highest and lowest probability distribution values for each sample in the latent space sim_size = int(round(self.latent_space_size/10)) responsibility_indexes = np.zeros((sim_size * 2, self.input_data.shape[0])) corr_input = np.zeros((sim_size * 2, self.input_data.shape[0])) for i in xrange(0, self.input_data.shape[0]): responsibility_indexes[0:sim_size, i] = np.argpartition(self.gtm_responsibility[:, i], -sim_size)[-sim_size:] responsibility_indexes[sim_size:, i] = np.argpartition(self.gtm_responsibility[:, i], sim_size)[0:sim_size] responsibility_indexes = responsibility_indexes.astype(int) # Create correlation input matrix for similarity assessment for i in xrange(0, self.input_data.shape[0]): corr_input[:, i] = self.gtm_responsibility[responsibility_indexes[:, i], i] # Calculate correlation between all samples and build similarity matrix similarity_matrix = np.corrcoef(np.transpose(corr_input)) # Plot heat map of the similarity matrix accordingly [x, y] = np.meshgrid(np.linspace(1, self.input_data.shape[0], self.input_data.shape[0]), np.linspace(1, self.input_data.shape[0], self.input_data.shape[0])) x = np.ravel(x) y = np.ravel(y) sim_lat = np.array([x, y]) print "Plotting color mesh image..." plt.pcolormesh(np.reshape(sim_lat[0, :], (self.input_data.shape[0], self.input_data.shape[0])), np.reshape(sim_lat[1, :], (self.input_data.shape[0], self.input_data.shape[0])), similarity_matrix, cmap='magma', vmin=0, vmax=1) plt.colorbar() plt.axis([x.min(), x.max(), y.min(), y.max()]) plt.gca().invert_yaxis() return similarity_matrix
def predict_variance_inf_phase1(budget, hum_train_means, temp_train_means, hum_train_vars, temp_train_vars): """Method to make predictions based on max-variance active inference.""" start_hum = 0 window_hum = None window_temp = None i = 0 hum_preds = np.ones((50, 96)) temp_preds = np.ones((50, 96)) for t in global_times: if budget > 0: window_hum = np.argpartition(hum_train_vars[t], -budget)[-budget:] window_temp = np.argpartition(temp_train_vars[t], -budget)[-budget:] else: window_hum = np.array([]) window_temp = np.array([]) hum_pred, temp_pred = makePreds_phase1(window_hum, window_temp, hum_train_means, temp_train_means, i, t) hum_preds[:, i] = copy.deepcopy(hum_pred) temp_preds[:, i] = copy.deepcopy(temp_pred) i += 1 hum_mean_err = mean_absolute_error(hum_test, hum_preds) temp_mean_err = mean_absolute_error(temp_test, temp_preds) return hum_preds, temp_preds, hum_mean_err, temp_mean_err
def precision_test_function(theano_inputs): k = 10 scores1, scores2, c_select, n_used_items = theano_test_function(*theano_inputs) ids1 = np.argpartition(-scores1, range(k), axis=-1)[0, :k] ids2 = np.argpartition(-scores2, range(k), axis=-1)[0, :k] return ids1, ids2, c_select, n_used_items
def branch_to_nodes(self, wt, completion): """ Decide which nodes to branch to next """ missing_edges = HGT.get_missing_edges(completion) # Obtain the missing edge sparse list nb = self.strat.node_brancher # Determine if there is a maximum count count_max = min(self.strat.max_node_branch, self.num_nodes) if nb is None or not 'name' in nb: # Default # Gets nodes that contribute to missing edge edge = missing_edges.indices[0] # Grab any next edge node_indices = self.H[:, edge].indices elif nb['name'] == 'greedy' or nb['name'] == 'long': # Gets the nodes that overlap the most(least) with what's missing overlap = self.H.dot(missing_edges.T) # k = min(count_max + wt.nnz, overlap.nnz) k = min(count_max, overlap.nnz) if k >= self.num_nodes or k == overlap.nnz: if nb['name'] == 'greedy': alg_slice = np.argsort(overlap.data)[::-1] else: # long alg_slice = np.argsort(overlap.data) else: # Else be smart, don't perform O(nlogn) operations, perform O(k) operations if nb['name'] == 'greedy': alg_slice = np.argpartition(overlap.data, -k)[-k:] else: #long alg_slice = np.argpartition(overlap.data, k)[:k] node_indices = overlap.indices[alg_slice] elif nb['name'] == 'random': # Gets nodes that contribute to random missing edge edge = np.random.choice(missing_edges.indices) # Grab any next edge node_indices = self.H[:, edge].indices elif nb['name'] == 'diverse': # Diversify the kinds of transversals that have been found if wt.nnz == 0: # Just starting out node_indices = np.arange(self.num_nodes) # Branch to everything else: # Otherwise be greedy up to one # edge = missing_edges.indices[0] # Grab any next edge # node_indices = [self.H[:, edge].indices[0]] # overlap = self.H.dot(missing_edges.T) # node_indices = [overlap.indices[np.argmax(overlap.data)]] scaled_overlap = overlap.data / (self.node_weights[overlap.indices]**2) node_indices = overlap.indices[np.where(np.max(scaled_overlap) == scaled_overlap)] else: raise ValueError("Invalid strat.node_brancher: {0}".format(self.strat.node_brancher)) if nb is not None and bool(nb.get('shuffle', False)): np.random.shuffle(node_indices) count = 0 for i in node_indices: if count >= count_max: break if not wt[i, 0] > 0: # not already part of working transversal self.log('Branching to node:', i) count += 1 yield i
def local_kmeans_class(I, L, x, k): from scipy.spatial.distance import cdist sizex = len(np.atleast_2d(x)) label = np.zeros((sizex,k)) for rowsx in range(0, sizex): tic() dists = cdist(I, np.atleast_2d(x[rowsx]), metric='euclidean') toc() center = np.zeros((10,k,28*28)) label_order = np.unique(L) l=0 tic() thing = np.zeros((k,28*28)) for labs in np.unique(L): indices = L == labs k_smallest = np.argpartition(dists[indices],tuple(range(1,k)),axis=None) for i in range(0,k): M = I[indices] #center[l,i,:] = np.average(M[k_smallest[:i+1]],axis = 0) if i == 0: thing[i] = M[k_smallest[i+1]] else: thing[i] = thing[i-1] + M[k_smallest[i+1]] center[l,:,:] = np.divide(thing,np.repeat(np.arange(1,11).reshape(10,1),28*28,axis=1)) l+=1 toc() for i in range(k): #print(cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean')) dists2center = cdist(center[:,i,:], np.atleast_2d(x[rowsx]), metric='euclidean') k_smallest = np.argpartition(dists2center,tuple(range(1)),axis=None) label[rowsx,i] = label_order[k_smallest[0]] return label
def construct_initial_solution(self): ind = np.argpartition(self.collaboration_coo.data, -len(self.villains_team))[-len(self.villains_team):] inc = 1 while len(np.unique(self.collaboration_coo.row[ind])) < len(self.villains_team): ind = np.argpartition(self.collaboration_coo.data, -(len(self.villains_team) + inc))[-(len(self.villains_team) + inc):] inc += 1 heroes_team = self.heroes.loc[self.heroes[CHARACTER_ID].isin(self.collaboration_coo.row[ind])] return heroes_team
def similarityPlot(): import matplotlib.pyplot as plt from matplotlib import rcParams tfidf_vectorizer = TfidfVectorizer(min_df=1) names = friendsAboveMinNumMessages(200) + [me] data = [] words = [] #ordering of words in tf_idf matrix wordsSet = set() #for faster lookup nameSet = set() for person in personDict: for name in person.split(): nameSet.add(name) nameSet.add(name.lower()) for i in range(len(names)): data.append(getAllMessagesAsString(names[i], False)) tfidf_matrix = tfidf_vectorizer.fit_transform(data) featureNames = tfidf_vectorizer.get_feature_names() tfidf_arr = tfidf_matrix.toarray() for j in range(len(tfidf_arr[0])): word = tfidf_arr[0][j] if word not in wordsSet: words.append(word) wordsSet.add(j) #nmds = manifold.MDS(metric = True, n_components = N_DISTINGUISHING_FEATURES) #npos = nmds.fit_transform(tfidf_matrix.toarray()) clf = PCA(n_components=2) npos = clf.fit_transform(tfidf_arr) plt.scatter(npos[:, 0], npos[:, 1], marker = 'o', c = 'b', cmap = plt.get_cmap('Spectral')) #change colors for name, x, y in zip(names, npos[:, 0], npos[:, 1]): plt.annotate( name, xy = (x, y), xytext = (-20, 20), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) fig, ax = plt.subplots() ax2 = ax.twinx() xAxisP = [featureNames[i] for i in np.argpartition(clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet] yAxisP = [featureNames[i] for i in np.argpartition(clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet] xAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[0], -50)[-50:] if featureNames[i] not in nameSet] yAxisN = [featureNames[i] for i in np.argpartition(-clf.components_[1], -50)[-50:] if featureNames[i] not in nameSet] ax.set_xlabel("Most Postively influential words along x axis:\n" + ", ".join(xAxisP), fontsize=18) ax.set_ylabel("Most Postively influential words along y axis:\n" + ", ".join(yAxisP), fontsize=18) ax2.set_xlabel("Most Negatively influential words along x axis:\n" + ", ".join(xAxisN), fontsize=18) ax2.set_ylabel("Most Negatively influential words along y axis:\n" + ", ".join(yAxisN), fontsize=18) # xAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[0]), -50)[-50:] if featureNames[i] not in nameSet] # yAxis = [featureNames[i] for i in np.argpartition(np.absolute(clf.components_[1]), -50)[-50:] if featureNames[i] not in nameSet] # for i in range(1, max(len(xAxis), len(yAxis)) ): # if i % 20 == 0 and i < len(xAxis): # xAxis[i] += "\n" # if i % 15 == 0 and i < len(yAxis): # yAxis[i] += "\n" # plt.xlabel("Most influential words along x axis:\n" + ", ".join(xAxis), fontsize=18) # plt.ylabel("Most influential words along y axis:\n" + ", ".join(yAxis), fontsize=18) rcParams.update({'figure.autolayout': True}) plt.suptitle("Word-Usage Similarity Scatterplot", fontsize = 24, fontweight = 'bold') plt.show()
def _get_k_max_elements_indices_and_scores(vec, k, mask=None): if mask is None: # We use argpartition here instead of argsort to achieve linear-time performance. max_elements_indices = np.argpartition(-vec, k - 1)[:k] else: masked_vec = vec.copy() # To avoid side-effects masked_vec[~mask] = -np.inf max_elements_indices = np.argpartition(-masked_vec, k - 1)[:k] return max_elements_indices, vec[max_elements_indices]
def _phase2(self): """ Execute phase 2 of the SP region. This phase is used to compute the active columns. Note - This should only be called after phase 1 has been called and after the inhibition radius and neighborhood have been updated. """ # Shift the outputs self.y[:, 1:] = self.y[:, :-1] self.y[:, 0] = 0 # Calculate k # - For a column to be active its overlap must be at least as large # as the overlap of the k-th largest column in its neighborhood. k = self._get_num_cols() if self.global_inhibition: # The neighborhood is all columns, thus the set of active columns # is simply columns that have an overlap >= the k-th largest in the # entire region # Compute the winning column indexes if self.learn: # Randomly break ties ix = np.argpartition(-self.overlap[:, 0] - self.prng.uniform(.1, .2, self.ncolumns), k - 1)[:k] else: # Choose the same set of columns each time ix = np.argpartition(-self.overlap[:, 0], k - 1)[:k] # Set the active columns self.y[ix, 0] = self.overlap[ix, 0] > 0 else: # The neighborhood is bounded by the inhibition radius, therefore # each column's neighborhood must be considered for i in xrange(self.ncolumns): # Get the neighbors ix = np.where(self.neighbors[i])[0] # Compute the minimum top overlap if ix.shape[0] <= k: # Desired number of candidates is at or below the desired # activity level, so find the overall min m = max(bn.nanmin(self.overlap[ix, 0]), 1) else: # Desired number of candidates is above the desired # activity level, so find the k-th largest m = max(-np.partition(-self.overlap[ix, 0], k - 1)[k - 1], 1) # Set the column activity if self.overlap[i, 0] >= m: self.y[i, 0] = True
def _build_recursive(indices, level=0, split_index=0): """ Descend recursively into tree to build it, setting splits and returning indices for leaves :param indices: The current set of indices before partitioning :param level: The level in the tree :param split_index: The index of the split to set :return: A list of arrays representing leaf membership :rtype: list[np.ndarray] """ # If we're at the bottom, no split, just return the set if level == self._depth: return [indices] n = indices.size # If we literally don't have enough to populate the leaf, make it # empty if n < 1: return [] # Get the random projections for these indices at this level # NB: Recall that the projection matrix has shape (levels, N) level_proj = proj[indices, level] # Split at the median if even, put median in upper half if not n_split = n // 2 if n % 2 == 0: part_indices = np.argpartition( level_proj, (n_split - 1, n_split)) split_val = level_proj[part_indices[n_split - 1]] split_val += level_proj[part_indices[n_split]] split_val /= 2.0 else: part_indices = np.argpartition(level_proj, n_split) split_val = level_proj[part_indices[n_split]] splits[split_index] = split_val # part_indices is relative to this block of values, recover # main indices left_indices = indices[part_indices[:n_split]] right_indices = indices[part_indices[n_split:]] # Descend into each split and get sub-splits left_out = _build_recursive(left_indices, level=level + 1, split_index=2 * split_index + 1) right_out = _build_recursive(right_indices, level=level + 1, split_index=2 * split_index + 2) # Assemble index set left_out.extend(right_out) return left_out
def fitOneLinearRegression(thetaLinear, IntensityLinear, tiltanglesArray, options): if (len(tiltanglesArray)%2 == 1): halfN = int(len(tiltanglesArray)/2) + 1 xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN] xRight, yRight = thetaLinear[halfN-1:], IntensityLinear[halfN-1:] else: halfN = int(len(tiltanglesArray)/2) xLeft, yLeft = thetaLinear[0:halfN], IntensityLinear[0:halfN] xRight, yRight = thetaLinear[halfN:], IntensityLinear[halfN:] slopeLeft, interceptLeft, r2Left = linearRegression(xLeft, yLeft) slopeRight, interceptRight, r2Right = linearRegression(xRight, yRight) assert(len(xLeft)==len(xRight)) fitLeft = slopeLeft*xLeft + interceptLeft fitRight = slopeRight*xRight + interceptRight #the sum of squared residuals resLeft = yLeft - fitLeft resLeft = resLeft / fitLeft #print "resLeft", resLeft resRight = yRight - fitRight resRight = resRight / fitRight #print "resRight", resRight fresLeft = sum(resLeft**2) fresRight = sum(resRight**2) fres = [fresLeft*1000000, fresRight*1000000] #find the points with the largest 3 residuals in left and right branches, use numpy.argpartition #N = options.largestNRes N=3 negN = (-1)*N indexLargeLeft = np.argpartition(resLeft**2, negN)[negN:] indexLargeRight = np.argpartition(resRight**2, negN)[negN:] M=3 #M = options.smallestNRes posM = M indexSmallLeft = np.argpartition(resLeft**2, posM)[:posM] indexSmallRight = np.argpartition(resRight**2, posM)[:posM] #MSE, under the assumption that the population error term has a constant variance, the estimate of that variance is given by MSE, mean square error #The denominator is the sample size reduced by the number of model parameters estimated from the same data, (n-p) for p regressors or (n-p-1) if an intercept is used. #In this case, p=1 so the denominator is n-2. stdResLeft = np.std(resLeft, ddof=2) stdResRight = np.std(resRight, ddof=2) stdRes = [stdResLeft*1000, stdResRight*1000] ret = fres, stdRes, xLeft, yLeft, fitLeft, xRight, yRight, fitRight, indexLargeLeft, indexLargeRight, indexSmallLeft, indexSmallRight, resLeft, resRight, slopeLeft, interceptLeft, slopeRight, interceptRight return ret
def define_toplogy(self, num_input, num_hidden, num_output, density): """ Defines the topology of the OpenBrain network. :param num_input: :param num_hidden: :param num_output: :param density: :return: """ topo = networkx.DiGraph(networkx.watts_strogatz_graph(self.num_neurons, 5, density, seed=None)).to_directed() adjacency_list = topo.adjacency_list() # Pick the output neurons to be those with highest in degree in_deg = np.array([topo.in_degree(x) for x,_ in enumerate(adjacency_list)]) self.output_neurons = np.argpartition(in_deg, -num_output)[-num_output:] print(self.output_neurons) print([topo.in_degree(x) for x in self.output_neurons]) # Pick the input neurons to be those with highest out degree out_deg = np.array([topo.out_degree(x) if x not in self.output_neurons else -1 for x,_ in enumerate(adjacency_list)]) self.input_neurons = np.argpartition(out_deg, -num_input)[-num_input:] # Output neurons do not fire out. for adjacent_neurons in adjacency_list: for out_neuron in self.output_neurons: if out_neuron in adjacent_neurons: adjacent_neurons.remove(out_neuron) # Disconnect input -> output for out in self.output_neurons: for inp in self.input_neurons: if out in adjacency_list[inp]: adjacency_list[inp].remove(out) if inp in adjacency_list[out]: adjacency_list[out].remove(inp) for i, adjacent in enumerate(adjacency_list): if i not in self.input_neurons and i not in self.output_neurons: for n in adjacent: if i in adjacency_list[n]: if np.random.rand(1)>0.5: adjacent.remove(n) else: adjacency_list[n].remove(i) # Let nothing enter the input neurons for inp in self.input_neurons: adjacency_list[inp] = [] return adjacency_list
def sort_by_relative_entropy(corpus, topicct, stemmer): # get the right file names for the corpus and count stemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights') if fname.startswith('{}-{}-{}'.format(corpus, stemmer, topicct))] unstemmed_weights = ['wordweights/' + fname for fname in os.listdir('wordweights') if fname.startswith('{}-{}-{}'.format(corpus, UNSTEMMED_NAME, topicct))] stemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, stemmer) unstemmed_corpus_file = 'corpora/{}-train-{}-stopped.txt'.format(corpus, UNSTEMMED_NAME) # get the mapping from unstemmed to stemmed words stemmed_to_unstemmed = defaultdict(set) unstemmed_counts = Counter() with open(stemmed_corpus_file) as f, open(unstemmed_corpus_file) as g: for stemmed_line in f: stemmed_words = stemmed_line.split()[3:] unstemmed_words = g.readline().split()[3:] assert(len(stemmed_words) == len(unstemmed_words)) for uword, sword in zip(unstemmed_words, stemmed_words): stemmed_to_unstemmed[sword].add(uword) unstemmed_counts[uword] += 1 # for each file; for each word; get the entropy stemmed_entropies = defaultdict(list) unstemmed_entropies = defaultdict(list) for file in stemmed_weights: entropy_dict = get_stemmed_entropy_per_word(file) for k, v in entropy_dict.iteritems(): stemmed_entropies[k].append(v) for file in unstemmed_weights: entropy_dict = get_unstemmed_entropy_per_word(file, stemmed_to_unstemmed, int(topicct)) for k, v in entropy_dict.iteritems(): unstemmed_entropies[k].append(v) # compute difference of average entropies stemmed_vocab = [sword for sword, uwords in stemmed_to_unstemmed.iteritems() if len(uwords) > 1] entropy_diffs = np.zeros(len(stemmed_vocab)) for i, sword in enumerate(stemmed_vocab): entropy_diffs[i] = np.mean(stemmed_entropies[sword]) - np.mean(unstemmed_entropies[sword]) # find top 50 maximum and minimum entropies min_indices = np.argpartition(entropy_diffs, 50)[:50] max_indices = np.argpartition(entropy_diffs, -50)[-50:] with open('wordlists/{}-{}-{}.txt'.format(corpus, stemmer, topicct), 'w') as wf: wf.write('Lowest entropy differences (stemmed is better)\n') for i in min_indices: wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]]))) wf.write('Highest entropy differences (unstemmed is better)\n') for i in max_indices: wf.write('{}\t{}\t{}\n'.format(entropy_diffs[i], stemmed_vocab[i], ' '.join(stemmed_to_unstemmed[stemmed_vocab[i]])))
def disp_results(fig, ax1, ax2, loss_iterations, losses, accuracy_iterations, accuracies, accuracies_iteration_checkpoints_ind, fileName, color_ind=0): modula = len(plt.rcParams['axes.color_cycle']) acrIterations =[] top_acrs={} if accuracies.size: if accuracies.size>4: top_n = 4 else: top_n = accuracies.size -1 temp = np.argpartition(-accuracies, top_n) result_indexces = temp[:top_n] temp = np.partition(-accuracies, top_n) result = -temp[:top_n] for acr in result_indexces: acrIterations.append(accuracy_iterations[acr]) top_acrs[str(accuracy_iterations[acr])]=str(accuracies[acr]) sorted_top4 = sorted(top_acrs.items(), key=operator.itemgetter(1)) maxAcc = np.amax(accuracies, axis=0) iterIndx = np.argmax(accuracies) maxAccIter = accuracy_iterations[iterIndx] maxIter = accuracy_iterations[-1] consoleInfo = format('\n[%s]:maximum accuracy [from 0 to %s ] = [Iteration %s]: %s ' %(fileName,maxIter,maxAccIter ,maxAcc)) plotTitle = format('max accuracy(%s) [Iteration %s]: %s ' % (fileName,maxAccIter, maxAcc)) print (consoleInfo) #print (str(result)) #print(acrIterations) # print 'Top 4 accuracies:' print ('Top 4 accuracies:'+str(sorted_top4)) plt.title(plotTitle) ax1.plot(loss_iterations, losses, color=plt.rcParams['axes.color_cycle'][(color_ind * 2 + 0) % modula]) ax2.plot(accuracy_iterations, accuracies, plt.rcParams['axes.color_cycle'][(color_ind * 2 + 1) % modula], label=str(fileName)) ax2.plot(accuracy_iterations[accuracies_iteration_checkpoints_ind], accuracies[accuracies_iteration_checkpoints_ind], 'o', color=plt.rcParams['axes.color_cycle'][(color_ind * 2 + 1) % modula]) plt.legend(loc='lower right')
def get_features(self, _input): d = self.prototypes - _input d = np.sqrt(sum(d.T ** 2)) # get Euclidian distance indexes = np.argpartition(d, self.c, axis=0)[:self.c] phi = np.zeros(self.numPrototypes) phi[indexes] = 1 return phi
def _kneighbors_reduce_func(self, dist, start, n_neighbors, return_distance): """Reduce a chunk of distances to the nearest neighbors Callback to :func:`sklearn.metrics.pairwise.pairwise_distances_chunked` Parameters ---------- dist : array of shape (n_samples_chunk, n_samples) start : int The index in X which the first row of dist corresponds to. n_neighbors : int return_distance : bool Returns ------- dist : array of shape (n_samples_chunk, n_neighbors), optional Returned only if return_distance neigh : array of shape (n_samples_chunk, n_neighbors) """ sample_range = np.arange(dist.shape[0])[:, None] neigh_ind = np.argpartition(dist, n_neighbors - 1, axis=1) neigh_ind = neigh_ind[:, :n_neighbors] # argpartition doesn't guarantee sorted order, so we sort again neigh_ind = neigh_ind[ sample_range, np.argsort(dist[sample_range, neigh_ind])] if return_distance: if self.effective_metric_ == 'euclidean': result = np.sqrt(dist[sample_range, neigh_ind]), neigh_ind else: result = dist[sample_range, neigh_ind], neigh_ind else: result = neigh_ind return result
def argsort(x, topn=None, reverse=False): """Get indices of the `topn` smallest elements in array `x`. Parameters ---------- x : array_like Array to sort. topn : int, optional Number of indices of the smallest(greatest) elements to be returned if given, otherwise - indices of all elements will be returned in ascending(descending) order. reverse : bool, optional If True - return the `topn` greatest elements, in descending order. Returns ------- numpy.ndarray Array of `topn` indices that.sort the array in the required order. """ x = np.asarray(x) # unify code path for when `x` is not a np array (list, tuple...) if topn is None: topn = x.size if topn <= 0: return [] if reverse: x = -x if topn >= x.size or not hasattr(np, 'argpartition'): return np.argsort(x)[:topn] # np >= 1.8 has a fast partial argsort, use that! most_extreme = np.argpartition(x, topn)[:topn] return most_extreme.take(np.argsort(x.take(most_extreme))) # resort topn into order
def computeRanks(composedSpace, observedSpace): """Ranks all the representations in the composed space with respect to the representations in the observed space. Cut-off value 1000" """ ranks = {} rankList = [] composedWords = set(composedSpace.get_id2row()) observedWords = observedSpace.get_id2row() neighbours = 1000 for w_idx, word in enumerate(composedWords): vector = composedSpace.get_row(word) Y = 1 - cdist(vector.mat, observedSpace.get_cooccurrence_matrix().mat, 'cosine') nearest = Y.argmax() nearest_k_indices = np.argpartition(Y, tuple([-p for p in range(neighbours)]), axis=None)[-neighbours:] # pp([(observedWords[idx], Y[0][idx]) for idx in reversed(nearest_k_indices)]) words = [observedWords[idx] for idx in reversed(nearest_k_indices)] wordRanks = {word:index+1 for index,word in enumerate(words)} # print(wordRanks) if (word in wordRanks): r = wordRanks[word] ranks[word] = r rankList.append(r) else: ranks[word] = 1000 rankList.append(1000) if ((w_idx > 0) and (w_idx % 100 == 0)): print(w_idx) return rankList, ranks
def similar_movies(self, weights, base_movie, movies = None, n = 6): """ Gets the n similar movies to a base movie. """ fv = self.features(base_movie, movies = movies) wv = weights.reshape((weights.shape[1],1)) scores = fv.dot(wv) inds = np.argpartition(scores,-n, axis = 0)[-n:].reshape(n) return [self.movie_indices[i]for i in inds]
def pspace(h1e, eri, norb, nelec, hdiag, np=400): '''pspace Hamiltonian to improve Davidson preconditioner. See, CPL, 169, 463 ''' neleca, nelecb = _unpack_nelec(nelec) h1e = numpy.ascontiguousarray(h1e) eri = ao2mo.restore(1, eri, norb) nb = cistring.num_strings(norb, nelecb) if hdiag.size < np: addr = numpy.arange(hdiag.size) else: try: addr = numpy.argpartition(hdiag, np-1)[:np] except AttributeError: addr = numpy.argsort(hdiag)[:np] addra, addrb = divmod(addr, nb) stra = numpy.array([cistring.addr2str(norb,neleca,ia) for ia in addra], dtype=numpy.uint64) strb = numpy.array([cistring.addr2str(norb,nelecb,ib) for ib in addrb], dtype=numpy.uint64) np = len(addr) h0 = numpy.zeros((np,np)) libfci.FCIpspace_h0tril(h0.ctypes.data_as(ctypes.c_void_p), h1e.ctypes.data_as(ctypes.c_void_p), eri.ctypes.data_as(ctypes.c_void_p), stra.ctypes.data_as(ctypes.c_void_p), strb.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(norb), ctypes.c_int(np)) for i in range(np): h0[i,i] = hdiag[addr[i]] h0 = lib.hermi_triu(h0) return addr, h0
def splitBimodal(self, x, y, largepoly=30): p = np.polyfit(x, y, largepoly) # polynomial coefficients for fit extrema = np.roots(np.polyder(p)) extrema = extrema[np.isreal(extrema)] extrema = extrema[(extrema - x[1]) * (x[-2] - extrema) > 0] # exclude the endpoints due false maxima during fitting try: root_vals = [sum([p[::-1][i]*(root**i) for i in range(len(p))]) for root in extrema] peaks = extrema[np.argpartition(root_vals, -2)][-2:] # find two peaks of bimodal distribution mid, = np.where((x - peaks[0])* (peaks[1] - x) > 0) # want data points between the peaks except: warnings.warn("Peak finding failed!") return None try: p_mid = np.polyfit(x[mid], y[mid], 2) # fit middle section to a parabola midpoint = np.roots(np.polyder(p_mid))[0] except: warnings.warn("Polynomial fit between peaks of distribution poorly conditioned. Falling back on using the minimum! May result in inaccurate split determination.") if len(mid) == 0: return None midx = np.argmin(y[mid]) midpoint = x[mid][midx] return midpoint
def nearest(self,wrd,N=10): wrd_vec_norm=self.w_to_normv(wrd) if wrd_vec_norm is None: return sims=self.vectors.dot(wrd_vec_norm)/self.norm_constants #cosine similarity to all other vecs #http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array return sorted(((sims[idx],self.words[idx]) for idx in numpy.argpartition(sims,-N-1)[-N-1:]), reverse=True)[1:]
def cluster_newsgroups(): """ Cluster newsgroup categories. """ from kmeans import KMeans from similarity import simMatrix corpus, dictionary = build_dictionary(bigram=True) tfidf = TFIDF(dictionary) newsgroups = tfidf.vectorize(corpus) dictionary = tfidf.dictionary categories = sorted(corpus.keys()) N = 6 print "\n{}-Most Common Words".format(N) for index, category in enumerate(categories): nlargest = np.argpartition(newsgroups[index,:], -N)[-N:] nlargest = nlargest[np.argsort(newsgroups[index,nlargest])][::-1] print "{:>24} {}".format(category, dictionary[nlargest]) print K = 3 km = KMeans(n_clusters=K) km.fit(newsgroups) labels = km.labels_ print "\nKMeans Label Assignment, K = {}".format(K) for category, label, in zip(categories, labels): print int(label), category simMatrix(newsgroups).plot().show()
def predict(clf): import numpy as np X = h5read('testX_sample_kmeans_3.h5', 'lid/test/X/sample_kmeans_3') #print("Data read.") yprob = clf.predict_proba(X) mp3db = h5read('testXmp3.h5', 'lid/test/X/mp3') ylabels = h5read('ydict.h5', 'lid/data/y/labels') ylang = h5read('ydict.h5', 'lid/data/y/lang') ydict = {k : v for k, v in zip(ylabels, ylang)} top_labels = np.zeros((yprob.shape[0], 3)) for isamp in range(0, yprob.shape[0]): #best = np.argmax(yprob[isamp]) #print best NTOP = 3 top_indices = np.argpartition(yprob[isamp], -NTOP)[-NTOP:] top_probs = yprob[isamp][top_indices] order = np.argsort(top_probs) #print(top_indices) #print(top_probs) #print(order) print(mp3db[isamp] + ',' + ydict[top_indices[order[2]]] + ',1') print(mp3db[isamp] + ',' + ydict[top_indices[order[1]]] + ',2') print(mp3db[isamp] + ',' + ydict[top_indices[order[0]]] + ',3') pass pass
def query_with_distances(self, v, n): """Find indices of `n` most similar vectors from the index to query vector `v`.""" if self._metric == 'hamming': v = numpy.packbits(v) if self._metric != 'jaccard': # use same precision for query as for index v = numpy.ascontiguousarray(v, dtype = self.index.dtype) # HACK we ignore query length as that's a constant not affecting the final ordering if self._metric == 'angular': # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b) dists = -numpy.dot(self.index, v) elif self._metric == 'euclidean': # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab dists = self.lengths - 2 * numpy.dot(self.index, v) elif self._metric == 'hamming': diff = numpy.bitwise_xor(v, self.index) pc = BruteForceBLAS.popcount den = float(len(v) * 8) dists = [sum([pc[part] for part in point]) / den for point in diff] elif self._metric == 'jaccard': dists = [pd[self._metric]['distance'](v, e) for e in self.index] else: assert False, "invalid metric" # shouldn't get past the constructor! nearest_indices = numpy.argpartition(dists, n)[:n] # partition-sort by distance, get `n` closest indices = [idx for idx in nearest_indices if pd[self._metric]["distance_valid"](dists[idx])] def fix(index): ep = self.index[index] ev = v if self._metric == "hamming": ep = numpy.unpackbits(ep) ev = numpy.unpackbits(ev) return (index, pd[self._metric]['distance'](ep, ev)) return map(fix, indices)
def process_chunk(chunk, data, k, metric): d = cdist(chunk, data, metric=metric).astype('float32') p = np.argpartition(d, k).astype('int32')[:, :k] rows = np.arange(chunk.shape[0])[:, None] d = d[rows, p] i = np.argsort(d) return d[rows, i], p[rows, i]
def argtopk(a_plus_idx, k, axis, keepdims): """ Chunk and combine function of argtopk Extract the indices of the k largest elements from a on the given axis. If k is negative, extract the indices of the -k smallest elements instead. Note that, unlike in the parent function, the returned elements are not sorted internally. """ assert keepdims is True axis = axis[0] if isinstance(a_plus_idx, list): a_plus_idx = list(flatten(a_plus_idx)) a = np.concatenate([ai for ai, _ in a_plus_idx], axis) idx = np.concatenate([broadcast_to(idxi, ai.shape) for ai, idxi in a_plus_idx], axis) else: a, idx = a_plus_idx if abs(k) >= a.shape[axis]: return a_plus_idx idx2 = np.argpartition(a, -k, axis=axis) k_slice = slice(-k, None) if k > 0 else slice(-k) idx2 = idx2[tuple(k_slice if i == axis else slice(None) for i in range(a.ndim))] return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
def get_largest(row, N=10): if N >= row.nnz: best = zip(row.data, row.indices) else: ind = numpy.argpartition(row.data, -N)[-N:] best = zip(row.data[ind], row.indices[ind]) return sorted(best, reverse=True)
def get_candidate_dynamic(texts, trainingset, cluster_size, file_name): vectorizer = cst_vectorizer.StemmedTfidfVectorizer(**param) texts_vec = vectorizer.fit_transform(texts) training_vec = vectorizer.transform(trainingset) from sklearn.metrics.pairwise import pairwise_distances # sim_matrix(i, j) is the distance between the ith array from X and the jth array from Y. # From scikit-learn: [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]. These metrics support sparse matrix inputs. sim_matrix = 1 - pairwise_distances(texts_vec, training_vec, metric="cosine") # euclidean as well num_texts = texts_vec.shape[0] cluster_size = cluster_size - 1 #减1是因为最后要把texts中放入,所以其实只需选择cluster_size-1个文本 ind_clustered_tweets = np.zeros([num_texts, cluster_size], dtype=int) for i in range(0, num_texts): indx = np.argpartition(sim_matrix[i], -cluster_size)[-cluster_size:] ind_clustered_tweets[i] = indx trainingset = np.array(trainingset) clustered_texts = [] extantion_content = [] for i in range(0, num_texts): ind = ind_clustered_tweets[i] clustered_texts.append(texts[i] + ' ' + ' '.join(trainingset[ind])) extantion_content.append(' '.join(trainingset[ind])) import pickle # 推荐file_name的值为neg和pos print('和training_data聚合在一起的test data保存在了:./data/extended_test/文件夹*.p中') pickle.dump(clustered_texts, open("./data/extended_test_data/" + file_name+"_clustered_texts.p", "wb")) pickle.dump(extantion_content, open("./data/extended_test_data/" + file_name+"_extantion_content.p", "wb")) # 执行上述函数时候需要三种变量
def argmedian(x): return np.argpartition(x, len(x) // 2, axis=0)[len(x) // 2]
img = img.convert('RGB') img = img.resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS) img = np.array(list(img.getdata()), dtype='uint8') img = np.reshape(img, (IMG_SIZE, IMG_SIZE, 3)) imgs.append(img) pred = sess.run(predictions, feed_dict={x: imgs}) log_soft = sess.run(logits_soft, feed_dict={x: imgs}) outputFeatureMap(imgs, conv2) # build the label map label_map = {} with open('signnames.csv') as f: r = csv.reader(f) next(r) for row in r: label, label_descrip = int(row[0]), row[1] label_map[label] = label_descrip # print out top 5 softmax probabilities with corresponding sign category final_pred = [label_map[i] for i in pred] for i in range(len(imgs)): index = np.argpartition(log_soft[i], -5)[-5:] ind_sort = index[np.argsort(log_soft[i][index])] ind_sort = ind_sort[::-1] top5_labels = [label_map[j] for j in ind_sort] print('%s --> %s --> %s -->%s' % (images[i], final_pred[i], log_soft[i][ind_sort], top5_labels)) print('\n')
def disp_topics(vocab, topics, n_words=10): import numpy as np for t in topics: idxs = np.argpartition(-t, range(n_words))[:n_words] print(' '.join([vocab[i] for i in idxs]))
def find_critical(learner, data, n=5, k=5, random_state=0): """ :param learner: argument-based learner to be tested :param data: learning data :param n: number of critical examples :param k: folds in cross-validation :param random_state: random state to be used in StratifiedKFold function :return: n most critical examples (with estimation of 'criticality') """ # first get how problematic is each example (cross-validation) # E ... the difference between probability of predicted most probable class # and the probability of the example's class. # if example is correctly predicted or if example is already covered # by an argumented rule, E equals 0. # CV skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=random_state) problematic = np.zeros(len(data)) problematic_rules = [[] for d in data] for learn_ind, test_ind in skf.split(data.X, data.Y): # move test_ind with arguments to learn_ind arg_ind = [] if ARGUMENTS in data.domain: for t in test_ind: if data[t][ARGUMENTS] not in ("", "?"): arg_ind.append(t) learn_ind = np.array(sorted(list(learn_ind) + arg_ind), dtype=int) test_ind = np.array([t for t in test_ind if t not in arg_ind], dtype=int) learn = Table(data.domain, data[learn_ind]) test = Table(data.domain, data[test_ind]) classifier = learner(learn) rules = classifier.rule_list # eval rules on test data cov = coverage(rules, test) # for each test instance find out best covering rule from the same class best_covered = np.zeros(len(test)) for ri, r in enumerate(rules): target = r.target_class == test.Y best_covered = np.maximum(best_covered, (cov[:, ri] & target) * r.quality) # compute how problematic each instance is ... probs = classifier(test, 1) for ti, t in enumerate(test_ind): # first check best rule, if same class, it can not be problematic d, p = test[ti], probs[ti] c = int(d.get_class()) # find best rule covering this example (best_rule * prediction) problematic[t] = (1 - best_covered[ti]) * (1 - p[c]) problematic_rules[t] = [ r for ri, r in enumerate(rules) if cov[ti, ri] ] # compute Mahalanobis distance between instances dist_matrix = squareform(pdist(data.X, metric="seuclidean")) # criticality is a combination of how much is the instance problematic # and its distance to other problematic examples of the same class # for loop over classes vals = np.unique(data.Y.astype(dtype=int)) k = int(np.ceil(n / len(vals))) crit_ind = [] for i in vals: inst = (data.Y == i) & (problematic > 1e-6) inst_pos = np.where(inst)[0] wdist = dist_matrix[np.ix_(inst, inst)] # select k most problematic instances prob = problematic[inst] ind = np.argpartition(prob, -k)[-k:] centers = kmeans(wdist, prob, ind) for c in centers: crit_ind.append(inst_pos[c]) # sort critical indices given problematicness crit_ind = sorted(crit_ind, key=lambda x: -problematic[x]) return (crit_ind, problematic[crit_ind], [problematic_rules[i] for i in crit_ind])
def plot_pca(self, plot_filename=None, PCs=[1, 2], plot_title='', image_format=None, log1p=False, plotWidth=5, plotHeight=10, cols=None, marks=None): """ Plot the PCA of a matrix Returns the matrix of plotted values. """ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(plotWidth, plotHeight)) # Filter m = self.matrix rvs = m.var(axis=1) if self.transpose: m = m[np.nonzero(rvs)[0], :] rvs = rvs[np.nonzero(rvs)[0]] if self.ntop > 0 and m.shape[0] > self.ntop: m = m[np.argpartition(rvs, -self.ntop)[-self.ntop:], :] rvs = rvs[np.argpartition(rvs, -self.ntop)[-self.ntop:]] # log2 (if requested) if self.log2: self.matrix = np.log2(self.matrix + 0.01) # Row center / transpose if self.rowCenter and not self.transpose: _ = self.matrix.mean(axis=1) self.matrix -= _[:, None] if self.transpose: m = m.T # Center and scale m2 = (m - np.mean(m, axis=0)) m2 /= np.std(m2, axis=0, ddof=1) # Use the unbiased std. dev. # SVD U, s, Vh = np.linalg.svd( m2, full_matrices=False, compute_uv=True) # Is full_matrices ever needed? # % variance, eigenvalues eigenvalues = s**2 variance = eigenvalues / float(np.max([1, m2.shape[1] - 1])) pvar = variance / variance.sum() # Weights/projections Wt = Vh if self.transpose: # Use the projected coordinates for the transposed matrix Wt = np.dot(m2, Vh.T).T if plot_filename is not None: n = n_bars = len(self.labels) if eigenvalues.size < n: n_bars = eigenvalues.size markers = itertools.cycle( matplotlib.markers.MarkerStyle.filled_markers) if cols is not None: colors = itertools.cycle(cols) else: colors = itertools.cycle( plt.cm.gist_rainbow(np.linspace(0, 1, n))) if marks is not None: markers = itertools.cycle(marks) if image_format == 'plotly': self.plotly_pca(plot_filename, Wt, pvar, PCs, eigenvalues, cols, plot_title) else: ax1.axhline(y=0, color="black", linestyle="dotted", zorder=1) ax1.axvline(x=0, color="black", linestyle="dotted", zorder=2) for i in range(n): color = next(colors) marker = next(markers) if isinstance(color, np.ndarray): color = pltcolors.to_hex(color, keep_alpha=True) ax1.scatter(Wt[PCs[0] - 1, i], Wt[PCs[1] - 1, i], marker=marker, color=color, s=150, label=self.labels[i], zorder=i + 3) if plot_title == '': ax1.set_title('PCA') else: ax1.set_title(plot_title) ax1.set_xlabel('PC{} ({:4.1f}% of var. explained)'.format( PCs[0], 100.0 * pvar[PCs[0] - 1])) ax1.set_ylabel('PC{} ({:4.1f}% of var. explained)'.format( PCs[1], 100.0 * pvar[PCs[1] - 1])) lgd = ax1.legend(scatterpoints=1, loc='center left', borderaxespad=0.5, bbox_to_anchor=(1, 0.5), prop={'size': 12}, markerscale=0.9) # Scree plot ind = np.arange(n_bars) # the x locations for the groups width = 0.35 # the width of the bars if mpl.__version__ >= "2.0.0": ax2.bar(2 * width + ind, eigenvalues[:n_bars], width * 2) else: ax2.bar(width + ind, eigenvalues[:n_bars], width * 2) ax2.set_ylabel('Eigenvalue') ax2.set_xlabel('Principal Component') ax2.set_title('Scree plot') ax2.set_xticks(ind + width * 2) ax2.set_xticklabels(ind + 1) ax3 = ax2.twinx() ax3.axhline(y=1, color="black", linestyle="dotted") ax3.plot(width * 2 + ind, pvar.cumsum()[:n], "r-") ax3.plot(width * 2 + ind, pvar.cumsum()[:n], "wo", markeredgecolor="black") ax3.set_ylim([0, 1.05]) ax3.set_ylabel('Cumulative variability') plt.subplots_adjust(top=3.85) plt.tight_layout() plt.savefig(plot_filename, format=image_format, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close() return Wt, eigenvalues
def path_matching_local(path: np.ndarray, ego_position: np.ndarray, consider_as_closed: bool = False, s_tot: Union[float, None] = None, no_interp_values: int = 11) -> tuple: """ author: Alexander Heilmeier .. description:: Get the corresponding s coordinate and the displacement of the own vehicle in relation to a local path. .. inputs:: :param path: Unclosed path used to match ego position ([s, x, y]). :type path: np.ndarray :param ego_position: Ego position of the vehicle ([x, y]). :type ego_position: np.ndarray :param consider_as_closed: If the path is closed in reality we can interpolate between last and first point. This can be enforced by setting consider_as_closed = True. :type consider_as_closed: bool :param s_tot: Total length of path in m. :type s_tot: Union[float, None] :param no_interp_values: Number of interpolation points that are created between the two closest points on the path to obtain a more accurate result. :type no_interp_values: int .. outputs:: :return s_interp: Interpolated s position of the vehicle in m. :rtype s_interp: np.ndarray :return d_displ: Estimated displacement from the trajectory in m. :rtype d_displ: np.ndarray """ # ------------------------------------------------------------------------------------------------------------------ # CHECK INPUT ------------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------------ if path.shape[1] != 3: raise RuntimeError("Inserted path must have 3 columns [s, x, y]!") if consider_as_closed and s_tot is None: print("WARNING: s_tot is not handed into path_matching_local function! Estimating s_tot on the basis of equal" "stepsizes") s_tot = path[-1, 0] + path[1, 0] - path[0, 0] # assume equal stepsize # ------------------------------------------------------------------------------------------------------------------ # SELF LOCALIZATION ON RACELINE ------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------------ # get the nearest path point to ego position dists_to_cg = np.hypot(path[:, 1] - ego_position[0], path[:, 2] - ego_position[1]) ind_min = np.argpartition(dists_to_cg, 1)[0] # get previous and following point on path if consider_as_closed: if ind_min == 0: ind_prev = dists_to_cg.shape[0] - 1 ind_follow = 1 elif ind_min == dists_to_cg.shape[0] - 1: ind_prev = ind_min - 1 ind_follow = 0 else: ind_prev = ind_min - 1 ind_follow = ind_min + 1 else: ind_prev = max(ind_min - 1, 0) ind_follow = min(ind_min + 1, dists_to_cg.shape[0] - 1) # get angle between selected point and neighbours: ang1 to previous point, ang2 to following point on path ang_prev = np.abs(trajectory_planning_helpers.angle3pt.angle3pt(path[ind_min, 1:3], ego_position, path[ind_prev, 1:3])) ang_follow = np.abs(trajectory_planning_helpers.angle3pt.angle3pt(path[ind_min, 1:3], ego_position, path[ind_follow, 1:3])) # extract neighboring points -> closest point and the point resulting in the larger angle if ang_prev > ang_follow: a_pos = path[ind_prev, 1:3] b_pos = path[ind_min, 1:3] s_curs = np.append(path[ind_prev, 0], path[ind_min, 0]) else: a_pos = path[ind_min, 1:3] b_pos = path[ind_follow, 1:3] s_curs = np.append(path[ind_min, 0], path[ind_follow, 0]) # adjust s if closed path shell be considered and we have the case of interpolation between last and first point if consider_as_closed: if ind_min == 0 and ang_prev > ang_follow: s_curs[1] = s_tot elif ind_min == dists_to_cg.shape[0] - 1 and ang_prev <= ang_follow: s_curs[1] = s_tot # interpolate between those points (linear) for better positioning t_lin = np.linspace(0.0, 1.0, no_interp_values) # set relative lengths that are evaluated for interpolation x_cg_interp = np.linspace(a_pos[0], b_pos[0], no_interp_values) y_cg_interp = np.linspace(a_pos[1], b_pos[1], no_interp_values) # get nearest of those interpolated points relative to ego position dists_to_cg = np.hypot(x_cg_interp - ego_position[0], y_cg_interp - ego_position[1]) ind_min_interp = np.argpartition(dists_to_cg, 1)[0] t_lin_used = t_lin[ind_min_interp] # ------------------------------------------------------------------------------------------------------------------ # CALCULATE REQUIRED INFORMATION ----------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------ # calculate current path length s_interp = np.interp(t_lin_used, (0.0, 1.0), s_curs) # get displacement between ego position and path (needed for lookahead distance) d_displ = dists_to_cg[ind_min_interp] return s_interp, d_displ
print(ex_answer) while True: # ask user for story and question story_lines = [] line = input("\nPlease enter a story:\n") while line != "": story_lines.append(line) line = input() story = ("\n".join(story_lines)).strip() question = input("Please enter a question:\n") # convert user input into a suitable network input s = vectorize(story, babi.story_maxlen) q = vectorize(question, babi.query_maxlen) # get prediction probabilities with forward propagation probs = model_inference.fprop(x=(s, q), inference=True).get() # get top k answers top_k = -min(5, babi.vocab_size) max_indices = np.argpartition(probs, top_k, axis=0)[top_k:] max_probs = probs[max_indices] sorted_idx = max_indices[np.argsort(max_probs, axis=0)] print("\nAnswer:") for idx in reversed(sorted_idx): idx = int(idx) print(babi.index_to_word[idx], float(probs[idx]))
X_train.shape, y_train.shape param = [{ 'kernel': ['linear'], 'C': [10., 30., 100., 300., 1000., 3000., 10000., 30000.0] }, { 'kernel': ['rbf'], 'C': [10., 30., 100., 300., 1000., 3000., 10000., 30000.0], 'gamma': [0.001, 0.01, 0.1, 1] }] grid = GridSearchCV(SVR(), param, scoring='neg_mean_squared_error', verbose=2, n_jobs=6, cv=5) grid.fit(X_train, y_train) grid.best_params_ negative_mse = grid.best_score_ rmse = np.sqrt(-negative_mse) rmse grid.best_estimator_.coef_ a = np.array([3, 10, 5, 6, 4, 5]) a np.sort(np.argpartition(a, -5)[-5:])
def argsort_k_largest(x, k): if k >= len(x): return np.argsort(x)[::-1] indices = np.argpartition(x, -k)[-k:] values = x[indices] return indices[np.argsort(-values)]
def pq_knn(dist, topk): ids = np.argpartition(dist, topk - 1)[:topk] ids = ids[dist[ids].argsort()] return ids
def kneighbors(self, X=None, n_neighbors=None, return_distance=True): """Finds the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. Parameters ---------- X : array-like, shape (n_ts, sz, d) The query time series. If not provided, neighbors of each indexed point are returned. In this case, the query point is not considered its own neighbor. n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). return_distance : boolean, optional. Defaults to True. If False, distances will not be returned Returns ------- dist : array Array representing the distance to points, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ self_neighbors = False if n_neighbors is None: n_neighbors = self.n_neighbors if X is None: X = self._X_fit self_neighbors = True if self.metric == "precomputed": full_dist_matrix = X else: if X.ndim == 2: # sklearn-format case X = X.reshape((X.shape[0], -1, self._d)) fit_X = self._X_fit.reshape( (self._X_fit.shape[0], -1, self._d)) elif hasattr(self, '_ts_fit') and self._ts_fit is not None: fit_X = self._ts_fit else: fit_X = self._X_fit if (self.metric in TSLEARN_VALID_METRICS or self.metric in [cdist_dtw, cdist_soft_dtw, cdist_sax]): full_dist_matrix = self._precompute_cross_dist(X, other_X=fit_X) elif self.metric in ["euclidean", "sqeuclidean", "cityblock"]: full_dist_matrix = scipy_cdist(X.reshape((X.shape[0], -1)), fit_X.reshape( (fit_X.shape[0], -1)), metric=self.metric) else: raise ValueError("Unrecognized time series metric string: %s " "(should be one of 'dtw', 'softdtw', " "'sax', 'euclidean', 'sqeuclidean' " "or 'cityblock')" % self.metric) # Code similar to sklearn (sklearn/neighbors/base.py), to make sure # that TimeSeriesKNeighbor~(metric='euclidean') has the same results as # feeding a distance matrix to sklearn.KNeighbors~(metric='euclidean') kbin = min(n_neighbors - 1, full_dist_matrix.shape[1] - 1) # argpartition will make sure the first `kbin` entries are the # `kbin` smallest ones (but in arbitrary order) --> complexity: O(n) ind = numpy.argpartition(full_dist_matrix, kbin, axis=1) if self_neighbors: ind = ind[:, 1:] if n_neighbors > full_dist_matrix.shape[1]: n_neighbors = full_dist_matrix.shape[1] ind = ind[:, :n_neighbors] n_ts = X.shape[0] sample_range = numpy.arange(n_ts)[:, None] # Sort the `kbin` nearest neighbors according to distance ind = ind[sample_range, numpy.argsort(full_dist_matrix[sample_range, ind])] dist = full_dist_matrix[sample_range, ind] if hasattr(self, '_ts_metric'): self.metric = self._ts_metric if return_distance: return dist, ind else: return ind
def indices_of_top_k(arr, k): return np.sort(np.argpartition(np.array(arr), -k)[-k:])
def getMinOfNum(a, K): a = np.array(a) return np.argpartition(a, -K)[-K:]
training: False }) los += r_l.sum() s += res los = los / x_test.shape[0] s = s / x_test.shape[0] print(" Test Accuracy: ", s) summ4 = sess.run(loss_test_summary, feed_dict={read_loss_test: los}) summ_writer.add_summary(summ4, ep) summ5 = sess.run(val_acc_summary, feed_dict={read_val_acc: s}) summ_writer.add_summary(summ5, ep) # Swapping samples if swapped != 0: print("Swapping ", swapped, " samples.") ind_batch_low = np.argpartition(full_batch_losses, swapped) ind_ma_high = np.argpartition(EMA_batch_losses, N - swapped) batch_low_swap = ind_batch_low[swapped:] ma_high_swap = ind_ma_high[-swapped:] indices = np.concatenate((batch_low_swap, ma_high_swap)) # Optional if you want to change the number of swapped samples during training # swapped += int(9000 / epochs) np.random.shuffle(indices)
def translate_sequence_beam(self, input_seq, beam_size=1): # https://machinelearningmastery.com/beam-search-decoder-natural-language-processing/ # Encode the input as state vectors. states_value = self.encoder_model.predict(input_seq) # Generate empty target sequence of length 1. target_seq = np.zeros((1, 1)) # only one candidate at the begining candidates = [ Candidate(target_seq=target_seq, last_prediction=SpecialSymbols.GO_IX, states_value=states_value, score=0, decoded_sentence="") ] while True: should_stop = True new_candidates = [] for candidate in candidates: if not candidate.finalised: outputs = self.decoder_model.predict( [candidate.target_seq] + candidate.states_value) should_stop = False output_tokens = outputs[0][-1] # find n (beam_size) best predictions indices = np.argpartition(output_tokens, -beam_size)[-beam_size:] for sampled_token_index in indices: score = -math.log(output_tokens[sampled_token_index]) # how long is the sentence, to compute average score step = candidate.get_sentence_length() + 1 # i believe scores should be summed together because log prob is used https://stats.stackexchange.com/questions/121257/log-probability-vs-product-of-probabilities # score is average of all probabilities (normalization so that longer sequences are not penalized) # incremental average https://math.stackexchange.com/questions/106700/incremental-averageing avg_score = utils.incremental_average(candidate.score, score, step) sampled_word = self.target_vocab.ix_to_word[sampled_token_index] new_candidate = Candidate(target_seq=candidate.target_seq, states_value=states_value, decoded_sentence=candidate.decoded_sentence, score=avg_score, sampled_word=sampled_word, last_prediction=sampled_token_index) new_candidates.append(new_candidate) # Exit condition: either hit max length # or find stop character. if sampled_word == SpecialSymbols.EOS: continue decoded_len = new_candidate.get_sentence_length() if decoded_len > self.training_dataset.y_max_seq_len \ and decoded_len > self.test_dataset.y_max_seq_len: new_candidate.finalise() continue # finished candidates are transfered to new_candidates automatically else: new_candidates.append(candidate) # take n (beam_size) best candidates candidates = sorted(new_candidates, key=lambda can: can.score)[:beam_size] if should_stop: break return candidates[0].decoded_sentence
def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1]
print('Recall: ', recall, '\n') ''' Part 3 ''' print('Part 3 - k-NN Classifier \n') # Variables to keep track of result from each step confusion_steps = [] accuracy_steps = [] # Perform knn classifier print('K Value - Accuracy') for i in range(1, 25): confusion = [[0, 0], [0, 0]] for j in range(len(test_data)): distances = np.array(np.power(abs(train_data.sub(np.array(np.array(test_data.loc[[j], :])[0]))), 2).sum(axis=1)) closest_neighbours = np.array([train_output[j] for j in np.argpartition(distances, i)[:i]]) pred_value = 1 if closest_neighbours.mean() > 0.5 else 0 true_value = test_output[j] if pred_value == 1: if pred_value == true_value: confusion[0][0] += 1 else: confusion[0][1] += 1 if pred_value == 0: if pred_value == true_value: confusion[1][1] += 1 else: confusion[1][0] += 1 tp = confusion[0][0] fp = confusion[0][1] fn = confusion[1][0]
img1 = cv2.imread(i1) img2 = cv2.imread(i2) # Calculate Optical Flow h_oflow, v_oflow = calc_optical_flow(img1, img2) # Make copies of the optical flow to play with pof_h = np.copy(h_oflow) pof_v = np.copy(v_oflow) # Find the magnitudes of movement given the h and v oflows mag(x,y) = sqrt( (h_oflow^2) + (v_oflow^2) ) magnitudes = np.sqrt((h_oflow)**2 + (v_oflow)**2) # Find the top N locations of magnitude N = 20 indices = np.argpartition(magnitudes.flatten(), -N)[-N:] locs = np.vstack(np.unravel_index(indices, magnitudes.shape)).T print "Perturbing at: ", locs # Apply the N perturbations to optical flow field for loc in locs: row = loc[0] col = loc[1] pof_h[row, col] *= -1 pof_v[row, col] *= -1 # Reverse the optical flow perturbations onto two adversarial spatial images pimg1, pimg2 = perturbed_oflow_to_images(img1, img2, pof_h, pof_v, locs) # Recalculate optical flow on adversarial spatial images
end_idx = min((batch_idx + 1) * args.ts_batch_size, nr_tst_num) X = X_tst[start_idx:end_idx] Y = Y_tst_o[start_idx:end_idx] data = Variable(torch.from_numpy(X).long()).cuda() candidates = baseline(data) candidates = candidates.data.cpu().numpy() Y_pred = np.zeros([candidates.shape[0], args.num_classes]) for i in range(candidates.shape[0]): candidate_labels = candidates[i, :].argsort()[-args.re_ranking:][::-1].tolist() _, activations_2nd = capsule_net(data[i, :].unsqueeze(0), candidate_labels) Y_pred[i, candidate_labels] = activations_2nd.squeeze(2).data.cpu().numpy() for i in range(Y_pred.shape[0]): sorted_idx = np.argpartition(-Y_pred[i, :], top_k)[:top_k] row_idx_list += [i + start_idx] * top_k col_idx_list += (sorted_idx).tolist() val_idx_list += Y_pred[i, sorted_idx].tolist() done = time.time() elapsed = done - start print("\r Epoch: {} Reranking: {} Iteration: {}/{} ({:.1f}%) Loss: {:.5f} {:.5f}".format( (epoch + 1), args.re_ranking, batch_idx, nr_batches, batch_idx * 100 / nr_batches, 0, elapsed), end="") m = max(row_idx_list) + 1 n = max(k_trn, k_tst)
def fun_idxs_of_max_n_score(user_scores_to_all_items, top_k): # 从一个向量里找到前n个大数所对应的index return np.argpartition(user_scores_to_all_items, -top_k)[-top_k:]
def find_max_ind(auc_ind): auc_ind = np.array(auc_ind) ind = np.argpartition(auc_ind, -3)[-3:] return ind, auc_ind[ind]
def embed_out_of_sample(X_train, X_manifold, X_out, K, beta, neighbor_measure): """ ****************************************************************** * * Func: embed_out_of_sample(X_train, X_manifold, X_out, K, beta, neighbor_measure) * * Desc: Embeds out-of-sample points into lower-dimensional space. * Uses a k-nearest neighbor, constrained least square reconstruction. * * Inputs: * X_train - NxD matrix of training data coordinates * * X_manifold - NxK matrix of low-dimensional training data coordinates * * X_out - MxD data matrix of out-of-sample points * * K - dimensionality of embedding space * * beta - bandwidth of RBf affinity function * * neighbor_measure - number of neighbors to consider in k-NN graph * * Outputs: * Z_out - MxK data matrix of embedded out of sample points * ****************************************************************** """ print("\nEmbedding out of sample data...") ## Extract constants num_total = np.shape(X_train)[0] ## Number of training data points num_out_sample = np.shape(X_out)[0] ## Number of out-of-sample-data-points input_dim = np.shape(X_out)[1] ## Dimesnionality of input space Z_out = np.zeros((num_out_sample,K)) ## Initialize out of sample embedded coordinate matrix ##### Affinity of out-of-sample with training set ##### print("Computing affinity matrices...") ## Define K-nearest neighbor graph W_L2 = distance_matrix(X_out, X_train, p=2) W_neighbors = W_L2 ## Square L2 distances, divide by negative bandwidth and exponentiate W_total = np.exp((-1/beta)*(W_L2**2)) print("Embedding out-of-sample points...") for idx in range(0,num_out_sample): temp_row = W_neighbors[idx, :] ## indicies of nearest neighbors according to L2 distance valid_ind = np.argpartition(temp_row, neighbor_measure) ##### Find reconstruction weights of current out of sample NO bias ###### X_recon = X_train[valid_ind[0:neighbor_measure],:].T x_current = X_out[idx,:] x_current= x_current.astype(np.double) X_recon - X_recon.astype(np.double) w_recon = unmix_cvxopt(np.expand_dims(x_current, axis=1), X_recon, gammaConst=0, P=None) w_recon = np.squeeze(w_recon) ## Embed sample as reconstruction of low-dimensional training data embeddings Z_recon = X_manifold[valid_ind[0:neighbor_measure],:].T z = np.dot(Z_recon, w_recon) Z_out[idx,:] = z print('Done!') return Z_out
def fitPlanesPiecewise(image, depth, normal, info, numOutputPlanes=20, imageIndex=1, parameters={}): if 'meanshift' in parameters and parameters['meanshift'] > 0: import sklearn.cluster meanshift = sklearn.cluster.MeanShift(parameters['meanshift']) pass from pylsd import lsd height = depth.shape[0] width = depth.shape[1] camera = getCameraFromInfo(info) urange = (np.arange(width, dtype=np.float32) / (width) * (camera['width']) - camera['cx']) / camera['fx'] urange = urange.reshape(1, -1).repeat(height, 0) vrange = (np.arange(height, dtype=np.float32) / (height) * (camera['height']) - camera['cy']) / camera['fy'] vrange = vrange.reshape(-1, 1).repeat(width, 1) X = depth * urange Y = depth Z = -depth * vrange normals = normal.reshape((-1, 3)) normals = normals / np.maximum(np.linalg.norm(normals, axis=-1, keepdims=True), 1e-4) validMask = np.logical_and(np.linalg.norm(normals, axis=-1) > 1e-4, depth.reshape(-1) > 1e-4) points = np.stack([X, Y, Z], axis=2).reshape(-1, 3) valid_points = points[validMask] lines = lsd(image.mean(2)) lineImage = image.copy() for line in lines: cv2.line(lineImage, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), (0, 0, 255), int(np.ceil(line[4] / 2))) continue cv2.imwrite('test/lines.png', lineImage) numVPs = 3 VPs, VPLines, remainingLines = calcVanishingPoints(lines, numVPs=numVPs) lineImage = image.copy() for VPIndex, lines in enumerate(VPLines): for line in lines: cv2.line(lineImage, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])), ((VPIndex == 0) * 255, (VPIndex == 1) * 255, (VPIndex == 2) * 255), int(np.ceil(line[4] / 2))) continue continue cv2.imwrite('test/lines_vp.png', lineImage) dominantNormals = np.stack([(VPs[:, 0] * info[16] / width - info[2]) / info[0], np.ones(numVPs), -(VPs[:, 1] * info[17] / height - info[6]) / info[5]], axis=1) dominantNormals /= np.maximum(np.linalg.norm(dominantNormals, axis=1, keepdims=True), 1e-4) dotThreshold = np.cos(np.deg2rad(20)) for normalIndex, crossNormals in enumerate([[1, 2], [2, 0], [0, 1]]): normal = np.cross(dominantNormals[crossNormals[0]], dominantNormals[crossNormals[1]]) normal = normalize(normal) if np.dot(normal, dominantNormals[normalIndex]) < dotThreshold: dominantNormals = np.concatenate([dominantNormals, np.expand_dims(normal, 0)], axis=0) pass continue print(VPs) print(dominantNormals) dominantNormalImage = np.abs(np.matmul(normal, dominantNormals.transpose())) cv2.imwrite('test/dominant_normal.png', drawMaskImage(dominantNormalImage)) planeHypothesisAreaThreshold = width * height * 0.01 planes = [] vpPlaneIndices = [] if 'offsetGap' in parameters: offsetGap = parameters['offsetGap'] else: offsetGap = 0.1 pass planeIndexOffset = 0 for dominantNormal in dominantNormals: if np.linalg.norm(dominantNormal) < 1e-4: continue offsets = np.tensordot(valid_points, dominantNormal, axes=([1], [0])) if 'meanshift' in parameters and parameters['meanshift'] > 0: sampleInds = np.arange(offsets.shape[0]) np.random.shuffle(sampleInds) meanshift.fit(np.expand_dims(offsets[sampleInds[:int(offsets.shape[0] * 0.02)]], -1)) for offset in meanshift.cluster_centers_: planes.append(dominantNormal * offset) continue else: offset = offsets.min() maxOffset = offsets.max() while offset < maxOffset: planeMask = np.logical_and(offsets >= offset, offsets < offset + offsetGap) segmentOffsets = offsets[np.logical_and(offsets >= offset, offsets < offset + offsetGap)] if segmentOffsets.shape[0] < planeHypothesisAreaThreshold: offset += offsetGap continue planeD = segmentOffsets.mean() planes.append(dominantNormal * planeD) offset = planeD + offsetGap continue pass vpPlaneIndices.append(np.arange(planeIndexOffset, len(planes))) planeIndexOffset = len(planes) continue if len(planes) == 0: return np.array([]), np.zeros(segmentation.shape).astype(np.int32) planes = np.array(planes) planesD = np.linalg.norm(planes, axis=1, keepdims=True) planeNormals = planes / np.maximum(planesD, 1e-4) if 'distanceCostThreshold' in parameters: distanceCostThreshold = parameters['distanceCostThreshold'] else: distanceCostThreshold = 0.05 pass distanceCost = np.abs(np.tensordot(points, planeNormals, axes=([1, 1])) - np.reshape(planesD, [1, -1])) / distanceCostThreshold normalCostThreshold = 1 - np.cos(np.deg2rad(30)) normalCost = (1 - np.abs(np.tensordot(normals, planeNormals, axes=([1, 1])))) / normalCostThreshold if 'normalWeight' in parameters: normalWeight = parameters['normalWeight'] else: normalWeight = 1 pass unaryCost = distanceCost + normalCost * normalWeight unaryCost *= np.expand_dims(validMask.astype(np.float32), -1) unaries = unaryCost.reshape((width * height, -1)) print('number of planes ', planes.shape[0]) cv2.imwrite('test/distance_cost.png', drawSegmentationImage(-distanceCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1)) cv2.imwrite('test/normal_cost.png', drawSegmentationImage(-normalCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1)) cv2.imwrite('test/unary_cost.png', drawSegmentationImage(-unaryCost.reshape((height, width, -1)), blackIndex=unaryCost.shape[-1] - 1)) cv2.imwrite('test/segmentation.png', drawSegmentationImage(-unaries.reshape((height, width, -1)), blackIndex=unaries.shape[-1])) if 'numProposals' in parameters: numProposals = parameters['numProposals'] else: numProposals = 3 pass numProposals = min(numProposals, unaries.shape[-1] - 1) proposals = np.argpartition(unaries, numProposals)[:, :numProposals] unaries = -readProposalInfo(unaries, proposals).reshape((-1, numProposals)) nodes = np.arange(height * width).reshape((height, width)) deltas = [(0, 1), (1, 0)] edges = [] edges_features = [] for delta in deltas: deltaX = delta[0] deltaY = delta[1] partial_nodes = nodes[max(-deltaY, 0):min(height - deltaY, height), max(-deltaX, 0):min(width - deltaX, width)].reshape(-1) edges.append(np.stack([partial_nodes, partial_nodes + (deltaY * width + deltaX)], axis=1)) labelDiff = (np.expand_dims(proposals[partial_nodes], -1) != np.expand_dims(proposals[partial_nodes + (deltaY * width + deltaX)], 1)).astype(np.float32) edges_features.append(labelDiff) continue edges = np.concatenate(edges, axis=0) edges_features = np.concatenate(edges_features, axis=0) if 'edgeWeights' in parameters: edgeWeights = parameters['edgeWeights'] else: edgeWeights = [0.5, 0.6, 0.6] pass lineSets = np.zeros((height * width, 3)) creaseLines = np.expand_dims(np.stack([planeNormals[:, 0] / info[0], planeNormals[:, 1], -planeNormals[:, 2] / info[5]], axis=1), 1) * planesD.reshape((1, -1, 1)) creaseLines = creaseLines - np.transpose(creaseLines, [1, 0, 2]) for planeIndex_1 in xrange(planes.shape[0]): for planeIndex_2 in xrange(planeIndex_1 + 1, planes.shape[0]): creaseLine = creaseLines[planeIndex_1, planeIndex_2] if abs(creaseLine[0]) > abs(creaseLine[2]): vs = np.arange(height) us = -(creaseLine[1] + (vs - info[6]) * creaseLine[2]) / creaseLine[0] + info[2] minUs = np.floor(us).astype(np.int32) maxUs = minUs + 1 validIndicesMask = np.logical_and(minUs >= 0, maxUs < width) if validIndicesMask.sum() == 0: continue vs = vs[validIndicesMask] minUs = minUs[validIndicesMask] maxUs = maxUs[validIndicesMask] edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs) for index, edgeIndex in enumerate(edgeIndices): pixel_1 = vs[index] * width + minUs[index] pixel_2 = vs[index] * width + maxUs[index] proposals_1 = proposals[pixel_1] proposals_2 = proposals[pixel_2] if planeIndex_1 in proposals_1 and planeIndex_2 in proposals_2: proposalIndex_1 = np.where(proposals_1 == planeIndex_1)[0][0] proposalIndex_2 = np.where(proposals_2 == planeIndex_2)[0][0] edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0] pass if planeIndex_2 in proposals_1 and planeIndex_1 in proposals_2: proposalIndex_1 = np.where(proposals_1 == planeIndex_2)[0][0] proposalIndex_2 = np.where(proposals_2 == planeIndex_1)[0][0] edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0] pass continue lineSets[vs * width + minUs, 0] = 1 lineSets[vs * width + maxUs, 0] = 1 else: us = np.arange(width) vs = -(creaseLine[1] + (us - info[2]) * creaseLine[0]) / creaseLine[2] + info[6] minVs = np.floor(vs).astype(np.int32) maxVs = minVs + 1 validIndicesMask = np.logical_and(minVs >= 0, maxVs < height) if validIndicesMask.sum() == 0: continue us = us[validIndicesMask] minVs = minVs[validIndicesMask] maxVs = maxVs[validIndicesMask] edgeIndices = (minVs * width + us) for index, edgeIndex in enumerate(edgeIndices): pixel_1 = minVs[index] * width + us[index] pixel_2 = maxVs[index] * width + us[index] proposals_1 = proposals[pixel_1] proposals_2 = proposals[pixel_2] if planeIndex_1 in proposals_1 and planeIndex_2 in proposals_2: proposalIndex_1 = np.where(proposals_1 == planeIndex_1)[0][0] proposalIndex_2 = np.where(proposals_2 == planeIndex_2)[0][0] edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0] pass if planeIndex_2 in proposals_1 and planeIndex_1 in proposals_2: proposalIndex_1 = np.where(proposals_1 == planeIndex_2)[0][0] proposalIndex_2 = np.where(proposals_2 == planeIndex_1)[0][0] edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[0] pass continue lineSets[minVs * width + us, 0] = 1 lineSets[maxVs * width + us, 0] = 1 pass continue continue planeDepths = calcPlaneDepths(planes, width, height, np.array([info[0], info[5], info[2], info[6], info[16], info[17], 0, 0, 0, 0])).reshape((height * width, -1)) planeDepths = readProposalInfo(planeDepths, proposals).reshape((-1, numProposals)) planeHorizontalVPMask = np.ones((planes.shape[0], 3), dtype=np.bool) for VPIndex, planeIndices in enumerate(vpPlaneIndices): planeHorizontalVPMask[planeIndices] = False continue for VPIndex, lines in enumerate(VPLines): lp = lines[:, :2] ln = lines[:, 2:4] - lines[:, :2] ln /= np.maximum(np.linalg.norm(ln, axis=-1, keepdims=True), 1e-4) ln = np.stack([ln[:, 1], -ln[:, 0]], axis=1) lnp = (ln * lp).sum(1, keepdims=True) occlusionLines = np.concatenate([ln, lnp], axis=1) for occlusionLine in occlusionLines: if abs(occlusionLine[0]) > abs(occlusionLine[1]): vs = np.arange(height) us = (occlusionLine[2] - vs * occlusionLine[1]) / occlusionLine[0] minUs = np.floor(us).astype(np.int32) maxUs = minUs + 1 validIndicesMask = np.logical_and(minUs >= 0, maxUs < width) vs = vs[validIndicesMask] minUs = minUs[validIndicesMask] maxUs = maxUs[validIndicesMask] edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs) for index, edgeIndex in enumerate(edgeIndices): pixel_1 = vs[index] * width + minUs[index] pixel_2 = vs[index] * width + maxUs[index] proposals_1 = proposals[pixel_1] proposals_2 = proposals[pixel_2] for proposalIndex_1, planeIndex_1 in enumerate(proposals_1): if not planeHorizontalVPMask[planeIndex_1][VPIndex]: continue planeDepth_1 = planeDepths[pixel_1][proposalIndex_1] for proposalIndex_2, planeIndex_2 in enumerate(proposals_2): if planeDepths[pixel_2][proposalIndex_2] > planeDepth_1: edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[1] pass continue continue continue lineSets[vs * width + minUs, 1] = 1 lineSets[vs * width + maxUs, 1] = 1 else: us = np.arange(width) vs = (occlusionLine[2] - us * occlusionLine[0]) / occlusionLine[1] minVs = np.floor(vs).astype(np.int32) maxVs = minVs + 1 validIndicesMask = np.logical_and(minVs >= 0, maxVs < height) us = us[validIndicesMask] minVs = minVs[validIndicesMask] maxVs = maxVs[validIndicesMask] edgeIndices = (minVs * width + us) for index, edgeIndex in enumerate(edgeIndices): pixel_1 = minVs[index] * width + us[index] pixel_2 = maxVs[index] * width + us[index] proposals_1 = proposals[pixel_1] proposals_2 = proposals[pixel_2] for proposalIndex_1, planeIndex_1 in enumerate(proposals_1): if not planeHorizontalVPMask[planeIndex_1][VPIndex]: continue planeDepth_1 = planeDepths[pixel_1][proposalIndex_1] for proposalIndex_2, planeIndex_2 in enumerate(proposals_2): if planeDepths[pixel_2][proposalIndex_2] > planeDepth_1: edges_features[edgeIndex, proposalIndex_1, proposalIndex_2] *= edgeWeights[1] pass continue continue continue lineSets[minVs * width + us, 1] = 1 lineSets[maxVs * width + us, 1] = 1 pass continue continue for line in remainingLines: if abs(line[3] - line[1]) > abs(line[2] - line[0]): if line[3] < line[1]: line = np.array([line[2], line[3], line[0], line[1]]) pass vs = np.arange(line[1], line[3] + 1, dtype=np.int32) us = line[0] + (vs - line[1]) / (line[3] - line[1]) * (line[2] - line[0]) minUs = np.floor(us).astype(np.int32) maxUs = minUs + 1 validIndicesMask = np.logical_and(minUs >= 0, maxUs < width) vs = vs[validIndicesMask] minUs = minUs[validIndicesMask] maxUs = maxUs[validIndicesMask] edgeIndices = (height - 1) * width + (vs * (width - 1) + minUs) for edgeIndex in edgeIndices: edges_features[edgeIndex] *= edgeWeights[2] continue lineSets[(vs * width + minUs), 2] = 1 lineSets[(vs * width + maxUs), 2] = 1 else: if line[2] < line[0]: line = np.array([line[2], line[3], line[0], line[1]]) pass us = np.arange(line[0], line[2] + 1, dtype=np.int32) vs = line[1] + (us - line[0]) / (line[2] - line[0]) * (line[3] - line[1]) minVs = np.floor(vs).astype(np.int32) maxVs = minVs + 1 validIndicesMask = np.logical_and(minVs >= 0, maxVs < height) us = us[validIndicesMask] minVs = minVs[validIndicesMask] maxVs = maxVs[validIndicesMask] edgeIndices = (minVs * width + us) for edgeIndex in edgeIndices: edges_features[edgeIndex] *= edgeWeights[2] continue lineSets[minVs * width + us, 2] = 1 lineSets[maxVs * width + us, 2] = 1 continue continue cv2.imwrite('test/line_sets.png', drawMaskImage(lineSets.reshape((height, width, 3)))) if 'smoothnessWeight' in parameters: smoothnessWeight = parameters['smoothnessWeight'] else: smoothnessWeight = 4 pass print('start') refined_segmentation = inference_ogm(unaries, -edges_features * smoothnessWeight, edges, return_energy=False, alg='trw') print('done') refined_segmentation = refined_segmentation.reshape([height, width, 1]) refined_segmentation = readProposalInfo(proposals, refined_segmentation) planeSegmentation = refined_segmentation.reshape([height, width]) planeSegmentation[np.logical_not(validMask.reshape((height, width)))] = planes.shape[0] cv2.imwrite('test/segmentation_refined.png', drawSegmentationImage(planeSegmentation)) return planes, planeSegmentation
def find_characters(vocab_filename, training_feats, train_labels, test_feats): window = 64 f = open('datasets/ImageSets/val.txt') wa = open('svm_test/waldo.txt', 'w+') we = open('svm_test/wenda.txt', 'w+') wi = open('svm_test/wizard.txt', 'w+') image_id = f.readline().rstrip() while image_id: print(image_id) print("processing") image = np.asarray( plt.imread('datasets/JPEGImages/' + image_id + '.jpg')) H, W, chan = image.shape img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) test_feats = [] orb = cv2.ORB_create() # orb = cv2.ORB_create(nfeatures=1000, scoreType=cv2.ORB_FAST_SCORE) kp, des = orb.detectAndCompute(img_gray, None) # # minHessian = 400 # # detector = cv2.xfeatures2d_SURF.create(hessianThreshold=minHessian) # # kp = detector.detect(img_gray) # fast = cv2.FastFeatureDetector_create() # # find and draw the keypoints # kp = fast.detect(img_gray,None) # img_kp = cv2.drawKeypoints(img_gray, kp, None, color=(0,0,255), flags=cv2.DrawMatchesFlags_DEFAULT) for idx in range(len(kp)): j, i = kp[idx].pt i = int(np.round(i)) j = int(np.round(j)) i_end = i + window j_end = j + window i_end = min(i_end, H - 1) j_end = min(j_end, W - 1) img = img_gray[i:i_end, j:j_end] feats = bags_of_sifts_image(img_gray, vocab_filename) test_feats.extend(feats) numOfMax = 5 probability = svm_probability(training_feats, train_labels, test_feats) locations = np.argpartition(-probability, numOfMax, axis=0)[:numOfMax] for k in range(len(locations[0])): for l in range(numOfMax): y, x = kp[locations[l][k]].pt x = int(np.round(x)) y = int(np.round(y)) y_end = y + window x_end = x + window x_end = min(x_end, H - 1) y_end = min(y_end, W - 1) patch = img_gray[x:x_end, y:y_end] if (probability[locations[l][k]][k] > 0.4): if k == 0: res = image_id + ' ' + str(probability[ locations[l][k]][k]) + ' ' + str(x) + ' ' + str( y) + ' ' + str(x_end) + ' ' + str(y_end) + '\n' wa.write(res) if k == 1: res = image_id + ' ' + str( np.max(probability[locations[l][k]][k]) ) + ' ' + str(x) + ' ' + str(y) + ' ' + str( x_end) + ' ' + str(y_end) + '\n' we.write(res) if k == 2: res = image_id + ' ' + str( np.max(probability[locations[l][k]][k]) ) + ' ' + str(x) + ' ' + str(y) + ' ' + str( x_end) + ' ' + str(y_end) + '\n' wi.write(res) image_id = f.readline().rstrip()
def lowest_indices(ary, n): """Returns the n lowest indices from a numpy array.""" flat = ary.flatten() indices = np.argpartition(flat, n)[:n] indices = indices[np.argsort(flat[indices])] return np.unravel_index(indices, ary.shape)
def fitPlanesManhattan(image, depth, normal, info, numOutputPlanes=20, imageIndex=-1, parameters={}): if 'meanshift' in parameters and parameters['meanshift'] > 0: import sklearn.cluster meanshift = sklearn.cluster.MeanShift(parameters['meanshift']) pass height = depth.shape[0] width = depth.shape[1] camera = getCameraFromInfo(info) urange = (np.arange(width, dtype=np.float32) / (width) * (camera['width']) - camera['cx']) / camera['fx'] urange = urange.reshape(1, -1).repeat(height, 0) vrange = (np.arange(height, dtype=np.float32) / (height) * (camera['height']) - camera['cy']) / camera['fy'] vrange = vrange.reshape(-1, 1).repeat(width, 1) X = depth * urange Y = depth Z = -depth * vrange normals = normal.reshape((-1, 3)) normals = normals / np.maximum(np.linalg.norm(normals, axis=-1, keepdims=True), 1e-4) validMask = np.logical_and(np.linalg.norm(normals, axis=-1) > 1e-4, depth.reshape(-1) > 1e-4) valid_normals = normals[validMask] points = np.stack([X, Y, Z], axis=2).reshape(-1, 3) valid_points = points[validMask] polarAngles = np.arange(16) * np.pi / 2 / 16 azimuthalAngles = np.arange(64) * np.pi * 2 / 64 polarAngles = np.expand_dims(polarAngles, -1) azimuthalAngles = np.expand_dims(azimuthalAngles, 0) normalBins = np.stack([np.sin(polarAngles) * np.cos(azimuthalAngles), np.tile(np.cos(polarAngles), [1, azimuthalAngles.shape[1]]), -np.sin(polarAngles) * np.sin(azimuthalAngles)], axis=2) normalBins = np.reshape(normalBins, [-1, 3]) numBins = normalBins.shape[0] normalDiff = np.tensordot(valid_normals, normalBins, axes=([1], [1])) normalDiffSign = np.sign(normalDiff) normalDiff = np.maximum(normalDiff, -normalDiff) normalMask = one_hot(np.argmax(normalDiff, axis=-1), numBins) bins = normalMask.sum(0) np.expand_dims(valid_normals, 1) * np.expand_dims(normalMask, -1) maxNormals = np.expand_dims(valid_normals, 1) * np.expand_dims(normalMask, -1) maxNormals *= np.expand_dims(normalDiffSign, -1) averageNormals = maxNormals.sum(0) / np.maximum(np.expand_dims(bins, -1), 1e-4) averageNormals /= np.maximum(np.linalg.norm(averageNormals, axis=-1, keepdims=True), 1e-4) dominantNormal_1 = averageNormals[np.argmax(bins)] dotThreshold_1 = np.cos(np.deg2rad(100)) dotThreshold_2 = np.cos(np.deg2rad(80)) dot_1 = np.tensordot(normalBins, dominantNormal_1, axes=([1], [0])) bins[np.logical_or(dot_1 < dotThreshold_1, dot_1 > dotThreshold_2)] = 0 dominantNormal_2 = averageNormals[np.argmax(bins)] dot_2 = np.tensordot(normalBins, dominantNormal_2, axes=([1], [0])) bins[np.logical_or(dot_2 < dotThreshold_1, dot_2 > dotThreshold_2)] = 0 dominantNormal_3 = averageNormals[np.argmax(bins)] dominantNormals = np.stack([dominantNormal_1, dominantNormal_2, dominantNormal_3], axis=0) dominantNormalImage = np.abs(np.matmul(normal, dominantNormals.transpose())) planeHypothesisAreaThreshold = width * height * 0.01 planes = [] if 'offsetGap' in parameters: offsetGap = parameters['offsetGap'] else: offsetGap = 0.1 pass for dominantNormal in dominantNormals: offsets = np.tensordot(valid_points, dominantNormal, axes=([1], [0])) if 'meanshift' in parameters and parameters['meanshift'] > 0: sampleInds = np.arange(offsets.shape[0]) np.random.shuffle(sampleInds) meanshift.fit(np.expand_dims(offsets[sampleInds[:int(offsets.shape[0] * 0.02)]], -1)) for offset in meanshift.cluster_centers_: planes.append(dominantNormal * offset) continue offset = offsets.min() maxOffset = offsets.max() while offset < maxOffset: planeMask = np.logical_and(offsets >= offset, offsets < offset + offsetGap) segmentOffsets = offsets[np.logical_and(offsets >= offset, offsets < offset + offsetGap)] if segmentOffsets.shape[0] < planeHypothesisAreaThreshold: offset += offsetGap continue planeD = segmentOffsets.mean() planes.append(dominantNormal * planeD) offset = planeD + offsetGap continue continue if len(planes) == 0: return np.array([]), np.zeros(segmentation.shape).astype(np.int32) planes = np.array(planes) print('number of planes ', planes.shape[0]) vanishingPoints = np.stack([dominantNormals[:, 0] / np.maximum(dominantNormals[:, 1], 1e-4) * info[0] + info[2], -dominantNormals[:, 2] / np.maximum(dominantNormals[:, 1], 1e-4) * info[5] + info[6]], axis=1) vanishingPoints[:, 0] *= width / info[16] vanishingPoints[:, 1] *= height / info[17] indices = np.arange(width * height, dtype=np.int32) uv = np.stack([indices % width, indices // width], axis=1) colors = image.reshape((-1, 3)) windowW = 9 windowH = 3 dominantLineMaps = [] for vanishingPointIndex, vanishingPoint in enumerate(vanishingPoints): horizontalDirection = uv - np.expand_dims(vanishingPoint, 0) horizontalDirection = horizontalDirection / np.maximum(np.linalg.norm(horizontalDirection, axis=1, keepdims=True), 1e-4) verticalDirection = np.stack([horizontalDirection[:, 1], -horizontalDirection[:, 0]], axis=1) colorDiffs = [] for directionIndex, direction in enumerate([horizontalDirection, verticalDirection]): neighbors = uv + direction neighborsX = neighbors[:, 0] neighborsY = neighbors[:, 1] neighborsMinX = np.maximum(np.minimum(np.floor(neighborsX).astype(np.int32), width - 1), 0) neighborsMaxX = np.maximum(np.minimum(np.ceil(neighborsX).astype(np.int32), width - 1), 0) neighborsMinY = np.maximum(np.minimum(np.floor(neighborsY).astype(np.int32), height - 1), 0) neighborsMaxY = np.maximum(np.minimum(np.ceil(neighborsY).astype(np.int32), height - 1), 0) indices_1 = neighborsMinY * width + neighborsMinX indices_2 = neighborsMaxY * width + neighborsMinX indices_3 = neighborsMinY * width + neighborsMaxX indices_4 = neighborsMaxY * width + neighborsMaxX areas_1 = (neighborsMaxX - neighborsX) * (neighborsMaxY - neighborsY) areas_2 = (neighborsMaxX - neighborsX) * (neighborsY - neighborsMinY) areas_3 = (neighborsX - neighborsMinX) * (neighborsMaxY - neighborsY) areas_4 = (neighborsX - neighborsMinX) * (neighborsY - neighborsMinY) neighborsColor = colors[indices_1] * np.expand_dims(areas_1, -1) + colors[indices_2] * np.expand_dims(areas_2, -1) + colors[indices_3] * np.expand_dims(areas_3, -1) + colors[indices_4] * np.expand_dims(areas_4, -1) colorDiff = np.linalg.norm(neighborsColor - colors, axis=-1) colorDiffs.append(colorDiff) continue colorDiffs = np.stack(colorDiffs, 1) deltaUs, deltaVs = np.meshgrid(np.arange(windowW) - (windowW - 1) / 2, np.arange(windowH) - (windowH - 1) / 2) deltas = deltaUs.reshape((1, -1, 1)) * np.expand_dims(horizontalDirection, axis=1) + deltaVs.reshape((1, -1, 1)) * np.expand_dims(verticalDirection, axis=1) windowIndices = np.expand_dims(uv, 1) - deltas windowIndices = (np.minimum(np.maximum(np.round(windowIndices[:, :, 1]), 0), height - 1) * width + np.minimum(np.maximum(np.round(windowIndices[:, :, 0]), 0), width - 1)).astype(np.int32) dominantLineMap = [] for pixels in windowIndices: gradientSums = colorDiffs[pixels].sum(0) dominantLineMap.append(gradientSums[1] / max(gradientSums[0], 1e-4)) continue dominantLineMaps.append(np.array(dominantLineMap).reshape((height, width))) continue dominantLineMaps = np.stack(dominantLineMaps, axis=2) if 'dominantLineThreshold' in parameters: dominantLineThreshold = parameters['dominantLineThreshold'] else: dominantLineThreshold = 3 pass smoothnessWeightMask = dominantLineMaps.max(2) > dominantLineThreshold planesD = np.linalg.norm(planes, axis=1, keepdims=True) planeNormals = planes / np.maximum(planesD, 1e-4) if 'distanceCostThreshold' in parameters: distanceCostThreshold = parameters['distanceCostThreshold'] else: distanceCostThreshold = 0.05 pass distanceCost = np.abs(np.tensordot(points, planeNormals, axes=([1, 1])) - np.reshape(planesD, [1, -1])) / distanceCostThreshold normalCost = 0 normalCostThreshold = 1 - np.cos(np.deg2rad(30)) normalCost = (1 - np.abs(np.tensordot(normals, planeNormals, axes=([1, 1])))) / normalCostThreshold unaryCost = distanceCost + normalCost unaryCost *= np.expand_dims(validMask.astype(np.float32), -1) unaries = unaryCost.reshape((width * height, -1)) if False: cv2.imwrite('test/dominant_normal.png', drawMaskImage(dominantNormalImage)) if imageIndex >= 0: cv2.imwrite('test/' + str(imageIndex) + '_dominant_lines.png', drawMaskImage(dominantLineMaps / dominantLineThreshold)) else: cv2.imwrite('test/dominant_lines.png', drawMaskImage(dominantLineMaps / dominantLineThreshold)) pass cv2.imwrite('test/dominant_lines_mask.png', drawMaskImage(smoothnessWeightMask)) cv2.imwrite('test/distance_cost.png', drawSegmentationImage(-distanceCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1)) cv2.imwrite('test/normal_cost.png', drawSegmentationImage(-normalCost.reshape((height, width, -1)), unaryCost.shape[-1] - 1)) cv2.imwrite('test/unary_cost.png', drawSegmentationImage(-unaryCost.reshape((height, width, -1)), blackIndex=unaryCost.shape[-1] - 1)) cv2.imwrite('test/segmentation.png', drawSegmentationImage(-unaries.reshape((height, width, -1)), blackIndex=unaries.shape[-1])) pass if 'numProposals' in parameters: numProposals = parameters['numProposals'] else: numProposals = 3 pass numProposals = min(numProposals, unaries.shape[-1] - 1) proposals = np.argpartition(unaries, numProposals)[:, :numProposals] proposals[np.logical_not(validMask)] = 0 unaries = -readProposalInfo(unaries, proposals).reshape((-1, numProposals)) nodes = np.arange(height * width).reshape((height, width)) deltas = [(0, 1), (1, 0)] edges = [] edges_features = [] smoothnessWeights = 1 - 0.99 * smoothnessWeightMask.astype(np.float32) for delta in deltas: deltaX = delta[0] deltaY = delta[1] partial_nodes = nodes[max(-deltaY, 0):min(height - deltaY, height), max(-deltaX, 0):min(width - deltaX, width)].reshape(-1) edges.append(np.stack([partial_nodes, partial_nodes + (deltaY * width + deltaX)], axis=1)) labelDiff = (np.expand_dims(proposals[partial_nodes], -1) != np.expand_dims(proposals[partial_nodes + (deltaY * width + deltaX)], 1)).astype(np.float32) edges_features.append(labelDiff * smoothnessWeights.reshape((width * height, -1))[partial_nodes].reshape(-1, 1, 1)) continue edges = np.concatenate(edges, axis=0) edges_features = np.concatenate(edges_features, axis=0) if 'smoothnessWeight' in parameters: smoothnessWeight = parameters['smoothnessWeight'] else: smoothnessWeight = 40 pass print('start') refined_segmentation = inference_ogm(unaries, -edges_features * smoothnessWeight, edges, return_energy=False, alg='trw') print('done') refined_segmentation = refined_segmentation.reshape([height, width, 1]) refined_segmentation = readProposalInfo(proposals, refined_segmentation) planeSegmentation = refined_segmentation.reshape([height, width]) planeSegmentation[np.logical_not(validMask.reshape((height, width)))] = planes.shape[0] cv2.imwrite('test/segmentation_refined.png', drawSegmentationImage(planeSegmentation)) return planes, planeSegmentation
with tf.Session() as sess: saver.restore(sess, INCEPTION_V4_CHECKPOINT_PATH) predictions_val = predictions.eval(feed_dict={X: X_test}) # In[60]: most_likely_class_index = np.argmax(predictions_val[0]) most_likely_class_index # In[61]: class_names[most_likely_class_index] # In[62]: top_5 = np.argpartition(predictions_val[0], -5)[-5:] top_5 = reversed(top_5[np.argsort(predictions_val[0][top_5])]) for i in top_5: print("{0}: {1:.2f}%".format(class_names[i], 100 * predictions_val[0][i])) # # Transfer Learning for Large Image Classification # **Exercise:** Create a training set containing at least 100 images per class. For example, you could classify your own pictures based on the location (beach, mountain, city, etc.), or alternatively you can just use an existing dataset, such as the flowers dataset or MIT's places dataset (requires registration, and it is huge). # In[ ]: import sys import tarfile from six.moves import urllib FLOWERS_URL = "http://download.tensorflow.org/example_images/flower_photos.tgz"
filled=True) (graph, ) = pydot.graph_from_dot_file('forest.dot') graph.write_png('somefile.png') svm = SVC(kernel='linear', gamma=1, C=100) svm.fit(X_train, y_train) svm_prediction = svm.predict(X_test) print(accuracy_score(y_test, svm_prediction)) coefs = np.ravel(svm.coef_) k = 10 test = np.argpartition(coefs, len(coefs) - k)[-k:] test = np.ravel(np.flip(test)) values = x.columns.values forest = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=0) forest.fit(X_train, y_train) forest_prediction = forest.predict(X_test) print(accuracy_score(y_test, forest_prediction)) result = forest.feature_importances_
def re_ranking(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3): # The following naming, e.g. gallery_num, is different from outer scope. # Don't care about it. original_dist = np.concatenate([ np.concatenate([q_q_dist, q_g_dist], axis=1), np.concatenate([q_g_dist.T, g_g_dist], axis=1) ], axis=0) original_dist = 2. - 2 * original_dist # change the cosine similarity metric to euclidean similarity metric original_dist = np.power(original_dist, 2).astype(np.float32) original_dist = np.transpose(1. * original_dist / np.max(original_dist, axis=0)) V = np.zeros_like(original_dist).astype(np.float32) #initial_rank = np.argsort(original_dist).astype(np.int32) # top K1+1 initial_rank = np.argpartition(original_dist, range(1, k1 + 1)) query_num = q_g_dist.shape[0] all_num = original_dist.shape[0] for i in range(all_num): # k-reciprocal neighbors k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1) k_reciprocal_expansion_index = k_reciprocal_index for j in range(len(k_reciprocal_index)): candidate = k_reciprocal_index[j] candidate_k_reciprocal_index = k_reciprocal_neigh( initial_rank, candidate, int(np.around(k1 / 2))) if len( np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index) ) > 2. / 3 * len(candidate_k_reciprocal_index): k_reciprocal_expansion_index = np.append( k_reciprocal_expansion_index, candidate_k_reciprocal_index) k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) weight = np.exp(-original_dist[i, k_reciprocal_expansion_index]) V[i, k_reciprocal_expansion_index] = 1. * weight / np.sum(weight) original_dist = original_dist[:query_num, ] if k2 != 1: V_qe = np.zeros_like(V, dtype=np.float32) for i in range(all_num): V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0) V = V_qe del V_qe del initial_rank invIndex = [] for i in range(all_num): invIndex.append(np.where(V[:, i] != 0)[0]) jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) for i in range(query_num): temp_min = np.zeros(shape=[1, all_num], dtype=np.float32) indNonZero = np.where(V[i, :] != 0)[0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum( V[i, indNonZero[j]], V[indImages[j], indNonZero[j]]) jaccard_dist[i] = 1 - temp_min / (2. - temp_min) final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value del original_dist del V del jaccard_dist final_dist = final_dist[:query_num, query_num:] return final_dist
def cL(s,x): '''returns n-s abs-smallest indices of vector x''' ns = len(x)-s return np.argpartition(abs(x),ns)[:ns]