def set_reach_dist(SetOfObjects, point_index, epsilon): """ Sets reachability distance and ordering. This function is the primary workhorse of the OPTICS algorithm. SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. (float) """ row = [SetOfObjects.data[point_index, :]] indices = np.argsort(row) distances = np.sort(row) if scipy.iterable(distances): unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum( distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) if unprocessed.size > 0: return unprocessed[np.argsort( np.array(SetOfObjects._reachability[unprocessed]))[0]] else: return point_index else: return point_index
def set_reach_dist(SetOfObjects,point_index,epsilon): ### Assumes that the query returns ordered (smallest distance first) entries ### ### This is the case for the balltree query... ### ### ...switching to a query structure that does not do this will break things! ### ### And break in a non-obvious way: For cases where multiple entries are tied in ### ### reachablitly distance, it will cause the next point to be processed in ### ### random order, instead of the closest point. This may manefest in edge cases ### ### where different runs of OPTICS will give different ordered lists and hence ### ### different clustering structure...removing reproducability. ### distances, indices = SetOfObjects.query(SetOfObjects.data[point_index], SetOfObjects._nneighbors[point_index]) ## Checks to see if there more than one member in the neighborhood ## if scipy.iterable(distances): ## Masking processed values ## unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T],SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum(SetOfObjects._reachability[unprocessed], rdistances) ### Checks to see if everything is already processed; if so, return control to main loop ## if unprocessed.size > 0: ### Define return order based on reachability distance ### return sorted(zip(SetOfObjects._reachability[unprocessed],unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: ## Not sure if this else statement is actaully needed... ## return point_index
def _set_reach_dist(setofobjects, point_index, epsilon): # Assumes that the query returns ordered (smallest distance first) # entries. This is the case for the balltree query... dists, indices = setofobjects.query(setofobjects.data[point_index], setofobjects._nneighbors[point_index]) # Checks to see if there more than one member in the neighborhood ## if sp.iterable(dists): # Masking processed values ## # n_pr is 'not processed' n_pr = indices[(setofobjects._processed[indices] < 1)[0].T] rdists = sp.maximum(dists[(setofobjects._processed[indices] < 1)[0].T], setofobjects.core_dists_[point_index]) new_reach = sp.minimum(setofobjects.reachability_[n_pr], rdists) setofobjects.reachability_[n_pr] = new_reach # Checks to see if everything is already processed; # if so, return control to main loop ## if n_pr.size > 0: # Define return order based on reachability distance ### return n_pr[sp.argmin(setofobjects.reachability_[n_pr])] else: return point_index
def _set_reach_dist(SetOfObjects, point_index, epsilon): # Assumes that the query returns ordered (smallest distance first) # entries. This is the case for the balltree query... distances, indices = SetOfObjects.query( SetOfObjects.data[point_index], SetOfObjects._nneighbors[point_index]) # Checks to see if there more than one member in the neighborhood ## if scipy.iterable(distances): # Masking processed values ## unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum( distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) # Checks to see if everything is already processed; # if so, return control to main loop ## if unprocessed.size > 0: # Define return order based on reachability distance ### return sorted(zip(SetOfObjects._reachability[unprocessed], unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: # Not sure if this else statement is actually needed... ## return point_index
def set_reach_dist(SetOfObjects, point_index, epsilon): """ Sets reachability distance and ordering. This function is the primary workhorse of the OPTICS algorithm. SetofObjects: Instantiated and prepped instance of 'setOfObjects' class epsilon: Determines maximum object size that can be extracted. Smaller epsilons reduce run time. (float) """ row = [SetOfObjects.data[point_index,:]] indices = np.argsort(row) distances = np.sort(row) if scipy.iterable(distances): unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum(distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[unprocessed] = scipy.minimum( SetOfObjects._reachability[unprocessed], rdistances) if unprocessed.size > 0: return unprocessed[np.argsort(np.array(SetOfObjects._reachability[ unprocessed]))[0]] else: return point_index else: return point_index
def set_reach_dist(SetOfObjects, point_index, epsilon): # Assumes that the query returns ordered (smallest distance first) # entries. This is the case for the balltree query... # distances, indices = SetOfObjects.query(SetOfObjects.data[point_index], # SetOfObjects._nneighbors[point_index]) row = [SetOfObjects.data[point_index,:]] indices = np.argsort(row) distances = np.sort(row) # Checks to see if there more than one member in the neighborhood ## if scipy.iterable(distances): # Masking processed values ## unprocessed = indices[(SetOfObjects._processed[indices] < 1)[0].T] rdistances = scipy.maximum( distances[(SetOfObjects._processed[indices] < 1)[0].T], SetOfObjects._core_dist[point_index]) SetOfObjects._reachability[ unprocessed] = scipy.minimum( SetOfObjects._reachability[ unprocessed], rdistances) # Checks to see if everything is already processed; # if so, return control to main loop ## if unprocessed.size > 0: # Define return order based on reachability distance ### return sorted(zip(SetOfObjects._reachability[unprocessed], unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: # Not sure if this else statement is actaully needed... ## return point_index
def set_reach_dist(self, point_index, epsilon, dtype): indices, distances = self.get_neighbors_dist(point_index, epsilon, dtype) if scipy.iterable(distances): unprocessed = ((self._processed[indices] < 1).T)[0].tolist() unprocessed = filter_list(indices, unprocessed) rdistances = scipy.maximum(filter_list(distances, unprocessed),self._core_dist[point_index]) self._reachability[unprocessed] = scipy.minimum(self._reachability[unprocessed], rdistances) if len(unprocessed) > 0: return sorted(zip(self._reachability[unprocessed],unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: return point_index
def set_reach_dist(SetOfObjects,point_index,epsilon): distances, indices = SetOfObjects.query(SetOfObjects.data[point_index], SetOfObjects._nneighbors[point_index], distance_upper_bound=epsilon) if scipy.iterable(distances): if scipy.isfinite(distances[-1]): c_dist = distances[-1] unprocessed = SetOfObjects._index[(scipy.where(test_set._processed < 1)[0])] SetOfObjects._reachability[unprocessed] = scipy.minimum(SetOfObjects._reachability[unprocessed],c_dist) return unprocessed[0] else: return point_index else: return point_index
def set_reach_dist(self, point_index, epsilon, dtype): indices, distances = self.get_neighbors_dist(point_index, epsilon, dtype) if scipy.iterable(distances): unprocessed = ((self._processed[indices] < 1).T)[0].tolist() unprocessed = filter_list(indices, unprocessed) rdistances = scipy.maximum(filter_list(distances, unprocessed), self._core_dist[point_index]) self._reachability[unprocessed] = scipy.minimum( self._reachability[unprocessed], rdistances) if len(unprocessed) > 0: return sorted(zip(self._reachability[unprocessed], unprocessed), key=lambda reachability: reachability[0])[0][1] else: return point_index else: return point_index
def _set_reach_dist(setofobjects, point_index, epsilon): X = np.array(setofobjects.data[point_index]).reshape(1, -1) indices = setofobjects.query_radius(X, r=epsilon, return_distance=False, count_only=False, sort_results=False)[0] # Checks to see if there more than one member in the neighborhood if sp.iterable(indices): # Masking processed values; n_pr is 'not processed' n_pr = np.compress( (np.take(setofobjects._processed, indices, axis=0) < 1).ravel(), indices, axis=0) # n_pr = indices[(setofobjects._processed[indices] < 1).ravel()] if len(n_pr) > 0: dists = pairwise_distances(X, np.take(setofobjects.get_arrays()[0], n_pr, axis=0), setofobjects.metric, n_jobs=1).ravel() rdists = sp.maximum(dists, setofobjects.core_dists_[point_index]) new_reach = sp.minimum( np.take(setofobjects.reachability_, n_pr, axis=0), rdists) setofobjects.reachability_[n_pr] = new_reach # Checks to see if everything is already processed; # if so, return control to main loop if n_pr.size > 0: # Define return order based on reachability distance return (n_pr[quick_scan( np.take(setofobjects.reachability_, n_pr, axis=0), dists)]) else: return point_index