def init_expansion(self): self.prefix_queue = [] self.prefix_tree = KDTree(self.atol,interval_width = self.interval_width) self.been_queued = set() # to avoid double queueing and inserting things... may happen as P is prefix closed, and want to queue all extensions of each item in P, as well as all P... [self.queue_prefix(p) for p in self.P] # might be adding some it isn't interested in to get to the counterexample added to P, so have to use force [self.prefix_tree.insert(p,self.prefix_to_nprow(p)) for p in self.P]
def run(obstacles, start, goal, max_size, plotter): step_size = 50 final_pos = np.array(goal[:2]) # Pre-compute for generating new random points in Q_free minp, maxp = obstacles.vertices.min(0), obstacles.vertices.max(0) span, offset = (maxp-minp)*1.1, minp-(maxp-minp)*0.05 def gen_valid_rand(valid_function): """ Generates a valid q_rand in Q_free given a validity function """ tmp = np.random.random(2) * span + offset while not valid_function(*tmp): tmp = np.random.random(2) * span + offset return tmp KD = KDTree(start[:2], 0) circ1 = plotter.draw_circle(start, 1, time=1, zorder=5) obstacles = Obstacles(obstacles.to_polygons()) trials = 0 while KD.length < max_size: trials += 1 circ1.remove() # Select a random point q_rand \in Q_free q_rand = gen_valid_rand(obstacles.point_is_valid) if np.random.randint(0, 100)>5 else final_pos circ1 = plotter.draw_circle(q_rand, 5, time=0.01, zorder=5) # Find the nearest node and distance to it q_near, dist = KD.nearestNode(q_rand, return_node=True) # Generate the next node in the direction of q_rand if dist < 0.5: continue if dist < step_size: if trials < 10: continue q_next = tuple(q_rand) else: q_next = gen_next(tuple(q_near.node), q_rand, step_size) if not obstacles.point_is_valid(*q_next): continue dist = math.hypot(q_next[0]-q_near[0], q_next[1]-q_near[1]) # Check validity and update tree for i in range(10): alpha_new = np.random.random() * 2*math.pi #( + q_near.alpha) - math.pi collides = check_collision(obstacles, (*q_near.node, q_near.alpha), (*q_next, alpha_new), dist) if not collides: break else: continue KD.addNode(q_next, alpha_new) plot_steps((*q_near.node, q_near.alpha), (*q_next, alpha_new), dist, plotter) goal_distance = math.hypot(q_next[0]-goal[0], q_next[1]-goal[1]) collides = check_collision(obstacles, (*q_next, alpha_new), goal, goal_distance) if not collides: plot_steps((*q_next, alpha_new), goal, goal_distance, plotter) plotter.draw_rectangle(gen_rect_pts(*goal), facecolor='red', edgecolor='k') break trials = 0 print("n =", KD.length)
def test_get_chosen_and_unchosen_branches_value_is_greater_than(self): left_tree = KDTree() right_tree = KDTree() kd_tree = KDTree(self._patient3, 0, left_tree, right_tree) chosen_tree, unchosen_tree = kd_tree._get_chosen_and_unchosen_branches( self._patient) self.assertEqual(chosen_tree, right_tree) self.assertEqual(unchosen_tree, left_tree)
def find_nearest_point(self, point): """ C.find_nearest_point([x,y,z]) -> [x,y,z] Find the closest point on the curve to the one given. """ if not self.kdtree: # only generate if nearest point is wanted self.kdtree = KDTree(self.points) return self.kdtree.nearest_neighbour(point).datum
def __init__(me,cons,forward,examplePose,phi,psi,task_queue,result_queue): multiprocessing.Process.__init__(me) try: me.target = PDBTools.readPDBFile("out.pdb")[0] except: me.target = PDBTools.readPDBFile("%s/out.pdb"%cons["commonFolder"])[0] me.targetTree = KDTree.loadAtomArray(me.target.atoms) me.clashLimit = cons["clashLimit"] me.forward = forward me.task_queue = task_queue me.result_queue = result_queue me.phi = phi me.psi = psi me.examplePose = examplePose me.iN = None me.iCA = None me.iC = None highestRes = max(examplePose.atoms,key = lambda x: x.residueNumber).residueNumber for i in range(len(examplePose.atoms)): atm = examplePose.atoms[i] if (forward and (atm.residueNumber == highestRes)) or ((not forward) and (atm.residueNumber == 1)): if (atm.atomType == "N"): me.iN = i if (atm.atomType == "C"): me.iC = i if (atm.atomType == "CA"): me.iCA = i if (atm.atomType == "O"): me.iO = i
def load(cls, fname, index): ''' Load a pickled search component from file fname, using search index index ''' comp = cls.__new__(cls) super(SearchComponent, comp).__init__() comp.logger = logging.getLogger( 'similar_item_service.search.SearchComponent') comp.logger.info(f"Loading search component from {fname}") comp.index = index with open(fname, 'rb') as f: comp.datadir, savedtrees = pickle.load(f) comp.cache = True comp.trees = {} for category, savedtree in savedtrees.items(): catFile = os.path.join(comp.datadir, str(category) + '_data.array') if not os.path.exists(catFile): comp.logger.error( f"Missing data file required for loading search component: {catFile}" ) raise FileNotFoundError catEmbeddings = np.memmap(catFile, dtype='float32', mode='r', shape=(savedtree["n"], savedtree["m"])) comp.trees[category] = KDTree.deserialize(savedtree, catEmbeddings) return comp
def __init__(me, cons, forward, examplePose, phi, psi, task_queue, result_queue): multiprocessing.Process.__init__(me) try: me.target = PDBTools.readPDBFile("out.pdb")[0] except: me.target = PDBTools.readPDBFile("%s/out.pdb" % cons["commonFolder"])[0] me.targetTree = KDTree.loadAtomArray(me.target.atoms) me.clashLimit = cons["clashLimit"] me.forward = forward me.task_queue = task_queue me.result_queue = result_queue me.phi = phi me.psi = psi me.examplePose = examplePose me.iN = None me.iCA = None me.iC = None highestRes = max(examplePose.atoms, key=lambda x: x.residueNumber).residueNumber for i in range(len(examplePose.atoms)): atm = examplePose.atoms[i] if (forward and (atm.residueNumber == highestRes)) or ( (not forward) and (atm.residueNumber == 1)): if (atm.atomType == "N"): me.iN = i if (atm.atomType == "C"): me.iC = i if (atm.atomType == "CA"): me.iCA = i if (atm.atomType == "O"): me.iO = i
def find_nearest_point(self,point): """ C.find_nearest_point([x,y,z]) -> [x,y,z] Find the closest point on the curve to the one given. """ if not self.kdtree: # only generate if nearest point is wanted self.kdtree = KDTree(self.points) return self.kdtree.nearest_neighbour(point).datum
def __init__(self, coordinates, bucket_size=10): # , copy=True): """ :Arguments: *coordinates* list of N coordinates (Nx3 numpy array) *bucket_size* bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ # to Nx3 array of type float (required for the C++ code) ## (also force a copy by default and make sure that the array order is compatible ## with the C++ code) ##self.coords=numpy.array(coordinates,dtype=numpy.float32,copy=copy,order='C') self.coords = numpy.asarray(coordinates, dtype=numpy.float32, order='C') assert (self.coords.dtype == numpy.float32) assert (bucket_size > 1) assert (self.coords.shape[1] == 3) self.kdt = KDTree(3, bucket_size) self.kdt.set_coords(self.coords)
def find_nearest_id(retailer_info, k_nn=12, leaf_size=10): k_nn = min(k_nn, len(retailer_info)-1) leaf_size = leaf_size now_retailer_gps = copy.deepcopy(retailer_info) if isinstance(now_retailer_gps, pd.DataFrame): pass else: raise Exception("传入数据格式不是DataFrame") now_retailer_gps = now_retailer_gps.reset_index(drop=True) now_retailer_gps['x'], now_retailer_gps['y'], now_retailer_gps['z'] = zip( *map(to_Cartesian, now_retailer_gps['end_loc_latitude'], now_retailer_gps['end_loc_longitude'])) now_retailer_xyz = list(zip(now_retailer_gps['x'], now_retailer_gps['y'], now_retailer_gps['z'])) RetlTree = KDTree(now_retailer_xyz, leafsize=leaf_size) dist_k, ind_k = RetlTree.query(now_retailer_xyz, k=k_nn + 1) # B_retailer_list = now_retailer_gps.iloc[ind_k[:, 1:].flatten(), :] # B_retailer_dist = now_retailer_gps.iloc[dist_k[:, 1:].flatten(), :] pinche_retailer = [] for i in now_retailer_gps.index: temp = [] start_dealer = now_retailer_gps.loc[i,'dealer_code'] temp.append(start_dealer) for j in range(1,k_nn+1): if dist_k[i,j]>625: temp.append(None) else: temp.append(now_retailer_gps.loc[ind_k[i,j],'dealer_code']) pinche_retailer.append(temp) retailerAB_IDlist = pd.DataFrame(data=pinche_retailer) # 需求不明,距离不知何时返回 # retailerAB_dist = pd.DataFrame(data=dist_k, index=now_retailer_gps.dealer_code).drop(0, 1) # return retailerAB_IDlist.reset_index() return retailerAB_IDlist
def __init__(me, step, cons, targetAll, task_queue, result_queue): multiprocessing.Process.__init__(me) me.cons = cons me.step = step me.targetAll = targetAll me.targetAll.makeDictionary() me.targetTree = KDTree.loadAtomArray( list(filter(lambda x: not x.isHydrogen(), targetAll.atoms))) me.centerResNum = step["centerResidue"] me.referenceResNums = step["referenceResidues"] if (me.centerResNum not in me.referenceResNums): me.referenceResNums.append(me.centerResNum) for rNum in me.referenceResNums: #print(targetAll.getSpecificAtom(rNum,"N").toPDBLine()) me.targetAll.implyHydrogen(rNum) targetByChain = splitAtomsIntoChains(me.targetAll.atoms) me.tRes = getTargetDict(targetByChain, True) acceptorList, donorList = getHBDandHBA(cons) me.donorTree = KDTree.loadAtomArray(donorList) me.acceptorTree = KDTree.loadAtomArray(acceptorList) me.tpChain = step.get("chain", None) if (me.tpChain is None): if (len(cons["targetProteinChain"]) == 1): me.tpChain = cons["targetProteinChain"][0] else: raise Exception( "No chain given for initiator routine and none can be assumed" ) me.tpDist = targetAll.getSpecificAtom(me.centerResNum, "N") me.tpAng = targetAll.getSpecificAtom(me.centerResNum, "CA") me.tpTor = targetAll.getSpecificAtom(me.centerResNum, "C") me.zmatObj = zmatObj(step) me.task_queue = task_queue me.result_queue = result_queue
def __iter__(self): while True: idx = self.rng.randint(len(self)) points = self.points[idx] label = self.label[idx] if self.augment: rot = rnd_rot() points = np.einsum('ij,nj->ni', rot, points) points += np.random.rand(3)[None, :] * 0.05 points = np.einsum('ij,nj->ni', rot.T, points) rand_rot = rnd_rot() if self.rnd_rot else np.eye(3) points = points @ rand_rot order, split_axis = KDTree(points, 11) yield (points, np.asarray(order), np.asarray(label[0]), *split_axis)
def __init__(me,step,cons,targetAll,task_queue,result_queue): multiprocessing.Process.__init__(me) me.cons = cons me.step = step me.targetAll = targetAll me.targetAll.makeDictionary() me.targetTree = KDTree.loadAtomArray(list(filter(lambda x: not x.isHydrogen(),targetAll.atoms))) me.centerResNum = step["centerResidue"] me.referenceResNums = step["referenceResidues"] if (me.centerResNum not in me.referenceResNums): me.referenceResNums.append(me.centerResNum) for rNum in me.referenceResNums: #print(targetAll.getSpecificAtom(rNum,"N").toPDBLine()) me.targetAll.implyHydrogen(rNum) targetByChain = splitAtomsIntoChains(me.targetAll.atoms) me.tRes = getTargetDict(targetByChain,True) acceptorList,donorList = getHBDandHBA(cons) me.donorTree = KDTree.loadAtomArray(donorList) me.acceptorTree = KDTree.loadAtomArray(acceptorList) me.tpChain = step.get("chain",None) if (me.tpChain is None): if (len(cons["targetProteinChain"]) == 1): me.tpChain = cons["targetProteinChain"][0] else: raise Exception("No chain given for initiator routine and none can be assumed") me.tpDist = targetAll.getSpecificAtom(me.centerResNum,"N") me.tpAng = targetAll.getSpecificAtom(me.centerResNum,"CA") me.tpTor = targetAll.getSpecificAtom(me.centerResNum,"C") me.zmatObj = zmatObj(step) me.task_queue = task_queue me.result_queue = result_queue
def setUp(self): self._patient = Patient(100, 1, 100, 100, 48, 0, 1.51, 28.1, 2, 2, 3, 1010, 37.99, 2, 4) self._patient1 = Patient(100, 0, 100, 100, 50.33, 0, 1.75, 27.3, 3, 3, 2, 6152, 38.81, 3, 4) self._patient2 = Patient(100, 1, 100, 100, 48.63, 1, 1.33, 23.3, 2, 1, 2, 2125, 35.75, 3, 4) self._patient3 = Patient(100, 1, 100, 100, 38.23, 1, 2.31, 30.1, 1, 3, 1, 1231, 33.75, 2, 4) self._patient4 = Patient(100, 1, 100, 100, 48.63, 1, 1.89, 29.8, 1, 1, 2, 4533, 39.11, 1, 4) patients = [ self._patient1, self._patient2, self._patient3, self._patient4 ] self._kd_tree = KDTree.build(patients)
def _build_(self, n_jobs=0): ''' Build a search component using n_jobs cpu cores. If n_jobs is 0, all cores are used. ''' n_jobs = mp.cpu_count() if n_jobs == 0 else max(1, n_jobs) self.logger.info(f"Building search component with {n_jobs} CPU cores") catEmbeddings = {} for category, items in self.index.itemsByCategory.items(): catFile = os.path.join(self.datadir, str(category) + "_data.array") catEmbeddings[category] = np.memmap(catFile, dtype='float32', mode='r') catEmbeddings[category].shape = (len(items), -1) if n_jobs == 1: for i, (category, embeddings) in enumerate(catEmbeddings.items(), 1): self.logger.info( f"Building tree for category {category}, {i} of {len(catEmbeddings)}" ) self.trees[category] = KDTree(embeddings) else: with mp.Pool(processes=n_jobs) as pool: results = pool.map(KDTree, catEmbeddings.values()) self.trees = dict(zip(catEmbeddings.keys(), results))
from Patient import Patient from KDTree import KDTree if __name__ == '__main__': patient = Patient.parse_patients_from_file('test_patients.csv')[0] patients = Patient.parse_patients_from_file('patient_data.csv') kd_tree = KDTree.build(patients) print(kd_tree.predict_outcome(patient, 3, 7)) print(kd_tree.create_grouping(patient, 3, 7))
class CoordinateNeighborSearch(object): """ This class can be used for two related purposes: 1. To find all indices of a coordinate list within radius of a given query position. 2. To find all indices of a coordinate list that are within a fixed radius of each other. CoordinateNeighborSearch makes use of the KDTree C++ module, so it's fast. """ def __init__(self, coordinates, bucket_size=10): # , copy=True): """ :Arguments: *coordinates* list of N coordinates (Nx3 numpy array) *bucket_size* bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ # to Nx3 array of type float (required for the C++ code) ## (also force a copy by default and make sure that the array order is compatible ## with the C++ code) ##self.coords=numpy.array(coordinates,dtype=numpy.float32,copy=copy,order='C') self.coords = numpy.asarray(coordinates, dtype=numpy.float32, order='C') assert (self.coords.dtype == numpy.float32) assert (bucket_size > 1) assert (self.coords.shape[1] == 3) self.kdt = KDTree(3, bucket_size) self.kdt.set_coords(self.coords) def search(self, center, radius, distances=False): """Neighbor search. Return all indices in the coordinates list that have at least one atom within *radius* of *center*. :Arguments: * center numpy array * radius float * distances bool ``True``: return (indices,distances); ``False``: return indices only """ self.kdt.search(center, radius) if distances: return self.kdt.get_indices(), self.kdt.get_radii() else: return self.kdt.get_indices() def search_list(self, centers, radius): """Search neighbours near all centers. Returns all indices that are within *radius* of any center listed in *centers*, i.e. "find all A within R of B" where A are the coordinates used for setting up the CoordinateNeighborSearch and B are the centers. :Arguments: *centers* Mx3 numpy array of M centers *radius* float """ self.kdt.list_search(centers, radius) return self.kdt.list_get_indices() def search_all(self, radius, distances=False): """All neighbor search. Return all index pairs corresponding to coordinates within the *radius*. :Arguments: *radius* float *distances* bool ``True``: return (indices,distances); ``False``: return indices only [``False``] """ self.kdt.all_search(radius) if distances: return self.kdt.all_get_indices(), self.kdt.all_get_radii() else: return self.kdt.all_get_indices() def _distances(self): """Return all distances after search().""" return self.kdt.get_radii() def _distances_all(self): """Return all distances after search_all().""" return self.kdt.all_get_radii()
from CvsHandler import DataHandler from KDTree import KDTree import os import numpy as np import pandas as pd from Node import Node from List import listeishon #Para borrar la pantalla def cls(): os.system('cls' if os.name == 'nt' else 'clear') tree = KDTree() datos = DataHandler() points = datos.get_points() listeishon = listeishon() tipo = '' #Primero, preguntamos que tipo de estructura quiere usar print( 'Bienvenido! \nPrimero, necesito saber si quieres usar una lista o un KDTree.' ) while tipo != "a" and tipo != "b": print('a : Lista\n b : KDTree') tipo = input() print('Usando Lista...') print('Loading Dataset...')
class Curve(object): """ A representation of a curve which is defined by points which are then linearly interpolated """ def __init__(self, points_ndarray): """ C.__init__(numpy.ndarray) Takes an numpy array with each entry defining a point in three dimensional space and creates a new curve object to represent it using linear interpolation between the points. """ self.set_points(points_ndarray) def val_at_arclength(self, t): """ C.val_at_arclength(float) -> [float,float,float] Give the point corresponding to the given arclength for the curve. To get this, linear interpolation on the given points is used. """ if t == 0: return self.points[0] if t == self.arc_length: return self.points[-1] if not (0 < t < self.arc_length): return \ ValueError("Input needs to be between 0 and the arclength of the curve") # find which two points this arclength t lies between top_limit = len(self.distances) - 1 bottom_limit = 0 while top_limit != bottom_limit: middle = bottom_limit + int((top_limit - bottom_limit) // 2) bottom_dist, top_dist = self.distances[middle] if bottom_dist <= t < top_dist: top_limit = bottom_limit = middle elif t < bottom_dist: top_limit = middle - 1 elif t >= top_dist: bottom_limit = middle + 1 a, b = self.points[top_limit], self.points[top_limit + 1] # contruct a function to linearly interpolate between these points dist_a = self.distances[top_limit][0] dist_b = self.distances[top_limit][1] f = lambda t: (b - a) / (dist_b - dist_a) * (t - dist_a) + a # return the interpolated value return f(t) def gen_num_points(self, total, loose_detail=False): """ C.change_num_points(int) Using the arclength parameterisation of this curve, create a new curve with the new number of points. Note: to avoid losing information, this will default to only allowing larger number of points to be chosen. If this what you desire, set loose_detail to True. """ if total < self.points.shape[0] and not loose_detail: raise ValueError(( "Total ({0}) needs to be larger than number of points" + " present ({1}). If you want to loose detail, invoke this function with" + "loose_detail=True").format(total, self.points.shape[0])) return numpy.array([ self.val_at_arclength(t) for t in numpy.linspace(0, self.arc_length, total) ]) def set_points(self, points_ndarray): """ C.set_points(numpy.ndarray) Change the points this curve represents """ if not (isinstance(points_ndarray, numpy.ndarray)): raise TypeError("Points must be in a numpy.ndarray") if not (points_ndarray.shape[1] == 3 or points_ndarray.shape[0] > 0): raise TypeError("points_ndarray must have more than one point " "and each point must be an array of length three.") self.points = points_ndarray euc_length = lambda a, b: pow(sum(pow(a - b, 2)), 0.5) # self.distances[i] is the arclength range between the ith and (i+1)th # points. This is calculated so that lookups based on arc length can # simply search this list self.distances = [(0, euc_length(self.points[0], self.points[1]))] for i in range(1, len(self.points) - 1): self.distances.append( (self.distances[i - 1][1], self.distances[i - 1][1] + euc_length(self.points[i], self.points[i + 1]))) self.arc_length = float(self.distances[-1][1]) self.kdtree = None def find_nearest_point(self, point): """ C.find_nearest_point([x,y,z]) -> [x,y,z] Find the closest point on the curve to the one given. """ if not self.kdtree: # only generate if nearest point is wanted self.kdtree = KDTree(self.points) return self.kdtree.nearest_neighbour(point).datum def __str__(self): """ s.__str__() Give the string representation of the points this curve currently is storing. """ return str(self.points)
class Curve(object): """ A representation of a curve which is defined by points which are then linearly interpolated """ def __init__(self, points_ndarray): """ C.__init__(numpy.ndarray) Takes an numpy array with each entry defining a point in three dimensional space and creates a new curve object to represent it using linear interpolation between the points. """ self.set_points(points_ndarray) def val_at_arclength(self,t): """ C.val_at_arclength(float) -> [float,float,float] Give the point corresponding to the given arclength for the curve. To get this, linear interpolation on the given points is used. """ if t==0: return self.points[0] if t==self.arc_length: return self.points[-1] if not(0 < t < self.arc_length): return \ ValueError("Input needs to be between 0 and the arclength of the curve") # find which two points this arclength t lies between top_limit = len(self.distances)-1 bottom_limit = 0 while top_limit != bottom_limit: middle = bottom_limit + int((top_limit-bottom_limit)//2) bottom_dist,top_dist = self.distances[middle] if bottom_dist <= t < top_dist: top_limit = bottom_limit = middle elif t < bottom_dist: top_limit = middle-1 elif t >= top_dist: bottom_limit = middle+1 a,b = self.points[top_limit],self.points[top_limit+1] # contruct a function to linearly interpolate between these points dist_a = self.distances[top_limit][0] dist_b = self.distances[top_limit][1] f = lambda t: (b-a)/(dist_b-dist_a)*(t-dist_a) + a # return the interpolated value return f(t) def gen_num_points(self, total, loose_detail=False): """ C.change_num_points(int) Using the arclength parameterisation of this curve, create a new curve with the new number of points. Note: to avoid losing information, this will default to only allowing larger number of points to be chosen. If this what you desire, set loose_detail to True. """ if total < self.points.shape[0] and not loose_detail: raise ValueError(("Total ({0}) needs to be larger than number of points"+ " present ({1}). If you want to loose detail, invoke this function with"+ "loose_detail=True").format(total,self.points.shape[0])) return numpy.array([self.val_at_arclength(t) for t in numpy.linspace(0,self.arc_length,total)]) def set_points(self,points_ndarray): """ C.set_points(numpy.ndarray) Change the points this curve represents """ if not(isinstance(points_ndarray,numpy.ndarray)): raise TypeError("Points must be in a numpy.ndarray") if not(points_ndarray.shape[1] == 3 or points_ndarray.shape[0] >0): raise TypeError("points_ndarray must have more than one point " "and each point must be an array of length three.") self.points = points_ndarray euc_length = lambda a,b: pow(sum(pow(a-b,2)),0.5) # self.distances[i] is the arclength range between the ith and (i+1)th # points. This is calculated so that lookups based on arc length can # simply search this list self.distances = [(0,euc_length(self.points[0],self.points[1]))] for i in range(1,len(self.points)-1): self.distances.append((self.distances[i-1][1], self.distances[i-1][1]+euc_length(self.points[i],self.points[i+1]))) self.arc_length = float(self.distances[-1][1]) self.kdtree = None def find_nearest_point(self,point): """ C.find_nearest_point([x,y,z]) -> [x,y,z] Find the closest point on the curve to the one given. """ if not self.kdtree: # only generate if nearest point is wanted self.kdtree = KDTree(self.points) return self.kdtree.nearest_neighbour(point).datum def __str__(self): """ s.__str__() Give the string representation of the points this curve currently is storing. """ return str(self.points)
def run(obstacles, start, goal, step_size, max_size, plotter): circ_rad = min(step_size / 5, 5) final_pos = np.array(goal[:2]) # Pre-compute for generating new random points in Q_free minp, maxp = obstacles.vertices.min(0), obstacles.vertices.max(0) span, offset = (maxp - minp) * 1.1, minp - (maxp - minp) * 0.05 def gen_valid_rand(valid_function): """ Generates a valid q_rand in Q_free given a validity function """ tmp = np.random.random(2) * span + offset while not valid_function(*tmp): tmp = np.random.random(2) * span + offset return tmp KD = KDTree(start) RRT = PathTree(start) circ1 = plotter.draw_circle(start, 1, time=1, zorder=5) obstacles = Obstacles(obstacles.to_polygons()) trials = 0 while KD.length < max_size: trials += 1 circ1.remove() # Select a random point q_rand \in Q_free q_rand = gen_valid_rand(obstacles.point_is_valid) if np.random.randint( 0, 100) > 5 else final_pos circ1 = plotter.draw_circle(q_rand, 5, time=0.01, zorder=5) # Find the nearest node and distance to it q_near, dist = KD.nearestNode(q_rand) # Generate the next node in the direction of q_rand if dist < step_size: if trials < 10: continue # Prevents step_size too big bug q_next = tuple(q_rand) else: q_next = gen_next(q_near, q_rand, step_size) if not obstacles.point_is_valid(*q_next): continue # Check validity and update tree if obstacles.check_collisions((q_near, q_next)): continue KD.addNode(q_next) RRT.addPath(q_near, q_next) plotter.draw_line(q_near, q_next, color='k', zorder=1, update=False) plotter.draw_circle(q_next, circ_rad, edgecolor='k', facecolor='w', zorder=2) if not obstacles.check_collisions((q_next, goal)): # IF there is a direct line to the goal, then TAKE IT goal_distance = math.hypot(q_next[0] - goal[0], q_next[1] - goal[1]) while goal_distance > 0: q_new = gen_next(q_next, goal, min(goal_distance, step_size)) RRT.addPath(q_next, q_new) plotter.draw_line(q_next, q_new, color='k', zorder=1, update=False) plotter.draw_circle(q_new, circ_rad, edgecolor='k', facecolor='w', zorder=2) q_next = q_new goal_distance -= step_size break trials = 0 print("n =", KD.length) cur = RRT[goal] while cur.parent: plotter.draw_line(cur, cur.parent, update=False, color='b', zorder=3) plotter.draw_circle(cur, circ_rad * 1.5, update=False, facecolor='xkcd:green', edgecolor='k', zorder=4) cur = cur.parent plotter.update()
import csv from Observation import Observation from KDTree import KDTree if __name__ == '__main__': observations = [] with open('example/observations.csv', 'r') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') headers = next(csv_reader) for row in csv_reader: classification = int(row[0]) features = [float(feature) for feature in row[1:]] observations.append(Observation(classification, features)) kd_tree = KDTree.build(observations) observation = Observation(None, [ -1.4096938683781, 0, -0.570643869378607, -0.163979687631107, -0.78891028462005, -0.998607403228397, -0.08425584140619, 0.173225008691556, 0.063331825211717 ]) predicted_class = kd_tree.predict_class(observation, 1.5, 20) closest_neighbors = kd_tree.create_grouping(observation, 1.5, 20)
def KNN(trainDatas, trainLabels, testDatas, k, distance_method='Euclidean', **kw): ''' KNN函数,默认采用欧氏距离 trainDatas : 训练数据 trainLabels : 训练数据对应的标签 testDatas : 测试数据 k : 超参数 distance_method : 距离度量方法 **kw : 可能传入加权方法weight_method,不传入的话采取不加权的策略; 也可能传入是否使用kd树参数usingKDTree,不传入或者传入false的话不使用kd树 ''' weight_method = None usingKDTree = False #是否使用距离加权 if 'weight_method' in kw: weight_method = kw['weight_method'] #是否使用kd树 if 'usingKDTree' in kw: usingKDTree = kw['usingKDTree'] kdTree = None if usingKDTree: from KDTree import KDTree #根据训练数据构建一棵kd树 kdTree = KDTree(trainDatas, np.array(trainLabels)) trainNum = len(trainDatas) testNum = len(testDatas) testLabels = [] for i in range(testNum): #从大到小排列的k个距离 kMinDistances = [float('inf') for i in range(k)] #与k个距离一一对应的标签 kLabels = ['0' for i in range(k)] testData = testDatas[i, :] #使用kd树查找 if usingKDTree: kMinDistances, kLabels = kdTree.searchKNearest( testData, k, distance_method) #不使用kd树,用最简单的遍历方法 else: #对每张测试图片都需要遍历训练集得到距离最近的k张图片的标签 for j in range(trainNum): trainData = trainDatas[j, :] trainLabel = trainLabels[j] d = float('inf') #曼哈顿距离 if distance_method == 'Manhattan': d = manhattanDistance(testData, trainData) #欧氏距离 else: d = euclideanDistance(testData, trainData) compareDistance(d, kMinDistances, trainLabel, kLabels) #对于当前测试图片,得到其最可能的标签,加入返回列表中 testLabels.append( mostPossibleLabel(kMinDistances, kLabels, weight_method)) return testLabels
def test_build_without_patients(self): kd_tree = KDTree.build([]) self.assertIsNone(kd_tree)
def buildAffinityMatrix(self, atoms, cutoff=4): """Build the affinity matrix for given *atoms*. Note that if you do not want to incorporate hydrogen and non-protein atoms in calculations, make the selection ``"noh and protein"``. :arg atoms: atoms for which the affinity matrix will be calculated :type atoms: :class:`~prody.atomic.Atomic` :arg cutoff: pairwise atomic contact cutoff distance, default is 4 Å :type cutoff: float """ if not isinstance(atoms, prody.Atomic): raise TypeError('atoms must be an Atomic instance, ' '{0:s} is not valid'.format(type(atoms))) cutoff = float(cutoff) assert cutoff > 0, 'cutoff distance must be greater than 0' from KDTree import KDTree start = time.time() if not isinstance(atoms, prody.AtomGroup): atoms = atoms.getAtomGroup().copy(atoms) n_atoms = atoms.numAtoms() hv = prody.HierView(atoms) n_res = hv.numResidues() rids = np.zeros(n_atoms, int) # residue indices of atoms rlen = np.zeros(n_res) # residue lengths resmap = {} # used for symmetry purposes for i, res in enumerate(hv.iterResidues()): rids[ res.getIndices() ] = i rlen[ i ] = len(res) res = (res.getChid(), res.getNumber(), res.getIcode()) resmap[i] = res resmap[res] = i self._resmap = resmap LOGGER.debug('Atoms were evalulated in {0:.2f}s.' .format(time.time()-start)) start = time.time() kdtree = KDTree(3) kdtree.set_coords(atoms.getCoords()) kdtree.all_search(cutoff) LOGGER.debug('KDTree was built in {0:.2f}s.' .format(time.time()-start)) start = time.time() affinity = defaultdict(int) for i, j in kdtree.all_get_indices(): i = rids[i] j = rids[j] if i == j: affinity[(i,j)] += 0.5 else: affinity[(i,j)] += 1 length = len(affinity) i = np.zeros(length, int) j = np.zeros(length, int) v = np.zeros(length, float) k = 0 for key, value in affinity.iteritems(): i[k] = key[0] j[k] = key[1] v[k] = value k += 1 rlen = rlen ** -0.5 # = Nij * (1/Ni^0.5) * (1/Nj^0.5) v = v * rlen[i] * rlen[j] affinity = sparse.coo_matrix((v, (i,j)), shape=(n_res, n_res)) self._affinity = affinity + affinity.T LOGGER.debug('Affinity matrix was built in {0:.2f}s.' .format(time.time()-start)) self._stationary = None self._n_nodes = n_res
class Table: def __init__(self,target,max_P,max_S,atol,interval_width,prints_path,\ s_separating_threshold,expanding_time_limit,\ progress_P_print_rate,interesting_p_transition_threshold,very_verbose): self.prints_path = prints_path self.P = [()] self.S = [(t,) for t in target.internal_alphabet] # always add to end of S! self.target = target self.max_P = max_P self.max_S = max_S self.expanding_time_limit = expanding_time_limit self.table_start = process_time() self.atol = atol self.interval_width = interval_width self.prefix_weights_dict = {} # cache self.prefix_rows = {} # cache self.s_separating_threshold = s_separating_threshold self.interesting_p_transition_threshold = interesting_p_transition_threshold self.number_ignored_suffixes_in_last_expand = 0 self.compared_log = {} self.last_suffix_add_time = process_time() self.progress_P_print_rate = progress_P_print_rate self.skipped_P_count = 0 self.very_verbose = very_verbose def compared_so_far(self,p1,p2): return max(self.compared_log.get((p1,p2),0),self.compared_log.get((p2,p1),0)) def note_compared(self,p1,p2): self.compared_log[(p1,p2)] = len(self.S) def equal(self,r1,r2): return np.allclose(r1,r2,atol=self.atol) def prefix_to_nprow(self,prefix): r = self.prefix_rows.get(prefix,np.array([])) remaining_S = self.S[len(r):] if remaining_S: remaining = self.target.last_token_probabilities_after_pref(prefix,remaining_S) r = np.array(r.tolist() + remaining) self.prefix_rows[prefix] = r return r def get_matching_ps(self,row): close = self.prefix_tree.get_all_close(row,self.atol) close = [p for p in close if self.equal(row,self.prefix_to_nprow(p))] return close def prefix_then_suffix_prob(self,prefix,suffix): # for now - not bothering to remember states that are prolly gonna be reused a lot tbfh s = self.target._state_from_sequence(prefix) return self.target.probability_of_sequence_after_state(s,suffix) def most_influential_separating_suffix(self,p_main,p_close,all_conts,all_close_conts,suffixes): relevant = [] for r1,r2,t in zip(all_conts,all_close_conts,self.target.input_alphabet): if self.equal(r1,r2): continue for v1,v2,s in zip(r1,r2,suffixes): if not self.equal(np.array([v1]),np.array([v2])): main_prob = self.prefix_then_suffix_prob(p_main,(t,)+s) p_close_prob = self.prefix_then_suffix_prob(p_close,(t,)+s) min_prob = min(main_prob,p_close_prob) # i.e. (t,)+s differentiate prefixes p1 and p2, and happens with probability at least min_prob after each one # (we care about the minimum probability after p1 and p2 because e.g. if the probability of (t,)+s happening after p1 is 0 # then it is not an interesting separating suffix, even if its probability after p2 is high relevant.append((t,s,min_prob)) # print("number of potential separating suffixes for",p_main,"and",p_close,":",len(relevant),file=self.prints_path) # print("they are\n:","\n".join([str(x) for x in sorted(relevant,key=lambda x:x[2],reverse=True)]),file=self.prints_path) if not relevant: return None,None most_relevant = relevant[np.argmax([x[2] for x in relevant])] # tuple with highest min_prob # print("most relevant was:",most_relevant,"with conditional probability:",most_relevant[2],file=self.prints_path) return (most_relevant[0],)+most_relevant[1],most_relevant[2] def check_consistency(self,prefix): # remember for each p1,p2 up to which index in S they've already been checked row = self.prefix_to_nprow(prefix) close_ps = self.get_matching_ps(row) all_conts_full_S = [self.prefix_to_nprow(prefix+(t,)) for t in self.target.input_alphabet] close_p_weights = [self.prefix_weight(p) for p in close_ps] num_checks = 1 for _,close_p in sorted(zip(close_p_weights,close_ps),key=lambda x:x[0],reverse=True): if close_p == prefix: continue # don't waste time # TODO: havent actually run code since adding this start = process_time() num_checks += 1 all_close_conts = [self.prefix_to_nprow(close_p+(t,)) for t in self.target.input_alphabet] # todo: these should also be sorted by max_(t in alphabet)(min_(p\in main_p,close_p)(likelihood of t appearing after p)) checked_so_far = self.compared_so_far(prefix,close_p) all_close_conts = [r[checked_so_far:] for r in all_close_conts] # next-one-token vectors for prefix that is similar to current on current S all_conts = [r[checked_so_far:] for r in all_conts_full_S] # prefix vectors suffixes = self.S[checked_so_far:] new_suffix,new_suffix_relevance = self.most_influential_separating_suffix(prefix,close_p,all_conts,all_close_conts,suffixes) self.note_compared(prefix,close_p) #will now process the results of the comparison, but jot down that it never has to be done again (on this part of S) if not None is new_suffix: assert not new_suffix in self.S # else wtf if new_suffix_relevance > self.s_separating_threshold: self.S.append(new_suffix) print("added separating suffix:",tup2seq(new_suffix),file=self.prints_path) print("time since last suffix add:",process_time()-self.last_suffix_add_time,file=self.prints_path) self.last_suffix_add_time = process_time() print("overall ignored",self.number_ignored_suffixes_in_last_expand,"suffixes so far in this expand",file=self.prints_path,flush=True) return False else: print("best separating suffix",new_suffix,"had minimal probability",new_suffix_relevance,\ "of being visited from one of the prefs, and was ignored",file=self.prints_path,flush=True) self.number_ignored_suffixes_in_last_expand += 1 return True def last_token_weight(self,prefix): if len(prefix)==0: return 1 return self.prefix_weight(prefix)/self.prefix_weight(prefix[:-1]) # use own functions for prefix weight because they have memory def process(self,prefix): # fails if there was an inconsistency. if not prefix in self.P: # check worthiness for addition to P row = self.prefix_to_nprow(prefix) if len(self.get_matching_ps(row))>0: # this row is not in P (so only here for closedness check), and indeed closed: wrap it up return True if self.last_token_weight(prefix)<self.interesting_p_transition_threshold: # this row isnt closed, but we dont care for it anyway self.skipped_P_count += 1 if self.skipped_P_count%1e4==0: print("not expanding prefix:",prefix,"(last token weight is:",clean_val(self.last_token_weight(prefix),6),\ "), have ignored:",self.skipped_P_count,"prefixes so far",file=self.prints_path,flush=True) return True # print("pref was not yet in P, has no matching rows, and is from a strong transition, so adding (and adding children to queue)" self.P.append(prefix) # unclosed, and not from worthless transition self.prefix_tree.insert(prefix,row) # P-prefs go in the prefix tree to be used and found in the future. # only ever add to the tree once. all those in the initial P are added on the expansion initiation. # new additions to P (only happens here) are processed here if len(self.P)%self.progress_P_print_rate == 0: print("|P|=",len(self.P),", time since extraction start:",clean_val(process_time()-self.table_start),file=self.prints_path,flush=True) # print("added pref to P") # (might occasionally get things that have already been accepted into P,\ # eg through cexs. then their children have to be processed ('closedness') regardless), so we're out of the if now [self.queue_prefix(prefix+(t,)) for t in self.target.input_alphabet] if len(self.S) >= self.max_S or len(self.P)>= self.max_P: # time to stoppe, no point in adding more S's, i.e. checking consistency # (if too many Ps then also no point adding more Ss, but if too many Ss return success and just stop checking # consistency, might add a few more Ps for a while) return True return self.check_consistency(prefix) def queue_prefix(self,prefix): if prefix in self.been_queued: # been_queued is empty for every new expansion return # already got this one in thanks. will happen often once p has several entries, eg a aa aab, as children of some go into what P already has eg aa will try to add aab prefix_weight = self.prefix_weight(prefix) heappush(self.prefix_queue,(-prefix_weight,prefix)) self.been_queued.add(prefix) def prefix_weight(self,prefix): res = self.prefix_weights_dict.get(prefix,None) if None is res: res = self.target.weight(prefix,as_prefix=True) self.prefix_weights_dict[prefix] = res return res def init_expansion(self): self.prefix_queue = [] self.prefix_tree = KDTree(self.atol,interval_width = self.interval_width) self.been_queued = set() # to avoid double queueing and inserting things... may happen as P is prefix closed, and want to queue all extensions of each item in P, as well as all P... [self.queue_prefix(p) for p in self.P] # might be adding some it isn't interested in to get to the counterexample added to P, so have to use force [self.prefix_tree.insert(p,self.prefix_to_nprow(p)) for p in self.P] def expand(self): self.number_ignored_suffixes_in_last_expand = 0 restart = True while restart: restart = False self.init_expansion() print("beginning expansion: |P|:",len(self.P),"|S|:",len(self.S),flush=True,file=self.prints_path) while self.prefix_queue and len(self.P)<self.max_P: if (process_time() - self.table_start) > self.expanding_time_limit: print("reached max time, wrapping up",file=self.prints_path) break # have to start wrapping it up neg_w,prefix = heappop(self.prefix_queue) process_success = self.process(prefix) if not process_success: # something was added to S restart = True break # reinit the expansion print("finished expanding, |P|:",len(self.P),"|S|:",len(self.S),flush=True,file=self.prints_path) def add_counterexample(self,cex): print("adding counterexample:",cex) cex = tuple(cex) # just in case start_P = len(self.P) for n in range(len(cex)+1): if not cex[:n] in self.P: self.P.append(cex[:n]) if not len(self.P) > start_P: print("cex did not add anything to P - it was all already here?",file=self.prints_path) print("cex was:",tup2seq(cex),file=self.prints_path) raise OhHeck()
def build_kd_trees(self): # Finally, index nodes using KD Trees self.region_kd_tree = KDTree(self.nodes, leaf_size=self.region_kd_size) self.lookup_kd_tree = KDTree(self.nodes, leaf_size=self.lookup_kd_size)
def benchmark_node_lookup(): from datetime import datetime print("Loading") nyc_map = Map("nyc_map4/nodes.csv", "nyc_map4/links.csv") max_speed = nyc_map.get_max_speed() print("Max speed = " + str(max_speed)) print("Reading file") sample_trips = [] with open('sample.csv', 'r') as f: r = csv.reader(f) r.next() # throw out header for line in r: [ _, # medallion, _, # hack_license, _, # vendor_id, _, # rate_code, _, # store_and_fwd_flag, _, # pickup_datetime, _, # dropoff_datetime, _, # passenger_count, _, # trip_time_in_secs, _, # trip_distance, pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude ] = line [ pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude ] = map(float, [ pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude ]) sample_trips.append([ pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude ]) if (len(sample_trips) >= 10000): break for leaf_size in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50]: d1 = datetime.now() nyc_map.lookup_kd_tree = KDTree(nyc_map.nodes, leaf_size=leaf_size) d2 = datetime.now() for [ pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude ] in sample_trips: orig = nyc_map.get_nearest_node(pickup_latitude, pickup_longitude) # print "calls : " + str(nyc_map.lookup_kd_tree.calls) dest = nyc_map.get_nearest_node(dropoff_latitude, dropoff_longitude) # print "calls : " + str(nyc_map.lookup_kd_tree.calls) d3 = datetime.now() print("leaf_size=" + str(leaf_size) + " build time: " + str(d2 - d1) + " query time: " + str(d3 - d2))
def run(obstacles, start, goal, step_size, max_size, plotter): circ_rad = min(step_size/5, 5) final_pos = [np.array(goal), np.array(start)] # Pre-compute for generating new random points in Q_free minp, maxp = obstacles.vertices.min(0), obstacles.vertices.max(0) span, offset = (maxp-minp)*1.1, minp-(maxp-minp)*0.05 def gen_valid_rand(valid_function): """ Generates a valid q_rand in Q_free given a validity function """ tmp = np.random.random(2) * span + offset while not valid_function(*tmp): tmp = np.random.random(2) * span + offset return tmp KD = [KDTree(start), KDTree(goal)] RRT = [PathTree(start), PathTree(goal)] n = 1 rnd_display = False obstacles = Obstacles(obstacles.to_polygons()) """ • Expand tree T_1 randomly, add node q_new • Expand T_2 towards q_new • If tree T_2 connects to q_new, path formed else add a q_new for tree T_2 • Now expand T_1 to q_new in tree T_2 • Keep swapping T_1 and T_2 for expansion towards the other tree until they meet """ trials = 0 q_new, last_expanded = None, -1 while KD[0].length + KD[1].length < 1000: trials += 1 if rnd_display: circ1.remove(); rnd_display = False n = 1 - n # If the last expanded node was in the other tree, try expanding towards q_new if last_expanded != n and q_new is not None: q_near, dist = KD[n].nearestNode(q_new) q_next = gen_next(q_near, q_new, step_size) if dist>step_size else q_new # Expansion towards q_new is possible. Add to path and goal check if obstacles.point_is_valid(*q_next) and \ not obstacles.check_collisions((q_near, q_next)): RRT[n].addPath(q_near, q_next) KD[n].addNode(q_next) plotter.draw_circle(q_next, circ_rad, edgecolor='k', facecolor='w', zorder=1) plotter.draw_line(q_near, q_next, color='kb'[n], zorder=1) if q_next == q_new: break # Path found q_new, last_expanded, trials = q_next, n, 0 # Update for next iteration continue # If last expanded node was not in the other tree or expansion to q_new not possible # Try to expand to q_rand if possible q_rand = gen_valid_rand(obstacles.point_is_valid) if np.random.randint(0,100)>5 else final_pos[n] rnd_display, circ1 = True, plotter.draw_circle(q_rand, 5, zorder=5) q_near, dist = KD[n].nearestNode(q_rand) if dist < step_size: if trials < 10: continue q_next = tuple(q_rand) else: q_next = gen_next(q_near, q_rand, step_size) if not obstacles.point_is_valid(*q_next): continue if obstacles.check_collisions((q_near, q_next)): continue KD[n].addNode(q_next) RRT[n].addPath(q_near, q_next) plotter.draw_line(q_near, q_next, color='kb'[n], zorder=1) plotter.draw_circle(q_next, circ_rad, edgecolor='k', facecolor='w', zorder=1) q_new, last_expanded, near_count = q_next, n, 0 print("n =", KD[0].length + KD[1].length, "(%d, %d)"%(KD[0].length, KD[1].length)) # Plot out goal path cur = RRT[0][tuple(q_next)] while cur.parent: plotter.draw_line(cur, cur.parent, update=False, color='y', zorder=3) plotter.draw_circle(cur, circ_rad*1.5, update=False, facecolor='xkcd:green', edgecolor='k', zorder=4) cur = cur.parent cur = RRT[1][tuple(q_next)] while cur.parent: plotter.draw_line(cur, cur.parent, update=False, color='y', zorder=3) plotter.draw_circle(cur, circ_rad*1.5, update=False, facecolor='xkcd:green', edgecolor='k', zorder=4) cur = cur.parent plotter.update()