def distances_matrix(vertices): avsp = average_spacing(data=vertices, neighbours=5) tree = KDTree(vertices) dist = tree.sparse_distance_matrix(tree, max_distance=avsp * 4) dist_array = {} # import Plot.Plot as Plt # Plt.plot_3d(vertices=[[vertices[354], vertices[528]]], faces=None, normals_view=False, points_view=True, # faces_view=False, point_color="r", point_size=5.0, face_color="c", edge_color="k") for i in range(0, len(vertices)): for j in range(0, len(vertices)): if i == j: continue if dist[(i, j)] == 0.0: # dist_array[(i, j)] = np.inf continue dist_array[(i, j)] = dist[(i, j)] sorted_dist_array = dict( sorted(dist_array.items(), key=operator.itemgetter(1))) del (dist_array, dist) return sorted_dist_array
class test_sparse_distance_matrix_compiled: def setUp(self): n = 50 m = 4 np.random.seed(0) data1 = np.random.randn(n,m) data2 = np.random.randn(n,m) self.T1 = cKDTree(data1,leafsize=2) self.T2 = cKDTree(data2,leafsize=2) self.ref_T1 = KDTree(data1, leafsize=2) self.ref_T2 = KDTree(data2, leafsize=2) self.r = 0.5 def test_consistency_with_neighbors(self): M = self.T1.sparse_distance_matrix(self.T2, self.r) r = self.T1.query_ball_tree(self.T2, self.r) for i,l in enumerate(r): for j in l: assert_almost_equal(M[i,j], distance(self.T1.data[i], self.T2.data[j]), decimal=14) for ((i,j),d) in M.items(): assert_(j in r[i]) def test_zero_distance(self): # raises an exception for bug 870 (FIXME: Does it?) self.T1.sparse_distance_matrix(self.T1, self.r) def test_consistency_with_python(self): M1 = self.T1.sparse_distance_matrix(self.T2, self.r) M2 = self.ref_T1.sparse_distance_matrix(self.ref_T2, self.r) assert_array_almost_equal(M1.todense(), M2.todense(), decimal=14)
def probalistic_cross_k_function(y_true, y_pred, minkov_degree=1, max_distance=10): y_true_list = np.stack(np.where(y_true == 1)).T y_true_kdtree = KDTree(y_true_list) y_pred_list = np.stack(np.where(y_pred >= 0)).T y_pred_kdtree = KDTree(y_pred_list) dense = np.mean(y_pred) k_value_list = [] random_list = [] for i in range(max_distance + 1): total_result = y_true_kdtree.sparse_distance_matrix(y_pred_kdtree, p=minkov_degree, max_distance=i) y_list = [] y_random_list = [] for j in range(len(y_true_list)): neighbor_obj = total_result.getrow(j).indices final_list = y_pred_list[list(neighbor_obj)].T y_list.append(sum(y_pred[final_list[0], final_list[1]])) y_random_list.append(len(neighbor_obj) * dense) k_value_list.append(np.mean(y_list) / dense) random_list.append(np.mean(y_random_list) / dense) return k_value_list, random_list
class test_sparse_distance_matrix: def setUp(self): n = 50 m = 4 self.T1 = KDTree(np.random.randn(n,m),leafsize=2) self.T2 = KDTree(np.random.randn(n,m),leafsize=2) self.r = 0.3 def test_consistency_with_neighbors(self): M = self.T1.sparse_distance_matrix(self.T2, self.r) r = self.T1.query_ball_tree(self.T2, self.r) for i,l in enumerate(r): for j in l: assert_equal(M[i,j],distance(self.T1.data[i],self.T2.data[j])) for ((i,j),d) in M.items(): assert_(j in r[i]) def test_zero_distance(self): M = self.T1.sparse_distance_matrix(self.T1, self.r) # raises an exception for bug 870
def distances_matrix(vertices): from scipy.spatial import KDTree tree = KDTree(vertices) dist = tree.sparse_distance_matrix(tree, max_distance=10.0) dist_array = {} for i in range(0, len(vertices)): for j in range(0, len(vertices)): if i == j: continue dist_array[(i, j)] = dist[(i, j)] sorted_dist_array = dict( sorted(dist_array.items(), key=operator.itemgetter(1))) return sorted_dist_array
def compute(self, dataset_pool): with logger.block( name="compute variable jobs_within_DDD_of_parcel_weighted with DDD=%s" % self.radius, verbose=False ): results = None distances = None with logger.block(name="trying to read cache files", verbose=False): try: results = self._load_results() except IOError: logger.log_warning("Cache file %s could not be loaded" % self.cache_file_name) try: distances = self._load_distances() except IOError: logger.log_warning("Cache file %s could not be loaded" % self.cache_distances_file_name) with logger.block(name="initialize datasets", verbose=False): parcels = self.get_dataset() arr = parcels.sum_dataset_over_ids(dataset_pool.get_dataset("job"), constant=1) if not results or not distances: with logger.block(name="initialize coords", verbose=False): coords = column_stack((parcels.get_attribute("x_coord_sp"), parcels.get_attribute("y_coord_sp"))) with logger.block(name="build KDTree", verbose=False): kd_tree = KDTree(coords, 100) with logger.block(name="compute neighbourhoods"): results = kd_tree.query_ball_tree(kd_tree, self.radius) with logger.block(name="compute euclidean distances"): distances = kd_tree.sparse_distance_matrix(kd_tree, self.radius) with logger.block(name="cache neighbourhoods"): if not SimulationState().cache_directory_exists(): logger.log_warning("Cache does not exist and is created.") SimulationState().create_cache_directory() self._cache_results(results) self._cache_distances(distances) with logger.block(name="Sum weighted jobs in neighbourhood", verbose=False): # return_values = array(map(lambda l: arr[l].sum(), results)) return_values = array(self.euclidean_accessibility_for_parcel(results, distances, arr)) return return_values
def lj_given_kd_tree(kd, positions, distance=2.5): raise NotImplementedError() # TODO CSC sparse matrices can't do most of the things we need to do in LJ (fancy indexing, division). Maybe another sparse matrix type can? c = containers[-1] positions = c.positions kd = KDTree(positions) sm_dr = kd.sparse_distance_matrix(kd, distance) ## sm_dr.getrow(0) ## kd.query(positions[0], k=10, distance_upper_bound=2.5) sm_dr_csc = sm_dr.tocsc() # F's up fancy ixing and multiplying sm_dxs = [] for dim_ix in xrange(positions.shape[1]): # KLUDGE, fix with other code that iterates over dims smx = sparse_tile(positions[:,dim_ix], sm_dr_csc) smdx = smx - smx.transpose() sm_dxs.append(smdx.todok()) distance_matrices = sm_dxs + [sm_dr] radius=1.0; epsilon=1.0 return lennardJonesForce(distance_matrices)
def point_to_point_distance(point_list1, point_list2, sparse=True): """ Computes the point to point distance matrix for two point lists. Args: point_list1 (numpy.array): a list of points point_list2 (numpy.array): a list of points sparse (bool): if True returns a sparse matrix Returns: (numpy.array or sparse matrix) a matrix with all the distance between points """ tree1 = KDTree(point_list1) tree2 = KDTree(point_list2) sdm = tree1.sparse_distance_matrix(tree2, np.infty) if (sparse): return sdm else: return sdm.toarray()
class test_sparse_distance_matrix_compiled: def setUp(self): n = 50 m = 4 np.random.seed(0) data1 = np.random.randn(n,m) data2 = np.random.randn(n,m) self.T1 = cKDTree(data1,leafsize=2) self.T2 = cKDTree(data2,leafsize=2) self.ref_T1 = KDTree(data1, leafsize=2) self.ref_T2 = KDTree(data2, leafsize=2) self.r = 0.5 def test_consistency_with_neighbors(self): M = self.T1.sparse_distance_matrix(self.T2, self.r) r = self.T1.query_ball_tree(self.T2, self.r) for i,l in enumerate(r): for j in l: assert_almost_equal(M[i,j], distance(self.T1.data[i], self.T2.data[j]), decimal=14) for ((i,j),d) in M.items(): assert_(j in r[i]) def test_zero_distance(self): # raises an exception for bug 870 (FIXME: Does it?) self.T1.sparse_distance_matrix(self.T1, self.r) def test_consistency_with_python(self): M1 = self.T1.sparse_distance_matrix(self.T2, self.r) M2 = self.ref_T1.sparse_distance_matrix(self.ref_T2, self.r) assert_array_almost_equal(M1.todense(), M2.todense(), decimal=14) def test_against_logic_error_regression(self): # regression test for gh-5077 logic error np.random.seed(0) too_many = np.array(np.random.randn(18, 2), dtype=int) tree = cKDTree(too_many, balanced_tree=False, compact_nodes=False) d = tree.sparse_distance_matrix(tree, 3).todense() assert_array_almost_equal(d, d.T, decimal=14)
class test_sparse_distance_matrix_compiled: def setUp(self): n = 50 m = 4 np.random.seed(0) data1 = np.random.randn(n, m) data2 = np.random.randn(n, m) self.T1 = cKDTree(data1, leafsize=2) self.T2 = cKDTree(data2, leafsize=2) self.ref_T1 = KDTree(data1, leafsize=2) self.ref_T2 = KDTree(data2, leafsize=2) self.r = 0.5 def test_consistency_with_neighbors(self): M = self.T1.sparse_distance_matrix(self.T2, self.r) r = self.T1.query_ball_tree(self.T2, self.r) for i, l in enumerate(r): for j in l: assert_almost_equal(M[i, j], distance(self.T1.data[i], self.T2.data[j]), decimal=14) for ((i, j), d) in M.items(): assert_(j in r[i]) def test_zero_distance(self): # raises an exception for bug 870 (FIXME: Does it?) self.T1.sparse_distance_matrix(self.T1, self.r) def test_consistency_with_python(self): M1 = self.T1.sparse_distance_matrix(self.T2, self.r) M2 = self.ref_T1.sparse_distance_matrix(self.ref_T2, self.r) assert_array_almost_equal(M1.todense(), M2.todense(), decimal=14) def test_against_logic_error_regression(self): # regression test for gh-5077 logic error np.random.seed(0) too_many = np.array(np.random.randn(18, 2), dtype=int) tree = cKDTree(too_many, balanced_tree=False, compact_nodes=False) d = tree.sparse_distance_matrix(tree, 3).todense() assert_array_almost_equal(d, d.T, decimal=14)
def get_contacts(protein, ligand, cutoff): """ Returns a list of inter-atomic contacts in a protein-ligand complex, grouped by ligand atoms. """ # CREATE KDTREES FOR BOTH THE PROTEIN AND THE LIGAND kdprot = KDTree(numpy.array([atom.coords for atom in protein.atoms]), leafsize=10) kdlig = KDTree(numpy.array([atom.coords for atom in ligand.atoms]), leafsize=10) # CREATE A SCIPY SPARSE DISTANCE MATRIX sdm = kdlig.sparse_distance_matrix(kdprot, cutoff, p=2.0) # CREATE A CONTACT LIST OF TUPLES IN THE FORM ((HETATM IDX, ATOM IDX), DISTANCE) # AND SORT BY HETATM IDX contacts = sorted(sdm.iteritems(), key=itemgetter(0)) # GROUP CONTACTS BY LIGAND ATOMS for hidx, contactiter in groupby(contacts, key=lambda x: x[0][0]): hetatm = ligand.OBMol.GetAtom(int(hidx + 1)) atoms = [] #print(hetatm.GetIdx()) # IGNORE HYDROGENS if hetatm.IsHydrogen(): continue for ((hidx, aidx), distance) in contactiter: # GET THE PROTEIN ATOM atom = protein.OBMol.GetAtom(int(aidx + 1)) # IGNORE THIS ONE AS WELL IF HYDROGEN if atom.IsHydrogen(): continue atoms.append((atom, distance)) yield hetatm, atoms
def get_contacts(protein, ligand, cutoff): """ Returns a list of inter-atomic contacts in a protein-ligand complex, grouped by ligand atoms. """ # CREATE KDTREES FOR BOTH THE PROTEIN AND THE LIGAND kdprot = KDTree(numpy.array([atom.coords for atom in protein.atoms]), leafsize=10) kdlig = KDTree(numpy.array([atom.coords for atom in ligand.atoms]), leafsize=10) # CREATE A SCIPY SPARSE DISTANCE MATRIX sdm = kdlig.sparse_distance_matrix(kdprot, cutoff, p=2.0) # CREATE A CONTACT LIST OF TUPLES IN THE FORM ((HETATM IDX, ATOM IDX), DISTANCE) # AND SORT BY HETATM IDX contacts = sorted(sdm.iteritems(), key=itemgetter(0)) # GROUP CONTACTS BY LIGAND ATOMS for hidx, contactiter in groupby(contacts, key=lambda x:x[0][0]): hetatm = ligand.OBMol.GetAtom(int(hidx+1)) atoms = [] #print(hetatm.GetIdx()) # IGNORE HYDROGENS if hetatm.IsHydrogen(): continue for ((hidx,aidx), distance) in contactiter: # GET THE PROTEIN ATOM atom = protein.OBMol.GetAtom(int(aidx+1)) # IGNORE THIS ONE AS WELL IF HYDROGEN if atom.IsHydrogen(): continue atoms.append((atom, distance)) yield hetatm, atoms
from itertools import combinations from itertools import permutations import time d_threshold = 1.5 x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] y = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] points = zip(x, y) x2 = [1, 3, 4, 5, 6, 7, 8, 8, 10, 16] y2 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 16.1] points2 = zip(x, y) start = time.clock() A_tree = KDTree(points) B_tree = KDTree(points2) neighbors = A_tree.query_ball_tree(B_tree, d_threshold) # print neighbors #it is a list of lists distances = A_tree.sparse_distance_matrix(B_tree, d_threshold) # print distances A_tree = cKDTree(points) B_tree = cKDTree(points2) # print points[0] cneighbors = A_tree.query_ball_tree(B_tree, d_threshold) # print neighbors #it is a list of lists cdistances = A_tree.sparse_distance_matrix(B_tree, d_threshold) # print distances # print neighbors # print cneighbors #print distances # if(neighbors==cneighbors): # print "Yes!!!" # di=distances.items()
N, D = data.shape print(data, data.shape) if False: print("plotting corner") figure = corner.corner(data) fn = "{}/data.{}".format(dir, suffix) figure.savefig(fn) print(fn) print("making KD tree") tree = KDTree(data) print(tree) print("making graph data") graph = tree.sparse_distance_matrix(tree, max_distance=0.1) # magic number print(graph.shape, len(graph)) print("finding connected components") cc = sp.csgraph.connected_components(graph, directed=False, return_labels=True) print(cc, cc[1].shape) print("writing pickle") pfn = 'data/cc.pkl' pickle_to_file(pfn, (data, cc)) print(pfn) K, ks = cc for k in range(K): I = (ks == k) if np.sum(I) > 16: # magic number
dx = abs(x2 - x1) dy = abs(y2 - y1) dz = abs(z2 - z1) mindist = r1 + r2 dist = dx + dy + dz if dist > mindist: overlaps_all_of_set = False if distance_from_origin % 100000 == 0: print(distance_from_origin) distance_from_origin += 1 return distance_from_origin strongest_idx = max(nanobots, key=lambda n: nanobots[n][3]) strongest = nanobots[strongest_idx] points = [nb[:-1] for k, nb in nanobots.items()] kdtree = KDTree(points, leafsize=10) print("Part 1") print(in_range(nanobots, strongest_idx)) print(len(in_range(nanobots, strongest_idx))) print("Part 2") distances = kdtree.sparse_distance_matrix(kdtree, np.inf, p=1) print("Distance matrix done") most_overlapping = setup(nanobots, distances) print("Solving...") solve(most_overlapping)
class test_sparse_distance_matrix_compiled(sparse_distance_matrix_consistency): def setUp(self): n = 50 m = 4 np.random.seed(0) data1 = np.random.randn(n, m) data2 = np.random.randn(n, m) self.T1 = cKDTree(data1, leafsize=2) self.T2 = cKDTree(data2, leafsize=2) self.ref_T1 = KDTree(data1, leafsize=2) self.ref_T2 = KDTree(data2, leafsize=2) self.r = 0.5 self.n = n self.m = m self.data1 = data1 self.data2 = data2 self.p = 2 def test_consistency_with_python(self): M1 = self.T1.sparse_distance_matrix(self.T2, self.r) M2 = self.ref_T1.sparse_distance_matrix(self.ref_T2, self.r) assert_array_almost_equal(M1.todense(), M2.todense(), decimal=14) def test_against_logic_error_regression(self): # regression test for gh-5077 logic error np.random.seed(0) too_many = np.array(np.random.randn(18, 2), dtype=int) tree = cKDTree(too_many, balanced_tree=False, compact_nodes=False) d = tree.sparse_distance_matrix(tree, 3).todense() assert_array_almost_equal(d, d.T, decimal=14) def test_ckdtree_return_types(self): # brute-force reference ref = np.zeros((self.n, self.n)) for i in range(self.n): for j in range(self.n): v = self.data1[i, :] - self.data2[j, :] ref[i, j] = np.dot(v, v) ref = np.sqrt(ref) ref[ref > self.r] = 0. # test return type 'dict' dist = np.zeros((self.n, self.n)) r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='dict') for i, j in r.keys(): dist[i, j] = r[(i, j)] assert_array_almost_equal(ref, dist, decimal=14) # test return type 'ndarray' dist = np.zeros((self.n, self.n)) r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='ndarray') for k in range(r.shape[0]): i = r['i'][k] j = r['j'][k] v = r['v'][k] dist[i, j] = v assert_array_almost_equal(ref, dist, decimal=14) # test return type 'dok_matrix' r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='dok_matrix') assert_array_almost_equal(ref, r.todense(), decimal=14) # test return type 'coo_matrix' r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='coo_matrix') assert_array_almost_equal(ref, r.todense(), decimal=14)
class test_sparse_distance_matrix_compiled(sparse_distance_matrix_consistency): def setUp(self): n = 50 m = 4 np.random.seed(0) data1 = np.random.randn(n,m) data2 = np.random.randn(n,m) self.T1 = cKDTree(data1,leafsize=2) self.T2 = cKDTree(data2,leafsize=2) self.ref_T1 = KDTree(data1, leafsize=2) self.ref_T2 = KDTree(data2, leafsize=2) self.r = 0.5 self.n = n self.m = m self.data1 = data1 self.data2 = data2 self.p = 2 def test_consistency_with_python(self): M1 = self.T1.sparse_distance_matrix(self.T2, self.r) M2 = self.ref_T1.sparse_distance_matrix(self.ref_T2, self.r) assert_array_almost_equal(M1.todense(), M2.todense(), decimal=14) def test_against_logic_error_regression(self): # regression test for gh-5077 logic error np.random.seed(0) too_many = np.array(np.random.randn(18, 2), dtype=int) tree = cKDTree(too_many, balanced_tree=False, compact_nodes=False) d = tree.sparse_distance_matrix(tree, 3).todense() assert_array_almost_equal(d, d.T, decimal=14) def test_ckdtree_return_types(self): # brute-force reference ref = np.zeros((self.n,self.n)) for i in range(self.n): for j in range(self.n): v = self.data1[i,:] - self.data2[j,:] ref[i,j] = np.dot(v,v) ref = np.sqrt(ref) ref[ref > self.r] = 0. # test return type 'dict' dist = np.zeros((self.n,self.n)) r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='dict') for i,j in r.keys(): dist[i,j] = r[(i,j)] assert_array_almost_equal(ref, dist, decimal=14) # test return type 'ndarray' dist = np.zeros((self.n,self.n)) r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='ndarray') for k in range(r.shape[0]): i = r['i'][k] j = r['j'][k] v = r['v'][k] dist[i,j] = v assert_array_almost_equal(ref, dist, decimal=14) # test return type 'dok_matrix' r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='dok_matrix') assert_array_almost_equal(ref, r.todense(), decimal=14) # test return type 'coo_matrix' r = self.T1.sparse_distance_matrix(self.T2, self.r, output_type='coo_matrix') assert_array_almost_equal(ref, r.todense(), decimal=14)
from itertools import combinations from itertools import permutations import time d_threshold = 1.5 x=[1,2,3,4,5,6,7,8,9,10] y=[1,2,3,4,5,6,7,8,9,10] points = zip(x, y) x2=[1,3,4,5,6,7,8,8,10,16] y2=[1,2,3,4,5,6,7,8,9,16.1] points2 = zip(x, y) start = time.clock() A_tree=KDTree(points) B_tree=KDTree(points2) neighbors=A_tree.query_ball_tree(B_tree,d_threshold) # print neighbors #it is a list of lists distances=A_tree.sparse_distance_matrix(B_tree,d_threshold) # print distances A_tree=cKDTree(points) B_tree=cKDTree(points2) # print points[0] cneighbors=A_tree.query_ball_tree(B_tree,d_threshold) # print neighbors #it is a list of lists cdistances=A_tree.sparse_distance_matrix(B_tree,d_threshold) # print distances # print neighbors # print cneighbors #print distances # if(neighbors==cneighbors): # print "Yes!!!" # di=distances.items()