def compute_geom_brute(self, diffusion_time, n_neighbors): data = self.data dim = self.dim #set radius according to paper (i think this is dim not d) radius = (diffusion_time * (diffusion_time * np.pi * 4)**(dim/2))**(0.5) #set adjacency radius large enough that points beyond it have affinity close to zero bigradius = 3 * radius adjacency_method = 'brute' adjacency_kwds = {'radius':bigradius} affinity_method = 'gaussian' affinity_kwds = {'radius':radius} laplacian_method = 'geometric' laplacian_kwds = {'scaling_epps':1} geom = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds, affinity_method=affinity_method, affinity_kwds=affinity_kwds, laplacian_method=laplacian_method, laplacian_kwds=laplacian_kwds) geom.set_data_matrix(data) geom.adjacency_matrix = geom.compute_adjacency_matrix() geom.laplacian_matrix = geom.compute_affinity_matrix() geom.laplacian_matrix = self.get_laplacian(geom, radius) return(geom)
Maps which are also the most scalable method. A too small radius can break some embedding methods specifically if there are neighbors with no or too few neighbors. You can check the connectedness of your affinity matrix as follows: ''' from scipy.sparse.csgraph import connected_components rad1 = 0.2 # compute an adjacency matrix with a radius geom.adjacency_kwds = {'radius':rad1} adjacency_matrix = geom.compute_adjacency_matrix() # compute the corresponding affinity matrix geom.affinity_kwds = {'radius':rad1} affinity_matrix = geom.compute_affinity_matrix({'radius':rad1}) (number_connected_components, labels) = connected_components(affinity_matrix) print(number_connected_components) ''' Since the number of connected components is large this indicates that our radius is too small. Let's increase: ''' rad2 = 0.5 # compute an adjacency matrix with a radius geom.adjacency_kwds = {'radius':rad2} adjacency_matrix = geom.compute_adjacency_matrix() # compute the corresponding affinity matrix geom.affinity_kwds = {'radius':rad2} affinity_matrix = geom.compute_affinity_matrix({'radius':rad2}) (number_connected_components, labels) = connected_components(affinity_matrix)
Maps which are also the most scalable method. A too small radius can break some embedding methods specifically if there are neighbors with no or too few neighbors. You can check the connectedness of your affinity matrix as follows: ''' from scipy.sparse.csgraph import connected_components rad1 = 0.2 # compute an adjacency matrix with a radius geom.adjacency_kwds = {'radius': rad1} adjacency_matrix = geom.compute_adjacency_matrix() # compute the corresponding affinity matrix geom.affinity_kwds = {'radius': rad1} affinity_matrix = geom.compute_affinity_matrix({'radius': rad1}) (number_connected_components, labels) = connected_components(affinity_matrix) print(number_connected_components) ''' Since the number of connected components is large this indicates that our radius is too small. Let's increase: ''' rad2 = 0.5 # compute an adjacency matrix with a radius geom.adjacency_kwds = {'radius': rad2} adjacency_matrix = geom.compute_adjacency_matrix() # compute the corresponding affinity matrix geom.affinity_kwds = {'radius': rad2} affinity_matrix = geom.compute_affinity_matrix({'radius': rad2}) (number_connected_components, labels) = connected_components(affinity_matrix) print(number_connected_components)