def remove_small_simplices(self, minsizes): if not minsizes: return self S = simpl_complex() # vertices minsize = minsizes[0] vertices = dict([(k, v) for k, v in dict_items(self[0]) if v >= minsize]) if vertices: S.simplices = [vertices] else: return S # empty simplicial complex vertex_present = np.zeros(self.vertices()[-1] + 1, dtype=np.bool) vertex_present[S.vertices()] = True # higher simplices for dim in range(1, self.dimension + 1): minsize = 1 if len(minsizes) <= dim else minsizes[dim] d = dict() for k, v in dict_items(self[dim]): if v >= minsize and np.all(vertex_present[list(k)]): d[k] = v if d: S.simplices.append(d) else: break return S
def remove_small_simplices(self, minsizes): if not minsizes: return self S = simpl_complex() # vertices minsize = minsizes[0] vertices = dict([(k,v) for k,v in dict_items(self[0]) if v>=minsize]) if vertices: S.simplices = [vertices] else: return S # empty simplicial complex vertex_present = np.zeros(self.vertices()[-1]+1, dtype=np.bool) vertex_present[S.vertices()] = True # higher simplices for dim in range(1,self.dimension+1): minsize = 1 if len(minsizes)<=dim else minsizes[dim] d = dict() for k,v in dict_items(self[dim]): if v>=minsize and np.all(vertex_present[list(k)]): d[k] = v if d: S.simplices.append(d) else: break return S
def adjacency_matrix(self, weighted=False, sparse=True): ''' Build the (weighted or unweighted) adjacency matrix of the 1-skeleton of a Mapper output. The edge weights are the number of data points in the intersections of two nodes. By default, the adjacency matrix is output as a sparse matrix in "Compressed Sparse Column" format (C{scipy.sparse.csc_matrix}). This had no deep reason. Change the sparse format if a different one is more appropriate. @param weighted: Weighted edges? (Default: False = unweighted) @type weighted: bool @param sparse: Sparse or dense output matrix? (Default: True = compressed) @type sparse: bool @rtype: matrix ''' assert isinstance(weighted, bool) assert isinstance(sparse, bool) dtype = int if weighted else bool inifn = scsp.csc_matrix if sparse else zeros A = inifn((self.num_nodes, self.num_nodes), dtype=dtype) if weighted: for edge, weight in dict_items(self.simplices[1]): A[edge] = weight A[edge[::-1]] = weight else: for edge in self.simplices[1]: A[edge] = True A[edge[::-1]] = True return A
def graphviz_node_pos(S, nodes): D = dot_from_mapper_output(S, nodes) P = dotparser(D) P.parse_graph() return zip(*[(int(n), tuple(map(float, a['pos'].split(',')))) \ for n, a in dict_items(P.nodes)])
def boundary(self, sanitize=True): ''' Simple method for the B{unoriented} boundary of a simplicial complex. This gives the mod-2 boundary of the top-dimensional cells in the simplicial complex. A good boundary method for chains would be more appropriate, hence the present method should not be the last word. @param sanitize: Sanitize the result by adding lower-dimensional faces? @type sanitize: bool @return: boundary @rtype: L{simpl_complex} ''' # First step: boundary of all top-dimensional simplices as a chain B = defaultdict(int) for s in self[-1]: for f in combinations(sorted(s), self.dimension): B[f] += 1 # Second step: return all faces with odd coefficients S = simpl_complex([s for s, num in dict_items(B) if num%2==1]) if sanitize: S.sanitize_faces() return S
def __init__( self, data={} ): ''' Generate a simplicial complex @param data: A dict of simplices and weights. Or an iterable of simplices. A C{dionysus.Filtration} is a possible input. @type data: dictionary or iterable For input purposes a simplex object can be a tuple of integers representing vertices( our internal representation ), or a Dionysus Simplex object. The latter will be converted into the internal representation. When input as a dictionary, the keys are the simplex objects and the values are weights. When input as a list, the items are the simplex objects; in this case the weights default to 1. The simplex objects need not be all of the same dimension. They will be sorted into the right postion. ''' self.simplices = [] if isinstance(data, dict): for simplex, weight in dict_items(data): self.add_simplex(simplex, weight) else: for simplex in data: self.add_simplex(simplex)
def boundary(self, sanitize=True): ''' Simple method for the B{unoriented} boundary of a simplicial complex. This gives the mod-2 boundary of the top-dimensional cells in the simplicial complex. A good boundary method for chains would be more appropriate, hence the present method should not be the last word. @param sanitize: Sanitize the result by adding lower-dimensional faces? @type sanitize: bool @return: boundary @rtype: L{simpl_complex} ''' # First step: boundary of all top-dimensional simplices as a chain B = defaultdict(int) for s in self[-1]: for f in combinations(sorted(s), self.dimension): B[f] += 1 # Second step: return all faces with odd coefficients S = simpl_complex([s for s, num in dict_items(B) if num % 2 == 1]) if sanitize: S.sanitize_faces() return S
def adjacency_matrix(self, weighted=False, sparse=True): ''' Build the (weighted or unweighted) adjacency matrix of the 1-skeleton of a Mapper output. The edge weights are the number of data points in the intersections of two nodes. By default, the adjacency matrix is output as a sparse matrix in "Compressed Sparse Column" format (C{scipy.sparse.csc_matrix}). This had no deep reason. Change the sparse format if a different one is more appropriate. @param weighted: Weighted edges? (Default: False = unweighted) @type weighted: bool @param sparse: Sparse or dense output matrix? (Default: True = compressed) @type sparse: bool @rtype: matrix ''' assert isinstance(weighted, bool) assert isinstance(sparse, bool) dtype = int if weighted else bool inifn = scsp.csc_matrix if sparse else zeros A = inifn((self.num_nodes,self.num_nodes), dtype=dtype) if weighted: for edge, weight in dict_items(self.simplices[1]): A[edge] = weight A[edge[::-1]] = weight else: for edge in self.simplices[1]: A[edge] = True A[edge[::-1]] = True return A
def generate_complex(self, cover=None, verbose=False, min_sizes=(), max_dim = -1): ''' Generate the simplicial complex from the intersections of the point sets for each node. The weight of each simplex is the number of data points in the intersection. This is a generic algorithm which works in every case but might not be fast. E.g. it tests every pair of nodes for intersecting point sets, wheres it is often known from the patch arrangement in the cover that many patches do not intersect. Feel free to use a different scheme when speed is an issue. @param verbose: print progress messages? @type verbose: bool ''' ''' The data scheme for the dictionary S: For v1<v2<...<vn, S[(v1,v2,...,v(n-1)][vn] stores the data points in the intersection of the patches U_v1, ..., U_vn if it is nonempty. This is exactly the condition that (v1,...,vn) form simplex. We iteratively generate this data, starting from S[()][i] = (data points for the node i). ''' dim = 0 print("There are {0} nodes.".format(self.num_nodes)) min_nodesize = 1 if len(min_sizes)<1 else min_sizes[0] S0 = dict() for i, n in enumerate(self.nodes): if n.points.size>=min_nodesize: S0[i] = n.points self.add_simplex((i,), len(n.points)) S = {(): S0} #S = {() : dict([(i, n.points) for i, n in enumerate(self.nodes) \ # if n.points.size>=min_nodesize])} if verbose: print("Generate the simplicial complex.") while S: # while S is not empty dim += 1 if max_dim >= 0 and dim > max_dim: break min_simplexsize = 1 if len(min_sizes)<=dim else min_sizes[dim] if verbose: print ("Collect simplices of dimension {0}:".format(dim)) T = defaultdict(dict) for i1, Si1 in dict_items(S): for i2, i3 in combinations(Si1,2): intersection = intersect1d(Si1[i2], Si1[i3], assume_unique=True) if intersection.size >= min_simplexsize: if i2>i3: # ensure i2<i3 i2, i3 = i3, i2 self.add_simplex( i1 + (i2,i3), weight=intersection.size ) T[i1 + (i2,)][i3] = intersection S = T if verbose: print("There are {0} simplices of dimension {1}.".\ format(sum(map(len,dict_values(S))), dim) )
def __init__(self, data={}): ''' Generate a simplicial complex @param data: A dict of simplices and weights. Or an iterable of simplices. A C{dionysus.Filtration} is a possible input. @type data: dictionary or iterable For input purposes a simplex object can be a tuple of integers representing vertices( our internal representation ), or a Dionysus Simplex object. The latter will be converted into the internal representation. When input as a dictionary, the keys are the simplex objects and the values are weights. When input as a list, the items are the simplex objects; in this case the weights default to 1. The simplex objects need not be all of the same dimension. They will be sorted into the right postion. ''' self.simplices = [] if isinstance(data, dict): for simplex, weight in dict_items(data): self.add_simplex(simplex, weight) else: for simplex in data: self.add_simplex(simplex)
def to_simple_Graph(self): ''' Convert the 1-skeleton of a L{mapper_output} to a networkx Graph. The nodes are nonnegative integers. No C{info} or C{levelset} dictionary, just the graph itself. @rtype: C{networkx.Graph} ''' import networkx as nx G = nx.Graph() G.add_nodes_from(self.simplices[0]) G.add_weighted_edges_from([edge + (weight,) for edge, weight in \ dict_items(self.simplices[1])]) return G
def dot_from_mapper_output(S, nodes): ''' Generate a dot file from Mapper output and process it with Graphviz. ''' if S.dimension < 0: return None graphvizcommand = 'neato' try: exception_to_catch = FileNotFoundError except NameError: exception_to_catch = OSError try: p = subprocess.Popen([graphvizcommand], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) except exception_to_catch: sys.stderr.write( 'Error: Could not call "{0}". ' 'Make sure that graphviz is installed and that {0} is in the search path.\n' .format(graphvizcommand)) raise p.stdin.write('graph mapper_output { ' 'node [ shape=circle, label="" ];'.encode('ascii')) # Caution: Not all nodes may be vertices! vertices = [n for n, in S[0]] vertices.sort() #f = [float(nodes[i].attribute) for i in vertices] #fmin, fmax = min(f), max(f) for i, n in enumerate(vertices): p.stdin.write('{};'.format(n).encode('ascii')) if S.dimension > 0: for (a, b), w in dict_items(S[1]): p.stdin.write('{0}--{1};'.format(a, b).encode('ascii')) p.stdin.write('}'.encode('ascii')) out, err = p.communicate() p.stdin.close() if err: print(err) raise RuntimeError(err) if p.returncode != 0: raise RuntimeError('Graphviz exited with return code ' + p.returncode) return out.decode('ascii')
def dot_from_mapper_output(S, nodes): ''' Generate a dot file from Mapper output and process it with Graphviz. ''' if S.dimension < 0: return None graphvizcommand = 'neato' try: exception_to_catch = FileNotFoundError except NameError: exception_to_catch = OSError try: p = subprocess.Popen([graphvizcommand], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) except exception_to_catch: sys.stderr.write('Error: Could not call "{0}". ' 'Make sure that graphviz is installed and that {0} is in the search path.\n'. format(graphvizcommand)) raise p.stdin.write('graph mapper_output { ' 'node [ shape=circle, label="" ];'.encode('ascii') ) # Caution: Not all nodes may be vertices! vertices = [n for n, in S[0]] vertices.sort() #f = [float(nodes[i].attribute) for i in vertices] #fmin, fmax = min(f), max(f) for i, n in enumerate(vertices): p.stdin.write('{};'.format(n).encode('ascii')) if S.dimension > 0: for (a, b), w in dict_items(S[1]): p.stdin.write('{0}--{1};'.format(a, b).encode('ascii')) p.stdin.write('}'.encode('ascii')) out, err = p.communicate() p.stdin.close() if err: print(err) raise RuntimeError(err) if p.returncode != 0: raise RuntimeError('Graphviz exited with return code ' + p.returncode) return out.decode('ascii')
def remove_nodes(self, nodes, verbose=False): ''' Remove nodes from the Mapper output. @param nodes: list of nodes. @type nodes: list of integers ''' nodes = list(nodes) if verbose: print("Cleanup: Remove the nodes {0}.".format(nodes)) if len(nodes)==0: return # update the level sets for ls in dict_values(self.levelsets): ls.nodes.difference_update(nodes) # make a map from old node indices to new indices nodes.sort() offset = 0 c = nodes[offset] node_map = [None] * self.num_nodes for i in range(self.num_nodes): if i==c: offset += 1 c = nodes[offset] if len(nodes)>offset else None else: node_map[i] = i - offset nm = lambda x: node_map[x] # update the simplicial complex D = dict() for s, v in dict_items(self.simplices.as_dict()): if node_map[s[0]] is not None: D[tuple(map(nm,s))] = v self.simplices = simpl_complex(D) # update the list of nodes self.nodes = [self.nodes[i] for i in range(self.num_nodes) \ if i not in nodes ]
def remove_nodes(self, nodes, verbose=False): ''' Remove nodes from the Mapper output. @param nodes: list of nodes. @type nodes: list of integers ''' nodes = list(nodes) if verbose: print("Cleanup: Remove the nodes {0}.".format(nodes)) if len(nodes) == 0: return # update the level sets for ls in dict_values(self.levelsets): ls.nodes.difference_update(nodes) # make a map from old node indices to new indices nodes.sort() offset = 0 c = nodes[offset] node_map = [None] * self.num_nodes for i in range(self.num_nodes): if i == c: offset += 1 c = nodes[offset] if len(nodes) > offset else None else: node_map[i] = i - offset nm = lambda x: node_map[x] # update the simplicial complex D = dict() for s, v in dict_items(self.simplices.as_dict()): if node_map[s[0]] is not None: D[tuple(map(nm, s))] = v self.simplices = simpl_complex(D) # update the list of nodes self.nodes = [self.nodes[i] for i in range(self.num_nodes) \ if i not in nodes ]
def to_db( self, cursor ): """ Given a psycopg2 cursor object that points to a postgres db with the mapper_output schema, writes the mapper_output objects information to db. Returns the expr_id, which can be passed to (C{mapper_output.from_db}) to retrieve the object. @param cursor: db cursor @type cursor: psycopg2._psycopg.cursor @rtype: int """ # TBD: Check that this function still works after the changes to # levelset logic #assert isinstance( cur, psycopg2._psycopg.cursor ) assert self.info["cover"]["dim"] == 1 levels_and_levelsets = list( dict_items(self.levelsets) ) levels_and_levelsets.sort( key = lambda x: x[0][0] ) nodes = self.nodes edges = self.simplices[1].keys( ) # should we modify db to store weights as well? dataset_id = self.info["dataset_id"] intervals = int( self.info["cover"]["intervals"] ) overlap = int( self.info["cover"]["fract_overlap"]*100 ) cover = self.info["cover"]["type"] cutoff = self.info["cutoff"] cluster = self.info["cluster"] filter_min = float(self.info["filter_min"]) filter_max = float(self.info["filter_max"]) cursor.execute( """select nextval( 'seq_mapper_experiments' );""" ) expr_id = cursor.fetchall( )[0][0] cursor.execute( """ insert into mapper_experiments( dataset_id, expr_id, intervals, overlap, filter_min, filter_max, cover, cutoff, cluster ) values( %(dataset_id)s, %(expr_id)s, %(intervals)s, %(overlap)s, %(filter_min)s, %(filter_max)s, %(cover)s, %(cutoff)s, %(cluster)s ); """, locals( ) ) #tmp_flattened_info = tools.flatten_dict( mapper_output_info ) #tmp = [ (expr_id, attribute, value) for attribute,value in tmp_flattened_info ] #iterable_to_table( cursor, tmp, "mapper_experiments_attributes" ) # Write levels and filter values tmp = [ (expr_id, level[0], float(levelset.filter_min), float(levelset.filter_max)) for level, levelset in levels_and_levelsets ] iterable_to_table( cursor, tmp, "mapper_levels" ) # Get node ids tmp = [ (expr_id, i, n.level[0], float(n.attribute)) for i,n in enumerate(nodes)] iterable_to_table( cursor, tmp, "mapper_nodes" ) # Write point sets tmp_seq_of_point_seqs = [ [ (expr_id, node_id, point) for point in node.points ] for node_id, node in enumerate(nodes) ] tmp = chain( *tmp_seq_of_point_seqs ) iterable_to_table( cursor, tmp, "mapper_points" ) # Write edges if len(edges) > 0: tmp = [ (expr_id, u, v) for u,v in edges ] iterable_to_table( cursor, tmp, "mapper_edges" ) cursor.connection.commit( ) self.add_info( expr_id = expr_id ) return expr_id
def generate_complex(self, cover=None, verbose=False, min_sizes=(), max_dim=-1): ''' Generate the simplicial complex from the intersections of the point sets for each node. The weight of each simplex is the number of data points in the intersection. This is a generic algorithm which works in every case but might not be fast. E.g. it tests every pair of nodes for intersecting point sets, wheres it is often known from the patch arrangement in the cover that many patches do not intersect. Feel free to use a different scheme when speed is an issue. @param verbose: print progress messages? @type verbose: bool ''' ''' The data scheme for the dictionary S: For v1<v2<...<vn, S[(v1,v2,...,v(n-1)][vn] stores the data points in the intersection of the patches U_v1, ..., U_vn if it is nonempty. This is exactly the condition that (v1,...,vn) form simplex. We iteratively generate this data, starting from S[()][i] = (data points for the node i). ''' dim = 0 print("There are {0} nodes.".format(self.num_nodes)) min_nodesize = 1 if len(min_sizes) < 1 else min_sizes[0] S0 = dict() for i, n in enumerate(self.nodes): if n.points.size >= min_nodesize: S0[i] = n.points self.add_simplex((i, ), len(n.points)) S = {(): S0} #S = {() : dict([(i, n.points) for i, n in enumerate(self.nodes) \ # if n.points.size>=min_nodesize])} if verbose: print("Generate the simplicial complex.") while S: # while S is not empty dim += 1 if max_dim >= 0 and dim > max_dim: break min_simplexsize = 1 if len(min_sizes) <= dim else min_sizes[dim] if verbose: print("Collect simplices of dimension {0}:".format(dim)) T = defaultdict(dict) for i1, Si1 in dict_items(S): for i2, i3 in combinations(Si1, 2): intersection = intersect1d(Si1[i2], Si1[i3], assume_unique=True) if intersection.size >= min_simplexsize: if i2 > i3: # ensure i2<i3 i2, i3 = i3, i2 self.add_simplex(i1 + (i2, i3), weight=intersection.size) T[i1 + (i2, )][i3] = intersection S = T if verbose: print("There are {0} simplices of dimension {1}.".\ format(sum(map(len,dict_values(S))), dim) )
def to_db(self, cursor): """ Given a psycopg2 cursor object that points to a postgres db with the mapper_output schema, writes the mapper_output objects information to db. Returns the expr_id, which can be passed to (C{mapper_output.from_db}) to retrieve the object. @param cursor: db cursor @type cursor: psycopg2._psycopg.cursor @rtype: int """ # TBD: Check that this function still works after the changes to # levelset logic #assert isinstance( cur, psycopg2._psycopg.cursor ) assert self.info["cover"]["dim"] == 1 levels_and_levelsets = list(dict_items(self.levelsets)) levels_and_levelsets.sort(key=lambda x: x[0][0]) nodes = self.nodes edges = self.simplices[1].keys() # should we modify db to store weights as well? dataset_id = self.info["dataset_id"] intervals = int(self.info["cover"]["intervals"]) overlap = int(self.info["cover"]["fract_overlap"] * 100) cover = self.info["cover"]["type"] cutoff = self.info["cutoff"] cluster = self.info["cluster"] filter_min = float(self.info["filter_min"]) filter_max = float(self.info["filter_max"]) cursor.execute("""select nextval( 'seq_mapper_experiments' );""") expr_id = cursor.fetchall()[0][0] cursor.execute( """ insert into mapper_experiments( dataset_id, expr_id, intervals, overlap, filter_min, filter_max, cover, cutoff, cluster ) values( %(dataset_id)s, %(expr_id)s, %(intervals)s, %(overlap)s, %(filter_min)s, %(filter_max)s, %(cover)s, %(cutoff)s, %(cluster)s ); """, locals()) #tmp_flattened_info = tools.flatten_dict( mapper_output_info ) #tmp = [ (expr_id, attribute, value) for attribute,value in tmp_flattened_info ] #iterable_to_table( cursor, tmp, "mapper_experiments_attributes" ) # Write levels and filter values tmp = [(expr_id, level[0], float(levelset.filter_min), float(levelset.filter_max)) for level, levelset in levels_and_levelsets] iterable_to_table(cursor, tmp, "mapper_levels") # Get node ids tmp = [(expr_id, i, n.level[0], float(n.attribute)) for i, n in enumerate(nodes)] iterable_to_table(cursor, tmp, "mapper_nodes") # Write point sets tmp_seq_of_point_seqs = [[(expr_id, node_id, point) for point in node.points] for node_id, node in enumerate(nodes)] tmp = chain(*tmp_seq_of_point_seqs) iterable_to_table(cursor, tmp, "mapper_points") # Write edges if len(edges) > 0: tmp = [(expr_id, u, v) for u, v in edges] iterable_to_table(cursor, tmp, "mapper_edges") cursor.connection.commit() self.add_info(expr_id=expr_id) return expr_id