Beispiel #1
0
 def remove_small_simplices(self, minsizes):
     if not minsizes:
         return self
     S = simpl_complex()
     # vertices
     minsize = minsizes[0]
     vertices = dict([(k, v) for k, v in dict_items(self[0])
                      if v >= minsize])
     if vertices:
         S.simplices = [vertices]
     else:
         return S  # empty simplicial complex
     vertex_present = np.zeros(self.vertices()[-1] + 1, dtype=np.bool)
     vertex_present[S.vertices()] = True
     # higher simplices
     for dim in range(1, self.dimension + 1):
         minsize = 1 if len(minsizes) <= dim else minsizes[dim]
         d = dict()
         for k, v in dict_items(self[dim]):
             if v >= minsize and np.all(vertex_present[list(k)]):
                 d[k] = v
         if d:
             S.simplices.append(d)
         else:
             break
     return S
Beispiel #2
0
    def __init__(self, data={}):
        '''
        Generate a simplicial complex

        @param data: A dict of simplices and weights. Or an iterable of simplices.
                     A C{dionysus.Filtration} is a possible input.
        @type data: dictionary or iterable

        For input purposes a simplex object can be a tuple of integers representing
        vertices( our internal representation ), or a Dionysus Simplex object.
        The latter will be converted into the internal representation.

        When input as a dictionary, the keys are the simplex objects and the values
        are weights. When input as a list, the items are the simplex objects;
        in this case the weights default to 1.

        The simplex objects need not be all of the same dimension. They will be
        sorted into the right postion.
        '''
        self.simplices = []
        if isinstance(data, dict):
            for simplex, weight in dict_items(data):
                self.add_simplex(simplex, weight)
        else:
            for simplex in data:
                self.add_simplex(simplex)
Beispiel #3
0
    def adjacency_matrix(self, weighted=False, sparse=True):
        '''
        Build the (weighted or unweighted) adjacency matrix of the 1-skeleton
        of a jusha output.

        The edge weights are the number of data points in the intersections of
        two nodes.

        By default, the adjacency matrix is output as a sparse matrix in
        "Compressed Sparse Column" format (C{scipy.sparse.csc_matrix}). This
        had no deep reason. Change the sparse format if a different one is
        more appropriate.

        @param weighted: Weighted edges? (Default: False = unweighted)
        @type weighted: bool
        @param sparse: Sparse or dense output matrix? (Default: True =
        compressed)
        @type sparse: bool

        @rtype: matrix
        '''
        assert isinstance(weighted, bool)
        assert isinstance(sparse, bool)
        dtype = int if weighted else bool
        inifn = scsp.csc_matrix if sparse else zeros
        A = inifn((self.num_nodes, self.num_nodes), dtype=dtype)
        if weighted:
            for edge, weight in dict_items(self.simplices[1]):
                A[edge] = weight
                A[edge[::-1]] = weight
        else:
            for edge in self.simplices[1]:
                A[edge] = True
                A[edge[::-1]] = True
        return A
Beispiel #4
0
    def boundary(self, sanitize=True):
        '''
        Simple method for the B{unoriented} boundary of a simplicial complex.
        This gives the mod-2 boundary of the top-dimensional cells in the
        simplicial complex.

        A good boundary method for chains would be more appropriate, hence the
        present method should not be the last word.

        @param sanitize: Sanitize the result by adding lower-dimensional faces?
        @type sanitize: bool

        @return: boundary
        @rtype: L{simpl_complex}
        '''
        # First step: boundary of all top-dimensional simplices as a chain
        B = defaultdict(int)
        for s in self[-1]:
            for f in combinations(sorted(s), self.dimension):
                B[f] += 1
        # Second step: return all faces with odd coefficients
        S = simpl_complex([s for s, num in dict_items(B) if num % 2 == 1])
        if sanitize:
            S.sanitize_faces()
        return S
Beispiel #5
0
def graphviz_node_pos(S, nodes):
    D = dot_from_mapper_output(S, nodes)

    P = dotparser(D)
    P.parse_graph()
    return zip(*[(int(n), tuple(map(float, a['pos'].split(',')))) \
                 for n, a in dict_items(P.nodes)])
Beispiel #6
0
 def to_simple_Graph(self):
     '''
     Convert the 1-skeleton of a L{jusha_output} to a networkx Graph. The
     nodes are nonnegative integers.
     No C{info} or C{levelset} dictionary, just the graph itself.
     @rtype: C{networkx.Graph}
     '''
     import networkx as nx
     G = nx.Graph()
     G.add_nodes_from(self.simplices[0])
     G.add_weighted_edges_from([edge + (weight,) for edge, weight in \
                                    dict_items(self.simplices[1])])
     return G
Beispiel #7
0
def dot_from_mapper_output(S, nodes):
    '''
    Generate a dot file from jushacore output and process it with Graphviz.
    '''
    if S.dimension < 0:
        return None

    graphvizcommand = 'neato'
    try:
        exception_to_catch = FileNotFoundError
    except NameError:
        exception_to_catch = OSError
    try:
        p = subprocess.Popen([graphvizcommand], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
    except exception_to_catch:
        sys.stderr.write('Error: Could not call "{0}". '
                         'Make sure that graphviz is installed and that {0} is in the search path.\n'.
                         format(graphvizcommand))
        raise

    p.stdin.write('graph mapper_output { '
                  'node [ shape=circle, label="" ];'.encode('ascii')
                  )
    # Caution: Not all nodes may be vertices!
    vertices = [n for n, in S[0]]
    vertices.sort()

    #f = [float(nodes[i].attribute) for i in vertices]
    #fmin, fmax = min(f), max(f)

    for i, n in enumerate(vertices):
        p.stdin.write('{};'.format(n).encode('ascii'))

    if S.dimension > 0:
        for (a, b), w in dict_items(S[1]):
            p.stdin.write('{0}--{1};'.format(a, b).encode('ascii'))

    p.stdin.write('}'.encode('ascii'))

    out, err = p.communicate()
    p.stdin.close()
    if err:
        print(err)
        raise RuntimeError(err)
    if p.returncode != 0:
        raise RuntimeError('Graphviz exited with return code ' + p.returncode)

    return out.decode('ascii')
Beispiel #8
0
    def remove_nodes(self, nodes, verbose=False):
        '''
        Remove nodes from the jusha output.

        @param nodes: list of nodes.
        @type nodes: list of integers
        '''
        nodes = list(nodes)
        if verbose:
            print("Cleanup: Remove the nodes {0}.".format(nodes))
        if len(nodes) == 0: return
        # update the level sets
        for ls in dict_values(self.levelsets):
            ls.nodes.difference_update(nodes)
        # make a map from old node indices to new indices
        nodes.sort()
        offset = 0
        c = nodes[offset]
        node_map = [None] * self.num_nodes
        for i in range(self.num_nodes):
            if i == c:
                offset += 1
                c = nodes[offset] if len(nodes) > offset else None
            else:
                node_map[i] = i - offset
        nm = lambda x: node_map[x]
        # update the simplicial complex
        D = dict()
        for s, v in dict_items(self.simplices.as_dict()):
            if node_map[s[0]] is not None:
                D[tuple(map(nm, s))] = v
        self.simplices = simpl_complex(D)

        # update the list of nodes
        self.nodes = [self.nodes[i] for i in range(self.num_nodes) \
                          if i not in nodes ]
Beispiel #9
0
    def to_db(self, cursor):
        """
        Given a psycopg2 cursor object that points to a postgres db with the
        jusha_output schema, writes the jusha_output objects information to
        db.

        Returns the expr_id, which can be passed to (C{jusha_output.from_db})
        to retrieve the object.

        @param cursor: db cursor
        @type cursor: psycopg2._psycopg.cursor

        @rtype: int
        """

        # TBD: Check that this function still works after the changes to
        # levelset logic

        #assert isinstance(  cur, psycopg2._psycopg.cursor )
        assert self.info["cover"]["dim"] == 1
        levels_and_levelsets = list(dict_items(self.levelsets))
        levels_and_levelsets.sort(key=lambda x: x[0][0])
        nodes = self.nodes
        edges = self.simplices[1].keys()
        # should we modify db to store weights as well?

        dataset_id = self.info["dataset_id"]
        intervals = int(self.info["cover"]["intervals"])
        overlap = int(self.info["cover"]["fract_overlap"] * 100)
        cover = self.info["cover"]["type"]
        cutoff = self.info["cutoff"]
        cluster = self.info["cluster"]
        filter_min = float(self.info["filter_min"])
        filter_max = float(self.info["filter_max"])

        cursor.execute("""select nextval( 'seq_jusha_experiments' );""")
        expr_id = cursor.fetchall()[0][0]
        cursor.execute(
            """
                        insert into jusha_experiments( dataset_id, expr_id, intervals,
                                                        overlap, filter_min, filter_max,
                                                        cover, cutoff, cluster )
                        values( %(dataset_id)s, %(expr_id)s, %(intervals)s,
                                %(overlap)s, %(filter_min)s, %(filter_max)s,
                                %(cover)s, %(cutoff)s, %(cluster)s );
                        """, locals())
        #tmp_flattened_info = tools.flatten_dict( jusha_output_info )
        #tmp = [ (expr_id, attribute, value) for attribute,value in tmp_flattened_info ]
        #iterable_to_table( cursor, tmp, "jusha_experiments_attributes" )

        # Write levels and filter values
        tmp = [(expr_id, level[0], float(levelset.filter_min),
                float(levelset.filter_max))
               for level, levelset in levels_and_levelsets]
        iterable_to_table(cursor, tmp, "jusha_levels")

        # Get node ids
        tmp = [(expr_id, i, n.level[0], float(n.attribute))
               for i, n in enumerate(nodes)]
        iterable_to_table(cursor, tmp, "jusha_nodes")

        # Write point sets
        tmp_seq_of_point_seqs = [[(expr_id, node_id, point)
                                  for point in node.points]
                                 for node_id, node in enumerate(nodes)]
        tmp = chain(*tmp_seq_of_point_seqs)
        iterable_to_table(cursor, tmp, "jusha_points")

        # Write edges
        if len(edges) > 0:
            tmp = [(expr_id, u, v) for u, v in edges]
            iterable_to_table(cursor, tmp, "jusha_edges")

        cursor.connection.commit()
        self.add_info(expr_id=expr_id)

        return expr_id
Beispiel #10
0
    def generate_complex(self,
                         cover=None,
                         verbose=False,
                         min_sizes=(),
                         max_dim=-1):
        '''
        Generate the simplicial complex from the intersections of the point
        sets for each node.

        The weight of each simplex is the number of data points in the
        intersection.

        This is a generic algorithm which works in every case but might not be
        fast. E.g. it tests every pair of nodes for intersecting point sets,
        wheres it is often known from the patch arrangement in the cover that
        many patches do not intersect. Feel free to use a different scheme
        when speed is an issue.

        @param verbose: print progress messages?
        @type verbose: bool
        '''
        '''
        The data scheme for the dictionary S: For v1<v2<...<vn,
        S[(v1,v2,...,v(n-1)][vn] stores the data points in the intersection of
        the patches U_v1, ..., U_vn if it is nonempty. This is exactly the
        condition that (v1,...,vn) form simplex. We iteratively generate this
        data, starting from S[()][i] = (data points for the node i).
        '''
        self.simplices = simpl_complex()
        dim = 0
        if verbose:
            print("There are {0} nodes.".format(self.num_nodes))
        min_nodesize = 1 if len(min_sizes) < 1 else min_sizes[0]
        S0 = dict()
        for i, n in enumerate(self.nodes):
            if n.points.size >= min_nodesize:
                S0[i] = n.points
                self.add_simplex((i, ), len(n.points))
        S = {(): S0}

        #S = {() : dict([(i, n.points) for i, n in enumerate(self.nodes) \
        #                    if n.points.size>=min_nodesize])}
        if verbose:
            print("Generate the simplicial complex.")
        while S:  # while S is not empty
            #@xl add a loop stopper for not computing on higher dimensions
            if dim > 2:
                break

            dim += 1
            if max_dim >= 0 and dim > max_dim: break
            min_simplexsize = 1 if len(min_sizes) <= dim else min_sizes[dim]
            if verbose:
                print("Collect simplices of dimension {0}:".format(dim))
            T = defaultdict(dict)
            for i1, Si1 in dict_items(S):
                for i2, i3 in combinations(Si1, 2):
                    intersection = intersect1d(Si1[i2],
                                               Si1[i3],
                                               assume_unique=True)
                    if intersection.size >= min_simplexsize:
                        if i2 > i3:  # ensure i2<i3
                            i2, i3 = i3, i2
                        self.add_simplex(i1 + (i2, i3),
                                         weight=intersection.size)
                        T[i1 + (i2, )][i3] = intersection
            S = T
            if verbose:
                print("There are {0} simplices of dimension {1}.".\
                          format(sum(map(len,dict_values(S))), dim) )