예제 #1
0
def split_alt_nodes(graph: GraphContainer, max_len=300, padding_len=150):
    """
    Split long alternate nodes
    :param graph: graph to work on
    :param max_len: max length of reference node with no sequences
    :param padding_len: length of sequence to keep
    """
    assert max_len >= 2 * padding_len
    for node in list(graph.altNodes()):
        if len(node["sequence"]) <= max_len:
            continue
        logging.info(f"Splitting long ALT node: {node['name']}")

        n1 = graph.add_altNode(node["chrom"], node["start"], node["end"],
                               node["sequence"][:padding_len],
                               node["sequences"])
        n2 = graph.add_altNode(node["chrom"], node["start"], node["end"],
                               node["sequence"][-padding_len:],
                               node["sequences"])

        for e in list(graph.inEdges(node)):
            graph.add_edge(graph.nodes[e["from"]], n1, e["sequences"])
        for e in list(graph.outEdges(node)):
            graph.add_edge(n2, graph.nodes[e["to"]], e["sequences"])
        graph.del_node(node)
예제 #2
0
def add_graph(graph1: GraphContainer, graph2: GraphContainer):
    """
    Add all nodes, edges and paths from graph2 to graph1 (inplace)
    """
    for node in graph2.refNodes():
        graph1.add_refNode(
            node["chrom"], node["start"], node["end"], node["sequences"])
    for node in graph2.altNodes():
        graph1.add_altNode(
            node["chrom"], node["start"], node["end"], node["sequence"], node["sequences"])
    for edge in graph2.edges.values():
        graph1.add_edge(graph1.nodes[edge["from"]], graph1.nodes[edge["to"]], edge["sequences"])
    graph1.paths += graph2.paths
예제 #3
0
    def get_graph(self, allele_graph=False):
        """ Create the paragraph representation of nodes and edges for this graph
        :param alleleGraph: create edges between any compatible allele pair (rather
                            than just following reference and given haplotypes)
        :return GraphContainer object
        """
        logging.info("Creating output graph")
        graph = GraphContainer()
        # create ref nodes
        pnode = None
        for ref in self.get_ref_alleles():
            node = graph.add_refNode(self.chrom, ref.begin, ref.end - 1,
                                     ref.data.haplotypes)
            if pnode:
                assert pnode["end"] + 1 == node["start"]
                graph.add_edge(pnode, node)
            pnode = node
        # Create alt nodes
        for alt in self.alts.values():
            graph.add_altNode(self.chrom, alt.start, alt.end, alt.sequence,
                              alt.haplotypes)

        # Create edges connecting nodes along a haplotype (or allele in alleleGraph mode)
        for haplo in self.get_haplotypes():
            nodes = graph.nodes_by_haplo(haplo)
            logging.info(
                f"Linking nodes in sequence {haplo}:\t{', '.join(n['name'] for n in nodes)}"
            )
            pnode = None
            for node in nodes:
                if pnode:
                    if pnode["end"] == node["start"] - 1:
                        graph.add_edge(pnode, node, [haplo])
                    pnode_is_ref_dummy = pnode[
                        "end"] == pnode["start"] - 1 and not pnode["sequence"]
                    pnode_ends_before_node = pnode["end"] < node[
                        "start"] and pnode["start"] < node["start"]
                    if not pnode_is_ref_dummy and not pnode_ends_before_node:
                        raise Exception(
                            f"Inconsistent nodes for haplotype {haplo}: {pnode['name']}, {node['name']}"
                        )
                pnode = node

        # In alleleGraph mode link each alt node to all neighboring nodes
        # In haplotype mode link nodes without in/out edges to reference
        for node in graph.altNodes():
            if allele_graph or not any(graph.inEdges(node)):
                graph.add_edge(
                    graph.refNode_ending_at[node["chrom"], node["start"] - 1],
                    node)
            if not any(graph.outEdges(node)):
                graph.add_edge(
                    node, graph.refNode_starting_at[node["chrom"],
                                                    node["end"] + 1])
            if allele_graph:
                isInsertion = node["end"] < node["start"]
                for n in graph.nodes_starting_at[node["end"] + 1]:
                    # Don't loop by connecting multiple insertions at the same position
                    if not (isInsertion and n["end"] < n["start"]):
                        graph.add_edge(node, n)

        # For nodes that do not have determined in/out edges for a given haplotype
        # label all in/out edges as compatible with that haplotype
        # excluding edges that connect to another allele at the same vcfVariant (e.g. insertions)
        for haplo in self.get_haplotypes():
            for node in graph.nodes_by_haplo(haplo):
                if not any(graph.inEdges(node, haplo)):
                    for e in graph.inEdges(node):
                        graph.add_edge(graph.nodes[e["from"]], node, [haplo])
                assert any(graph.inEdges(node, haplo))
                if not any(graph.outEdges(node, haplo)):
                    for e in graph.outEdges(node):
                        graph.add_edge(node, graph.nodes[e["to"]], [haplo])
        return graph