コード例 #1
0
def get_poagraph(maf: msa.Maf, metadata: Optional[msa.MetadataCSV]) -> \
        Tuple[List[graph.Node], Dict[msa.SequenceID, graph.Sequence]]:
    """Get poagraph elements from MAF.

    Args:
        maf: Multialignment file in MAF format.
        metadata: MetadataCSV.

    Returns:
        Tuple of poagraph elements.
    """

    alignment = [*AlignIO.parse(maf.filecontent, "maf")]
    nodes, sequences = _init_poagraph(alignment, metadata)

    current_node_id = graph.NodeID(-1)
    column_id = graph.ColumnID(-1)
    for block_id, block in enumerate(alignment):
        global_logger.info(f"Processing block {block_id}...")
        block_width = len(block[0].seq)

        for col in range(block_width):
            column_id += 1
            sequence_id_to_nucleotide = {
                msa.SequenceID(seq.id): seq[col]
                for seq in block
            }
            nodes_codes = sorted([
                *(set([
                    nucleotide
                    for nucleotide in sequence_id_to_nucleotide.values()
                ])).difference({'-'})
            ])
            column_nodes_ids = [
                graph.NodeID(current_node_id + i + 1)
                for i, _ in enumerate(nodes_codes)
            ]

            for i, nucl in enumerate(nodes_codes):
                current_node_id += 1
                nodes.append(
                    graph.Node(node_id=current_node_id,
                               base=graph.Base(nucl),
                               aligned_to=_get_next_aligned_node_id(
                                   graph.NodeID(i), column_nodes_ids),
                               column_id=graph.ColumnID(column_id),
                               block_id=graph.BlockID(block_id)))

                for seq_id, nucleotide in sequence_id_to_nucleotide.items():
                    if nucleotide == nucl:
                        sequences[seq_id] = _add_node_do_sequence(
                            sequence=sequences[seq_id],
                            node_id=current_node_id)

    return nodes, sequences
コード例 #2
0
def _get_poagraph_paths_and_nodes(po_lines: List[str],
                                  sequences_info: Dict[int, POSequenceInfo],
                                  sequences: Dict[msa.SequenceID, graph.Sequence]) -> \
        Tuple[List[graph.Node], Dict[msa.SequenceID, graph.Sequence]]:
    nodes_count = int(_extract_line_value(po_lines[3]))
    paths_count = int(_extract_line_value(po_lines[4]))
    nodes: List[graph.Node] = [None] * nodes_count
    node_id = 0
    for i in range(5 + paths_count * 2, 5 + paths_count * 2 + nodes_count):
        node_line = po_lines[i]
        base = graph.Base(node_line[0].upper())
        in_nodes, po_sequences_ids, aligned_to = _extract_node_parameters(node_line)
        sequences_ids = [sequences_info[po_sequences_id].name
                         for po_sequences_id in po_sequences_ids]
        nodes[node_id] = graph.Node(graph.NodeID(node_id),
                                    base,
                                    graph.NodeID(aligned_to))
        for seq_id in sequences_ids:
            if len(sequences[seq_id].paths) == 1:
                sequences[seq_id].paths[0].append(graph.NodeID(node_id))
            else:
                sequences[seq_id].paths.append(graph.SeqPath([graph.NodeID(node_id)]))
        node_id += 1
    return nodes, sequences
コード例 #3
0
def nid(x):
    return graph.NodeID(x)
コード例 #4
0
def _get_max_node_id(nodes: List[graph.Node]) -> graph.NodeID:
    return graph.NodeID(len(nodes) - 1)
コード例 #5
0
def nid(x): return graph.NodeID(x)


def bid(x): return graph.BlockID(x)
コード例 #6
0
def nid(x): return graph.NodeID(x)


def bid(x): return graph.Base(x)