Ejemplo n.º 1
0
def _append_edge_list_chain(graph,
                            text,
                            generic_feature,
                            edge_list,
                            vertices_before,
                            vertices_after,
                            beginning="(",
                            delimiter="->"):
    # Get to be replaced amino_acids
    y_s = get_content(text, beginning, delimiter)

    # Generalizing: It can reference multiple substitutions
    for y in y_s.split(","):
        y = y.strip().replace(" ", "")
        # TODO is such a combination enough?
        for aa_in_list, aa_out_list in zip(vertices_before, vertices_after):
            if len(aa_out_list) == 0 or len(aa_in_list) == 0:
                # Skip this entry, since we do not have complete information
                # -> Not possible to link either start or end or both
                continue

            # Add each individual amino acid as a node
            y_idcs = []
            for entry in y:
                vertex = graph.add_vertex()
                graph.vs[vertex.index]["aminoacid"] = entry
                graph.vs[vertex.index]["accession"] = graph.vs[1]["accession"]
                if "isoform_accession" in aa_in_list[0].attributes():
                    graph.vs[vertex.index]["isoform_accession"] = aa_in_list[
                        0]["isoform_accession"]
                y_idcs.append(vertex.index)

            # Add edges between them (if needed)
            for idx, n in enumerate(y_idcs[:-1]):
                graph.add_edges([(n, y_idcs[idx + 1])])

            # Get the first and last node index
            first_node, last_node = y_idcs[0], y_idcs[-1]

            # And add then to the edges list (to connect them to the rest of the graph)
            for aa_in in aa_in_list:
                for aa_edge_in in list(graph.es.select(
                        _target=aa_in)):  # Get all incoming edges
                    edge_list.append((
                        (aa_edge_in.source, first_node),
                        [*_get_qualifiers(aa_edge_in), generic_feature],
                    ))
            for aa_out in aa_out_list:
                for aa_edge_out in list(graph.es.select(
                        _source=aa_out)):  # Get all outgoing edges
                    edge_list.append(((last_node, aa_edge_out.target),
                                      _get_qualifiers(aa_edge_out)))
Ejemplo n.º 2
0
def _append_edge_list_missing(graph, generic_feature, edge_list, v_before,
                              v_after):
    """ TODO DESC! """
    [__start_node__] = graph.vs.select(aminoacid="__start__")
    [__stop_node__] = graph.vs.select(aminoacid="__end__")
    # A sequence is missing! Just append an edge and its information

    # TODO is such a combination enough?
    # Here we iterate over all possiblites over two pairs of nodes and its edges
    for aa_in_list, aa_out_list in zip(v_before, v_after):
        for aa_in in aa_in_list:
            for aa_edge_in in list(
                    graph.es.select(_target=aa_in)):  # Get all incoming edges
                for aa_out in aa_out_list:
                    for aa_edge_out in list(graph.es.select(
                            _source=aa_out)):  # Get all outgoing edges
                        # Add corresponding edges and the qualifiers information
                        # for that edge (At least the generic feature)
                        # Only if they not point to start_end directly! (#special case e.g. in P49782)
                        if aa_edge_in.source != __start_node__.index or aa_edge_out.target != __stop_node__.index:
                            edge_list.append((
                                (aa_edge_in.source, aa_edge_out.target),
                                [
                                    *_get_qualifiers(aa_edge_in),
                                    generic_feature
                                ],
                            ))
Ejemplo n.º 3
0
def _create_edges_list_and_feature(start_nodes, end_nodes, start_idx, stop_idx,
                                   feature):
    """
    Cleaves the ingoing edges of start_nodes and the outgoing edges of end_nodes.

    This function returns two lists containing the edges and the features,
    which can be later applied on the graph itself
    """
    # Create the edge-list (cleaving the referenced peptide)
    edge_list = []
    edge_feature = []

    # Cleave the beginning
    for sn_iso in start_nodes:
        for sn in sn_iso:
            edge_list.append(
                (start_idx, sn.index))  # Add Ingoing edge from start
            edge_feature.append([feature])
            for ie in sn.in_edges():
                if ie.source != start_idx:  # Check if ingoing node is start
                    edge_list.append((
                        ie.source,
                        stop_idx))  # Add outgoing edge from all in-going nodes
                    edge_feature.append([*_get_qualifiers(ie), feature])

    # Cleave the end
    for en_iso in end_nodes:
        for en in en_iso:
            edge_list.append((en.index, stop_idx))  # Add Outgoing edge to end
            edge_feature.append([feature])
            for oe in en.out_edges():
                if oe.target != stop_idx:  # Check if ingoing node is start
                    edge_list.append(
                        (start_idx, oe.target
                         ))  # Add outgoing edge from all in-going nodes
                    edge_feature.append([*_get_qualifiers(oe), feature])

    return edge_list, edge_feature
Ejemplo n.º 4
0
def execute_signal(graph, signal_feature):
    """
    This function adds ONLY edges to skip the the signal peptide.

    NOTE: This transforms the graph without returning it!

    Following Keys are set here:
    Nodes: <None>
    Edges: "qualifiers" ( -> adds SIGNAL)
    """
    if isinstance(signal_feature.location.end, UnknownPosition):
        # The Position of the end is not known. Therefore we skip
        # this entry simply. It does not contain any useful information
        return

    # Get end node
    [__stop_node__] = graph.vs.select(aminoacid="__end__")

    # Get start and end position of signal peptide
    # NOTE: + 1, since the start node occupies the position 0
    start_position, end_position = (
        signal_feature.location.start + 1,
        signal_feature.location.end + 0,
    )

    # Get all nodes with position start_position and their corresponding end_position(s)
    all_start_points = list(graph.vs.select(position=start_position))
    all_end_points = [
        _get_nodes_from_position(graph, x, end_position) for x in all_start_points
    ]

    # Check if only one end point exists for each start
    for x in all_end_points:
        if len(x) > 1:
            # TODO custom exception, something which should not happen!
            raise Exception("WARNING, there are multiple defined ENDPOINTS!!")

    # TODO should the signal peptide be exactly the same as in canonical? Or can we leave it as is for isoforms?
    # Should we check this? If so: do this here!!
    # TODO can we associate the end points with the start points, according to its index?

    # Create edge list
    all_edges = []
    for start_point, end_point_list in zip(all_start_points, all_end_points):
        for end_point in end_point_list:
            # For each start and end point

            # Get the corresponding edges
            edges_in = list(graph.es.select(_target=start_point))
            edges_out = list(graph.es.select(_source=end_point))

            # And add a new edge to skip the signal
            for ei in edges_in:
                for eo in edges_out:
                    all_edges.append(
                        (
                            (ei.source_vertex, eo.target_vertex),
                            [*_get_qualifiers(ei), signal_feature],
                        )
                    )

            # Special case the signal end can go directly to the stop node
            all_edges.append(((end_point, __stop_node__), [signal_feature]))

    # Bulk adding of edges into the graph
    cur_edges = graph.ecount()
    graph.add_edges([x[0] for x in all_edges])
    graph.es[cur_edges:]["qualifiers"] = [x[1] for x in all_edges]