def _append_edge_list_chain(graph, text, generic_feature, edge_list, vertices_before, vertices_after, beginning="(", delimiter="->"): # Get to be replaced amino_acids y_s = get_content(text, beginning, delimiter) # Generalizing: It can reference multiple substitutions for y in y_s.split(","): y = y.strip().replace(" ", "") # TODO is such a combination enough? for aa_in_list, aa_out_list in zip(vertices_before, vertices_after): if len(aa_out_list) == 0 or len(aa_in_list) == 0: # Skip this entry, since we do not have complete information # -> Not possible to link either start or end or both continue # Add each individual amino acid as a node y_idcs = [] for entry in y: vertex = graph.add_vertex() graph.vs[vertex.index]["aminoacid"] = entry graph.vs[vertex.index]["accession"] = graph.vs[1]["accession"] if "isoform_accession" in aa_in_list[0].attributes(): graph.vs[vertex.index]["isoform_accession"] = aa_in_list[ 0]["isoform_accession"] y_idcs.append(vertex.index) # Add edges between them (if needed) for idx, n in enumerate(y_idcs[:-1]): graph.add_edges([(n, y_idcs[idx + 1])]) # Get the first and last node index first_node, last_node = y_idcs[0], y_idcs[-1] # And add then to the edges list (to connect them to the rest of the graph) for aa_in in aa_in_list: for aa_edge_in in list(graph.es.select( _target=aa_in)): # Get all incoming edges edge_list.append(( (aa_edge_in.source, first_node), [*_get_qualifiers(aa_edge_in), generic_feature], )) for aa_out in aa_out_list: for aa_edge_out in list(graph.es.select( _source=aa_out)): # Get all outgoing edges edge_list.append(((last_node, aa_edge_out.target), _get_qualifiers(aa_edge_out)))
def _append_edge_list_missing(graph, generic_feature, edge_list, v_before, v_after): """ TODO DESC! """ [__start_node__] = graph.vs.select(aminoacid="__start__") [__stop_node__] = graph.vs.select(aminoacid="__end__") # A sequence is missing! Just append an edge and its information # TODO is such a combination enough? # Here we iterate over all possiblites over two pairs of nodes and its edges for aa_in_list, aa_out_list in zip(v_before, v_after): for aa_in in aa_in_list: for aa_edge_in in list( graph.es.select(_target=aa_in)): # Get all incoming edges for aa_out in aa_out_list: for aa_edge_out in list(graph.es.select( _source=aa_out)): # Get all outgoing edges # Add corresponding edges and the qualifiers information # for that edge (At least the generic feature) # Only if they not point to start_end directly! (#special case e.g. in P49782) if aa_edge_in.source != __start_node__.index or aa_edge_out.target != __stop_node__.index: edge_list.append(( (aa_edge_in.source, aa_edge_out.target), [ *_get_qualifiers(aa_edge_in), generic_feature ], ))
def _create_edges_list_and_feature(start_nodes, end_nodes, start_idx, stop_idx, feature): """ Cleaves the ingoing edges of start_nodes and the outgoing edges of end_nodes. This function returns two lists containing the edges and the features, which can be later applied on the graph itself """ # Create the edge-list (cleaving the referenced peptide) edge_list = [] edge_feature = [] # Cleave the beginning for sn_iso in start_nodes: for sn in sn_iso: edge_list.append( (start_idx, sn.index)) # Add Ingoing edge from start edge_feature.append([feature]) for ie in sn.in_edges(): if ie.source != start_idx: # Check if ingoing node is start edge_list.append(( ie.source, stop_idx)) # Add outgoing edge from all in-going nodes edge_feature.append([*_get_qualifiers(ie), feature]) # Cleave the end for en_iso in end_nodes: for en in en_iso: edge_list.append((en.index, stop_idx)) # Add Outgoing edge to end edge_feature.append([feature]) for oe in en.out_edges(): if oe.target != stop_idx: # Check if ingoing node is start edge_list.append( (start_idx, oe.target )) # Add outgoing edge from all in-going nodes edge_feature.append([*_get_qualifiers(oe), feature]) return edge_list, edge_feature
def execute_signal(graph, signal_feature): """ This function adds ONLY edges to skip the the signal peptide. NOTE: This transforms the graph without returning it! Following Keys are set here: Nodes: <None> Edges: "qualifiers" ( -> adds SIGNAL) """ if isinstance(signal_feature.location.end, UnknownPosition): # The Position of the end is not known. Therefore we skip # this entry simply. It does not contain any useful information return # Get end node [__stop_node__] = graph.vs.select(aminoacid="__end__") # Get start and end position of signal peptide # NOTE: + 1, since the start node occupies the position 0 start_position, end_position = ( signal_feature.location.start + 1, signal_feature.location.end + 0, ) # Get all nodes with position start_position and their corresponding end_position(s) all_start_points = list(graph.vs.select(position=start_position)) all_end_points = [ _get_nodes_from_position(graph, x, end_position) for x in all_start_points ] # Check if only one end point exists for each start for x in all_end_points: if len(x) > 1: # TODO custom exception, something which should not happen! raise Exception("WARNING, there are multiple defined ENDPOINTS!!") # TODO should the signal peptide be exactly the same as in canonical? Or can we leave it as is for isoforms? # Should we check this? If so: do this here!! # TODO can we associate the end points with the start points, according to its index? # Create edge list all_edges = [] for start_point, end_point_list in zip(all_start_points, all_end_points): for end_point in end_point_list: # For each start and end point # Get the corresponding edges edges_in = list(graph.es.select(_target=start_point)) edges_out = list(graph.es.select(_source=end_point)) # And add a new edge to skip the signal for ei in edges_in: for eo in edges_out: all_edges.append( ( (ei.source_vertex, eo.target_vertex), [*_get_qualifiers(ei), signal_feature], ) ) # Special case the signal end can go directly to the stop node all_edges.append(((end_point, __stop_node__), [signal_feature])) # Bulk adding of edges into the graph cur_edges = graph.ecount() graph.add_edges([x[0] for x in all_edges]) graph.es[cur_edges:]["qualifiers"] = [x[1] for x in all_edges]