def group_novel_isoforms(new_isoforms, all_filter_passing_query_isoforms, pred_samfile_path): pred_samfile = pysam.AlignmentFile(pred_samfile_path, "r", check_sq=False) query_new_isoforms = [ q_isoform for q_isoform in all_filter_passing_query_isoforms if q_isoform.query_name in new_isoforms ] G = nx.Graph() for n in new_isoforms: G.add_node(n) print("nr new:", len(query_new_isoforms)) for i1 in query_new_isoforms: for i2 in query_new_isoforms: if i1.query_name == i2.query_name: continue else: if is_same_isoform_cigar(i1, i2) and is_same_isoform_cigar( i2, i1): G.add_edge(i1.query_name, i2.query_name) print(len(list(G.nodes()))) print(len(list(G.edges()))) maximal_cliques = [cl for cl in nx.find_cliques(G)] print([len(cl) for cl in maximal_cliques]) print(sum([len(cl) for cl in maximal_cliques])) print(len([len(cl) for cl in maximal_cliques]), "unique splice sites isoforms") queries_to_new = { q_acc: "new_isoform_" + str(i) for i, cl in enumerate(sorted(maximal_cliques, key=len)) for q_acc in cl } return queries_to_new
def get_cliques(netw, node): g=nx.Graph(netw.subgraph(netw.neighbors(node).append(node))) cliques = nx.find_cliques(g) re=[] for cli in cliques: if node in cli and len(cli)>len(re): re = cli return re
def all_large_cliques_upto(G,limit=1000): GG = G.copy() cliq_iter = nx.find_cliques(GG) for i in range(limit): try: cliq = [] while len(cliq)<3: cliq = next(cliq_iter) except StopIteration: break yield cliq