graph[node] = {adj.split(':')[0] for adj in adjs.split(',')} # Load connected components CCs = [] with open('../connect_pgraph/out/pconnect1.txt') as file: for line in file: _, nodes = line.rstrip().split(':') CCs.append(set(nodes.split(','))) OGs = [] CCtypes = [{} for _ in range(5)] for CC in CCs: subgraph = {node: graph[node] for node in CC} # Cluster by triangle criterion subOGs = get_triangle_clusters(subgraph) OGs.append(subOGs) # Classify CCs subnOGs = [{node for edge in subOG for node in edge} for subOG in subOGs] if len(subnOGs) == 0: CCtypes[0][len(subnOGs)] = CCtypes[0].get(len(subnOGs), 0) + 1 # Component has no OGs elif len(subnOGs) == 1: if len(subnOGs[0]) == len(CC): CCtypes[1][len(subnOGs)] = CCtypes[1].get(len(subnOGs), 0) + 1 # Component and OG are equal else: CCtypes[2][len(subnOGs)] = CCtypes[2].get(len(subnOGs), 0) + 1 # Component has single OG which is a subset of the component elif any([set.intersection(nOG1, nOG2) for nOG1, nOG2 in combinations(subnOGs, 2)]): CCtypes[4][len(subnOGs)] = CCtypes[4].get(len(subnOGs), 0) + 1 # Component has multiple non-disjoint OGs else: CCtypes[3][len(subnOGs)] = CCtypes[3].get(len(subnOGs), 0) + 1 # Component has multiple pairwise disjoint OGs
rpgraph = {} for qppid, sppids in pgraph.items(): for sppid in sppids: try: r = qppid in pgraph[sppid] except KeyError: r = False if r: try: rpgraph[qppid].add(sppid) except KeyError: rpgraph[qppid] = {sppid} # Cluster by triangle criterion OGs = get_triangle_clusters(rpgraph) # Make output directory if not os.path.exists('out/'): os.mkdir('out/') # Write clusters to file with open('out/pclusters.txt', 'w') as outfile: for i, OG in enumerate(OGs): OGid = hex(i)[2:].zfill(4) outfile.write(OGid + ':' + '\t'.join([f'{node1},{node2}' for node1, node2 in OG]) + '\n') """ DEPENDENCIES ../hsps2pgraph/hsps2pgraph.py