def cliqueToDomain(dictline): #for each gene tree dd, value = dictline.split('\t') master = {dd: eval(value)} G = nx.Graph() for ii in master[dd]: #for each sequence in this gene tree for jj in range(len(master[dd][ii])): #for each annotated domain datum if 'Linker' not in master[dd][ii][jj][0]: coordinates_and_ids = master[dd][ii][jj] G.add_node( ii + '.' + str(jj), coords=coordinates_and_ids[1:3], pid=coordinates_and_ids[3] ) #add it to the graph, coords = start,end pair, pid=pfam id for kk in G.nodes(): ens_id = kk.split('.')[0] #get the ID without the domain number start_kk = G.nodes[kk]['coords'][0] #start of the kth domain end_kk = G.nodes[kk]['coords'][1] #end of the kth domain for mm in G.nodes(): start_mm = G.nodes[mm]['coords'][0] #start of the mth domain end_mm = G.nodes[mm]['coords'][1] #end of the mth domain mid_kk = (start_kk + end_kk) / 2 mid_mm = (start_mm + end_mm) / 2 if (mid_kk < end_mm and mid_kk > start_mm) and (mid_mm < end_kk and mid_mm > start_kk): G.add_edge(mm, kk) #if their starts are closer than their ends G_deep = G.copy() cliques = [] #get list of nodes in cliques, remove those from the graph to solve problem of single domain member of two cliques while True: maxclique = list(max_clique(G)) cliques.append(maxclique) G.remove_nodes_from(maxclique) if G.number_of_nodes() == 0: break domain_families = { } #Domain_0, Domain_1, etc. are keys, while values are a list of tuples like (ensembl_id, coords) cliques.sort(key=lambda x: G_deep.nodes[x[0]]['coords'][0]) #print([G_deep.nodes[x[0]]['coords'][0] for x in cliques]) dom_idx = 0 ddout = {} for i in cliques: for j in i: pid, domain = j.split('.') start_out, end_out = G_deep.nodes[j]['coords'] pfam = G_deep.nodes[j]['pid'] if pid in ddout: ddout[pid].append( ['Domain_' + str(dom_idx), start_out, end_out, pfam]) else: ddout[pid] = [[ 'Domain_' + str(dom_idx), start_out, end_out, pfam ]] dom_idx += 1 return ([dd, str(ddout)])
def max_clique_filter(self, c_pnts, n_pnts, dist_thrs=0.2, min_points=6): num_points = c_pnts.shape[0] graph = nx.Graph() graph.add_nodes_from(list(range(num_points))) if c_pnts.shape[0] < min_points: raise Exception('Too low count of points') clique_len = 0 while clique_len < min_points: for i in range(num_points): diff_1 = c_pnts[i,:] - c_pnts diff_2 = n_pnts[i,:] - n_pnts dist_1 = np.linalg.norm(diff_1, axis=1) dist_2 = np.linalg.norm(diff_2, axis=1) diff = np.abs(dist_2 - dist_1) wIdx = np.where(diff < dist_thrs) for i_w in wIdx[0]: graph.add_edge(i, i_w) cliques = nx.algorithms.find_cliques(graph) _clique = max_clique(graph) clique_len = len(_clique) dist_thrs *= 2 idxs = list(_clique) return idxs, dist_thrs
def dfa_id_encodings( apta: APTA, sym_mode: SymMode = None, extra_clauses: ExtraClauseGenerator = lambda *_: (), bounds: Bounds = (None, None) ) -> Encodings: """Iterator of codecs and clauses for DFAs of increasing size.""" cgraph = apta.consistency_graph() clique = max_clique(cgraph) max_needed = len(apta.nodes) low, high = bounds # Tighten lower bound. if low is None: low = 1 low = max(low, len(clique)) if (low > max_needed) and ((high is None) or (low < high)): high = low # Will find something at low if one exists. elif high is None: high = max_needed else: high = min(high, max_needed) if high < low: raise ValueError('Empty bound range!') for n_colors in range(low, high + 1): codec = Codec.from_apta(apta, n_colors, sym_mode=sym_mode) clauses = list(encode_dfa_id(apta, codec, cgraph, clique)) clauses.extend(list(extra_clauses(apta, codec))) yield codec, clauses
def extract_max_clique(Gx): ''' Extract the maximum clique from graph Gx return max clique as graph ''' max_clique_ = clique.max_clique(Gx) G_max = nx.subgraph(Gx, max_clique_) return G_max
def main(): # g = ds.getDataWithCost('/home/ankursarda/Projects/graph-analytics/networkx/data/sample_data1.adj') # res = cyc.cycle_basis(g) # res = cyc.find_cycle(g) g = ds.getData('/home/ankursarda/Projects/graph-analytics/networkx/data/sample_data1.adj') # iset2, clique2 = cq.clique_removal(g) iset3 = cq.max_clique(g) # print(max(clique2), end='\n') print(iset3)
def compute_max_clique_size(self): """Computes the size of the maxiumum clique in the network of neurons. A maximum clique of a graph, G, is a clique, such that there is no clique with more vertices. https://en.wikipedia.org/wiki/Clique_(graph_theory)#Definitions """ return len(clique.max_clique(self.network))
def split(X): """Finds a split for a given language without the empty string nor words of length 1 Input: a set of strings, X, epsilon not in X, and X is not the subset of Sigma Output: sets A, B, C for which AB + C = X and A, B are nontrivial""" G = buildCharacteristicGraph(X) K = max_clique(G) A = frozenset(u for (u, w) in K) B = frozenset(w for (u, w) in K) C = frozenset(X - catenate(A, B)) return (A, B, C)
def Run(self): if self.IsCompleted: return items = sorted(self.Items, reverse=True) # TODO: build conflict graph and compute maximal clique items, newNumberOfItems = self.RemoveLargeItems(items, self.Bin.Dy, self.Bin.Dx) self.DetermineConflicts(items, self.Bin.Dy, self.Bin.Dx) conflictGraph = nx.Graph() conflictGraph.add_nodes_from([item.Id for i, item in enumerate(items)]) for i in range(len(items)): itemI = items[i] for j in range(len(items)): itemJ = items[j] if frozenset((itemI.Id, itemJ.Id)) in self.IncompatibleItems: conflictGraph.add_edge(itemI.Id, itemJ.Id) maxClique = clique.max_clique(conflictGraph) sortedMaxClique = sorted(maxClique) newItems = [] for i, oldIndex in enumerate(sortedMaxClique): newItems.append(Item(i, items[oldIndex].Dx, items[oldIndex].Dy)) for i, item in enumerate(items): if item.Id in sortedMaxClique: continue newItems.append(Item(len(newItems), items[item.Id].Dx, items[item.Id].Dy)) self.IncompatibleItems.clear() self.DetermineConflicts(newItems, self.Bin.Dy, self.Bin.Dx) newNumberOfItems = len(newItems) self.UpperBoundsBin = newNumberOfItems self.FixItemToBin = SymmetryBreaking.DetermineFixedItems(self.IncompatibleItems, newNumberOfItems) self.BinDomains = SymmetryBreaking.CreateReducedBinDomains(self.IncompatibleItems, newNumberOfItems, self.UpperBoundsBin, self.FixItemToBin) self.ItemPlacementPatternsX, self.ItemPlacementPatternsY, self.GlobalPlacementPatternsX = SymmetryBreaking.CreateBinDependentPlacementPatterns(self.IncompatibleItems, self.FixItemToBin, newItems, self.UpperBoundsBin, self.Bin, self.PlacementPointStrategy) self.ProcessedItems = newItems self.IsCompleted = True
def __init__(self, graph, precision=1e-5): self.graph = graph self.adj_matrix = nx.adjacency_matrix(self.graph) self.precision = precision self.nodes = self.graph.nodes self.ind_sets = [] self.not_connected = nx.complement( self.graph).edges # dopolnenie grapha # self.current_maximum_clique_len = len(max_clique(self.graph)) # self.heuristic_max_clique = # self.heuristic_max_clique_len = # print(self.heuristic_max_clique_len) self.current_max_clique = max_clique(self.graph) self.current_maximum_clique_len = len(self.current_max_clique) self.branch_num = 0 self.get_ind_sets() self.reduced_master_problem = self.construct_reduced_master_problem() self.mwis_problem = None self.current_obj_values = []
def save_result_for_1_file(filename): logger.debug('start save') g = data.DataDIMACS.load(filename) for key in result.keys(): result[key].append(None) result["name"][-1] = filename result["num_vertices"][-1] = len(g.vertices) logger.info(f'{filename}') g_nx = g.to_nx_graph() start_time = time.time() max_clique = list(clique.max_clique(g_nx)) finish_time = time.time() res = graph.NxMCP(g_nx, max_clique) ans = res.is_right_max_clique() is_right, not_neighbor = ans[0], ans[1] result["time"][-1] = finish_time - start_time result["is_right"][-1] = is_right result["max_clique"][-1] = res.max_clique result["size_max_clique"][-1] = len(res.max_clique) result["wrong_vertices"][-1] = not_neighbor df = pd.DataFrame.from_dict(result) df.to_csv('DIMACS_all_nx.csv', sep=';') logger.info(f'{filename} time-{finish_time - start_time}')
input_data = pd.read_csv('ITALIAN_GANGS.csv', index_col=0) G = nx.from_numpy_matrix(input_data.values) print(input_data) print(input_data.values) print(nx.info(G)) degree_sequence = sorted([d for n, d in G.degree()], reverse=True) dmax = max(degree_sequence) dmin = min(degree_sequence) print("Maximum degree: ", dmax) print("Minimum degree: ", dmin) print("Average degree: ", sum(degree_sequence) / len(degree_sequence)) print("Average degree centrality: ", sum(nx.degree_centrality(G).values()) / len(nx.degree_centrality(G))) from networkx.algorithms.community import greedy_modularity_communities c = list(greedy_modularity_communities(G)) print("Number of communities: ", len(c)) print("Transitivity: ", nx.transitivity(G)) from networkx.algorithms.approximation import clique print("Maximum clique number: ", len(clique.max_clique(G))) print("Max k-core: ", max(nx.core_number(G).values())) print("Number of triangles: ", sum(list(nx.triangles(G).values()))) print("Degree of assortativity: ", nx.degree_assortativity_coefficient(G))
nx.edge_connectivity(G)) print() # c) Minimum vertex coloring print("c) Colors:", max(nx.greedy_color(G).values()) + 1, ", Z =", nx.greedy_color(G)) print() # d) Minimum edge coloring print("d) Colors:", max(nx.greedy_color(nx.line_graph(G)).values()) + 1, ", E =", nx.greedy_color(nx.line_graph(G))) print() # e) Maximum clique print("e) Q =", clique.max_clique(G)) print() # f) Maximum stable set print("f) S =", independent_set.maximum_independent_set(G)) print() # g) Maximum matching print("g) M =", nx.max_weight_matching(G)) print() # h) Minimum vertex cover print("h) R =", vertex_cover.min_weighted_vertex_cover(G)) print() # i) Minimum edge cover
def main(): if preInput: for filename in os.listdir(rootFolder + '/preInput'): print(filename) with open(rootFolder + '/preInput/' + filename, 'r') as oneStrFile: mainRefseq = oneStrFile.read().replace('\n', '') blast = initialBlast(filename, mainRefseq) initBlastList = parseInitialBlast(blast, qCoverLimit, evalueLimit) with open(rootFolder + '/Input/' + filename, 'w') as blastResults: blastResults.write('\n'.join( list(dict.fromkeys(initBlastList)))) if mergeInput: mergedSet = set() for filename in os.listdir(rootFolder + '/Input'): with open(rootFolder + '/Input/' + filename, 'r') as singleFile: singleContent = singleFile.read() mergedSet = mergedSet | set(singleContent.split('\n')) mergedSet.discard('') with open(rootFolder + '/Input/merged.txt', 'w') as mergedFile: mergedFile.write('\n'.join(mergedSet)) if doMainAnalysis: for filename in os.listdir(rootFolder + '/Input'): #proteins = checkPreviousPickle( # os.path.splitext(filename)[0], # '/Previous_Proteins' # ) proteins = False if not proteins: proteins = getSequences(filename, dict()) proteins = getIsoforms(proteins) proteins = getSpeciesName(proteins) toDel = list() for r in proteins.keys(): if proteins[r].species == None: toDel.append(r) print('SOMETHING BADD!!!') for r in toDel: del proteins[r] savePickle( os.path.splitext(filename)[0], proteins, '/Previous_Proteins') print(str(datetime.datetime.now()) + ': "proteins" ready') blastDict = dict() chunksForBlast = dict() counter = 0 for p in proteins.values(): chunksForBlast[p.refseq] = p counter += 1 if counter >= blastChunkSize: blastDict = blastSearch( [seq.refseq for seq in chunksForBlast.values()], [seq.taxid for seq in proteins.values()], filename, blastDict) print( str(datetime.datetime.now()) + ': Blast search completed') counter = 0 chunksForBlast = dict() savePickle('part_' + os.path.splitext(filename)[0], \ {'proteins':proteins, 'blastDict':blastDict}, '/For_online') blastDict = blastSearch( [seq.refseq for seq in chunksForBlast.values()], [seq.taxid for seq in proteins.values()], filename, blastDict) print(str(datetime.datetime.now()) + ': Blast search completed') savePickle(os.path.splitext(filename)[0], \ {'proteins':proteins, 'blastDict':blastDict}, '/For_online') print('Checkong Blast dictionary...') blastDict = checkBlastDict(proteins, filename, blastDict, 0) print(str(datetime.datetime.now()) + ': Blast dictionary checked') savePickle(os.path.splitext(filename)[0], \ {'proteins':proteins, 'blastDict':blastDict}, '/For_online') transDict = deepcopy(blastDict) for q in transDict.keys(): for s in transDict[q].keys(): if transDict[q][s] in proteins: transDict[q][s] = proteins[transDict[q][s]].gene else: transDict[q][s] = 'NA' geneDict = dict() for g in set([p.gene for p in proteins.values()]): geneDict[g] = dict() isoforms = [p.refseq for p in proteins.values() if p.gene == g] for s in set([p.species for p in proteins.values()]): targetGenes = dict() for i in isoforms: if s in transDict[i]: if not transDict[i][s] in geneDict[g]: targetGenes[transDict[i][s]] = 1 else: targetGenes[transDict[i][s]] += 1 if len(targetGenes) > 0: if max(targetGenes.values()) / sum( targetGenes.values()) >= orthologyThreshold: geneDict[g][s] = list(targetGenes.keys())[list( targetGenes.values()).index( max(targetGenes.values()))] if finalAnalysis: for filename in os.listdir(rootFolder + '/preInput'): print(filename) # proteins need to be refreshed each time we do an analysis # else good values are not dropped pkl = checkPreviousPickle(filename, '/For_online') proteins = pkl['proteins'] blastDict = pkl['blastDict'] transDict = deepcopy(blastDict) for q in transDict.keys(): for s in transDict[q].keys(): if transDict[q][s] in proteins: transDict[q][s] = proteins[transDict[q][s]].gene else: transDict[q][s] = 'NA' geneDict = dict() for g in set([p.gene for p in proteins.values()]): geneDict[g] = dict() isoforms = [p.refseq for p in proteins.values() if p.gene == g] for s in set([p.species for p in proteins.values()]): targetGenes = dict() for i in isoforms: if s in transDict[i]: if not transDict[i][s] in geneDict[g]: targetGenes[transDict[i][s]] = 1 else: targetGenes[transDict[i][s]] += 1 if len(targetGenes) > 0: if max(targetGenes.values()) / sum( targetGenes.values()) >= orthologyThreshold: geneDict[g][s] = list(targetGenes.keys())[list( targetGenes.values()).index( max(targetGenes.values()))] with open(rootFolder + '/preInput/' + filename, 'r') as oneStrFile: mainRefseq = oneStrFile.read().replace('\n', '') mainSpecies = proteins[mainRefseq].species mainGene = proteins[mainRefseq].gene graph = networkx.Graph() graph.add_node(mainGene) for q in geneDict: qSpecies = [ p.species for p in proteins.values() if p.gene == q ][0] if (mainSpecies in geneDict[q]) and (qSpecies in geneDict[mainGene]): if (geneDict[q][mainSpecies] == mainGene) and \ (geneDict[mainGene][qSpecies] == q): graph.add_node(q) for q in graph.nodes(): for s in geneDict[q]: for t in graph.nodes(): qSpecies = [ p.species for p in proteins.values() if p.gene == q ][0] tSpecies = [ p.species for p in proteins.values() if p.gene == t ][0] if (tSpecies in geneDict[q]) and (qSpecies in geneDict[t]): if (q != t) and (geneDict[q][tSpecies] == t) and (geneDict[t][qSpecies] == q): graph.add_edge(q, t) maxClique = clique.max_clique(graph) # for p in proteins.values(): # setattr(p, 'good', False) proteins = goodGeneMakesGoodProtein(proteins, maxClique) refDict = dict() for p in proteins.values(): if p.good: refDict[p.species] = p.gene toDel = set() for p in proteins.values(): if (p.species in refDict.keys()) and (p.gene != refDict[p.species]): toDel.add(p.refseq) tempProteins = deepcopy(proteins) for refseq in toDel: tempProteins.pop(refseq) html = analyzeBlastDict(blastDict, tempProteins) with open( rootFolder + '/Results/' + os.path.splitext(filename)[0] + '.html', 'w') as out: out.write('Original cluster:<br>') for gene in maxClique: out.write([p.species for p in proteins.values() if p.gene == gene][0] + ': ' + \ [p.symbol for p in proteins.values() if p.gene == gene][0] + ' (' + \ gene + ')<br>') out.write('<br>Results:<br>') out.write(html)
vgapE=[] # record energy vszG=[] vszCLQ=[] NV=len(CLQ) MAXBRANCH=int(NREADS) # init random seed for reproducability while ( (NV<M-1-NV_LIM) and (itr<MAXITR) ) : print('===================================================') print('>>> FINDING -', NV+1,'TH VECTOR OF -',M, 'ITER=', itr, 'of', MAXITR) print('===================================================') #NTSWEEPS=NV*NSWEEPS NTSWEEPS=updateMAXREADS(NSWEEPS, NV) # current status print('Est-CLQ(G):', len(xclq.max_clique(G)), ', |G|=', len(G.nodes())) # ------------------------------------------------------------------------- # fetch a clique not yet in the trialset # ------------------------------------------------------------------------- TF=True mm=0 tCLQ=set() while(TF and (mm<len(OLS_CLQ)) ): #print('Trying:',mm, 'with |CLQ|',len(), ' of', len(LS_CLQ)) tCLQ=OLS_CLQ[:][mm].copy() #print('tCLQ->', tCLQ) tpCLQ=tuple(set(tCLQ)) #force ordering of the nodes ID notInTrial=not(tpCLQ in TRIAL) enoughLength=len(tCLQ)>1 TF= not(notInTrial and enoughLength and isOrthoSet(set(tCLQ),M)) mm=mm+1
plt.clf() #d edges_color = [] for (v1, v2) in (g2.edges()): #массив цветов ребер val = nx.greedy_color(nx.line_graph(g2)).get((v1, v2)) if val == None: val = nx.greedy_color(nx.line_graph(g2)).get((v2, v1)) edges_color.append(val) nx.draw_planar(g2, with_labels=True, edge_color=edges_color) plt.savefig("edges_color.png") plt.clf() #e print("Maximum clique") print(clique.max_clique(g2)) #f print("Maximum stable set") print(independent_set.maximum_independent_set(g2)) #g print("Maximum matching") print(nx.max_weight_matching(g2, maxcardinality=True, weight=0)) #i print("Minimum edge cover") print(covering.min_edge_cover(g2)) #ищем минимальное реберное покрытие #j print("Hanging vertices")
G.node[name]['viz'] = redColor G.node[name]['colAtr'] = "Red" elif ("Green" in generate): G.node[name]['viz'] = greenColor G.node[name]['colAtr'] = "Green" dict[name] = [generate, benefits] line += 1 for x in dict: for y in dict: for b in dict[x][1]: l = b.split("&") cond_met = True for cond in l: if (cond[0] == "!"): cond_met = cond_met and (not (cond[1:] in dict[y][0])) else: cond_met = cond_met and (cond in dict[y][0]) if (cond_met): G.add_edge(x, y) G.edges[x, y]['atr'] = b # If you run this script, there will be two gexf files created in the folder you're working right now. If you don't want this to happen, comment this lines. nx.write_gexf(G, "test.gexf") h = clique.max_clique(G) H = G.subgraph(h) print(H.nodes) nx.write_gexf(H, "clique.gexf")
def cardinalMaxClique(): """ Génere une question sur le cardinal de la clique maximale d'un graph au format json : - question cardinal clique - graph non pondéré - graph non dirigé - bonne réponse cardinal max clique - mauvaise réponses """ question = "{ \"question\": \"Quel est le cardinal de la plus grande clique du graphe ci-dessus ?\", " ponderate = "\"ponderate\": \"False\", " colorBase = "\"colorbase\": \"None\", " #noeud a colorer de base colorReponse = "\"colorreponse\": " #noeud a colorer a l'affichage de la reponse complementReponse = "\"complementreponse\": \"None\", " nbrReponse = 0 while nbrReponse < 3: G = nx.fast_gnp_random_graph(6, 0.7, None, False) #G -> graph non orienté colorClique = clique.max_clique(G) true_answer = len(colorClique) nbrReponse = true_answer colorReponse += "{ \"nodes\": \"" + str( colorClique ) + "\", \"edges\": \"None\"}, " #recuperation ensemble de noeuds a colorer pour la reponse colorReponse = colorReponse.replace( " ", "") #suppression des " " pour un meilleure parsage en javascript G = addEdgesIds(G) graph = graphToJson(G) #mise au format Json #reponse card max clique reponse_true = ", \"true_answer\": \"" reponse_true += str(true_answer) + "\"" #mauvaise reponse wa1 = random.randint(1, true_answer - 2) wa2 = random.randint(true_answer + 1, true_answer + 4) wa3 = random.randint(wa1, wa2) while (wa3 == wa1 or wa3 == wa2 or wa3 == true_answer): wa1 = random.randint(1, true_answer - 2) wa2 = random.randint(true_answer + 1, true_answer + 4) wa3 = random.randint(wa1, wa2) wrong_answer = str(wa1) + "z" + str(wa2) + "z" + str(wa3) + "z" reponse_wrong = ", \"wrong_answer\": \"" + wrong_answer + "\"" graph = graph[ 1:len(graph) - 1] #cut { et } pour ajouter la question/réponse et garder un format json valide graph = question + ponderate + colorBase + colorReponse + complementReponse + str( graph ) + str( reponse_true ) + reponse_wrong + "}" #ajout question/reponse dans le format json + fermeture de l'objet json avec } return graph
shortest = None for node in graph[start]: if node not in path: newpath = find_shortest_path(graph, node, end, path) if newpath: if not shortest or len(newpath) < len(shortest): shortest = newpath return shortest with open("/Users/harryritchie/Documents/Aeropress16/coffee_17_minedset.csv" ) as file: read_data = csv.reader(file, delimiter=';') names = {} for row in read_data: names[row[0]] = row[1:] del names['Recipe'] # GRAPH GRAPH = GraphDict(names) G = nx.Graph(GRAPH) # MAX CLIQUE WITH MINED SETTINGS SUP > 0.5 print(clique.max_clique(G)) # PLOT GRAPH pos = nx.spring_layout(G) nx.draw_networkx(G, pos) plt.show()