def analizzaGirvanNewman(pfPaj, pfAINN, pfMod): # prende un grafo in formato Pajek # restituisce le comunita come ID Nome Comunita g = snap.LoadPajek(snap.PUNGraph, pfPaj) comunita = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, comunita) dMod = {} # {numero : classe} classe = 0 for com in comunita: # print('comunita {} = '.format(classe), end='' ) for nodo in com: # print('{} '.format(nodo), end='') dMod.update({nodo: classe}) classe += 1 # print('') print('Numero di comunita analizzaGirvanNewman: {} modularity: {}'.format( classe, modularity)) dNum = {} with open(pfAINN, 'rb') as fAINN: for line in fAINN: autID, autNum, autNome = line.rstrip().split('\t') autNum = int(autNum) dNum.update({autNum: [autID, autNome]}) # print(dNum) with open(pfMod, 'wb') as fMod: for autNum in dNum: fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0], dNum[autNum][1], dMod[autNum])) return classe # numero di comunita trovate
filename = "example.paj" output = open(filename, "w") output.write("""*Vertices 9 1 "1" 0.3034 0.7561 2 "2" 0.4565 0.6039 3 "3" 0.4887 0.8188 *Arcs 1 2 1 1 3 1 2 3 1 """) output.close() print("Directed graph") Graph = snap.LoadPajek(snap.PNGraph, filename) print("Nodes", Graph.GetNodes()) if Graph.GetNodes() != 3: print("*** Error11") for NI in Graph.Nodes(): print("Node", NI.GetId()) print("Edges", Graph.GetEdges()) if Graph.GetEdges() != 3: print("*** Error12") for EI in Graph.Edges(): print("Edge", EI.GetSrcNId(), EI.GetDstNId()) print("Undirected graph") UGraph = snap.LoadPajek(snap.PUNGraph, filename)
def collassaNodiShortPath(pfAutNum, pfDatiPaj): #edge e autori non collassati UGraph = snap.LoadPajek(snap.PUNGraph, pfDatiPaj) with open(pfAutNum, 'rb') as fAutNum: # load nomi id num daID = {} # {id : numero, nome} daNum = {} # {numero : id, nome} daNome = {} # {nome : ([id, id...], [num, num...])} for line in fAutNum: pezzi = line.rstrip().split('\t') autID = pezzi[0] autNum = int(pezzi[1]) autNome = pezzi[2] daID.update({autID:[autNum, autNome]}) daNum.update({autNum:[autID, autNome]}) if autNome in daNome: # print('gia visto nome {} lista {}'.format(autNome, daNome[autNome])) daNome[autNome][0].append(autID) daNome[autNome][1].append(autNum) else: daNome.update({autNome:[[autID], [autNum]]}) print('daID: {}\ndaNum: {}\ndaNome: {}'.format(daID, daNum, daNome)) # fAutNum.seek(0) # start = timer() dAbbreviazioni = {} # {con cani cose:[c c cose, con c cose, c cani cose, con cani cose]} # ma anche {con c cose:[c c cose, con c cose]} # for line in fAutNum: for autNome in daNome: # pezzi = line.rstrip().split('\t') # autNome = pezzi[2] creaAbbreviazioni(autNome, dAbbreviazioni, daNome) # end = timer() # print 'creaAbbreviazioni in {}'.format(end-start) print 'dAbbreviazioni: {}'.format(dAbbreviazioni) daNomeAbb = {} # {fullest name:[abbreviazioni che incontro...]} # fAutNum.seek(0) # for line in fAutNum: # pezzi = line.rstrip().split('\t') # autNome = pezzi[2] for autNome in daNome: checkNome(autNome, daNomeAbb, dAbbreviazioni, daNome) print 'daNomeAbb: {}'.format(daNomeAbb) daNomiNum = {} # {nomefull:[numeri di id legati al nome]} for nome in daNomeAbb: # print 'nome {}'.format(nome) daNomiNum[nome] = daNome[nome][1] for abb in daNomeAbb[nome]: if abb in daNome: # tendenzialmente sempre for num in daNome[abb][1]: if num not in daNomiNum[nome]: daNomiNum[nome].append(num) else: # print 'avevo gia visto abb {} con nome {} e daNome[abb][1] {}'.format(abb, nome, daNome[abb][1]) pass else: print('non trovata {} in daNome'.format(abb)) pass print 'daNomiNum: {}'.format(daNomiNum) # for nome in daNomiNum: # print('nome {} sue abb {}'.format(nome, [daNum[x][1] for x in daNomiNum[nome]])) lenfreq = {} dacollassare = {} cdc = {} # coppie da collassare {nome: [[src, dst], ...]} maxhops = 2 for nome in daNomiNum: if len(daNomiNum[nome]) > 1: # print 'nome: {}\tdaNomiNum[nome]: {}'.format(nome, daNomiNum[nome]) for src, dst in combinations(daNomiNum[nome], 2): srcname = daNum[src][1] dstname = daNum[dst][1] # print type(src), type(dst) lenshopa = snap.GetShortPath(UGraph, src, dst) # print 'da {}\ta {}\tlen {}\tsrcname {}\tdstname {}'.format(src, dst, lenshopa, srcname, dstname) if lenshopa in lenfreq: lenfreq[lenshopa] += 1 else: lenfreq.update({lenshopa:1}) if lenshopa <= maxhops: if nome not in cdc: dacollassare[nome] = set([src, dst]) cdc[nome] = [ssd(src, dst)] else: dacollassare[nome].add(src) dacollassare[nome].add(dst) cdc[nome].append(ssd(src, dst)) print 'len {} dacollassare: {}'.format(len(dacollassare), dacollassare) print 'len {} cdc: {}'.format(len(cdc), cdc) print 'lenfreq {}'.format(lenfreq) # for nome in dacollassare: # if len(dacollassare[nome]) <> len(daNomiNum[nome]): # t = 'nome {}\tlen(daco) {}\tlen daNONU {} daco {} daNONU {}' # print t.format(nome, len(dacollassare[nome]), len(daNomiNum[nome]), dacollassare[nome], daNomiNum[nome]) uomo = 'w han' # uomo = 'carlo ferrari' uomo = 'guangyuan liu' print 'dacollassare[{}]: {}'.format(uomo, dacollassare[uomo]) ccf = cdc[uomo] # ccf = [ [2,3], [1,2], [1,4], [3,4], [5,6], [5,7], [5,1] ] # ccf = [ [5,6], [3,5], [4,5] ] # ccf = [ [5,6], [3,5], [4,5], [3,6] ] print 'len(ccf): {} ccf {}'.format(len(ccf), ccf) # ccf = [[x[0],x[1]] for x in set((y[0],y[1]) for y in ccf)] # print 'len(ccf): {} ccf {}'.format(len(ccf), ccf) # src = ccf[0][0] # nodi = {src:[]} # for coppia in ccf: # print coppia # if src == coppia[0]: # nodi[src].append(coppia[1]) # else: # src = coppia[0] # if src in nodi: # nodi[src].append(coppia[1]) # else: # nodi[src] = [coppia[1]] # for n in nodi: # print('{} {}'.format(n, nodi[n])) acf = ccf for i in range(len(acf)): src = acf[i][0] dst = acf[i][1] # for j in range(i, len(acf)): for j in range(len(acf)): if acf[j][0] == dst: acf[j][0] = src print 'i:{} j:{} src:{} dst:{} acf:{}'.format(i, j, src, dst, ccf) print acf met = set() bcf = ccf stot = set() # tutti gli autNum for coppia in bcf: stot |= set(coppia) tot = len(stot) # numero di autNum da vedere print 'len(stot) {}'.format(tot) i = 0 gruppi = [set(bcf[0])] # while len(met) < tot: for coppia in bcf: if coppia[0] in gruppi[i] or coppia[1] in gruppi[i]: print 'aggiungo {}'.format(coppia) gruppi[i] |= set(coppia) met |= set(coppia) print gruppi
def main(): """ See usage message in module header block """ directed = False try: opts,args = getopt.getopt(sys.argv[1:], "d") except: usage(sys.argv[0]) for opt,arg in opts: if opt == '-d': directed = True else: usage(sys.argv[0]) if len(args) != 5: usage(sys.argv[0]) edgelistFilename = args[0] num_samples = int(args[1]) num_seeds = int(args[2]) num_waves = int(args[3]) outputdir = args[4] print "directed:", directed print "number of samples:", num_samples print "number of seeds:", num_seeds print "number of waves:", num_waves print "output directory:", outputdir if not os.path.exists(outputdir): os.mkdir(outputdir) G = snap.LoadPajek(snap.PNGraph if directed else snap.PUNGraph, edgelistFilename) snap.PrintInfo(G) # get num_samples * num_seeds distinct random seed nodes (sample without replacement) # and convert to list of lists where each list is seed set for one sample allseeds = random.sample([node.GetId() for node in G.Nodes()], num_samples * num_seeds) seedsets = [allseeds[i:i+num_seeds] for i in range(0, len(allseeds), num_seeds)] sampledesc_filename = outputdir + os.path.sep + "sampledesc" + os.path.extsep + "txt" sampledesc_f = open(sampledesc_filename, 'w') for i in range(num_samples): sys.stdout.write( 'generating snowball sample ' + str(i+1) + '... ' ) start = time.time() # have to convert seedset to TIntV for SNAP seedsVec = snap.TIntV() for nodeid in seedsets[i]: seedsVec.Add(nodeid) Gsample0 = snowball_sample(G, num_waves, seedsVec) #print 'XXX',Gsample0.GetIntAttrDatN(Gsample0.GetRndNId(), "zone")#XXX # renumber nodes so they are numbered 0..N-1 # Actually can't do this as it loses the node attributes (zone) # so instead build a dictionary mapping nodeid:zone # so that can be written to zone file in correct order. # Note that then the index in nodelist of a nodeid can be used # as sequential node number of each node. #Gsample = snap.ConvertGraph(snap.PNEANet, Gsample0, True) #print 'YYY',Gsample.GetIntAttrDatN(Gsample.GetRndNId(), "zone")#XXX Gsample = Gsample0 nodelist = list() # keep this iteration in list so we always use same order in future zonedict = dict() # map nodeid : zone for node in Gsample.Nodes(): nodelist.append(node.GetId()) zonedict[node.GetId()] = Gsample.GetIntAttrDatN(node.GetId(), "zone") print time.time() - start, 's' snap.PrintInfo(Gsample) subgraph_filename = outputdir + os.path.sep + "subgraph" + str(i) + os.path.extsep + "txt" write_graph_file(subgraph_filename, Gsample, nodelist) subzone_filename = outputdir + os.path.sep + "subzone" + str(i) + os.path.extsep + "txt" write_zone_file(subzone_filename, Gsample, nodelist, zonedict) subactor_filename = outputdir + os.path.sep + "subactor" + str(i) + os.path.extsep + "txt" # TODO get actor attributes (currently just writes file with no attrs) # format of sampledesc file is: # N subzone_filename subgraph_filename subactor_filename sampledesc_filename = outputdir + os.path.sep + "sampledesc" + os.path.extsep + "txt" sampledesc_f.write("%d %s %s %s\n" % (Gsample.GetNodes(), subzone_filename, subgraph_filename, subactor_filename)) sampledesc_f.close()
def collassaNodiShortPath(pfAutINN, pfDatiPaj, pfEdgeUnif, pfAutUnif, maxhops): #edge e autori non collassati with open(pfAutINN, 'rb') as fAutINN: # load nomi id num daID = {} # {id : numero, nome} daNum = {} # {numero : id, nome} daNome = {} # {nome : ([id, id...], [num, num...])} for line in fAutINN: pezzi = line.rstrip().split('\t') autID = pezzi[0] autNum = int(pezzi[1]) autNome = pezzi[2] daID.update({autID:[autNum, autNome]}) daNum.update({autNum:[autID, autNome]}) if autNome in daNome: # print('gia visto nome {} lista {}'.format(autNome, daNome[autNome])) daNome[autNome][0].append(autID) daNome[autNome][1].append(autNum) else: daNome.update({autNome:[[autID], [autNum]]}) # print('daID: {}\ndaNum: {}\ndaNome: {}'.format(daID, daNum, daNome)) # nameSort = [ ..., 'n s cog', 'n sec cog', 'nom sec cog', ... ] # FIXME dopo nom sec cog puo' esserci num san cog, ... # nameSort = sorted(daNome.keys(), key=lambda x: '{} {}'.format(x.rsplit(' ', 1)[1], x.rsplit(' ', 1)[0])) nameSort = sorted(daNome.keys(), key=lambda x: swapNomeCog(x)) # for n in nameSort: print n UGraph = snap.LoadPajek(snap.PUNGraph, pfDatiPaj) lenfreq = {} dacollassare = {} cdc = {} # coppie da collassare {nome: [[src, dst], ...]} # maxhops = 2 tsdc = [] for au in abbUguali(nameSort): # print au numeri = [] for nome in au: numeri.extend(daNome[nome][1]) # print numeri if len(numeri) > 100: print('au: {} len(numeri): {}'.format(au, len(numeri) ) ) coppie = [] # scoppie = set() j = 0 for src, dst in combinations(numeri, 2): j += 1 if j%10000 == 0: print(j) lenshopa = snap.GetShortPath(UGraph, src, dst) # print 'da {}\ta {}\tlen {}'.format(src, dst, lenshopa) if lenshopa in lenfreq: lenfreq[lenshopa] += 1 else: lenfreq[lenshopa] = 1 if lenshopa > 0 and lenshopa <= maxhops: coppie.append(ssd(src, dst)) # scoppie.add(ssd(src, dst)) # if len(coppie) > 100: # print 'au {}\nlen {:3} coppie: {}'.format(au, len(coppie), coppie) # print 'au {}\nlen {:3} scoppie: {}'.format(au, len(scoppie), scoppie) if len(coppie) == 0: # non ho coppie # print 'no coppie' sdc = [] elif len(coppie) == 1: # print 'una coppia' sdc = [set(coppie[0])] else: sdc = [set(coppie[0])] # set da collassare [set(1,3,5), set(2,7)] for coppia in coppie[1:]: # salto la prima a = coppia[0] b = coppia[1] posA, posB = -1, -1 for i in range(len(sdc)): if a in sdc[i]: posA = i if b in sdc[i]: posB = i if posA == -1 and posB == -1: # entrambi MAI visti sdc.append(set(coppia)) elif posA <> -1 and posB == -1: # a in sdc[posA] sdc[posA].add(b) # aggiungo b che non avevo mai visto elif posA == -1 and posB <> -1: sdc[posB].add(a) else: sdc[posA] |= sdc[posB] if posA <> posB: del sdc[posB] # if len(sdc) > 1: print 'len {} sdc {}'.format(len(sdc), sdc) tsdc.extend(sdc) print 'lenfreq {}'.format(lenfreq) autUniti = {} # {nomelungo: [set(num da collassare), IDlungo, numlungo]} for s in tsdc: # print s # print [daNum[x] for x in s] setleader = max([daNum[x] for x in s], key=itemgetter(1)) # print setleader autUniti[setleader[1]] = (s, setleader[0], daID[setleader[0]][0]) # print autUniti dEdgeUnif = {} with open(pfDatiPaj, 'rb') as fDatiPaj: line = '' while line <> '*Edges': line = fDatiPaj.readline().rstrip() # brucio linee line = fDatiPaj.readline().rstrip() # brucio linee while line <> '': # print line pezzi = line.split() a = int(pezzi[0]) b = int(pezzi[1]) w = int(pezzi[2]) for nome in autUniti: if a in autUniti[nome][0]: a = autUniti[nome][2] if b in autUniti[nome][0]: b = autUniti[nome][2] if ssd(a,b) in dEdgeUnif: dEdgeUnif[ssd(a,b)] += w else: dEdgeUnif[ssd(a,b)] = w line = fDatiPaj.readline().rstrip() # print 'len(dEdgeUnif) {} dEdgeUnif {}'.format(len(dEdgeUnif), dEdgeUnif) sAutUnif = set() with open(pfEdgeUnif, 'wb') as fEdgeUnif: for edge in dEdgeUnif: a = daNum[edge[0]][0] b = daNum[edge[1]][0] sAutUnif.add(a) sAutUnif.add(b) w = dEdgeUnif[edge] fEdgeUnif.write('{}\t{}\t{}\r\n'.format(a, b, w)) with open(pfAutUnif, 'wb') as fAutUnif: for a in sAutUnif: fAutUnif.write('{}\t{}\r\n'.format(a, daID[a][1]))