Esempio n. 1
0
def analizzaGirvanNewman(pfPaj, pfAINN, pfMod):
    # prende un grafo in formato Pajek
    # restituisce le comunita come ID Nome Comunita
    g = snap.LoadPajek(snap.PUNGraph, pfPaj)

    comunita = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(g, comunita)
    dMod = {}  # {numero : classe}
    classe = 0
    for com in comunita:
        # print('comunita {} = '.format(classe), end='' )
        for nodo in com:
            # print('{} '.format(nodo), end='')
            dMod.update({nodo: classe})
        classe += 1
        # print('')
    print('Numero di comunita analizzaGirvanNewman: {} modularity: {}'.format(
        classe, modularity))

    dNum = {}
    with open(pfAINN, 'rb') as fAINN:
        for line in fAINN:
            autID, autNum, autNome = line.rstrip().split('\t')
            autNum = int(autNum)
            dNum.update({autNum: [autID, autNome]})
    # print(dNum)
    with open(pfMod, 'wb') as fMod:
        for autNum in dNum:
            fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0],
                                               dNum[autNum][1], dMod[autNum]))

    return classe  # numero di comunita trovate
Esempio n. 2
0
filename = "example.paj"

output = open(filename, "w")
output.write("""*Vertices      9
   1 "1"    0.3034    0.7561
   2 "2"    0.4565    0.6039
   3 "3"    0.4887    0.8188
*Arcs
    1      2       1
    1      3       1
    2      3       1
""")
output.close()

print("Directed graph")
Graph = snap.LoadPajek(snap.PNGraph, filename)

print("Nodes", Graph.GetNodes())
if Graph.GetNodes() != 3:
    print("*** Error11")
for NI in Graph.Nodes():
    print("Node", NI.GetId())

print("Edges", Graph.GetEdges())
if Graph.GetEdges() != 3:
    print("*** Error12")
for EI in Graph.Edges():
    print("Edge", EI.GetSrcNId(), EI.GetDstNId())

print("Undirected graph")
UGraph = snap.LoadPajek(snap.PUNGraph, filename)
def collassaNodiShortPath(pfAutNum, pfDatiPaj):
  #edge e autori non collassati
  UGraph = snap.LoadPajek(snap.PUNGraph, pfDatiPaj)
  with open(pfAutNum, 'rb') as fAutNum:
    # load nomi id num
    daID   = {} # {id : numero, nome}
    daNum  = {} # {numero : id, nome}
    daNome = {} # {nome : ([id, id...], [num, num...])}
    for line in fAutNum:
      pezzi = line.rstrip().split('\t')
      autID = pezzi[0]
      autNum = int(pezzi[1])
      autNome = pezzi[2]
      daID.update({autID:[autNum, autNome]})
      daNum.update({autNum:[autID, autNome]})
      if autNome in daNome:
        # print('gia visto nome {} lista {}'.format(autNome, daNome[autNome]))
        daNome[autNome][0].append(autID)
        daNome[autNome][1].append(autNum)
      else:
        daNome.update({autNome:[[autID], [autNum]]})
    print('daID: {}\ndaNum: {}\ndaNome: {}'.format(daID, daNum, daNome))
    # fAutNum.seek(0)
    # start = timer()
    dAbbreviazioni = {} # {con cani cose:[c c cose, con c cose, c cani cose, con cani cose]}
    # ma anche {con c cose:[c c cose, con c cose]}
    # for line in fAutNum:
    for autNome in daNome:
      # pezzi = line.rstrip().split('\t')
      # autNome = pezzi[2]
      creaAbbreviazioni(autNome, dAbbreviazioni, daNome)
    # end = timer()
    # print 'creaAbbreviazioni in {}'.format(end-start)
    print 'dAbbreviazioni: {}'.format(dAbbreviazioni)

    daNomeAbb = {}  # {fullest name:[abbreviazioni che incontro...]}
    # fAutNum.seek(0)
    # for line in fAutNum:
      # pezzi = line.rstrip().split('\t')
      # autNome = pezzi[2]
    for autNome in daNome:
      checkNome(autNome, daNomeAbb, dAbbreviazioni, daNome)
    print 'daNomeAbb: {}'.format(daNomeAbb)

    daNomiNum = {} # {nomefull:[numeri di id legati al nome]}
    for nome in daNomeAbb:
      # print 'nome {}'.format(nome)
      daNomiNum[nome] = daNome[nome][1]
      for abb in daNomeAbb[nome]:
        if abb in daNome: # tendenzialmente sempre
          for num in daNome[abb][1]:
            if num not in daNomiNum[nome]:
              daNomiNum[nome].append(num)
            else:
              # print 'avevo gia visto abb {} con nome {} e daNome[abb][1] {}'.format(abb, nome, daNome[abb][1])
              pass
        else:
          print('non trovata {} in daNome'.format(abb))
          pass
    print 'daNomiNum: {}'.format(daNomiNum)
    # for nome in daNomiNum:
      # print('nome {} sue abb {}'.format(nome, [daNum[x][1] for x in daNomiNum[nome]]))
    lenfreq = {}
    dacollassare = {}
    cdc = {} # coppie da collassare {nome: [[src, dst], ...]}
    maxhops = 2
    for nome in daNomiNum:
      if len(daNomiNum[nome]) > 1:
        # print 'nome: {}\tdaNomiNum[nome]: {}'.format(nome, daNomiNum[nome])
       for src, dst in combinations(daNomiNum[nome], 2):
          srcname = daNum[src][1]
          dstname = daNum[dst][1]
          # print type(src), type(dst)
          lenshopa = snap.GetShortPath(UGraph, src, dst)

          # print 'da {}\ta {}\tlen {}\tsrcname {}\tdstname {}'.format(src, dst, lenshopa, srcname, dstname)
          if lenshopa in lenfreq: lenfreq[lenshopa] += 1
          else: lenfreq.update({lenshopa:1})
          if lenshopa <= maxhops:
            if nome not in cdc:
              dacollassare[nome] = set([src, dst])
              cdc[nome] = [ssd(src, dst)]
            else:
              dacollassare[nome].add(src)
              dacollassare[nome].add(dst)
              cdc[nome].append(ssd(src, dst))
    print 'len {} dacollassare: {}'.format(len(dacollassare), dacollassare)
    print 'len {} cdc: {}'.format(len(cdc), cdc)
    print 'lenfreq {}'.format(lenfreq)
    # for nome in dacollassare:
      # if len(dacollassare[nome]) <> len(daNomiNum[nome]):
        # t = 'nome {}\tlen(daco) {}\tlen daNONU {} daco {} daNONU {}'
        # print t.format(nome, len(dacollassare[nome]), len(daNomiNum[nome]), dacollassare[nome], daNomiNum[nome])
    uomo = 'w han'
    # uomo = 'carlo ferrari'
    uomo = 'guangyuan liu'
    print 'dacollassare[{}]: {}'.format(uomo, dacollassare[uomo])
    ccf = cdc[uomo]
    # ccf = [ [2,3], [1,2], [1,4], [3,4], [5,6], [5,7], [5,1] ]
    # ccf = [ [5,6], [3,5], [4,5] ]
    # ccf = [ [5,6], [3,5], [4,5], [3,6] ]
    print 'len(ccf): {} ccf {}'.format(len(ccf), ccf)
    # ccf = [[x[0],x[1]] for x in set((y[0],y[1]) for y in ccf)]
    # print 'len(ccf): {} ccf {}'.format(len(ccf), ccf)
    # src = ccf[0][0]
    # nodi = {src:[]}
    # for coppia in ccf:
      # print coppia
      # if src == coppia[0]:
        # nodi[src].append(coppia[1])
      # else:
        # src = coppia[0]
        # if src in nodi:
          # nodi[src].append(coppia[1])
        # else:
          # nodi[src] = [coppia[1]]
    # for n in nodi:
      # print('{} {}'.format(n, nodi[n]))
    acf = ccf
    for i in range(len(acf)):
      src = acf[i][0]
      dst = acf[i][1]
      # for j in range(i, len(acf)):
      for j in range(len(acf)):
        if acf[j][0] == dst:
          acf[j][0] = src
          print 'i:{} j:{} src:{} dst:{} acf:{}'.format(i, j, src, dst, ccf)
    print acf

    met = set()
    bcf = ccf
    stot = set()        # tutti gli autNum
    for coppia in bcf:
      stot |= set(coppia)
    tot = len(stot) # numero di autNum da vedere
    print 'len(stot) {}'.format(tot)

    i = 0
    gruppi = [set(bcf[0])]
    # while len(met) < tot:

    for coppia in bcf:
      if coppia[0] in gruppi[i] or coppia[1] in gruppi[i]:
        print 'aggiungo {}'.format(coppia)
        gruppi[i] |= set(coppia)
        met |= set(coppia)
    print gruppi
def main():
    """
    See usage message in module header block
    """
    directed = False
    try:
        opts,args = getopt.getopt(sys.argv[1:], "d")
    except:
        usage(sys.argv[0])
    for opt,arg in opts:
        if opt == '-d':
            directed = True
        else:
            usage(sys.argv[0])

    if len(args) != 5:
        usage(sys.argv[0])

    edgelistFilename = args[0]
    num_samples = int(args[1])
    num_seeds = int(args[2])
    num_waves = int(args[3])
    outputdir = args[4]

    print "directed:", directed
    print "number of samples:", num_samples
    print "number of seeds:", num_seeds
    print "number of waves:", num_waves
    print "output directory:", outputdir
    
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)

    G = snap.LoadPajek(snap.PNGraph if directed else snap.PUNGraph, 
                          edgelistFilename)
    snap.PrintInfo(G)


    # get num_samples * num_seeds distinct random seed nodes (sample without replacement)
    # and convert to list of lists where each list is seed set for one sample
    allseeds = random.sample([node.GetId() for node in G.Nodes()], num_samples * num_seeds)
    seedsets = [allseeds[i:i+num_seeds] for i in range(0, len(allseeds), num_seeds)]

    sampledesc_filename = outputdir + os.path.sep + "sampledesc" + os.path.extsep + "txt"
    sampledesc_f = open(sampledesc_filename, 'w')

    for i in range(num_samples):
        sys.stdout.write( 'generating snowball sample ' + str(i+1) + '... ' )
        start = time.time()
        # have to convert seedset to TIntV for SNAP
        seedsVec = snap.TIntV()
        for nodeid in seedsets[i]:
            seedsVec.Add(nodeid)
        Gsample0 = snowball_sample(G, num_waves, seedsVec)
        #print 'XXX',Gsample0.GetIntAttrDatN(Gsample0.GetRndNId(), "zone")#XXX
        # renumber nodes so they are numbered 0..N-1
        # Actually can't do this as it loses the node attributes (zone)
        # so instead build a dictionary mapping nodeid:zone
        # so that can be written to zone file in correct order.
        # Note that then the index in nodelist of a nodeid can be used
        # as sequential node number of each node.
        #Gsample = snap.ConvertGraph(snap.PNEANet, Gsample0, True)
        #print 'YYY',Gsample.GetIntAttrDatN(Gsample.GetRndNId(), "zone")#XXX
        Gsample = Gsample0
        nodelist = list()  # keep this iteration in list so we always use same order in future
        zonedict = dict() # map nodeid : zone
        for node in Gsample.Nodes():
            nodelist.append(node.GetId())
            zonedict[node.GetId()] = Gsample.GetIntAttrDatN(node.GetId(), "zone")
        print time.time() - start, 's'
        
        snap.PrintInfo(Gsample)
        subgraph_filename = outputdir + os.path.sep + "subgraph" + str(i) + os.path.extsep + "txt"
        write_graph_file(subgraph_filename, Gsample, nodelist)
        subzone_filename = outputdir + os.path.sep + "subzone" + str(i) + os.path.extsep + "txt"
        write_zone_file(subzone_filename, Gsample, nodelist, zonedict)
        subactor_filename = outputdir + os.path.sep + "subactor" + str(i) + os.path.extsep + "txt"
        # TODO get actor attributes (currently just writes file with no attrs)
        
        # format of sampledesc file is:
        # N subzone_filename subgraph_filename subactor_filename
        sampledesc_filename = outputdir + os.path.sep + "sampledesc" + os.path.extsep + "txt"
        sampledesc_f.write("%d %s %s %s\n" % (Gsample.GetNodes(), subzone_filename,
                                              subgraph_filename, subactor_filename))

    sampledesc_f.close()
Esempio n. 5
0
def collassaNodiShortPath(pfAutINN, pfDatiPaj, pfEdgeUnif, pfAutUnif, maxhops):
  #edge e autori non collassati
  with open(pfAutINN, 'rb') as fAutINN:
    # load nomi id num
    daID   = {} # {id : numero, nome}
    daNum  = {} # {numero : id, nome}
    daNome = {} # {nome : ([id, id...], [num, num...])}
    for line in fAutINN:
      pezzi = line.rstrip().split('\t')
      autID = pezzi[0]
      autNum = int(pezzi[1])
      autNome = pezzi[2]
      daID.update({autID:[autNum, autNome]})
      daNum.update({autNum:[autID, autNome]})
      if autNome in daNome:
        # print('gia visto nome {} lista {}'.format(autNome, daNome[autNome]))
        daNome[autNome][0].append(autID)
        daNome[autNome][1].append(autNum)
      else:
        daNome.update({autNome:[[autID], [autNum]]})
    # print('daID: {}\ndaNum: {}\ndaNome: {}'.format(daID, daNum, daNome))

  # nameSort = [ ..., 'n s cog', 'n sec cog', 'nom sec cog', ... ]
  # FIXME dopo nom sec cog puo' esserci num san cog, ...
  # nameSort = sorted(daNome.keys(), key=lambda x: '{} {}'.format(x.rsplit(' ', 1)[1], x.rsplit(' ', 1)[0]))
  nameSort = sorted(daNome.keys(), key=lambda x: swapNomeCog(x))
  # for n in nameSort: print n

  UGraph = snap.LoadPajek(snap.PUNGraph, pfDatiPaj)
  lenfreq = {}
  dacollassare = {}
  cdc = {} # coppie da collassare {nome: [[src, dst], ...]}
  # maxhops = 2
  tsdc = []
  for au in abbUguali(nameSort):
    # print au
    numeri = []
    for nome in au:
      numeri.extend(daNome[nome][1])
    # print numeri
    if len(numeri) > 100: print('au: {} len(numeri): {}'.format(au, len(numeri) ) )
    coppie = []
    # scoppie = set()
    j = 0
    for src, dst in combinations(numeri, 2):
      j += 1
      if j%10000 == 0: print(j)
      lenshopa = snap.GetShortPath(UGraph, src, dst)
      # print 'da {}\ta {}\tlen {}'.format(src, dst, lenshopa)
      if lenshopa in lenfreq: lenfreq[lenshopa] += 1
      else: lenfreq[lenshopa] = 1
      if lenshopa > 0 and lenshopa <= maxhops:
        coppie.append(ssd(src, dst))
        # scoppie.add(ssd(src, dst))
    # if len(coppie) > 100:
      # print 'au {}\nlen {:3} coppie: {}'.format(au, len(coppie), coppie)
      # print 'au {}\nlen {:3} scoppie: {}'.format(au, len(scoppie), scoppie)
    if len(coppie) == 0: # non ho coppie
      # print 'no coppie'
      sdc = []
    elif len(coppie) == 1:
      # print 'una coppia'
      sdc = [set(coppie[0])]
    else:
      sdc = [set(coppie[0])] # set da collassare [set(1,3,5), set(2,7)]
      for coppia in coppie[1:]: # salto la prima
        a = coppia[0]
        b = coppia[1]
        posA, posB = -1, -1
        for i in range(len(sdc)):
          if a in sdc[i]: posA = i
          if b in sdc[i]: posB = i
        if posA == -1 and posB == -1: # entrambi MAI visti
          sdc.append(set(coppia))
        elif posA <> -1 and posB == -1: # a in sdc[posA]
          sdc[posA].add(b)              # aggiungo b che non avevo mai visto
        elif posA == -1 and posB <> -1:
          sdc[posB].add(a)
        else:
          sdc[posA] |= sdc[posB]
          if posA <> posB: del sdc[posB]
      # if len(sdc) > 1: print 'len {} sdc {}'.format(len(sdc), sdc)
    tsdc.extend(sdc)
  print 'lenfreq {}'.format(lenfreq)

  autUniti = {} # {nomelungo: [set(num da collassare), IDlungo, numlungo]}
  for s in tsdc:
    # print s
    # print [daNum[x] for x in s]
    setleader =  max([daNum[x] for x in s], key=itemgetter(1))
    # print setleader
    autUniti[setleader[1]] = (s, setleader[0], daID[setleader[0]][0])
  # print autUniti

  dEdgeUnif = {}
  with open(pfDatiPaj, 'rb') as fDatiPaj:
    line = ''
    while line <> '*Edges': line = fDatiPaj.readline().rstrip() # brucio linee
    line = fDatiPaj.readline().rstrip() # brucio linee
    while line <> '':
      # print line
      pezzi = line.split()
      a = int(pezzi[0])
      b = int(pezzi[1])
      w = int(pezzi[2])
      for nome in autUniti:
        if a in autUniti[nome][0]:
          a = autUniti[nome][2]
        if b in autUniti[nome][0]:
          b = autUniti[nome][2]

      if ssd(a,b) in dEdgeUnif:
        dEdgeUnif[ssd(a,b)] += w
      else:
        dEdgeUnif[ssd(a,b)] = w
      line = fDatiPaj.readline().rstrip()
    # print 'len(dEdgeUnif) {} dEdgeUnif {}'.format(len(dEdgeUnif), dEdgeUnif)

  sAutUnif = set()
  with open(pfEdgeUnif, 'wb') as fEdgeUnif:
    for edge in dEdgeUnif:
      a = daNum[edge[0]][0]
      b = daNum[edge[1]][0]
      sAutUnif.add(a)
      sAutUnif.add(b)
      w = dEdgeUnif[edge]
      fEdgeUnif.write('{}\t{}\t{}\r\n'.format(a, b, w))

  with open(pfAutUnif, 'wb') as fAutUnif:
    for a in sAutUnif:
      fAutUnif.write('{}\t{}\r\n'.format(a, daID[a][1]))