def retrieve_multiple_edges(graph, source=-1, target=-1):
    if source != -1:
        e = graph.incident(source, mode=ig.OUT)
        if target != -1:
            e = set(e).intersection(graph.incident(target, mode=ig.IN))
        return ig.EdgeSeq(graph, e)     
    else:
        if target != -1:
            e = graph.incident(target, mode=ig.IN)
        else:
            e = list()
        return ig.EdgeSeq(graph, e)
Esempio n. 2
0
def gml2svg(gmlfolder, svgfolder):
    fw = open(svgoutpath + 'vecount.txt', 'w')
    svgtype = [
        '_weakGaint', '_strongGaint', '_weakGaintSPT', '_strongGaintSPT'
    ]
    if os.path.exists(svgoutpath) == 0:
        os.mkdir(svgoutpath)
    for file in os.listdir(gmlinpath):
        if os.path.splitext(file)[1] == '.gml':
            print 'Reading graph from ...', file
            if os.path.exists(svgoutpath + file + svgtype[3] + '.2svg'):
                print file, 'has existesed'
                pass
            else:
                try:
                    gmlfile = open(gmlinpath + file)
                    g = ig.Graph.Read_GML(gmlfile)
                    gg = clus.VertexClustering.giant(g.clusters(mode='strong'))
                    es = ig.EdgeSeq(gg)
                    subg = gg.subgraph_edges(es.select(retwitype_eq='0'))
                    es = ig.EdgeSeq(subg)
                    timelist = map(float, es.get_attribute_values('time'))
                    gsp = ig.Graph.spanning_tree(subg, timelist)

                    vecountstr = str(g.vcount()) + '\t' + str(g.ecount(
                    )) + '\t' + str(gg.vcount()) + '\t' + str(
                        gg.ecount()) + '\t' + str(subg.vcount()) + '\t' + str(
                            subg.ecount()) + '\t' + str(
                                gsp.vcount()) + '\t' + str(gsp.ecount())
                    fw.write(file + '\t' + vecountstr + '\n')
                    if os.path.exists(svgoutpath + file + svgtype[3] + '.svg'):
                        print file, 'has existesed'
                    else:
                        print 'Ploting graph'
                        ig.Graph.write_svg(subg,
                                           svgoutpath + file + svgtype[1] +
                                           '.svg',
                                           layout='large')
                        ig.Graph.write_svg(gsp,
                                           svgoutpath + file + svgtype[3] +
                                           '.svg',
                                           layout='large')
                    layout = gsp.layout("large")
                    fig = plot(gsp, layout=layout)
                    plot.show()
            #         ig.Graph.write_svg(gsp, svgoutpath+file+'_w.svg', layout='large')
            #.save(gmlinpath+file+'.fig')
                except Exception, e:
                    print gmlinpath + file, ' failed', e
                    pass
                gmlfile.close()
Esempio n. 3
0
def analyzeNetStat(g):
    "given a graph g with edges attributes, analyze its stat features"
    "IN:graph g"
    "OUT:stat features"
    es = ig.EdgeSeq(g)
    "attributeA = es.get_attribute_values('attribute name')"
    #     print es.attribute_names()
    '''reposts_count
    mentioncnt
    city
    verified
    retweeted_status
    attitudes_count
    location
    followers_count
    created_attos
    verified_type
    statuses_count
    statuslasttos
    friends_count
    idstr
    timein
    createdtimetos
    bi_followers_count
    favourites_count
    province
    userid
    comments_count
    gender'''
    from analysisStatFromRepostxt import fansum
    from analysisStatFromRepostxt import echouser
    from analysisStatFromRepostxt import lifespan

    timelist = es.get_attribute_values('createdtimetos')
    fansumlist = es.get_attribute_values('followers_count')
    useridlist = es.get_attribute_values('userid')
    mentioncntlist = es.get_attribute_values('mentioncnt')
    bifansumlist = es.get_attribute_values('bi_followers_count')
    friends_countlist = es.get_attribute_values('friends_count')
    reposts_countlist = es.get_attribute_values('reposts_count')

    fanscnt, fanscntavg = fansum(fansumlist, 1)
    echousercnt = echouser(useridlist, 1)
    durationlist, durationaddedlist, durationavglist, durationaddedavglist = lifespan(
        timelist, 1)
    mentioncnt, mentioncntavg = fansum(mentioncntlist, 1)
    bifansum, bifansumavg = fansum(bifansumlist, 1)
    friends_count, friends_countavg = fansum(friends_countlist, 1)
    reposts_count, reposts_countavg = fansum(reposts_countlist, 1)

    #     print fanscnt,echousercnt,fanscntavg,durationlist,durationavglist,mentioncnt,mentioncntavg,bifansum,bifansumavg,friends_count,friends_countavg,reposts_count,reposts_countavg
    return [
        fanscnt[0], echousercnt[0], fanscntavg[0], durationlist[0],
        durationavglist[0], mentioncnt[0], mentioncntavg[0], bifansum[0],
        bifansumavg[0], friends_count[0], friends_countavg[0],
        reposts_count[0], reposts_countavg[0]
    ]
Esempio n. 4
0
def analyze_one(cocfilename, coc_folder,gmlfolder,percentlist=[1],timeseriesfile = r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt',periodcnt=1,graphAll = None):
    #IN:one coc file
    #OUT:all the net attributes of this coc by all percent
    #Process:coc2list;percent2timepoint;???
    netlist = []
    netlistcore = []
    statlist = []
    statlistcore = []
    es=ig.EdgeSeq(graphAll)

    cocfilepath=cocfolder+cocfilename+'.coc'
    timelist = es.get_attribute_values('createdtimetos')
    timelist.sort()

    netAttribute_all = []
    netstat_all = []
    netAttribute_core = []
    netstat_core = []
    for percent in percentlist:
        lengthNow = int(round(len(timelist)*percent))
        lengthNow = lengthNow if lengthNow>1 else 1
        timelistPercentNow = timelist[:lengthNow]
        timelistPeriodNow = selecTime(timelistPercentNow,periodcnt)
        for timep in timelistPeriodNow:
            timep = str(timep)
            percentNetAttri = []
            percentNetAttri.append(cocfilename)
            percentNetAttri.append(percent)

            g = graphAll.subgraph_edges(es.select(createdtimetos_le = timep),delete_vertices=False)
#             gtemp = graphAll.subgraph_edges(es.select(createdtimetos_le = timep).attribute)
#             gtemp = graphAll.subgraph_edges(es.select(createdtimetos_le = timep))
# gte = g.subgraph_edges(g.es.select(createdtimetos_ge = '1352502966'),delete_vertices=True)
# print g.vcount(),g.ecount(),gte.vcount(),gte.ecount()
# g = g.subgraph(g.vs.select(name_in=gte.vs.get_attribute_values('name')))
# ge = g.subgraph_edges(g.es.select(createdtimetos_ge = '1352502966'),delete_vertices=True)
           
            netAttribute_all = analysisNet(g)#grt.analysisNet(g)
            #netstat_all = analyzeNetStat(g)
            
            gg = clus.VertexClustering.giant(g.clusters(mode='weak'))          
            ggcore = getCorePart(gg,1)
            #print ggcore.vcount(),ggcore.ecount()
            netAttribute_core = analysisNet(ggcore)
            #netstat_core = analyzeNetStat(gg) 
            
            netlist_all = get_netlist(netAttribute_all,percentNetAttri[0:],netlist)    
            netlist_core = get_netlist(netAttribute_core,percentNetAttri[0:],netlistcore) 
            
            netstat_alllist = get_netlist(netstat_all,percentNetAttri[0:],statlist)    
            netstat_corelist = get_netlist(netstat_core,percentNetAttri[0:],statlistcore) 
            #print netstat_alllist
            #print   len(netAttribute_all),len(netAttribute_core),len(netstat_all),len(netstat_core) 

    return [zip(*netlist_all),zip(*netlist_core),zip(*netstat_alllist),zip(*netstat_corelist)]
Esempio n. 5
0
def analyzeNet_time(workfolder_att, fname, g):
    if 1:

        "add time slice function"
        attsfp = workfolder_att + str(fname[0]) + '.atts'
        gt.createFiles(attsfp)
        stat_attsfp_percent = workfolder_att + 'percent_stat.att'
        gt.createFiles(stat_attsfp_percent)
        net_attsfp_percent = workfolder_att + 'percent_net.att'
        gt.createFiles(net_attsfp_percent)
        periodcnt = 1
        percentlist = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
        netlist = []
        statlist = []

        es = ig.EdgeSeq(g)
        vs = ig.VertexSeq(g)
        timelist = es.get_attribute_values('createdtimetos')
        timelistlen = len(timelist)
        timelistpop = []
        for i in xrange(timelistlen - 1, -1, -1):
            if timelist[i] == 'createdtimetos':
                timelist.pop(i)
                #timelistpop.append(i)
#             for i in timelistpop:
#                 timelist.pop(i)
        timelist.sort()
        for percent in percentlist:
            lengthNow = int(round(len(timelist) * percent))
            lengthNow = lengthNow if lengthNow > 1 else 1
            timelistPercentNow = timelist[:lengthNow]
            timelistPeriodNow = selecTime(timelistPercentNow, periodcnt)
            for timep in timelistPeriodNow:
                timep = str(timep)
                percentNetAttri = []
                percentNetAttri.append(fname[0])
                percentNetAttri.append(percent)
                percentNetAttri.append(timep)

                subg = g.subgraph_edges(es.select(createdtimetos_le=timep))

                #grt.analyzeNetNodes(g,workfolder_att,str(fname[0]))
                netAttribute_all = grt.analysisNet(subg)
                netlist_all = get_netlist(netAttribute_all,
                                          percentNetAttri[0:], netlist)
                gt.saveList(netlist_all, net_attsfp_percent, writype='a+')
                netlist = []

                netstat_all = analyzeNetStat(subg)
                netstat_alllist = get_netlist(netstat_all, percentNetAttri[0:],
                                              statlist)
                gt.saveList(netstat_alllist, stat_attsfp_percent, writype='a+')
                statlist = []
Esempio n. 6
0
def draw_graph_adj(A, seed):

    random.seed(seed)
    g = igraph.Graph()
    num = len(A)
    g.add_vertices(num)

    for i in range(num):
        for j in range(i, num):
            if A[i, j] > 0:
                g.add_edges([(i, j)])

    vs = igraph.VertexSeq(g)
    es = igraph.EdgeSeq(g)
    vs["label"] = np.arange(num)

    layout = g.layout("fruchterman_reingold")
    igraph.plot(g, layout=layout)
def analyze_one(cocfilename, coc_folder,gmlfolder,percentlist=[1],timeseriesfile = r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt',periodcnt=1,graphAll = None):
    #IN:one coc file
    #OUT:all the net attributes of this coc by all percent
    #Process:coc2list;percent2timepoint;???
    netlist = []
    netlistcore = []
    statlist = []
    statlistcore = []
    es=ig.EdgeSeq(graphAll)

    cocfilepath=cocfolder+cocfilename+'.coc'

    netAttribute_all = []
    netstat_all = []
    netAttribute_core = []
    netstat_core = []
    for percent in percentlist:
            percentNetAttri = []
            percentNetAttri.append(cocfilename)
            percentNetAttri.append(percent)

            g = graphAll#.subgraph_edges(es.select(createdtimetos_le = timep),delete_vertices=False)
            netAttribute_all = analysisNet(g)#grt.analysisNet(g)
            #netstat_all = analyzeNetStat(g)
            
            gg = clus.VertexClustering.giant(g.clusters(mode='weak'))          
            ggcore = getCorePart(gg,1)
            #print ggcore.vcount(),ggcore.ecount()
            netAttribute_core = analysisNet(ggcore)
            #netstat_core = analyzeNetStat(gg) 
            
            netlist_all = get_netlist(netAttribute_all,percentNetAttri[0:],netlist)    
            netlist_core = get_netlist(netAttribute_core,percentNetAttri[0:],netlistcore) 
            
            netstat_alllist = get_netlist(netstat_all,percentNetAttri[0:],statlist)    
            netstat_corelist = get_netlist(netstat_core,percentNetAttri[0:],statlistcore) 
            #print netstat_alllist
            #print   len(netAttribute_all),len(netAttribute_core),len(netstat_all),len(netstat_core) 

    return [zip(*netlist_all),zip(*netlist_core),zip(*netstat_alllist),zip(*netstat_corelist)]
Esempio n. 8
0
def draw_graph_comm(A, community_pool, seed):

    random.seed(seed)
    g = igraph.Graph()
    num = len(A)
    g.add_vertices(num)

    for i in range(num):
        for j in range(i, num):
            if A[i, j] > 0:
                g.add_edges([(i, j)])

    vs = igraph.VertexSeq(g)
    es = igraph.EdgeSeq(g)
    vs["label"] = np.arange(num)

    for c in community_pool:
        memlen = len(c.members)
        for i in range(memlen):
            vs[c.members[i]]["color"] = c.color

    layout = g.layout("fruchterman_reingold")
    igraph.plot(g, layout=layout)
Esempio n. 9
0
def getDis(g, firstlabel):
    es = ig.EdgeSeq(g)
    vs = ig.VertexSeq(g)

    i = -1
    sourceid = 0
    for v in g.vs:
        i += 1
        if v['label'] == firstlabel:
            sourceid = i
            break


#     sor = vs.select(label_eq=firstlabel)
#     sourceid = int(sor.get_attribute_values('id')[0])
#     mids = es.get_attribute_values('mid');    #timelist.sort()
#     print sourceid
    dis = g.shortest_paths_dijkstra(source=None,
                                    target=sourceid,
                                    weights=None,
                                    mode='ALL')

    dis = list(np.mat(dis).flat)
    return dis
Esempio n. 10
0
with open(str(sys.argv[3]), 'rb') as csvfileQ:
    csvreaderQ = csv.reader(csvfileQ)
    mycsvQ = list(csvreaderQ)
    fQs = open('../../data/Qsupply.txt', 'w')
    fQd = open('../../data/Qdemand.txt', 'w')
    for row in mycsvQ:
        networkGraph.vs.select(int(row[0]) - 1)["Qsupply"] = float(row[1])
        networkGraph.vs.select(int(row[0]) - 1)["Qdemand"] = float(row[2])
        fQs.write(row[1] + '\n')
        fQd.write(row[2] + '\n')
    fQs.close()
    fQd.close()

fNI = open('../../data/networkInfo.txt', 'w')
es = igraph.EdgeSeq(networkGraph)
for edge in es:
    #print edge.tuple
    fNI.write(str(edge.tuple[0]) + '\t')
    fNI.write(str(edge.tuple[1]))
    fNI.write('\n')
fNI.close()

fSVQ = open('../../data/SVQ.txt', 'w')
rowN, colN = SVQ.shape
for x in range(0, rowN):
    for y in range(0, colN):
        fSVQ.write(str(SVQ[x, y]) + '\t')
    fSVQ.write('\n')

fSVQ.close()
Esempio n. 11
0
import igraph

#%%
test_file = "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/test_file.txt"
g = igraph.Graph.Read(test_file, format="ncol", directed=False)

#%%
tw = igraph.EdgeSeq(g)["weight"]

#%%
mov_edge_file = "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/red_mov_net_edgelist.txt"

gm = igraph.Graph.Read(mov_edge_file, format="ncol", directed=False)

#%%

com = gm.community_fastgreedy(weights=igraph.EdgeSeq(gm)["weight"])

#%%
genrefile = open(
    "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_genre.txt",
    'r')
ratefile = open(
    "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_rating.txt",
    'r')
movfile = open(
    "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_5acts.txt",
    'r')

#%%
movies_dict = dict()
Esempio n. 12
0
def temporalCommunityLayout(tempNet,
                            use_weights=True,
                            iterations=None,
                            temperature=1):
    """Returns a special representation of the first-order aggregated
       network which groups temporal communities based on the second-
       order network.
       
       @param tempNet:  The temporal network instance to plot
       @param use_weights: whether or not to use link weights(of the first-order
       model) in the layout algorithm. If the given temporal network is not 
       weighted, this will be ignored.
       @param iterations: number of iterations to use for the fruchterman-
       reingold layout algorithm. Falls back to number of vertices in tempNet
       in case of None (default)
       @param temperature: parameter for the fruchterman-reingold layout algo
       """

    Log.add(
        "Layouting first-order aggregate network with temporal communities ..."
    )

    ## get first-order network and two-paths (build them if necessary)
    g1 = tempNet.igraphFirstOrder()
    if tempNet.tpcount == -1:
        tempNet.extractTwoPaths()

    # now calculate the layout based on this information

    # first: assign random positions
    nodes = g1.vcount()
    sqrt_nodes = np.sqrt(nodes)
    xpos = sqrt_nodes * np.random.rand(nodes) - sqrt_nodes / 2.
    ypos = sqrt_nodes * np.random.rand(nodes) - sqrt_nodes / 2.

    if iterations is None:
        iterations = nodes
    difftemp = temperature / float(
        iterations)  # enforce true division in python2

    # second: iteration
    for t in range(iterations):
        # clear displacement vectors
        dplx = np.zeros(nodes)
        dply = np.zeros(nodes)

        # repulsive forces
        for i in range(nodes):
            for j in range(i + 1, nodes):
                dx = xpos[i] - xpos[j]
                dy = ypos[i] - ypos[j]
                dist = dx * dx + dy * dy

                # avoid division by (nearly) zero
                if (dist < 1e-9):
                    dx = np.random.rand() * 1e-9
                    dy = np.random.rand() * 1e-9
                    dist = float(dx * dx + dy * dy)

                # update displacement vectors
                dplx[i] += dx / dist
                dply[i] += dy / dist
                dplx[j] -= dx / dist
                dply[j] -= dy / dist

        # attractive forces
        for e in igraph.EdgeSeq(g1):
            source, target = e.tuple
            tp_factor = 0
            weight_factor = (use_weights and g1.is_weighted())

            dx = xpos[source] - xpos[target]
            dy = ypos[source] - ypos[target]
            dist = np.sqrt(dx * dx + dy * dy)

            # use also weights to layout the graph
            if use_weights and g1.is_weighted():
                weight_factor *= e["weight"]

            # use information from two-paths to layout the graph
            # is there a two-path s -> ?? -> t ?
            src_name = g1.vs[source]["name"]
            trg_name = g1.vs[target]["name"]
            for time, tp in tempNet.twopathsBySource[src_name].items():
                for path in tp:
                    # NOTE: path = tuple( source, mid, target, weight )
                    if path[2] == trg_name:
                        tp_factor += path[3]

            # scale with edge / two-paths / weight factor
            dist *= (1. + tp_factor + weight_factor)

            dplx[source] -= dx * dist
            dply[source] -= dy * dist
            dplx[target] += dx * dist
            dply[target] += dy * dist

        # update the positions
        for i in range(nodes):
            dx = dplx[i] + np.random.rand() * 1e-9
            dy = dply[i] + np.random.rand() * 1e-9
            dist = float(np.sqrt(dx * dx + dy * dy))

            real_dx = dx if np.absolute(dx) < temperature else temperature
            real_dy = dy if np.absolute(dy) < temperature else temperature

            # avoid division by zero
            if dist > 0:
                xpos[i] += (dx / dist) * real_dx
                ypos[i] += (dy / dist) * real_dy

        temperature = temperature - difftemp
    # end of iteration loop

    Log.add("finished")

    # finally plot the first-order network with this special layout
    return igraph.Layout(tuple(zip(xpos, ypos)))
Esempio n. 13
0
def analyze_one(
        cocfilename,
        coc_folder,
        gmlfolder,
        percentlist=[1],
        timeseriesfile=r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt',
        periodcnt=1,
        graphAll=None):
    #IN:one coc file
    #OUT:all the net attributes of this coc by all percent
    #Process:coc2list;percent2timepoint;???
    netlist = []
    es = ig.EdgeSeq(graphAll)
    cocfilepath = cocfolder + cocfilename + '.coc'
    #     timelist = selecTimelist(findstr=cocfilename,timeSeriesFilepath=timeseriesfile)
    timelist = es.get_attribute_values('createdtimetos')
    timelist.sort()
    #     vfg = gt.csv2list(cocfolder+cocfilename)
    #     vfg.reverse()
    #     timelist = gt.selectColfromList(vfg, 4, 5)
    for percent in percentlist:
        lengthNow = int(round(len(timelist) * percent))
        lengthNow = lengthNow if lengthNow > 1 else 1
        timelistPercentNow = timelist[:lengthNow]
        timelistPeriodNow = selecTime(timelistPercentNow, periodcnt)
        for timep in timelistPeriodNow:
            timep = str(timep)
            percentNetAttri = []
            percentNetAttri.append(cocfilename)
            percentNetAttri.append(percent)

            #             selectedCoc = selectCoc(cocfilepath,timep)
            #             gmlfilepath = createGml(selectedCoc,gmlfolder='',cocfilename='temp',keepold=False)
            #             g=ig.Graph.Read_GML(gmlfilepath)

            #选择子网络
            #     print es.attribute_names()
            #             print timep
            g = graphAll.subgraph_edges(es.select(createdtimetos_le=timep))

            #             x = []
            #             y = []
            #             j = 0
            #             goutdegree = g.outdegree()
            #             for i in g.indegree():
            #                 if i>0:
            #                     x.append(i)
            #                     y.append(goutdegree[j])
            #                 j+=1
            #
            #             plt.scatter(x,y)
            #             plt.show()
            #             print '==============================='
            #             j = ''
            #             print  g.vcount(),g.ecount()
            #             for i in g.vs:
            #                 j+= i['label']+';'
            #             print j
            #             print g
            netAttribute = analysisNet(g)
            percentNetAttri.extend(netAttribute)
            netlist.append(percentNetAttri)
#     print netlist
    return zip(*netlist)
Esempio n. 14
0
parents = [int(x) for x in lines[1:]]
edges = []
root = 0
for i, p in enumerate(parents):
    if p != 0:
        edges.append([p - 1, i])
    else:
        root = i

g = ig.Graph(n=n, directed=True)
g.add_edges(edges)

g.vs["label"] = [str(x + 1) for x in range(n)]

layout = g.layout_reingold_tilford(root=root)

f.close()

if len(sys.argv) == 3:
    g.delete_edges(ig.EdgeSeq(g))
    f = open(sys.argv[2])
    line = f.readline().split()
    m = int(line[3])
    for _ in range(m):
        u, v = [int(x) for x in f.readline().split()]
        g.add_edge(u - 1, v - 1)

    g.to_undirected()

ig.plot(g, layout=layout)
Esempio n. 15
0
#para mi se ve muy bien con louvain y tenemos que probar qué onda con kmeans, o con eb-infomap
#imponiendo el corte del dendograma.
louvain=community.best_partition(nuevo_post2,weight='weight')
lista_com_lou=list(louvain.values())
print('Louvain') 
print('Cantidad de comunidades óptimas: '+str(np.max(lista_com_lou)+1))
fig = plt.figure(figsize=[9,9])
graph_color(nuevo_post2,lista_com_lou,'Louvain',pos,my_dict2,peso2)
Q_l=community.modularity(louvain,nuevo_post2,weight='weight')#0.071
fig = plt.figure(figsize=[9,9])
graph_color_lab(nuevo_post2,lista_com_lou,'Louvain',pos,my_dict2,peso2)



post_ig = ig.Graph.TupleList(edges=nuevo_post2.edges(),directed=False)
edges=ig.EdgeSeq(post_ig)
vseq = post_ig.vs #lista de nodos (vseq['name'])
pesos_ig=[]

for i in edges:
    aux=(vseq['name'][i.tuple[0]],vseq['name'][i.tuple[1]])
    pesos_ig.append(nuevo_post2.edges[aux]['weight'])

post_ig.es['weight'] = pesos_ig

def nodos_nx_ig(nodos_nx,nodos_ig,comunidad):#paso de los nodos ordenados segun ig a los nodos ordenados segun nx
    lista=[None]*len(nodos_nx)
    for i in np.arange(0,len(comunidad)):
        for j in comunidad[i]:
            node_name=nodos_ig[j]['name']
            ind=nodos_nx.index(node_name)
        for coord in coordinates:
            coordinate_sum = (coordinate_sum[0] + coord[0],
                              coordinate_sum[1] + coord[1])
        coordinate_avg = (coordinate_sum[0] / len(coordinates),
                          coordinate_sum[1] / len(coordinates))

        geolocations[feature_id] = Geolocation(display_name, coordinate_avg)


# Question 7
def pretty_print_loc(loc):
    return (round(loc[0], 3), round(loc[1], 3))


weighted_edges_gcc = collections.OrderedDict()
for index, edge in enumerate(ig.EdgeSeq(g_gcc)):
    weighted_edges_gcc[index] = edge["weight"]

g_mst = g_gcc.spanning_tree(weights=list(weighted_edges_gcc.values()))
print("The Minimum Spanning Tree has {0} vertices and {1} edges.".format(
    g_mst.vcount(), g_mst.ecount()))
g_edge_seq = ig.EdgeSeq(g_mst)
for index, edge in enumerate(g_edge_seq):
    if index % 150 == 1:  # random sampling of edges
        print("One edge in the MST: {0}, with weight {1}".format(
            edge.tuple, edge["weight"]))
        print(
            "\tSource node street address: {0}; location: {1}\n\tTarget node street address: {2}; location: {3}"
            .format(
                geolocations[str(edge.tuple[0])].name,
                pretty_print_loc(geolocations[str(edge.tuple[0])].location),
Esempio n. 17
0
def pamChoice(gmlf):
    '''The supporters and invitees of initiator connect to the initiator. However, the supporters and invitees of participator pi have two choices: join the initiator directly, or join his or her master pi. Each individual k has its preference, someone may like join CMO directly, while others may be not. For the member of CMO, the ability of appealing new members is obviously in proportion to its mobilizing ability. Specifically speaking, the preference probability of each member m who are connected is:
    p_mk=β_k  〖MA〗_m/(∑_(k=0)^i?〖MA〗_k )                                (5)
    In which MA represents the mobilizing ability, and the β_k represents the preference coefficient of  m_k .'''
    graphAll = ig.Graph.Read_GML(gmlf)
    es = ig.EdgeSeq(graphAll)
    timelist = es.get_attribute_values('createdtimetos')
    timelist.sort()
    sourceLabel = '醉联盟'
    print len(timelist)
    tp = timelist[-1]
    if 1:
        #     for tp in timelist[60:]:
        g = graphAll.subgraph_edges(
            es.select(createdtimetos_le=tp, retwitype_ge='0'))
        print g.vcount()
        "['reposts_count', 'avatar_large', 'retwitcnt', 'text', 'mid', 'visible', 'statuslast', 'mentioncnt', 'description', 'city', 'verified', 'retweeted_status', 'thumbnail_pic', 'truncated', 'plzftype', 'follow_me', 'verified_reason', 'attitudes_count', 'location', 'followers_count', 'retwitype', 'created_attos', 'verified_type', 'username', 'favorited', 'statuses_count', 'statuslasttos', 'friends_count', 'online_status', 'allow_all_act_msg', 'profile_image_url', 'idstr', 'timein', 'allow_all_comment', 'geo_enabled', 'geo', 'createdtimetos', 'lang', 'bi_followers_count', 'remark', 'favourites_count', 'screen_name', 'url', 'province', 'created_at', 'mlevel', 'userid', 'comments_count', 'profile_url', 'gender', 'following']"
        username, followers_count, friends_count = getAttofNodesFromGraphES(
            es, ['username', 'followers_count', 'friends_count'])
        followers_count = map(int, followers_count)
        dis = getDis(g, sourceLabel)

        vsNetAtt = zip(*(g.vs['label'], dis))
        nodeAtt = zip(*(username, followers_count, friends_count))
        vslabels = g.vs['label']
        #         print vslabels
        atts = gt.connectlist(nodeAtt, vsNetAtt, passcol=0, sameposition_a=2)
        print len(atts)
        #         vslabels = np.unique(vslabels)
        import operator
        vslabels.sort(key=operator.itemgetter(0))

        for v, un, fo, fr, di in zip(*(vslabels, username, followers_count,
                                       friends_count, dis)):
            print v, un, fo, fr, di  #.get_attribute_values('label')
        for v, di in zip(*(vslabels, dis)):
            print v, di
        print len(followers_count)
        print len(dis)
        er


#         print np.mat(dis).flat

    fansr = np.array(fansr)  #np.mat(fansr).flat
    dis = np.array(dis)  #

    train_x = [fansr, dis]
    train_y = g.shortest_paths_dijkstra(source=None,
                                        target=35,
                                        weights=None,
                                        mode='ALL')
    train_x, train_y = np.mat(train_x), np.mat(train_y).transpose()
    print len(train_x), len(train_y)

    from regression import logisticReg
    opts = {
        'alpha': 0.01,
        'maxIter': 20,
        'optimizeType': 'smoothStocGradDescent'
    }

    lr = logisticReg()
    lr.start(train_x, train_y, opts)
    edge_igraph = [ (node_id_dict[edge_data.iat[row, 0]], node_id_dict[edge_data.iat[row, 1]]) for row in range(0, edge_data.shape[0])]

    print('[process] plot')
    g = igraph.Graph()
    g.add_vertices(node_count)  # 添加顶点
    g.add_edges(edge_igraph)
    lay = g.layout(args.layout)   # 记录了每一个顶点布局后的x/y坐标轴,[(x0,y0), (x1,y1),...] 存储


    # 位置信息提取,便于html绘图

    position = {k: lay[k] for k in range(node_count)}  # 记录每一个顶点的x/y轴坐标, dict存储,key = index
    Y = [lay[k][1] for k in range(node_count)]  # 获取素有Y轴坐标
    M = max(Y)  # 最大的Y轴

    es = igraph.EdgeSeq(g) # sequence of edges
    E = [e.tuple for e in g.es] # list of edges, 建议一这段代码改为  g.get_edgelist(), 返回边信息

    # 取得每一个边两侧顶点对应的X坐标,Y坐标
    L = len(position)
    Xn = [position[k][0] for k in range(L)]
    Yn = [2*M-position[k][1] for k in range(L)] # 重新定义Y坐标轴
    Xe = []
    Ye = []
    for edge in E:
        Xe += [position[edge[0]][0], position[edge[1]][0], None]
        Ye += [2*M-position[edge[0]][1], 2*M-position[edge[1]][1], None]

    labels = node_data.iloc[:,0].to_list()