def retrieve_multiple_edges(graph, source=-1, target=-1): if source != -1: e = graph.incident(source, mode=ig.OUT) if target != -1: e = set(e).intersection(graph.incident(target, mode=ig.IN)) return ig.EdgeSeq(graph, e) else: if target != -1: e = graph.incident(target, mode=ig.IN) else: e = list() return ig.EdgeSeq(graph, e)
def gml2svg(gmlfolder, svgfolder): fw = open(svgoutpath + 'vecount.txt', 'w') svgtype = [ '_weakGaint', '_strongGaint', '_weakGaintSPT', '_strongGaintSPT' ] if os.path.exists(svgoutpath) == 0: os.mkdir(svgoutpath) for file in os.listdir(gmlinpath): if os.path.splitext(file)[1] == '.gml': print 'Reading graph from ...', file if os.path.exists(svgoutpath + file + svgtype[3] + '.2svg'): print file, 'has existesed' pass else: try: gmlfile = open(gmlinpath + file) g = ig.Graph.Read_GML(gmlfile) gg = clus.VertexClustering.giant(g.clusters(mode='strong')) es = ig.EdgeSeq(gg) subg = gg.subgraph_edges(es.select(retwitype_eq='0')) es = ig.EdgeSeq(subg) timelist = map(float, es.get_attribute_values('time')) gsp = ig.Graph.spanning_tree(subg, timelist) vecountstr = str(g.vcount()) + '\t' + str(g.ecount( )) + '\t' + str(gg.vcount()) + '\t' + str( gg.ecount()) + '\t' + str(subg.vcount()) + '\t' + str( subg.ecount()) + '\t' + str( gsp.vcount()) + '\t' + str(gsp.ecount()) fw.write(file + '\t' + vecountstr + '\n') if os.path.exists(svgoutpath + file + svgtype[3] + '.svg'): print file, 'has existesed' else: print 'Ploting graph' ig.Graph.write_svg(subg, svgoutpath + file + svgtype[1] + '.svg', layout='large') ig.Graph.write_svg(gsp, svgoutpath + file + svgtype[3] + '.svg', layout='large') layout = gsp.layout("large") fig = plot(gsp, layout=layout) plot.show() # ig.Graph.write_svg(gsp, svgoutpath+file+'_w.svg', layout='large') #.save(gmlinpath+file+'.fig') except Exception, e: print gmlinpath + file, ' failed', e pass gmlfile.close()
def analyzeNetStat(g): "given a graph g with edges attributes, analyze its stat features" "IN:graph g" "OUT:stat features" es = ig.EdgeSeq(g) "attributeA = es.get_attribute_values('attribute name')" # print es.attribute_names() '''reposts_count mentioncnt city verified retweeted_status attitudes_count location followers_count created_attos verified_type statuses_count statuslasttos friends_count idstr timein createdtimetos bi_followers_count favourites_count province userid comments_count gender''' from analysisStatFromRepostxt import fansum from analysisStatFromRepostxt import echouser from analysisStatFromRepostxt import lifespan timelist = es.get_attribute_values('createdtimetos') fansumlist = es.get_attribute_values('followers_count') useridlist = es.get_attribute_values('userid') mentioncntlist = es.get_attribute_values('mentioncnt') bifansumlist = es.get_attribute_values('bi_followers_count') friends_countlist = es.get_attribute_values('friends_count') reposts_countlist = es.get_attribute_values('reposts_count') fanscnt, fanscntavg = fansum(fansumlist, 1) echousercnt = echouser(useridlist, 1) durationlist, durationaddedlist, durationavglist, durationaddedavglist = lifespan( timelist, 1) mentioncnt, mentioncntavg = fansum(mentioncntlist, 1) bifansum, bifansumavg = fansum(bifansumlist, 1) friends_count, friends_countavg = fansum(friends_countlist, 1) reposts_count, reposts_countavg = fansum(reposts_countlist, 1) # print fanscnt,echousercnt,fanscntavg,durationlist,durationavglist,mentioncnt,mentioncntavg,bifansum,bifansumavg,friends_count,friends_countavg,reposts_count,reposts_countavg return [ fanscnt[0], echousercnt[0], fanscntavg[0], durationlist[0], durationavglist[0], mentioncnt[0], mentioncntavg[0], bifansum[0], bifansumavg[0], friends_count[0], friends_countavg[0], reposts_count[0], reposts_countavg[0] ]
def analyze_one(cocfilename, coc_folder,gmlfolder,percentlist=[1],timeseriesfile = r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt',periodcnt=1,graphAll = None): #IN:one coc file #OUT:all the net attributes of this coc by all percent #Process:coc2list;percent2timepoint;??? netlist = [] netlistcore = [] statlist = [] statlistcore = [] es=ig.EdgeSeq(graphAll) cocfilepath=cocfolder+cocfilename+'.coc' timelist = es.get_attribute_values('createdtimetos') timelist.sort() netAttribute_all = [] netstat_all = [] netAttribute_core = [] netstat_core = [] for percent in percentlist: lengthNow = int(round(len(timelist)*percent)) lengthNow = lengthNow if lengthNow>1 else 1 timelistPercentNow = timelist[:lengthNow] timelistPeriodNow = selecTime(timelistPercentNow,periodcnt) for timep in timelistPeriodNow: timep = str(timep) percentNetAttri = [] percentNetAttri.append(cocfilename) percentNetAttri.append(percent) g = graphAll.subgraph_edges(es.select(createdtimetos_le = timep),delete_vertices=False) # gtemp = graphAll.subgraph_edges(es.select(createdtimetos_le = timep).attribute) # gtemp = graphAll.subgraph_edges(es.select(createdtimetos_le = timep)) # gte = g.subgraph_edges(g.es.select(createdtimetos_ge = '1352502966'),delete_vertices=True) # print g.vcount(),g.ecount(),gte.vcount(),gte.ecount() # g = g.subgraph(g.vs.select(name_in=gte.vs.get_attribute_values('name'))) # ge = g.subgraph_edges(g.es.select(createdtimetos_ge = '1352502966'),delete_vertices=True) netAttribute_all = analysisNet(g)#grt.analysisNet(g) #netstat_all = analyzeNetStat(g) gg = clus.VertexClustering.giant(g.clusters(mode='weak')) ggcore = getCorePart(gg,1) #print ggcore.vcount(),ggcore.ecount() netAttribute_core = analysisNet(ggcore) #netstat_core = analyzeNetStat(gg) netlist_all = get_netlist(netAttribute_all,percentNetAttri[0:],netlist) netlist_core = get_netlist(netAttribute_core,percentNetAttri[0:],netlistcore) netstat_alllist = get_netlist(netstat_all,percentNetAttri[0:],statlist) netstat_corelist = get_netlist(netstat_core,percentNetAttri[0:],statlistcore) #print netstat_alllist #print len(netAttribute_all),len(netAttribute_core),len(netstat_all),len(netstat_core) return [zip(*netlist_all),zip(*netlist_core),zip(*netstat_alllist),zip(*netstat_corelist)]
def analyzeNet_time(workfolder_att, fname, g): if 1: "add time slice function" attsfp = workfolder_att + str(fname[0]) + '.atts' gt.createFiles(attsfp) stat_attsfp_percent = workfolder_att + 'percent_stat.att' gt.createFiles(stat_attsfp_percent) net_attsfp_percent = workfolder_att + 'percent_net.att' gt.createFiles(net_attsfp_percent) periodcnt = 1 percentlist = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] netlist = [] statlist = [] es = ig.EdgeSeq(g) vs = ig.VertexSeq(g) timelist = es.get_attribute_values('createdtimetos') timelistlen = len(timelist) timelistpop = [] for i in xrange(timelistlen - 1, -1, -1): if timelist[i] == 'createdtimetos': timelist.pop(i) #timelistpop.append(i) # for i in timelistpop: # timelist.pop(i) timelist.sort() for percent in percentlist: lengthNow = int(round(len(timelist) * percent)) lengthNow = lengthNow if lengthNow > 1 else 1 timelistPercentNow = timelist[:lengthNow] timelistPeriodNow = selecTime(timelistPercentNow, periodcnt) for timep in timelistPeriodNow: timep = str(timep) percentNetAttri = [] percentNetAttri.append(fname[0]) percentNetAttri.append(percent) percentNetAttri.append(timep) subg = g.subgraph_edges(es.select(createdtimetos_le=timep)) #grt.analyzeNetNodes(g,workfolder_att,str(fname[0])) netAttribute_all = grt.analysisNet(subg) netlist_all = get_netlist(netAttribute_all, percentNetAttri[0:], netlist) gt.saveList(netlist_all, net_attsfp_percent, writype='a+') netlist = [] netstat_all = analyzeNetStat(subg) netstat_alllist = get_netlist(netstat_all, percentNetAttri[0:], statlist) gt.saveList(netstat_alllist, stat_attsfp_percent, writype='a+') statlist = []
def draw_graph_adj(A, seed): random.seed(seed) g = igraph.Graph() num = len(A) g.add_vertices(num) for i in range(num): for j in range(i, num): if A[i, j] > 0: g.add_edges([(i, j)]) vs = igraph.VertexSeq(g) es = igraph.EdgeSeq(g) vs["label"] = np.arange(num) layout = g.layout("fruchterman_reingold") igraph.plot(g, layout=layout)
def analyze_one(cocfilename, coc_folder,gmlfolder,percentlist=[1],timeseriesfile = r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt',periodcnt=1,graphAll = None): #IN:one coc file #OUT:all the net attributes of this coc by all percent #Process:coc2list;percent2timepoint;??? netlist = [] netlistcore = [] statlist = [] statlistcore = [] es=ig.EdgeSeq(graphAll) cocfilepath=cocfolder+cocfilename+'.coc' netAttribute_all = [] netstat_all = [] netAttribute_core = [] netstat_core = [] for percent in percentlist: percentNetAttri = [] percentNetAttri.append(cocfilename) percentNetAttri.append(percent) g = graphAll#.subgraph_edges(es.select(createdtimetos_le = timep),delete_vertices=False) netAttribute_all = analysisNet(g)#grt.analysisNet(g) #netstat_all = analyzeNetStat(g) gg = clus.VertexClustering.giant(g.clusters(mode='weak')) ggcore = getCorePart(gg,1) #print ggcore.vcount(),ggcore.ecount() netAttribute_core = analysisNet(ggcore) #netstat_core = analyzeNetStat(gg) netlist_all = get_netlist(netAttribute_all,percentNetAttri[0:],netlist) netlist_core = get_netlist(netAttribute_core,percentNetAttri[0:],netlistcore) netstat_alllist = get_netlist(netstat_all,percentNetAttri[0:],statlist) netstat_corelist = get_netlist(netstat_core,percentNetAttri[0:],statlistcore) #print netstat_alllist #print len(netAttribute_all),len(netAttribute_core),len(netstat_all),len(netstat_core) return [zip(*netlist_all),zip(*netlist_core),zip(*netstat_alllist),zip(*netstat_corelist)]
def draw_graph_comm(A, community_pool, seed): random.seed(seed) g = igraph.Graph() num = len(A) g.add_vertices(num) for i in range(num): for j in range(i, num): if A[i, j] > 0: g.add_edges([(i, j)]) vs = igraph.VertexSeq(g) es = igraph.EdgeSeq(g) vs["label"] = np.arange(num) for c in community_pool: memlen = len(c.members) for i in range(memlen): vs[c.members[i]]["color"] = c.color layout = g.layout("fruchterman_reingold") igraph.plot(g, layout=layout)
def getDis(g, firstlabel): es = ig.EdgeSeq(g) vs = ig.VertexSeq(g) i = -1 sourceid = 0 for v in g.vs: i += 1 if v['label'] == firstlabel: sourceid = i break # sor = vs.select(label_eq=firstlabel) # sourceid = int(sor.get_attribute_values('id')[0]) # mids = es.get_attribute_values('mid'); #timelist.sort() # print sourceid dis = g.shortest_paths_dijkstra(source=None, target=sourceid, weights=None, mode='ALL') dis = list(np.mat(dis).flat) return dis
with open(str(sys.argv[3]), 'rb') as csvfileQ: csvreaderQ = csv.reader(csvfileQ) mycsvQ = list(csvreaderQ) fQs = open('../../data/Qsupply.txt', 'w') fQd = open('../../data/Qdemand.txt', 'w') for row in mycsvQ: networkGraph.vs.select(int(row[0]) - 1)["Qsupply"] = float(row[1]) networkGraph.vs.select(int(row[0]) - 1)["Qdemand"] = float(row[2]) fQs.write(row[1] + '\n') fQd.write(row[2] + '\n') fQs.close() fQd.close() fNI = open('../../data/networkInfo.txt', 'w') es = igraph.EdgeSeq(networkGraph) for edge in es: #print edge.tuple fNI.write(str(edge.tuple[0]) + '\t') fNI.write(str(edge.tuple[1])) fNI.write('\n') fNI.close() fSVQ = open('../../data/SVQ.txt', 'w') rowN, colN = SVQ.shape for x in range(0, rowN): for y in range(0, colN): fSVQ.write(str(SVQ[x, y]) + '\t') fSVQ.write('\n') fSVQ.close()
import igraph #%% test_file = "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/test_file.txt" g = igraph.Graph.Read(test_file, format="ncol", directed=False) #%% tw = igraph.EdgeSeq(g)["weight"] #%% mov_edge_file = "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/red_mov_net_edgelist.txt" gm = igraph.Graph.Read(mov_edge_file, format="ncol", directed=False) #%% com = gm.community_fastgreedy(weights=igraph.EdgeSeq(gm)["weight"]) #%% genrefile = open( "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_genre.txt", 'r') ratefile = open( "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_rating.txt", 'r') movfile = open( "D:/Assignments/Graduate/EE232E/P2/project_2_data/project_2_data/movie_5acts.txt", 'r') #%% movies_dict = dict()
def temporalCommunityLayout(tempNet, use_weights=True, iterations=None, temperature=1): """Returns a special representation of the first-order aggregated network which groups temporal communities based on the second- order network. @param tempNet: The temporal network instance to plot @param use_weights: whether or not to use link weights(of the first-order model) in the layout algorithm. If the given temporal network is not weighted, this will be ignored. @param iterations: number of iterations to use for the fruchterman- reingold layout algorithm. Falls back to number of vertices in tempNet in case of None (default) @param temperature: parameter for the fruchterman-reingold layout algo """ Log.add( "Layouting first-order aggregate network with temporal communities ..." ) ## get first-order network and two-paths (build them if necessary) g1 = tempNet.igraphFirstOrder() if tempNet.tpcount == -1: tempNet.extractTwoPaths() # now calculate the layout based on this information # first: assign random positions nodes = g1.vcount() sqrt_nodes = np.sqrt(nodes) xpos = sqrt_nodes * np.random.rand(nodes) - sqrt_nodes / 2. ypos = sqrt_nodes * np.random.rand(nodes) - sqrt_nodes / 2. if iterations is None: iterations = nodes difftemp = temperature / float( iterations) # enforce true division in python2 # second: iteration for t in range(iterations): # clear displacement vectors dplx = np.zeros(nodes) dply = np.zeros(nodes) # repulsive forces for i in range(nodes): for j in range(i + 1, nodes): dx = xpos[i] - xpos[j] dy = ypos[i] - ypos[j] dist = dx * dx + dy * dy # avoid division by (nearly) zero if (dist < 1e-9): dx = np.random.rand() * 1e-9 dy = np.random.rand() * 1e-9 dist = float(dx * dx + dy * dy) # update displacement vectors dplx[i] += dx / dist dply[i] += dy / dist dplx[j] -= dx / dist dply[j] -= dy / dist # attractive forces for e in igraph.EdgeSeq(g1): source, target = e.tuple tp_factor = 0 weight_factor = (use_weights and g1.is_weighted()) dx = xpos[source] - xpos[target] dy = ypos[source] - ypos[target] dist = np.sqrt(dx * dx + dy * dy) # use also weights to layout the graph if use_weights and g1.is_weighted(): weight_factor *= e["weight"] # use information from two-paths to layout the graph # is there a two-path s -> ?? -> t ? src_name = g1.vs[source]["name"] trg_name = g1.vs[target]["name"] for time, tp in tempNet.twopathsBySource[src_name].items(): for path in tp: # NOTE: path = tuple( source, mid, target, weight ) if path[2] == trg_name: tp_factor += path[3] # scale with edge / two-paths / weight factor dist *= (1. + tp_factor + weight_factor) dplx[source] -= dx * dist dply[source] -= dy * dist dplx[target] += dx * dist dply[target] += dy * dist # update the positions for i in range(nodes): dx = dplx[i] + np.random.rand() * 1e-9 dy = dply[i] + np.random.rand() * 1e-9 dist = float(np.sqrt(dx * dx + dy * dy)) real_dx = dx if np.absolute(dx) < temperature else temperature real_dy = dy if np.absolute(dy) < temperature else temperature # avoid division by zero if dist > 0: xpos[i] += (dx / dist) * real_dx ypos[i] += (dy / dist) * real_dy temperature = temperature - difftemp # end of iteration loop Log.add("finished") # finally plot the first-order network with this special layout return igraph.Layout(tuple(zip(xpos, ypos)))
def analyze_one( cocfilename, coc_folder, gmlfolder, percentlist=[1], timeseriesfile=r'G:\HFS\WeiboData\HFSWeiboStatNet\Stat\TimeSeries.txt', periodcnt=1, graphAll=None): #IN:one coc file #OUT:all the net attributes of this coc by all percent #Process:coc2list;percent2timepoint;??? netlist = [] es = ig.EdgeSeq(graphAll) cocfilepath = cocfolder + cocfilename + '.coc' # timelist = selecTimelist(findstr=cocfilename,timeSeriesFilepath=timeseriesfile) timelist = es.get_attribute_values('createdtimetos') timelist.sort() # vfg = gt.csv2list(cocfolder+cocfilename) # vfg.reverse() # timelist = gt.selectColfromList(vfg, 4, 5) for percent in percentlist: lengthNow = int(round(len(timelist) * percent)) lengthNow = lengthNow if lengthNow > 1 else 1 timelistPercentNow = timelist[:lengthNow] timelistPeriodNow = selecTime(timelistPercentNow, periodcnt) for timep in timelistPeriodNow: timep = str(timep) percentNetAttri = [] percentNetAttri.append(cocfilename) percentNetAttri.append(percent) # selectedCoc = selectCoc(cocfilepath,timep) # gmlfilepath = createGml(selectedCoc,gmlfolder='',cocfilename='temp',keepold=False) # g=ig.Graph.Read_GML(gmlfilepath) #选择子网络 # print es.attribute_names() # print timep g = graphAll.subgraph_edges(es.select(createdtimetos_le=timep)) # x = [] # y = [] # j = 0 # goutdegree = g.outdegree() # for i in g.indegree(): # if i>0: # x.append(i) # y.append(goutdegree[j]) # j+=1 # # plt.scatter(x,y) # plt.show() # print '===============================' # j = '' # print g.vcount(),g.ecount() # for i in g.vs: # j+= i['label']+';' # print j # print g netAttribute = analysisNet(g) percentNetAttri.extend(netAttribute) netlist.append(percentNetAttri) # print netlist return zip(*netlist)
parents = [int(x) for x in lines[1:]] edges = [] root = 0 for i, p in enumerate(parents): if p != 0: edges.append([p - 1, i]) else: root = i g = ig.Graph(n=n, directed=True) g.add_edges(edges) g.vs["label"] = [str(x + 1) for x in range(n)] layout = g.layout_reingold_tilford(root=root) f.close() if len(sys.argv) == 3: g.delete_edges(ig.EdgeSeq(g)) f = open(sys.argv[2]) line = f.readline().split() m = int(line[3]) for _ in range(m): u, v = [int(x) for x in f.readline().split()] g.add_edge(u - 1, v - 1) g.to_undirected() ig.plot(g, layout=layout)
#para mi se ve muy bien con louvain y tenemos que probar qué onda con kmeans, o con eb-infomap #imponiendo el corte del dendograma. louvain=community.best_partition(nuevo_post2,weight='weight') lista_com_lou=list(louvain.values()) print('Louvain') print('Cantidad de comunidades óptimas: '+str(np.max(lista_com_lou)+1)) fig = plt.figure(figsize=[9,9]) graph_color(nuevo_post2,lista_com_lou,'Louvain',pos,my_dict2,peso2) Q_l=community.modularity(louvain,nuevo_post2,weight='weight')#0.071 fig = plt.figure(figsize=[9,9]) graph_color_lab(nuevo_post2,lista_com_lou,'Louvain',pos,my_dict2,peso2) post_ig = ig.Graph.TupleList(edges=nuevo_post2.edges(),directed=False) edges=ig.EdgeSeq(post_ig) vseq = post_ig.vs #lista de nodos (vseq['name']) pesos_ig=[] for i in edges: aux=(vseq['name'][i.tuple[0]],vseq['name'][i.tuple[1]]) pesos_ig.append(nuevo_post2.edges[aux]['weight']) post_ig.es['weight'] = pesos_ig def nodos_nx_ig(nodos_nx,nodos_ig,comunidad):#paso de los nodos ordenados segun ig a los nodos ordenados segun nx lista=[None]*len(nodos_nx) for i in np.arange(0,len(comunidad)): for j in comunidad[i]: node_name=nodos_ig[j]['name'] ind=nodos_nx.index(node_name)
for coord in coordinates: coordinate_sum = (coordinate_sum[0] + coord[0], coordinate_sum[1] + coord[1]) coordinate_avg = (coordinate_sum[0] / len(coordinates), coordinate_sum[1] / len(coordinates)) geolocations[feature_id] = Geolocation(display_name, coordinate_avg) # Question 7 def pretty_print_loc(loc): return (round(loc[0], 3), round(loc[1], 3)) weighted_edges_gcc = collections.OrderedDict() for index, edge in enumerate(ig.EdgeSeq(g_gcc)): weighted_edges_gcc[index] = edge["weight"] g_mst = g_gcc.spanning_tree(weights=list(weighted_edges_gcc.values())) print("The Minimum Spanning Tree has {0} vertices and {1} edges.".format( g_mst.vcount(), g_mst.ecount())) g_edge_seq = ig.EdgeSeq(g_mst) for index, edge in enumerate(g_edge_seq): if index % 150 == 1: # random sampling of edges print("One edge in the MST: {0}, with weight {1}".format( edge.tuple, edge["weight"])) print( "\tSource node street address: {0}; location: {1}\n\tTarget node street address: {2}; location: {3}" .format( geolocations[str(edge.tuple[0])].name, pretty_print_loc(geolocations[str(edge.tuple[0])].location),
def pamChoice(gmlf): '''The supporters and invitees of initiator connect to the initiator. However, the supporters and invitees of participator pi have two choices: join the initiator directly, or join his or her master pi. Each individual k has its preference, someone may like join CMO directly, while others may be not. For the member of CMO, the ability of appealing new members is obviously in proportion to its mobilizing ability. Specifically speaking, the preference probability of each member m who are connected is: p_mk=β_k 〖MA〗_m/(∑_(k=0)^i?〖MA〗_k ) (5) In which MA represents the mobilizing ability, and the β_k represents the preference coefficient of m_k .''' graphAll = ig.Graph.Read_GML(gmlf) es = ig.EdgeSeq(graphAll) timelist = es.get_attribute_values('createdtimetos') timelist.sort() sourceLabel = '醉联盟' print len(timelist) tp = timelist[-1] if 1: # for tp in timelist[60:]: g = graphAll.subgraph_edges( es.select(createdtimetos_le=tp, retwitype_ge='0')) print g.vcount() "['reposts_count', 'avatar_large', 'retwitcnt', 'text', 'mid', 'visible', 'statuslast', 'mentioncnt', 'description', 'city', 'verified', 'retweeted_status', 'thumbnail_pic', 'truncated', 'plzftype', 'follow_me', 'verified_reason', 'attitudes_count', 'location', 'followers_count', 'retwitype', 'created_attos', 'verified_type', 'username', 'favorited', 'statuses_count', 'statuslasttos', 'friends_count', 'online_status', 'allow_all_act_msg', 'profile_image_url', 'idstr', 'timein', 'allow_all_comment', 'geo_enabled', 'geo', 'createdtimetos', 'lang', 'bi_followers_count', 'remark', 'favourites_count', 'screen_name', 'url', 'province', 'created_at', 'mlevel', 'userid', 'comments_count', 'profile_url', 'gender', 'following']" username, followers_count, friends_count = getAttofNodesFromGraphES( es, ['username', 'followers_count', 'friends_count']) followers_count = map(int, followers_count) dis = getDis(g, sourceLabel) vsNetAtt = zip(*(g.vs['label'], dis)) nodeAtt = zip(*(username, followers_count, friends_count)) vslabels = g.vs['label'] # print vslabels atts = gt.connectlist(nodeAtt, vsNetAtt, passcol=0, sameposition_a=2) print len(atts) # vslabels = np.unique(vslabels) import operator vslabels.sort(key=operator.itemgetter(0)) for v, un, fo, fr, di in zip(*(vslabels, username, followers_count, friends_count, dis)): print v, un, fo, fr, di #.get_attribute_values('label') for v, di in zip(*(vslabels, dis)): print v, di print len(followers_count) print len(dis) er # print np.mat(dis).flat fansr = np.array(fansr) #np.mat(fansr).flat dis = np.array(dis) # train_x = [fansr, dis] train_y = g.shortest_paths_dijkstra(source=None, target=35, weights=None, mode='ALL') train_x, train_y = np.mat(train_x), np.mat(train_y).transpose() print len(train_x), len(train_y) from regression import logisticReg opts = { 'alpha': 0.01, 'maxIter': 20, 'optimizeType': 'smoothStocGradDescent' } lr = logisticReg() lr.start(train_x, train_y, opts)
edge_igraph = [ (node_id_dict[edge_data.iat[row, 0]], node_id_dict[edge_data.iat[row, 1]]) for row in range(0, edge_data.shape[0])] print('[process] plot') g = igraph.Graph() g.add_vertices(node_count) # 添加顶点 g.add_edges(edge_igraph) lay = g.layout(args.layout) # 记录了每一个顶点布局后的x/y坐标轴,[(x0,y0), (x1,y1),...] 存储 # 位置信息提取,便于html绘图 position = {k: lay[k] for k in range(node_count)} # 记录每一个顶点的x/y轴坐标, dict存储,key = index Y = [lay[k][1] for k in range(node_count)] # 获取素有Y轴坐标 M = max(Y) # 最大的Y轴 es = igraph.EdgeSeq(g) # sequence of edges E = [e.tuple for e in g.es] # list of edges, 建议一这段代码改为 g.get_edgelist(), 返回边信息 # 取得每一个边两侧顶点对应的X坐标,Y坐标 L = len(position) Xn = [position[k][0] for k in range(L)] Yn = [2*M-position[k][1] for k in range(L)] # 重新定义Y坐标轴 Xe = [] Ye = [] for edge in E: Xe += [position[edge[0]][0], position[edge[1]][0], None] Ye += [2*M-position[edge[0]][1], 2*M-position[edge[1]][1], None] labels = node_data.iloc[:,0].to_list()