def get_hits_venues(): mapping = snap.TStrIntSH() t0 = time() file_output_1 = open("paper_venues_hits_hub.txt", 'w') file_output_2 = open("paper_venues_hits_auth.txt", 'w') G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1, mapping) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(G0, NIdHubH, NIdAuthH, 1000) print("HITS time:", round(time() - t0, 3), "s") for item in NIdHubH: file_output_1.write( str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n') for item in NIdAuthH: file_output_2.write( str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n') # convert input string to node id # NodeId = mapping.GetKeyId("814DF491") # convert node id to input string # NodeName = mapping.GetKey(NodeId) # print "name", NodeName # print "id ", NodeId print("finish hits!") file_output_1.close() file_output_2.close()
def load_graph(self, name): ''' Helper function to load graphs. Check that the respective .txt files are in the same folder as this script; if not, change the paths below as required. ''' mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, self.graphName, 0, 1, mapping) return G
def hits(graph_filename): # create graph name_id_map = snap.TStrIntSH() graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1, name_id_map) # run HITS algo id_hub_map = snap.TIntFltH() id_auth_map = snap.TIntFltH() snap.GetHits(graph, id_hub_map, id_auth_map, 1000) # iterate 1000 times return name_id_map, id_hub_map, id_auth_map
def getDataSource(forceReload=False): dataSourceFileName = "scoring.pkl" import os.path if forceReload or not os.path.isfile(dataSourceFileName): Data_Scraper.load_data() print('before mapping') mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) rankedCommentsPos = [] rankedCommentsNeg = [] combinedNeg = [] combinedPos = [] #1000000 commentsToTake = 5000 for i in range(1, 5): rankedCommentData = Snap_Analytics.sort_comments(60, mapping, i) rankedCommentsPos.append(rankedCommentData[:commentsToTake]) rankedCommentsNeg.append( rankedCommentData[len(rankedCommentData) - commentsToTake * 4:len(rankedCommentData)]) combinedPos.extend(rankedCommentData[:commentsToTake / 4]) combinedNeg.extend(rankedCommentData[len(rankedCommentData) - (commentsToTake * 4) / 4:len(rankedCommentData)]) rankedCommentsPos.append(combinedPos) rankedCommentsNeg.append(combinedNeg) f = open(dataSourceFileName, 'wb') pickle.dump((rankedCommentsPos, rankedCommentsNeg), f) f.close() return (rankedCommentsPos, rankedCommentsNeg) else: print("loading from file") f = open(dataSourceFileName, 'rb') classifier = pickle.load(f) f.close() return (classifier[0], classifier[1])
import snap H = snap.TStrIntSH() Graph = snap.LoadConnListStr(snap.PNGraph, "data/example-LoadConnListStr.txt", H) # get node ID of "A" print H.GetDat("A") H = snap.TStrIntSH() UGraph = snap.LoadConnListStr(snap.PUNGraph, "data/example-LoadConnListStr.txt", H) H = snap.TStrIntSH() Network = snap.LoadConnListStr(snap.PNEANet, "data/example-LoadConnListStr.txt", H)
def load_graph(file_path): H = snap.TStrIntSH() Graph = snap.LoadConnListStr(snap.PNGraph, file_path, H) print "-----graph loaded" return Graph, H
def main(): print('begin main') Data_Scraper.load_data() #measure_comment_lengths() # measure_comment_lengths() # FK_histogram() print('before mapping') #for i in xrange(25): # sample_random_comment(G, mapping) mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) root = getRootNode(mapping, G) # FK_histogram() # FK_scores = getFKScores() # """ TRYING TO CREATE A SMALL GRAPH SAMPLE FOR VISUALIZATION """ # f = open("Mini_graph.csv", "w") # thread_id = random.sample(Data_Scraper.thread_ids, 1) # newRoot = G.GetNI(mapping.GetKeyId(thread_id)).GetId() # # newRoot = mapping.GetKeyId(thread_id) # f.write(str(root.GetId()) + "," + str(newRoot) + "\n") # sg = makeSubGraph(G, mapping) # for edge in sg.Edges(): # tup = edge.GetId() # f.write(str(tup[0]) + "," + str(tup[1]) + "\n") # f.close() # print "File written" # for key,value in FK_scores.iteritems(): # f.write(str(key) + "\t" + str(value) + "\n") # f.close() # print "File written" #stats_vec = comment_statistics(mapping, G) #print(stats_vec[4]) #getCommentHistogram([G.GetNI(n) for n in root.GetOutEdges()], G) #output = open('comment_stats.pkl', 'wb') #pickle.dump(stats_vec,output) #output.close() #sort_comments(200, mapping, 1) # pickle.dump(stats_vec,output) #output.close() # print "Nodes: " + str(Data_Scraper.all_comments) # print "Threads: " + str(len(Data_Scraper.thread_ids)) # print "Root comments: " + str(len(Data_Scraper.root_comments)) #print "delay: " + estimated_delay(144834, mapping, G) #record_comment_lengths() assemble_sequence_dict(mapping) print G.GetNodes() print G.GetEdges()
nodes.append(node) links = [] print("looping thru edges") for i in G.Edges(): edge = {} edge["source"] = mapping.GetKey(i.GetSrcNId()) edge["target"] = mapping.GetKey(i.GetDstNId()) edge["value"] = 1 links.append(edge) f = open("graph.json", "w") f.write(json.dumps({"nodes": nodes, "links": links})) f.close() # TODO: mean and standard deviation of FK - scores and word lengths if __name__ == "__main__": main() mapping = snap.TStrIntSH() G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping) # root = getRootNode(mapping, G) # thread_sizes = {} # for thread in root.GetOutEdges(): # threadsize = _findDepth(G.GetNI(thread), G) # thread_sizes[mapping.GetKey(thread)] = threadsize