Example #1
0
def get_hits_venues():
    mapping = snap.TStrIntSH()
    t0 = time()
    file_output_1 = open("paper_venues_hits_hub.txt", 'w')
    file_output_2 = open("paper_venues_hits_auth.txt", 'w')
    G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1,
                              mapping)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G0, NIdHubH, NIdAuthH, 1000)
    print("HITS time:", round(time() - t0, 3), "s")
    for item in NIdHubH:
        file_output_1.write(
            str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n')
    for item in NIdAuthH:
        file_output_2.write(
            str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n')
    # convert input string to node id
    # NodeId = mapping.GetKeyId("814DF491")
    # convert node id to input string
    # NodeName = mapping.GetKey(NodeId)
    # print "name", NodeName
    # print "id  ", NodeId
    print("finish hits!")
    file_output_1.close()
    file_output_2.close()
    def load_graph(self, name):
        '''
	    Helper function to load graphs.
	    Check that the respective .txt files are in the same folder as this script;
	    if not, change the paths below as required.
	    '''
        mapping = snap.TStrIntSH()
        G = snap.LoadEdgeListStr(snap.PNGraph, self.graphName, 0, 1, mapping)
        return G
Example #3
0
def hits(graph_filename):
    # create graph
    name_id_map = snap.TStrIntSH()
    graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1,
                                 name_id_map)

    # run HITS algo
    id_hub_map = snap.TIntFltH()
    id_auth_map = snap.TIntFltH()
    snap.GetHits(graph, id_hub_map, id_auth_map, 1000)  # iterate 1000 times

    return name_id_map, id_hub_map, id_auth_map
def getDataSource(forceReload=False):
    dataSourceFileName = "scoring.pkl"
    import os.path

    if forceReload or not os.path.isfile(dataSourceFileName):
        Data_Scraper.load_data()
        print('before mapping')
        mapping = snap.TStrIntSH()
        G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1,
                                 mapping)

        rankedCommentsPos = []
        rankedCommentsNeg = []
        combinedNeg = []
        combinedPos = []

        #1000000
        commentsToTake = 5000

        for i in range(1, 5):
            rankedCommentData = Snap_Analytics.sort_comments(60, mapping, i)
            rankedCommentsPos.append(rankedCommentData[:commentsToTake])
            rankedCommentsNeg.append(
                rankedCommentData[len(rankedCommentData) -
                                  commentsToTake * 4:len(rankedCommentData)])
            combinedPos.extend(rankedCommentData[:commentsToTake / 4])
            combinedNeg.extend(rankedCommentData[len(rankedCommentData) -
                                                 (commentsToTake * 4) /
                                                 4:len(rankedCommentData)])

        rankedCommentsPos.append(combinedPos)
        rankedCommentsNeg.append(combinedNeg)

        f = open(dataSourceFileName, 'wb')
        pickle.dump((rankedCommentsPos, rankedCommentsNeg), f)
        f.close()
        return (rankedCommentsPos, rankedCommentsNeg)

    else:
        print("loading from file")
        f = open(dataSourceFileName, 'rb')
        classifier = pickle.load(f)
        f.close()
        return (classifier[0], classifier[1])
import snap

H = snap.TStrIntSH()
Graph = snap.LoadConnListStr(snap.PNGraph, "data/example-LoadConnListStr.txt",
                             H)
# get node ID of "A"
print H.GetDat("A")

H = snap.TStrIntSH()
UGraph = snap.LoadConnListStr(snap.PUNGraph,
                              "data/example-LoadConnListStr.txt", H)

H = snap.TStrIntSH()
Network = snap.LoadConnListStr(snap.PNEANet,
                               "data/example-LoadConnListStr.txt", H)
def load_graph(file_path):
    H = snap.TStrIntSH()
    Graph = snap.LoadConnListStr(snap.PNGraph, file_path, H)
    print "-----graph loaded"
    return Graph, H
Example #7
0
def main():
	print('begin main')
	Data_Scraper.load_data()
	#measure_comment_lengths()
	
	# measure_comment_lengths()
	# FK_histogram()

	
	print('before mapping')
	
	#for i in xrange(25):
	#	sample_random_comment(G, mapping)

	mapping = snap.TStrIntSH()
	G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping)

	root = getRootNode(mapping, G)

	# FK_histogram()

	# FK_scores = getFKScores()

	# """ TRYING TO CREATE A SMALL GRAPH SAMPLE FOR VISUALIZATION """
	# f = open("Mini_graph.csv", "w")
	# thread_id = random.sample(Data_Scraper.thread_ids, 1)
	# newRoot = G.GetNI(mapping.GetKeyId(thread_id)).GetId()
	# # newRoot = mapping.GetKeyId(thread_id)
	# f.write(str(root.GetId()) + "," + str(newRoot) + "\n")
	# sg = makeSubGraph(G, mapping)
	# for edge in sg.Edges():
	# 	tup = edge.GetId()
	# 	f.write(str(tup[0]) + "," + str(tup[1]) + "\n")
	# f.close()
	# print "File written"
	
	# for key,value in FK_scores.iteritems():
	# 	f.write(str(key) + "\t" + str(value) + "\n")
	# f.close()
	# print "File written"



	#stats_vec = comment_statistics(mapping, G)
	#print(stats_vec[4])


	#getCommentHistogram([G.GetNI(n) for n in root.GetOutEdges()], G)
	#output = open('comment_stats.pkl', 'wb')
	#pickle.dump(stats_vec,output)

	#output.close()


	#sort_comments(200, mapping, 1)



	# pickle.dump(stats_vec,output)
	#output.close()

	# print "Nodes: " + str(Data_Scraper.all_comments)
	# print "Threads: " + str(len(Data_Scraper.thread_ids))
	# print "Root comments: " + str(len(Data_Scraper.root_comments))


	#print "delay: " + estimated_delay(144834, mapping, G)
	
	#record_comment_lengths()
	
	assemble_sequence_dict(mapping)
	print G.GetNodes()
	print G.GetEdges()
Example #8
0
		nodes.append(node)
	links = []
	print("looping thru edges")
	for i in G.Edges():
		edge = {}
		edge["source"] = mapping.GetKey(i.GetSrcNId())
		edge["target"] = mapping.GetKey(i.GetDstNId())
		edge["value"] = 1
		links.append(edge)
	f = open("graph.json", "w")
	f.write(json.dumps({"nodes": nodes, "links": links}))
	f.close()

	
# TODO: mean and standard deviation of FK - scores and word lengths

if __name__ == "__main__":
	main()

mapping = snap.TStrIntSH()
G = snap.LoadEdgeListStr(snap.PNGraph, "politics_edge_list.txt", 0, 1, mapping)
# root = getRootNode(mapping, G)
# thread_sizes = {}
# for thread in root.GetOutEdges():
# 		threadsize = _findDepth(G.GetNI(thread), G)
# 		thread_sizes[mapping.GetKey(thread)] = threadsize