예제 #1
0
파일: graph_loader.py 프로젝트: pwelke/hops
def snap_load(filename="", src_path="", directed=False):
    graph_extension = ".graph"
    label_extension = ".label"
    if filename == "":
        graph_extension = ""

    if os.path.isfile(src_path + filename + graph_extension):
        g_path = src_path + filename + graph_extension
        FIn = snap.TFIn(g_path)
        if directed:
            G = snap.TNGraph.Load(FIn)
        else:
            G = snap.TUNGraph.Load(FIn)
    elif os.path.isfile(src_path + filename + ".gml") or os.path.isfile(src_path + filename + ".gpickle"):
        graph_converter.gml_gpickle_to_snap_graph(filename, src_path)
        FIn = snap.TFIn(src_path + filename + graph_extension)
        if directed:
            G = snap.TNGraph.Load(FIn)
        else:
            G = snap.TUNGraph.Load(FIn)
    if os.path.isfile(src_path + filename + label_extension):
        FIn = snap.TFIn(src_path + filename + label_extension)
        labels = snap.TIntStrH()
        labels.Load(FIn)
    else:
        labels = snap.TIntStrH()

    if labels:
        return G, labels
    else:
        return G
def get_top_packages(graph_path, n):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)
    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now()))
    data_file = graph_name + "_pageranks"
    prank_hashtable = snap.TIntFltH()
    if not os.path.isfile(data_file):
        # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
        snap.GetPageRank(graph, prank_hashtable, 0.85)
        fout = snap.TFOut(data_file)
        prank_hashtable.Save(fout)
    else:
        fin = snap.TFIn(data_file)
        prank_hashtable.Load(fin)

    top_n = get_top_nodes_from_hashtable(prank_hashtable, n)
    top_n.sort(key=itemgetter(1))
    top_packages = []
    for pair in top_n:
        top_packages.append(id_pkg_dict[pair[0]])
    return top_packages
def LoadGraph(name):
    if name.split(".")[-1]=="graph":
        FIn = snap.TFIn("res/others/" + name )
        completeGraph = snap.TUNGraph.Load( FIn )   
    else:
        FIn = snap.TFIn("res/others/" + name + ".graph" )
        completeGraph = snap.TUNGraph.Load( FIn )   
    return completeGraph
예제 #4
0
    def load_z_deta(self, index=0):
        LEdgeVIn = snap.TFIn(f"{base_path}\\LEdgeV-{index}.bin")
        LEdgeV = snap.TIntTrV()
        LEdgeV.Load(LEdgeVIn)

        NodePermIn = snap.TFIn(f"{base_path}\\NodePerm-{index}.bin")
        NodePerm = snap.TIntV()
        NodePerm.Load(NodePermIn)

        return LEdgeV, NodePerm
예제 #5
0
def GetNbr(sw):
    """
    provide graph neighbors
    """

    taskname = sw.GetName()
    # tindex = sw.GetIndex()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist %s" % msglist)

    with perf.Timer(sw.log, "LoadState-GetNbrCpp"):
        AdjLists = LoadState(sw)

    if AdjLists:
        # state is available, process requests for neighbors
        sw.log.debug('[%s] state available, length %d' %
                     (sw.GetName(), AdjLists.Len()))
        for item in msglist:
            name = sw.GetMsgName(item)

            # read the input nodes
            FIn = Snap.TFIn(Snap.TStr(name))
            msg = Snap.TIntV(FIn)

            GetNeighbors(sw, AdjLists, msg)
        return

    # state not found, initialize it with neighbors
    sw.log.debug('[%s] adjlist not found, initializing' % sw.GetName())
    Edges = Snap.TIntIntVV()

    for item in msglist:
        name = sw.GetMsgName(item)

        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntIntVV(FIn)

        Snap.AddVec64(Edges, Vec)

    # first iteration: input are edges, save the state
    AdjLists = GetEdges(sw, Edges)

    sw.log.debug('[%s] saving adjlist of size %d now' %
                 (sw.GetName(), AdjLists.Len()))

    with perf.Timer(sw.log, "SaveState-GetNbrCpp"):
        SaveState(sw, AdjLists)

    dmsgout = {}
    dmsgout["src"] = sw.GetName()
    dmsgout["cmd"] = "targets"
    dmsgout["body"] = {}
    sw.Send(0, dmsgout, "2")
    def testRandomDataset7(self):
        ds = self.graph_unweighted_undirect
        utils.remove_if_file_exit(ds.file_snap)
        print(ds.to_snapformat())
        FIn = snap.TFIn(ds.file_snap)
        Graph = snap.TUNGraph.Load(FIn)

        ds = self.graph_unweighted_direct
        self.assertTrue(ds.is_directed())
        utils.remove_if_file_exit(ds.file_snap)
        print(ds.to_snapformat())
        FIn = snap.TFIn(ds.file_snap)
        Graph = snap.TNGraph.Load(FIn)
def degreeDistribution():
	### read original and reonstructed graph
	FIn = snap.TFIn("../graph/steam.graph")
	Go = snap.TUNGraph.Load(FIn)
	FIn = snap.TFIn("../graph/steam_user100_game1000.graph")
	Gn = snap.TUNGraph.Load(FIn)
	print Go.GetNodes(), Go.GetEdges()
	print Gn.GetNodes(), Gn.GetEdges()

	d1, user1, game1 = getDeg(Go)
	d2, user2, game2 = getDeg(Gn)
	plot(d1, user1, game1, 'Original Graph')
	plot(d2, user2, game2, 'Reconstructed Graph')
예제 #8
0
def GetNbr(sw):
    """
    provide graph neighbors
    """

    # taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist %s" % msglist)

    with perf.Timer(sw.log, "LoadState-GetNbrCpp"):
        AdjLists = LoadState(sw)

    if AdjLists:
        # state is available, process requests for neighbors
        for item in msglist:
            name = sw.GetMsgName(item)

            # read the input nodes
            FIn = Snap.TFIn(Snap.TStr(name))
            msg = Snap.TIntV(FIn)

            GetNeighbors(sw, AdjLists, msg)
        return

    # state not found, initialize it with neighbors
    Edges = Snap.TIntV()

    for item in msglist:
        name = sw.GetMsgName(item)

        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        Edges.AddV(Vec)

    # first iteration: input are edges, save the state
    AdjLists = GetEdges(sw, Edges)
    sw.log.debug("state: %d" % AdjLists.Len())

    with perf.Timer(sw.log, "SaveState-GetNbrCpp"):
        SaveState(sw, AdjLists)

    dmsgout = {}
    dmsgout["src"] = sw.GetName()
    dmsgout["cmd"] = "targets"
    dmsgout["body"] = {}
    sw.Send(0, dmsgout, "2")
예제 #9
0
def GetNbr(sw):
    """
    provide graph neighbors
    """

    taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.flog.write("msglist " + str(msglist) + "\n")
    sw.flog.flush()

    AdjLists = LoadState()

    if AdjLists:
        # state is available, process requests for neighbors
        for item in msglist:
            name = sw.GetMsgName(item)

            # read the input nodes
            FIn = Snap.TFIn(Snap.TStr(name))
            msg = Snap.TIntV(FIn)

            GetNeighbors(sw, AdjLists, msg)
        return

    # state not found, initialize it with neighbors
    Edges = Snap.TIntV()

    for item in msglist:
        name = sw.GetMsgName(item)

        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        Edges.AddV(Vec)

    # first iteration: input are edges, save the state
    AdjLists = GetEdges(Edges)
    sw.flog.write("state " + str(AdjLists.Len()) + "\n")
    sw.flog.flush()

    SaveState(AdjLists)

    dmsgout = {}
    dmsgout["src"] = sw.GetName()
    dmsgout["cmd"] = "targets"
    dmsgout["body"] = {}
    sw.Send(0, dmsgout, "2")
def write_graph_to_csv(name):
    start = time.time()
    FIn = snap.TFIn("graph/steam_weight_user_limit_100.graph")
    G = snap.TNEANet.Load(FIn)

    print("finished loading: ", time.time() - start)

    attr = 'weight'

    # put node ids into consecutive integers
    node_dict = {}
    nid_count = 0
    for node in G.Nodes():
        nid = node.GetId()
        if nid not in node_dict.keys():
            node_dict[nid] = nid_count
            nid_count += 1

    print("finished generating node id dict: ", time.time() - start)

    with open('csv/weighted_%s_limit_100.csv' % name, mode='w') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerow([str(len(node_dict.keys()))])
        # count = 0
        for edge in G.Edges():
            srcnid = node_dict[edge.GetSrcNId()]
            dstnid = node_dict[edge.GetDstNId()]
            # print(edge.GetSrcNId(), edge.GetDstNId(), srcnid, dstnid)
            value = G.GetIntAttrDatE(edge, attr)
            row = [str(srcnid), str(dstnid), str(value)]
            writer.writerow(row)
            # count+=1

    print("done: ", time.time() - start)
def generate_steam_edge_list():
    FIn = snap.TFIn("graph/steam.graph")
    G = snap.TUNGraph.Load(FIn)

    G = snap.GetMxWcc(G)

    user_node_array = []  #88310
    with open('graph/user_node.txt', 'r') as f:
        for line in f:
            user_node_array.append(int(line))

    game_node_array = []  #10978
    with open('graph/game_node.txt', 'r') as f:
        for line in f:
            game_node_array.append(int(line))

    with open('graph/steam_edge_list.csv', 'w') as f:
        writer = csv.writer(f, delimiter=',')
        for edge in G.Edges():
            # eid = edge.GetId()
            id1 = edge.GetSrcNId()
            id2 = edge.GetDstNId()
            if id1 in user_node_array:
                row = [str(id1), 'g' + str(id2)]
            else:
                row = [str(id2), 'g' + str(id1)]
            writer.writerow(row)
예제 #12
0
def sampleFeatures(family, feature, numSamples, apiGraph):
    path = 'data/graphs/' + family + '/'
    files = os.listdir(path)
    if apiGraph:
        graph_files = filter(lambda x: '.apigraph' in x, files)
    else:
        graph_files = filter(lambda x: '.edges' in x, files)
    random.shuffle(graph_files)
    features = np.zeros(numSamples)
    count = 0
    i = 0
    while count < numSamples:
        f = graph_files[i]
        i += 1
        if apiGraph:
            FIn = snap.TFIn(path + f)
            G = snap.TNEANet.Load(FIn)
        else:
            G = snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1)
        if G.GetEdges() == 0 or G.GetNodes() == 0:
            continue
        extractor = graph_features.extractors[feature]
        features[count] = extractor(G)
        count += 1
    return features
예제 #13
0
def avgDegreeDist(family, direction, numSamples, apiGraph):
    path = 'data/graphs/' + family + '/'
    files = os.listdir(path)
    if apiGraph:
        graph_files = filter(lambda x: '.apigraph' in x, files)
    else:
        graph_files = filter(lambda x: '.edges' in x, files)
    random.shuffle(graph_files)
    maxdeg = 0
    if apiGraph:
        Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]]
    else:
        Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]]
    if direction == 'in':
        maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs])
    else:
        maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs])

    avg_deg_dist = np.zeros(maxdeg + 1)
    for G in Gs:
        DegToCntV = snap.TIntPrV()
        if direction == 'in':
            snap.GetInDegCnt(G, DegToCntV)
        else:
            snap.GetOutDegCnt(G, DegToCntV)

        for item in DegToCntV:
            deg = item.GetVal1()
            avg_deg_dist[deg] += item.GetVal2()
    avg_deg_dist = avg_deg_dist / numSamples
    return avg_deg_dist
예제 #14
0
def LoadGraph(pth='hw1-q2.graph'):
    """
    :param pth: data path
    :return: loaded graph g
    """
    g = snap.TUNGraph.Load(snap.TFIn(pth))
    return g
def construct():
	FIn = snap.TFIn("../graph/steam.graph")
	G = snap.TUNGraph.Load(FIn)
	print G.GetNodes(), G.GetEdges()

	ls = []
	for ni in G.Nodes():
		id = ni.GetId()
		if id >= 600000 and ni.GetDeg() > 100:
			ls.append(id)
		elif id < 600000 and ni.GetDeg() > 1000:
			ls.append(id)
	for i in ls:
		G.DelNode(i)
	print G.GetNodes(), G.GetEdges()

	ls = []
	for ni in G.Nodes():
		id = ni.GetId()
		if ni.GetDeg() == 0:
			ls.append(id)
	for i in ls:
		G.DelNode(i)

	print G.GetNodes(), G.GetEdges()

	FOut = snap.TFOut("../graph/steam_user100_game1000.graph")
	G.Save(FOut)
	FOut.Flush()
def clustering_coefficient(input):
    print("Loading graph...")
    FIn = snap.TFIn(input)
    graph = snap.TNGraph.Load(FIn)

    print("Calculating clustering coefficient...")
    print ("Clustering Coefficient:", snap.GetClustCf (graph, -1))
예제 #17
0
def to_snap(data):
    """
    convert the dataset to a SNAP graph.
    
    :param data: :py:class:`gct.Dataset`
    :rtype: SNAP graph
    """
    import snap
    if 1 and utils.file_exists(data.file_snap):
        FIn = snap.TFIn(data.file_snap)
        if data.is_directed():
            graph = snap.TNGraph.Load(FIn)
        else:
            graph = snap.TUNGraph.Load(FIn)
        return graph

    if False and data.is_weighted():
        raise Exception("weighted graph is not supported well on snap")
    fname = data.file_edges
    if not utils.file_exists(fname):
        data.to_edgelist()

    if data.is_directed():
        return snap.LoadEdgeList(snap.PNGraph, fname, 0, 1)
    else:
        return snap.LoadEdgeList(snap.PUNGraph, fname, 0, 1)
예제 #18
0
def main ():
    import json
    import snap
    import graphviz
    import matplotlib.pyplot as plt
    import numpy as np
    import xlrd
    #-----------------
    #The common area
    rumor_number = "21"

    path_input = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\'
    workbook_input1_D = xlrd.open_workbook(path_input + 'DATASET.xlsx', on_demand = True)
    
    path_jsonl = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.jsonl'
    path_graph = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.graph'

    path_output  = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Output\\'

    FIn = snap.TFIn(path_graph)
    G_Directed = snap.TNGraph.Load(FIn)        
    G_Directed_with_Attributes = snap.ConvertGraph(snap.PNEANet, G_Directed) #Convert Directed Graph to Directed Graph with attributes: it means now we can assign attributes to the graph nodes
    G_Directed_with_Attributes = Get_Graph_with_Attributes_New (path_jsonl, G_Directed_with_Attributes, workbook_input1_D)
    #-----------------
    #The specific area
    snap.PrintInfo(G_Directed_with_Attributes, "Python type PNEANet", path_output + "S18_5_Output.txt", False)
예제 #19
0
def simpleLoadFromFile(name):
	G = snap.TUNGraph.Load(snap.TFIn(DATA_PATH + name + ".graph"))

	nodes = open(DATA_PATH + name + ".nodes", 'r')
	nodesMap = pickle.load(nodes)

	return G, nodesMap # variation for simple OSM saving.
예제 #20
0
파일: hw1-q2.py 프로젝트: canVa4/CS224w
def load_binary_graph():
    """
    读取二进制存储的图
    :return: SNAP中的 TUNGraph
    """
    G = snap.TUNGraph.Load(snap.TFIn("hw1-q2.graph"))
    return G
예제 #21
0
def getGraphFromFile(gfile):
    if gfile.endswith('.graph'):
        FIn = snap.TFIn(gfile)
        Network = snap.TUNGraph.Load(FIn)
    else:
        Network = snap.LoadEdgeList(snap.PUNGraph, gfile, 0, 1)
    return Network
    def load(filename):
        """Loads an EIGraph from the given `filename` and the possible
        ratings."""
        FIn = snap.TFIn(filename)
        G = snap.TUNGraph.Load(FIn)

        graph = EIGraph()
        graph.name = filename
        graph._G = G
        for node in G.Nodes():
            if EIGraph.nid_is_entity(node.GetId()):
                graph.num_entities += 1
                graph.entities.append(node.GetId())
            else:
                assert EIGraph.nid_is_item(node.GetId())
                graph.num_items += 1
                graph.items.append(node.GetId())

        with open(EIGraph._get_meta_filename(filename), 'rb') as fin:
            graph._weights = marshal.load(fin)
            ratings_set = set()
            for k, v in graph._weights.items():
                ratings_set.add(v)
            possible_ratings = sorted(list(ratings_set))

        # Setup the graph with the range of possible ratings.
        graph.rating_range = (1, 5)
        graph.possible_ratings = possible_ratings

        return graph
예제 #23
0
def LoadState():
    fname = sw.GetStateName()
    if not os.path.exists(fname):
        return None

    FIn = Snap.TFIn(Snap.TStr(fname))
    AdjLists = Snap.TIntIntVH(FIn)
    return AdjLists
예제 #24
0
def loadCollaborationGraph():
    """
    This method loads the collaboration graph
    return type : A TUNGraph
    :returns: Collaboration Graph
    """
    Graph = snap.TUNGraph.Load(snap.TFIn("collaboration.graph"))
    return Graph
예제 #25
0
def getGraph(filename):
	FIn = snap.TFIn(filename)
	G = snap.TNEANet.Load(FIn)

	print "Get nodes: ",G.GetNodes()
	print "Get edges: ",G.GetEdges()

	return G;
예제 #26
0
def GenGraph(sw):
    """
    generate the graph edges
    """

    # extract the stubs from the args
    # iterate through the input queue and add new items to the stub list

    # taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist: %s" % msglist)

    Stubs = Snap.TIntV()  # Stubs is an empty vector
    for item in msglist:

        # 1) Get item in msglist

        # 2) Get name of item
        name = sw.GetMsgName(item)

        # 3) Get vector associated with name
        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        # 4) Add vector to Stubs
        Stubs.AddV(Vec)

    # 5) Got all stubs, which is of length msglist


#    # Randomize the items (aka shuffle)
#    Snap.Randomize(Stubs)
#
#    # nodes in each task and the number of tasks
#    tsize = sw.GetRange()
#    ntasks = int(sw.GetVar("gen_tasks"))
#
#    # get edges for a specific task
#    Tasks = Snap.TIntIntVV(ntasks)  # vector of length ntasks containing vectors
#    Snap.AssignEdges(Stubs, Tasks, tsize)
    ntasks = int(sw.GetVar("gen_tasks"))
    seg_bits = int(sw.GetVar('seg_bits'))
    tsize = sw.GetRange()

    Tasks = Snap.TIntVVV(ntasks)
    Stubs = Snap.segment(Stubs, seg_bits)  # segmentize stubs

    # do segmented random edge assignment
    Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits)

    # desegment results
    Tasks = Snap.desegmentRandomizedEdges(Tasks, seg_bits, tsize)

    # send messages
    for i in xrange(0, Tasks.Len()):
        sw.log.debug("sending task: %d, len: %d" % (i, Tasks.GetVal(i).Len()))
        sw.Send(i, Tasks.GetVal(i), swsnap=True)
예제 #27
0
def snapLoad(to_load, file_name):
    """
    Caricamento da file.

    :param to_load: oggetto da caricare
    :param file_name: nome del file da caricare
    """
    f_in = snap.TFIn(file_name)
    to_load.Load(f_in)
예제 #28
0
def loadFromFile(name):
	G = snap.TUNGraph.Load(snap.TFIn(DATA_PATH + name + ".graph"))

	idIn = open(DATA_PATH + name + ".id", 'r')
	idToOsmid = pickle.load(idIn)

	coords = open(DATA_PATH + name + ".coords", 'r')
	coordsMap = pickle.load(coords)

	return G, idToOsmid, coordsMap 
예제 #29
0
 def _rehydrate_snap_graph(self):
     """
     IMPORTANT: run this after restoring from a pickle.
     After restoring this osmAnalyzer from a pickled state, it doesn't have its snap graph because
         the snap graph object doesn't work with the pickle API; I chopped off self._graph before
         pickling.
     This restores the snap graph from the data/ folder.
     """
     self._graph = snap.TUNGraph.Load(
         snap.TFIn(DATA_PATH + self._city_name + ".graph"))
예제 #30
0
def GenGraph(sw):
    """
    generate the graph edges
    """

    # extract the stubs from the args
    # iterate through the input queue and add new items to the stub list

    # taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist: %s" % msglist)

    Stubs = Snap.TIntIntVV()  # Stubs is an empty vector
    for item in msglist:

        # 1) Get item in msglist

        # 2) Get name of item
        name = sw.GetMsgName(item)

        # 3) Get vector associated with name
        FIn = Snap.TFIn(Snap.TStr(name))
        Vec64 = Snap.TIntIntVV(FIn)

        # 4) Add vector to Stubs
        #Stubs.AddV(Vec)
        Snap.AddVec64(Stubs, Vec64)

    # 5) Got all stubs, which is of length msglist

    # nodes in each task
    tsize = sw.GetRange()

    # number of bits in our segment (so seg size is (1<<seg_bits))
    seg_bits = int(sw.GetVar('seg_bits'))

    # number of tasks
    ntasks = int(sw.GetVar("gen_tasks"))

    # get edges for a specific task
    Tasks = Snap.TIntVVV(ntasks)  # vector of length ntasks containing vectors

    sw.log.debug('[%s] about to assign random edges' % sw.GetName())

    # handles shuffling and random assignment of edges
    Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits)

    sw.log.debug('[%s] done assigning random edges' % sw.GetName())

    # send messages
    for i in xrange(0, Tasks.Len()):
        sw.log.debug(LazyStr(lambda: '[%s] sending TIntIntVV of memory size %d to %d' % \
            (sw.GetName(), Snap.GetMemSize64(Tasks.GetVal(i)), i)))
        sw.Send(i, Tasks.GetVal(i), swsnap=True)