Exemple #1
0
def gml_gpickle_to_snap_graph(filename, src_path, dst_path=""):
    if dst_path == "":
        dst_path = src_path

    try:
        G = nx.read_gpickle(src_path + filename + ".gpickle")
    except:
        G = nx.read_gml(src_path + filename + ".gml")

    GSnap = snap.TUNGraph()

    labels = snap.TIntStrH()
    for node in G.nodes(data=True):
        id = node[0]
        node_label = node[1]['predicate']
        labels[int(id)] = str(node_label)
        GSnap.AddNode(int(id))

    for edge in G.edges():
        GSnap.AddEdge(int(edge[0]), int(edge[1]))

    FOut = snap.TFOut(dst_path + filename + ".graph")
    GSnap.Save(FOut)

    FOut = snap.TFOut(dst_path + filename + ".labels")
    labels.Save(FOut)
    FOut.Flush()
Exemple #2
0
def create_rnd_trees(size, number, filename, dst_path, labeled=False, seed=1):
    random.seed(seed)
    for i in range(number):
        G = nx.random_tree(size, seed + i)
        GSnap = snap.TUNGraph()

        if labeled:
            labels = snap.TIntStrH()
        for node in G.nodes(data=True):
            id = node[0]
            if labeled:
                node_label = node[1]['predicate']
                labels[int(id)] = str(node_label)
            GSnap.AddNode(int(id))

        for edge in G.edges():
            GSnap.AddEdge(int(edge[0]), int(edge[1]))

        FOut = snap.TFOut(dst_path + filename + "_" +
                          str(i).zfill(math.ceil(math.log10(number + 1))) +
                          ".graph")
        GSnap.Save(FOut)
        FOut.Flush()

        if labeled:
            FOut = snap.TFOut(dst_path + filename + "_" +
                              str(i).zfill(math.ceil(math.log10(number + 1))) +
                              ".labels")
            labels.Save(FOut)
            FOut.Flush()
def SaveGraph(graph, name):
    if name.split(".")[-1]=="graph":
        FOut = snap.TFOut("res/others/" + name)
        graph.Save( FOut )
        FOut.Flush()
    else:
        FOut = snap.TFOut("res/others/" + name + ".graph")
        graph.Save( FOut )  
        FOut.Flush()
    return
def generate_graph(prune, output):
    graph = snap.TNGraph.New()


    print ("Adding PRs...")
    for pr in pull_requests():
        # If source or destination don't exist, then we need to create them.
        src, dst = pr['pr_creator'], pr['repo_owner']

        # add this to fix outlier
        if src is None or dst is None:
            continue

        if not graph.IsNode(src):
            graph.AddNode(src)
        if not graph.IsNode(dst):
            graph.AddNode(dst)

        graph.AddEdge(src, dst)

    print("Nodes:", graph.GetNodes())
    print("Edges:", graph.GetEdges())

    FOut = snap.TFOut(output)
    graph.Save(FOut)
    FOut.Flush()
Exemple #5
0
def generate_graph(prune, output):
    graph = snap.TNGraph.New()

    # If we aren't pruning edges, then we need the whole graph.
    if not prune:
        print("Adding users...")
        for user in users():
            graph.AddNode(user['id'])

    print("Adding follow edges...")
    for follow in followers():
        # If source or destination don't exist, then we need to create them.
        src, dst = follow['user_id'], follow['follower_id']
        if not graph.IsNode(src):
            graph.AddNode(src)
        if not graph.IsNode(dst):
            graph.AddNode(dst)

        graph.AddEdge(src, dst)

    print("Nodes:", graph.GetNodes())
    print("Edges:", graph.GetEdges())

    FOut = snap.TFOut(output)
    graph.Save(FOut)
    FOut.Flush()
def get_top_packages(graph_path, n):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)
    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now()))
    data_file = graph_name + "_pageranks"
    prank_hashtable = snap.TIntFltH()
    if not os.path.isfile(data_file):
        # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
        snap.GetPageRank(graph, prank_hashtable, 0.85)
        fout = snap.TFOut(data_file)
        prank_hashtable.Save(fout)
    else:
        fin = snap.TFIn(data_file)
        prank_hashtable.Load(fin)

    top_n = get_top_nodes_from_hashtable(prank_hashtable, n)
    top_n.sort(key=itemgetter(1))
    top_packages = []
    for pair in top_n:
        top_packages.append(id_pkg_dict[pair[0]])
    return top_packages
Exemple #7
0
def main(version):

    starttime = datetime.datetime.now()

    codePath = sys.path[0]
    s = codePath.split('\\')
    workPath = s[0] + '\\' + s[1] + '\\' + s[
        2] + '\\data\\flixster\\commondata\\'  #f:\project\somproject
    filePath1 = workPath + 'finalSocial' + version + '.txt'

    # transfer node string to num      2131313 to 1
    # use the index of list to represent the node

    totalNodeList = []
    G1 = snap.TUNGraph.New()
    for line in open(filePath1):
        if line == '':
            break
        linkPair = line[:-1].split('\t')
        node1 = int(linkPair[0])
        node2 = int(linkPair[1])
        if node1 not in totalNodeList:
            totalNodeList.append(node1)
        if node2 not in totalNodeList:
            totalNodeList.append(node2)

        node1MapNum = totalNodeList.index(node1)
        node2MapNum = totalNodeList.index(node2)
        if not G1.IsNode(node1MapNum):
            G1.AddNode(node1MapNum)
        if not G1.IsNode(node2MapNum):
            G1.AddNode(node2MapNum)
        G1.AddEdge(node1MapNum, node2MapNum)

    print 'get the max connected component...'
    MxWcc = snap.GetMxWcc(G1)
    print 'the max connected component node num is  %d ' % MxWcc.GetNodes()

    print MxWcc.GetEdges()
    # filePath2=workPath+'finalUserID.txt'
    # finalNodeList=[]
    # for line in open(filePath2):
    # if line=='':
    # break
    # nodeStr=line[:-1]
    # node=int(nodeStr)
    # nodeMapNum=totalNodeList.index(node)

    # if MxWcc.IsNode(nodeMapNum):
    # finalNodeList.append(node)

    # print 'the final user num is %d' %len(finalNodeList)

    FOut = snap.TFOut(workPath + 'finalSocial' + version + '.graph')
    MxWcc.Save(FOut)
    FOut.Flush()

    print 'finished'
    endtime = datetime.datetime.now()
    print 'passed time is %d s' % (endtime - starttime).seconds
def construct():
	FIn = snap.TFIn("../graph/steam.graph")
	G = snap.TUNGraph.Load(FIn)
	print G.GetNodes(), G.GetEdges()

	ls = []
	for ni in G.Nodes():
		id = ni.GetId()
		if id >= 600000 and ni.GetDeg() > 100:
			ls.append(id)
		elif id < 600000 and ni.GetDeg() > 1000:
			ls.append(id)
	for i in ls:
		G.DelNode(i)
	print G.GetNodes(), G.GetEdges()

	ls = []
	for ni in G.Nodes():
		id = ni.GetId()
		if ni.GetDeg() == 0:
			ls.append(id)
	for i in ls:
		G.DelNode(i)

	print G.GetNodes(), G.GetEdges()

	FOut = snap.TFOut("../graph/steam_user100_game1000.graph")
	G.Save(FOut)
	FOut.Flush()
def create_tneanet(save=True):
    meta, matches = sp.loadPickle()
    numNodes = len(matches) + len(meta.player) + len(meta.team) + len(
        meta.country)
    numEdges = 3 * len(matches) + 4 * len(meta.player) + len(meta.team)
    G = TN.New(numNodes, numEdges)
    countryToNId = {}
    teamToNId = {}
    playerToNId = {}
    matchIndToNId = []
    i = 0
    for (countryId, countryName) in meta.country.items():
        ni = G.GetNI(G.AddNode(i))
        G.AddStrAttrDatN(ni, "country", "kind")
        G.AddIntAttrDatN(ni, countryId, "countryId")
        G.AddStrAttrDatN(ni, countryName, "countryName")
        countryToNId[countryId] = i
        i += 1
    for (teamId, d) in meta.team.items():
        ni = G.GetNI(G.AddNode(i))
        G.AddStrAttrDatN(ni, "team", "kind")
        G.AddIntAttrDatN(ni, teamId, "teamId")
        G.AddStrAttrDatN(ni, d['name'], "teamName")
        teamToNId[teamId] = i
        EId = G.AddEdge(i, countryToNId[d['country']])
        G.AddStrAttrDatE(EId, "team from", "kind")
        i += 1
    for (playerId, d) in meta.player.items():
        ni = G.GetNI(G.AddNode(i))
        G.AddStrAttrDatN(ni, "player", "kind")
        G.AddIntAttrDatN(ni, playerId, "playerId")
        G.AddStrAttrDatN(ni, d['name'], "playerName")
        playerToNId[playerId] = i
        for teamId in d['team']:
            EId = G.AddEdge(i, teamToNId[teamId])
            G.AddStrAttrDatE(EId, "plays for", "kind")
        i += 1
    for match in matches:
        matchIndToNId.append(i)
        ni = G.GetNI(G.AddNode(i))
        G.AddStrAttrDatN(ni, "match", "kind")
        G.AddIntAttrDatN(ni, match.away_goal, "away_goal")
        G.AddIntAttrDatN(ni, match.home_goal, "away_goal")
        G.AddIntAttrDatN(ni, match.stageId, "stageId")
        G.AddStrAttrDatN(ni, match.season, "season")
        G.AddIntAttrDatN(ni, match.leagueId, "leagueId")
        G.AddIntAttrDatN(ni, match.id, "matchId")
        EId = G.AddEdge(i, teamToNId[match.home_team])
        G.AddStrAttrDatE(EId, "home team", "kind")
        EId = G.AddEdge(i, teamToNId[match.away_team])
        G.AddStrAttrDatE(EId, "away team", "kind")
        EId = G.AddEdge(i, countryToNId[match.countryId])
        G.AddStrAttrDatE(EId, "match in", "kind")
        i += 1
    if save:
        G.Save(snap.TFOut(saveFileName))
    return G
Exemple #10
0
def data2dag(data, num_nodes):
    dag = snap.TNGraph.New()
    for i in range(num_nodes):
        dag.AddNode(i)

    for i in range(data.shape[0]):
        dag.AddEdge(int(data[i][0]), int(data[i][1]))
    FOut = snap.TFOut("../data/youtube.graph")
    dag.Save(FOut)
    return dag
Exemple #11
0
def saveToFile(G, idToOsmid, nodes, name):
	out = snap.TFOut(DATA_PATH + name + ".graph") # graph saved as _.graph
	G.Save(out)
	out.Flush()

	idOut = open(DATA_PATH + name + ".id", 'w')
	pickle.dump(idToOsmid, idOut, 1)

	nodesOut = open(DATA_PATH + name + ".coords", 'w')
	pickle.dump(nodes, nodesOut, 1)
Exemple #12
0
def tungraphToBinary():
    t0 = t()
    G = snap.LoadEdgeList(snap.PUNGraph, NW.twitter, 0, 1)
    t1 = reportTime(t0, "TUNGRAPH")
    FOut = snap.TFOut(NW.twitter_binary)
    G.Save(FOut)
    FOut.Flush()
    t2 = reportTime(t1, "TUNGRAPH save binary")
    FIn = snap.TFIn(NW.twitter_binary)
    G2 = snap.TUNGraph.Load(FIn)
    reportTime(t2, "TUNGRAPH load binary")
Exemple #13
0
def SaveState(ds):
    fname = sw.GetStateName()

    Start = Snap.TInt(ds["start"])
    Dist = Snap.TInt(ds["dist"])
    Visited = ds["visit"]

    FOut = Snap.TFOut(Snap.TStr(fname))
    Start.Save(FOut)
    Dist.Save(FOut)
    Visited.Save(FOut)
    FOut.Flush()
Exemple #14
0
def snapSave(to_save, file_name):
    """
    Salvataggio in formato binario.
    Viene sovrascritto il contenuto del file specificato se già esistente.
    Viene creato il file se non esiste.

    :param to_save: oggetto da salvare
    :param file_name: nome del file dove salvare l'oggetto
    """
    f_out = snap.TFOut(file_name)
    to_save.Save(f_out)
    f_out.Flush()
Exemple #15
0
    def _parse_from_raw_data(self):
        train_path = DATA_PATH + 'train.txt'
        num_lines = sum(1 for line in open(train_path, "r"))
        trainFile = open(train_path, "r")
        self.digraph = snap.TNGraph.New()

        for i in range(num_lines):
            line = trainFile.readline()
            nodes = line.split("\t")
            base = int(nodes[0])
            if not self.digraph.IsNode(base):
                self.digraph.AddNode(base)
            for j in range(1, len(nodes)):
                node = int(nodes[j])
                if not self.digraph.IsNode(node):
                    self.digraph.AddNode(node)
                self.digraph.AddEdge(base, node)
        self.ugraph = snap.ConvertGraphMP(snap.PUNGraph, self.digraph)

        self.digraph.save(snap.TFOut(DIRECT_GRAPH_PATH))
        self.ugraph.save(snap.TFOut(UNDIRECT_GRAPH_PATH))
Exemple #16
0
def txt_to_graph(filename, src_path, dst_path=""):
    """
    Converts a snap txt graph to the much more faster .graph format
    :param filename: name of the graph without .txt ending
    :param src_path: source path of the graph
    :param dst_path: destination path for the output
    """
    if dst_path == "":
        dst_path = src_path
    GSnap = snap.LoadEdgeList(snap.PNGraph, src_path + filename + ".txt")
    FOut = snap.TFOut(dst_path + filename + ".graph")
    GSnap.Save(FOut)
    def save(self, filename):
        """Save this graph in binary format to the given `filename`.

        In order to store metadata associated with this the EIGraph
        object, we save an extra file, with the name `filename + '.ei_meta'`.
        """
        FOut = snap.TFOut(filename)
        self.base().Save(FOut)
        FOut.Flush()
        meta_fn = self._get_meta_filename(filename)

        with open(meta_fn, 'wb') as fout:
            marshal.dump(self._weights, fout)
Exemple #18
0
def SaveState(sw, ds):
    fname = sw.GetStateName()

    Start = Snap.TInt(ds["start"])
    Dist = Snap.TInt(ds["dist"])
    Visited = ds["visit"]

    FOut = Snap.TFOut(Snap.TStr(fname))
    sw.cum_timer.cum_start("disk")
    Start.Save(FOut)
    Dist.Save(FOut)
    Visited.Save(FOut)
    FOut.Flush()
    sw.cum_timer.cum_stop("disk")
    def to_snapformat(self, filepath=None):
        if (filepath == None):
            filepath = self.file_snap
            if utils.file_exists(filepath):
                return filepath

        import snap
        from gct.dataset import convert
        g = convert.to_snap(self)
        self.logger.info("Writing {} to {}".format(type(g), filepath))
        FOut = snap.TFOut(filepath)
        g.Save(FOut)
        FOut.Flush()

        return filepath
def copy_graph(graph):
    tmpfile = '.copy.bin'

    # Saving to tmp file
    FOut = snap.TFOut(tmpfile)
    graph.Save(FOut)
    FOut.Flush()

    # Loading to new graph
    FIn = snap.TFIn(tmpfile)
    graphtype = type(graph)
    new_graph = graphtype.New()
    new_graph = new_graph.Load(FIn)

    return new_graph
Exemple #21
0
 def save_graph(self, out_dir):
     meta = {
         "cells": self.nodes,
         "cell_map": self.node_map,
         "cell_pos": self.cell_pos
     }
     os.makedirs(out_dir, exist_ok=True)
     metadata_path = os.path.join(out_dir, "meta.pickle")
     with open(metadata_path, "wb") as out_file:
         pickle.dump(meta, out_file)
     graph_path = os.path.join(out_dir, "bin.graph")
     # self.graph.SaveEdgeList(graph_path)
     FOut = snap.TFOut(graph_path)
     self.graph.Save(FOut)
     FOut.Flush()
def buildSimGraph(questions, wordVecs):
    tfidf_matrix, ids, idf = similarityModel(questions, wordVecs)
    graph = snap.TUNGraph.New()
    for id in ids:
        graph.AddNode(id)
    print graph.GetNodes()
    numq = tfidf_matrix.shape[0]
    for i in xrange(numq):
        if i % 1000 == 0:
            print "done", i
        similarity = tfidf_matrix[i + 1:].dot(tfidf_matrix[i])
        for j in xrange(len(similarity)):
            if similarity[j] > 0.2:
                graph.AddEdge(ids[i], ids[j + i + 1])
    fout = snap.TFOut("similarity2.graph")
    graph.Save(fout)
    fout.Flush()
Exemple #23
0
def setCategorys():
    G = getGraph("../files/G.graph")

    list_post = getListFromFile("../files/list_comment_category_nbsvm_1.txt")
    post_comments = list_post["post"]

    print "post_comments:", len(post_comments)
    i = 0
    m = 0

    for NI in G.Nodes():
        nid = NI.GetId()
        NLabel = G.GetStrAttrDatN(nid, "NLabel")

        #-------------PHOTO
        if NLabel == 'photo':
            c = 0
            comments = post_comments[i]["comments"]

            #------------------------------------------------IN EDGES------------------------------------------------
            for nid1 in NI.GetInEdges():
                NLabel1 = G.GetStrAttrDatN(nid1, "NLabel")
                NName1 = G.GetStrAttrDatN(nid1, "NName")
                NCategory = G.GetStrAttrDatN(nid1, "NCategory")
                eid = G.GetEId(nid1, nid)
                ETime = G.GetStrAttrDatE(eid, "ETime")
                #------------COMMENT
                if NLabel1 == "comment":
                    if NCategory == "text":
                        G.AddStrAttrDatN(nid1, "other", 'NCategory')
                    newCategory = comments[c]['category']
                    G.AddStrAttrDatN(nid1, newCategory, 'NCategory')
                    NCategory_1 = G.GetStrAttrDatN(nid1, "NCategory")
                    print c, NCategory_1, "--", newCategory
                    c += 1

            print i, "-->", len(comments), "=", c
            i += 1
    #---------------save Graph as an output file
    snap.SaveEdgeList(G, "../files/new_G.txt",
                      "Save as tab-separated list of edges")

    #---------------save binary
    FOut = snap.TFOut("../files/new_G.graph")
    G.Save(FOut)
def reverse_graph(input, output):
    print("Loading graph...")
    FIn = snap.TFIn(input)
    graph = snap.TNGraph.Load(FIn)

    reversed_graph = snap.TNGraph.New()
    for node in graph.Nodes():
        reversed_graph.AddNode(node.GetId())

    for e in graph.Edges():
        reversed_graph.AddEdge(e.GetDstNId(), e.GetSrcNId())

    assert graph.GetNodes() == reversed_graph.GetNodes()
    assert graph.GetEdges() == reversed_graph.GetEdges()

    FOut = snap.TFOut(output)
    reversed_graph.Save(FOut)
    FOut.Flush()
Exemple #25
0
def SaveState(ds):
    fname = sw.GetStateName()

    First = Snap.TInt(ds["first"])
    Range = Snap.TInt(ds["range"])
    Count = Snap.TInt(ds["count"])
    Dist = Snap.TInt(ds["dist"])
    Start = Snap.TInt(ds["start"])
    Visited = ds["visit"]

    FOut = Snap.TFOut(Snap.TStr(fname))
    First.Save(FOut)
    Range.Save(FOut)
    Count.Save(FOut)
    Dist.Save(FOut)
    Start.Save(FOut)
    Visited.Save(FOut)
    FOut.Flush()
Exemple #26
0
def q4_2():
    FIn = snap.TFIn('GDNetwork.graph')
    G = snap.TUNGraph.Load(FIn)
    import csv
    #id:degree
    geneDict = dict()
    with open('geneDegrees.csv', "r") as file:
        for line in file:
            list = line.split()[0].split(',')
            geneDict[int(list[0])] = int(list[1])

    #create HDN
    #traverse over genes, create full graph for every node
    #adding nodes
    for node in G.Nodes():
        if (node.GetId() < 20000):
            continue
        HDN.AddNode(node.GetId())
    #gene disease boundary is 20000 (nodeId)
    #17047 is max gene id
    maxId = max(geneDict.keys())
    #maxCliques = []
    #counter = 0
    for i in range(maxId, 0, -1):
        gene = G.GetNI(i)
        genDeg = gene.GetDeg()
        neighbours = []
        for k in range(genDeg):
            neighbours.append(gene.GetNbrNId(k))
        #if (counter<10):
        #    maxCliques.append(neighbours)
        #    counter += 1
        #add edges among nodes
        for j in range(len(neighbours) - 1):
            for z in range(j + 1, len(neighbours)):
                #add edge
                HDN.AddEdge(neighbours[j], neighbours[z])
        print i

    FOut = snap.TFOut('HDN.graph')
    G.Save(FOut)
    FOut.Flush()
    print "end of q4_2"
def generate_steam_graph():
    G = snap.TUNGraph.New()
    user_node_array = []
    user_node_id = 600000
    game_node_array = []  #10978
    # min_game_id = sys.maxint #10
    # max_game_id = 0 #530720
    # count1=0
    # count2=0
    with open("data/australian_users_items.json") as f:
        for line in f:
            data = ast.literal_eval(line)
            user_id = data['user_id']
            item_count = int(data['items_count'])
            # if item_count>900:
            # 	count1+=1
            # if item_count>1000:
            # 	count2+=1
            G.AddNode(user_node_id)
            items = data['items']
            for item in items:
                item_id = int(item['item_id'])
                # min_game_id = min(item_id, min_game_id)
                # max_game_id = max(item_id, max_game_id)
                if not G.IsNode(item_id):
                    G.AddNode(item_id)
                    game_node_array.append(item_id)
                G.AddEdge(user_node_id, item_id)
            user_node_array.append(user_node_id)
            user_node_id += 1
    # print(len(game_node_array))
    # print(min_game_id, max_game_id)
    with open('graph/user_node.txt', 'w') as f:
        for item in user_node_array:
            f.write("%d\n" % item)

    with open('graph/game_node.txt', 'w') as f:
        for item in game_node_array:
            f.write("%d\n" % item)

    FOut = snap.TFOut("graph/steam.graph")
    G.Save(FOut)
    FOut.Flush()
def main():
    subreddit_file = 'data/subreddits.gz'
    print('Fetching number of lines in ' + subreddit_file + '...')
    nsubreddits = sum(1 for l in gzip.open(subreddit_file))
    subreddits = (json.loads(line) for line in gzip.open(subreddit_file))
    graph = setup_graph()

    print('Parsing {}...'.format(subreddit_file))
    progress.init_progbar(nsubreddits)
    for s in subreddits:
        parse_subreddit(s, graph)
        progress.report_progress()
    progress.report_finished()

    print('Saving...')
    output = snap.TFOut('output/subreddits.graph')
    graph.Save(output)
    output.Flush()
    print('Done')
Exemple #29
0
def xmlScrape():
    authorsDict = {}
    graph = snap.TNEANet.New()
    used = [
        'article', 'inproceedings', 'proceedings', 'book', 'incollection',
        'phdthesis', 'mastersthesis', 'www', 'author'
    ]

    for event, elem in et.iterparse('/lfs/local/0/dzeng0/dblp/rep-dblp.xml',
                                    events=('start', 'end')):
        if event == 'end':
            if elem.tag not in used:
                elem.clear()
            elif elem.tag != 'author':
                authors = elem.findall('author')
                for author in authors:
                    name = author.text.encode('utf-8')
                    if name not in authorsDict:
                        id = graph.AddNode(-1)
                        authorsDict[name] = id
                        graph.AddStrAttrDatN(id, name, 'name')
                        graph.AddIntAttrDatN(id, 0, 'exp')
                    id = authorsDict[name]
                    graph.AddIntAttrDatN(id,
                                         graph.GetIntAttrDatN(id, 'exp') + 1,
                                         'exp')
                for a1 in authors:
                    n1 = a1.text.encode('utf-8')
                    i1 = authorsDict[n1]
                    for a2 in authors:
                        n2 = a2.text.encode('utf-8')
                        i2 = authorsDict[n2]
                        if not graph.IsEdge(i1, i2) and i1 != i2:
                            eid = graph.AddEdge(i1, i2)
                            eid = graph.AddEdge(i2, i1)

                print elem.get('key'), len(authors)
                sys.stdout.flush()
                elem.clear()
    fout = snap.TFOut('coauthor.graph')
    graph.Save(fout)
    fout.Flush()
def generate_steam_game_graph():
    FIn = snap.TFIn("graph/steam.graph")
    G = snap.TUNGraph.Load(FIn)

    user_node_array = []  #88310
    with open('graph/user_node.txt', 'r') as f:
        for line in f:
            user_node_array.append(int(line))

    game_node_array = []  #10978
    with open('graph/game_node.txt', 'r') as f:
        for line in f:
            game_node_array.append(int(line))

    G_game = snap.TUNGraph.New()
    # add nodes
    for uid in game_node_array:
        G_game.AddNode(uid)
    # add edges
    count = 0
    for node in G.Nodes():
        NId = node.GetId()
        if NId in user_node_array:
            ki = node.GetDeg()
            neid = []
            for i in range(ki):
                neid.append(node.GetNbrNId(i))
            for i in range(len(neid)):
                for j in range(i + 1, len(neid)):
                    G_game.AddEdge(neid[i], neid[j])
        count += 1
        if count % 1000 == 0:
            print("percentage: %f" % (count / (float(G.GetNodes()))))
    FOut = snap.TFOut("graph/steam_game.graph")
    G_game.Save(FOut)
    FOut.Flush()

    FIn = snap.TFIn("graph/steam_game.graph")
    G_game = snap.TUNGraph.Load(FIn)
    ClustCf = snap.GetClustCf(G_game, 1000)
    print("clustering coefficient: %f" % ClustCf)
    return G_game