def test_generic_weighted_projected_graph(self):
        def shared(unbrs, vnbrs):
            return len(unbrs & vnbrs)

        B = nx.path_graph(5)
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4],
                                                       weight_function=shared)
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])

        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])
        B = nx.DiGraph()
        B.add_path(list(range(5)))
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])
Example #2
0
    def test_generic_weighted_projected_graph_simple(self):
        def shared(G, u, v):
            return len(set(G[u]) & set(G[v]))

        B = nx.path_graph(5)
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4],
                                                       weight_function=shared)
        assert_nodes_equal(list(G), [0, 2, 4])
        assert_edges_equal(list(list(G.edges(data=True))), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])

        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_nodes_equal(list(G), [0, 2, 4])
        assert_edges_equal(list(list(G.edges(data=True))), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])
        B = nx.DiGraph()
        B.add_path(list(range(5)))
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_nodes_equal(list(G), [0, 2, 4])
        assert_edges_equal(list(G.edges(data=True)), [(0, 2, {
            'weight': 1
        }), (2, 4, {
            'weight': 1
        })])
def misc_1():
    def jaccard(G, u, v):
        unbrs = set(G[u])

        vnbrs = set(G[v])
        return float(len(unbrs & vnbrs)) / len(unbrs | vnbrs)

    def my_weight(G, u, v, weight='weight'):
        w = 0
        print('@@@@@@@@@@@@@@@@')
        print(G)
        print((G[u]))
        print(type(u))
        print(G.edges())
        print(set(G[u]) & set(G[v]))
        for nbr in set(G[u]) & set(G[v]):
            print('{{{{{{{{{{{{{{{{{{')
            print((nbr))
            # print(G[u][nbr].get(weight, 6))
            # x0, y0 = G.node[edge[0]]['pos']
            w += G[u][nbr][weight] + G[v][nbr][weight]
            print('w=', w)
            # w += G[u][nbr].get(weight, 1) + G[v][nbr].get(weight, 1)
            # w += G.edge[u][nbr].get(weight, 1) + G.edge[v][nbr].get(weight, 1)
        return w

    B = nx.complete_bipartite_graph(2, 2)
    # B = nx.complete_bipartite_graph(3, 3)

    print('iiiiiiiiiiiiiiiiii')
    for edge in B.edges(data=True):
        print(edge)

    j = 1

    for i in B.edges(data=True):
        print('///////////////')
        # B[i[0]][i[1]]['weight'] = 22
        # print(B[i[0]])
        # print(B[i[0]][i[1]])
        i[2]['weight'] = j  # B[i[0]][i[1]]['weight'] = 22 does the same thing
        j = j + 1

    for edge in B.edges(data=True):
        print(edge)

    G = bipartite.generic_weighted_projected_graph(B, [0, 1])
    # bi = graphx()
    # bi.plot_graph(B,'complete')
    # bi.plot_graph(G,'complete')
    print(G.edges(data=True))
    for edge in G.edges(data=True):
        # print()
        print(edge)

    G = bipartite.generic_weighted_projected_graph(B, [0, 1],
                                                   weight_function=my_weight)
    print('Final value')

    print(G.edges(data=True))
Example #4
0
def splitBipartiteGexf(inputGexf, outputGexfPath):
    outputGexfPath = outputGexfPath + os.sep
    jr["input_gexf"] = inputGexf
    jr["outputGexfPath"] = outputGexfPath
    # otuput files
    xgexf = os.path.join(dirname(outputGexfPath),
                         basename(splitext(inputGexf)[0])) + ".x.gexf"
    ygexf = os.path.join(dirname(outputGexfPath),
                         basename(splitext(inputGexf)[0])) + ".y.gexf"

    try:
        graph = nx.readwrite.gexf.read_gexf(inputGexf)
    except:
        throwError("unable to read gexf file")
        return

    # bug in networkx, we need to make the directed graph as undirected
    graph = graph.to_undirected()

    jr["numOfNodes"] = len(graph.nodes())
    jr["numOfEdges"] = len(graph.edges())

    X, Y = bipartite.sets(graph)
    print "biparte.sets..."
    print X
    print Y

    #xgr=project_bipartite_graph(graph,X,"weight")
    xgr = bipartite.generic_weighted_projected_graph(graph, X)
    print "biparte.xgr..."
    print len(xgr.nodes())
    print len(xgr.edges())
    try:
        nx.readwrite.gexf.write_gexf(xgr, xgexf)
    except:
        throwError("unable to write file, path:'" + xgexf + "'")
        return

    #ygr=project_bipartite_graph(graph,Y,"weight")
    ygr = bipartite.generic_weighted_projected_graph(graph, Y)
    print "biparte.ygr..."
    print len(ygr.nodes())
    print len(ygr.edges())
    try:
        nx.readwrite.gexf.write_gexf(ygr, ygexf)
    except:
        throwError("unable to write file, path:'" + ygexf + "'")
        #print sys.exc_info()
    jr['output_gexf'] = [xgexf, ygexf]

    print "nodes in X", xgr.nodes()
    print "edges in X", list(xgr.edges())
    print "nodes in Y", ygr.nodes()
Example #5
0
def splitBipartiteGexf( inputGexf, outputGexfPath ):
    outputGexfPath = outputGexfPath + os.sep
    jr["input_gexf"] = inputGexf 
    jr["outputGexfPath"] = outputGexfPath
    # otuput files
    xgexf = os.path.join( dirname( outputGexfPath ), basename( splitext( inputGexf )[0] )  )+".x.gexf"
    ygexf = os.path.join( dirname( outputGexfPath ), basename( splitext( inputGexf )[0] )  )+".y.gexf"
        
    try:
        graph = nx.readwrite.gexf.read_gexf( inputGexf );
    except:
        throwError( "unable to read gexf file" )
        return
    
    # bug in networkx, we need to make the directed graph as undirected
    graph=graph.to_undirected()
    
    jr["numOfNodes"] = len( graph.nodes() )
    jr["numOfEdges"] = len( graph.edges() )
    
    X,Y=bipartite.sets(graph)
    print "biparte.sets..."
    print X
    print Y
    
    #xgr=project_bipartite_graph(graph,X,"weight")
    xgr=bipartite.generic_weighted_projected_graph(graph,X)
    print "biparte.xgr..."
    print len(xgr.nodes())
    print len(xgr.edges())
    try:
        nx.readwrite.gexf.write_gexf(xgr, xgexf )
    except:
        throwError( "unable to write file, path:'" + xgexf + "'" )
        return
    
    #ygr=project_bipartite_graph(graph,Y,"weight")
    ygr=bipartite.generic_weighted_projected_graph(graph,Y)
    print "biparte.ygr..."
    print len(ygr.nodes())
    print len(ygr.edges())
    try:
        nx.readwrite.gexf.write_gexf(ygr, ygexf )
    except:
        throwError( "unable to write file, path:'" + ygexf + "'" )
        #print sys.exc_info()
    jr['output_gexf'] = [ xgexf, ygexf ]
    
    print "nodes in X", xgr.nodes()
    print "edges in X", list( xgr.edges() )
    print "nodes in Y", ygr.nodes()
 def generic_weighted_projected_graph(self):
     E = bipartite.sets(self.B)[0]
     P = bipartite.generic_weighted_projected_graph(self.B, E)
     self.plot_graph_2(P, 'generic_weighted_projected_graph')
     print('generic_weighted_projected_graph:number of edges:',
           P.number_of_edges())
     print(P.edges())
     print(list(P.edges(data=True)))
    def test_generic_weighted_projected_graph_simple(self):
        def shared(G, u, v):
            return len(set(G[u]) & set(G[v]))

        B = nx.path_graph(5)
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4], weight_function=shared)
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {"weight": 1}), (2, 4, {"weight": 1})])

        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {"weight": 1}), (2, 4, {"weight": 1})])
        B = nx.DiGraph()
        B.add_path(list(range(5)))
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4])
        assert_equal(sorted(G.nodes()), [0, 2, 4])
        assert_equal(G.edges(data=True), [(0, 2, {"weight": 1}), (2, 4, {"weight": 1})])
    def test_generic_weighted_projected_graph(self):
        def shared(unbrs, vnbrs): 
            return len(unbrs & vnbrs) 
        B = nx.path_graph(5) 
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4], weight_function=shared) 
        assert_equal(sorted(G.nodes()), [0, 2, 4]) 
        assert_equal(G.edges(data=True), 
                     [(0, 2, {'weight': 1}), (2, 4, {'weight': 1})] )

        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4]) 
        assert_equal(sorted(G.nodes()), [0, 2, 4]) 
        assert_equal(G.edges(data=True), 
                     [(0, 2, {'weight': 1}), (2, 4, {'weight': 1})] )
        B = nx.DiGraph()
        B.add_path(list(range(5)))
        G = bipartite.generic_weighted_projected_graph(B, [0, 2, 4]) 
        assert_equal(sorted(G.nodes()), [0, 2, 4]) 
        assert_equal(G.edges(data=True), 
                     [(0, 2, {'weight': 1}), (2, 4, {'weight': 1})] )
Example #9
0
    def projected_graph(self):

        if not self.B:
            self.create_bipartite_graph()

        bottom = bipartite.sets(self.B)[0]
        G = bipartite.generic_weighted_projected_graph(
            self.B, bottom, weight_function=self.projection_weight)

        return G
Example #10
0
def project(graph, nodes):
    """
    对图进行投影
    :param graph:
    :param nodes:
    :return:
    """
    prj_graph = bipartite.generic_weighted_projected_graph(
        graph, nodes, weight_function=my_weight)
    for node in prj_graph.nodes:
        prj_graph.nodes[node]["weight"] = degrees(prj_graph, node)
    return prj_graph
Example #11
0
 def test_generic_weighted_projected_graph_custom(self):
     def jaccard(G, u, v):
         unbrs = set(G[u])
         vnbrs = set(G[v])
         return float(len(unbrs & vnbrs)) / len(unbrs | vnbrs)
     def my_weight(G, u, v, weight='weight'):
         w = 0
         for nbr in set(G[u]) & set(G[v]):
             w += G.edge[u][nbr].get(weight, 1) + G.edge[v][nbr].get(weight, 1)
         return w
     B = nx.complete_bipartite_graph(2,2)
     for i,(u,v) in enumerate(B.edges()):
         B.edge[u][v]['weight'] = i + 1
     G = bipartite.generic_weighted_projected_graph(B, [0, 1],                   
                                                     weight_function=jaccard)
     assert_edges_equal(G.edges(data=True), [(0, 1, {'weight': 1.0})])
     G = bipartite.generic_weighted_projected_graph(B, [0, 1],                   
                                                     weight_function=my_weight)
     assert_edges_equal(G.edges(data=True), [(0, 1, {'weight': 10})])
     G = bipartite.generic_weighted_projected_graph(B, [0, 1])
     assert_edges_equal(G.edges(data=True), [(0, 1, {'weight': 2})])
    def test_generic_weighted_projected_graph_custom(self):
        def jaccard(G, u, v):
            unbrs = set(G[u])
            vnbrs = set(G[v])
            return float(len(unbrs & vnbrs)) / len(unbrs | vnbrs)

        def my_weight(G, u, v, weight="weight"):
            w = 0
            for nbr in set(G[u]) & set(G[v]):
                w += G.edge[u][nbr].get(weight, 1) + G.edge[v][nbr].get(weight, 1)
            return w

        B = nx.complete_bipartite_graph(2, 2)
        for i, (u, v) in enumerate(B.edges()):
            B.edge[u][v]["weight"] = i + 1
        G = bipartite.generic_weighted_projected_graph(B, [0, 1], weight_function=jaccard)
        assert_equal(G.edges(data=True), [(0, 1, {"weight": 1.0})])
        G = bipartite.generic_weighted_projected_graph(B, [0, 1], weight_function=my_weight)
        assert_equal(G.edges(data=True), [(0, 1, {"weight": 10})])
        G = bipartite.generic_weighted_projected_graph(B, [0, 1])
        assert_equal(G.edges(data=True), [(0, 1, {"weight": 2})])
def fold_network(network, nodes, mode='multi'):
    """
    Folds the network from a two-mode representation to a one-mode representation.
    :param network: networkx.Graph
            Bipartite graph to be folded
    :param nodes: list of nodes
            The node set to keep
    :param mode: str, optional
            'multi' for getting an edge weight dictionary
            'single' for getting a single weight per edge
            Default to 'multi'
    :return:
        nw: networkx.Graph
         The folded network
    """
    if mode == 'multi':
        return bipartite.generic_weighted_projected_graph(
            network, nodes, weight_function=multi_weight_function)
    elif mode == 'single':
        return bipartite.generic_weighted_projected_graph(
            network, nodes, weight_function=single_weight_function)
    else:
        return None
Example #14
0
def project_graph(name='bipartite_reader_network.pickle', method="Count"):
    """
    Create the projected graph, with weights.
    :param book_weights_dict: the weights dictionary, which is of the form {(title1_gid, title2_gid) : weight, ...}
    :param method: This tells us how to weight the edges. "Rating count" sums all the ratings for a weight.
    "Average" takes the average. "Count" just counts the number of times the edge is shared (co-read).
    :return: A nx graph.
    """

    print("Projecting Graph with {} method.".format(method))

    bi_graph = read(name)

    if not bipartite.is_bipartite(bi_graph):
        raise Exception("Projecting non-bipartite graphs is felony.")

    # Make top nodes (users) to project down onto bottom nodes (books)
    top_nodes = {
        n
        for n, d in bi_graph.nodes(data=True) if d['bipartite'] == 0
    }
    bottom_nodes = set(bi_graph) - top_nodes

    # Various projection methods
    if method == "Count":  # Count the number of co-reads
        proj_graph = bipartite.generic_weighted_projected_graph(
            bi_graph, bottom_nodes)
    elif method == "Collaboration":  # Newman's collaboration metric
        proj_graph = bipartite.collaboration_weighted_projected_graph(
            bi_graph, bottom_nodes)
    elif method == "Overlap":  # Proportion of neighbors that are shared
        proj_graph = bipartite.overlap_weighted_projected_graph(
            bi_graph, bottom_nodes)
    elif method == "Average Weight":  # todo
        proj_graph = bipartite.collaboration_weighted_projected_graph(
            bi_graph, bottom_nodes)
    elif method == "Divergence":  # todo
        proj_graph = bipartite.collaboration_weighted_projected_graph(
            bi_graph, bottom_nodes)
    else:
        raise Exception("{} is not a valid projection method".format(method))

    # Save
    print("Saving projection_graph_{}.pickle".format(method))
    overwrite(proj_graph, "projection_graph_{}.pickle".format(method))
    print("Saving projection_graph_{}.gml".format(method))
    nx.write_gml(proj_graph, "projection_graph_{}.gml".format(method))

    return proj_graph
Example #15
0
def creazioneProiezione(g):
    """
    Dal grafo bipartito (User-Tag) vado a generare la proiezione sugli users
    :param g:
    :type g: Graph
    :return:
    """
    def my_weight(G, u, v, weight='weight'):
        w = 0
        for nbr in set(G[u]) & set(G[v]):
            w += G.edge[u][nbr].get(weight, 1) + G.edge[v][nbr].get(weight, 1)
        return w

    print("\nVado a creare prima il grafo bipartito e poi la proiezione sugli utenti!")
    # Passo attraverso Networkx
    B=nx.read_graphml(path=pathOutput+fileNameGraphML+".graphml")
    if nx.is_connected(B):
        top_nodes = set((n,d["gender"]) for n,d in B.nodes(data=True) if d['bipartite']==1)
        bottom_nodes = set(B) - top_nodes
        print("\nGrafo bipartito?: {}".format(nx.is_bipartite(B)))
        print("NODI: {}".format(list(top_nodes)[:10]))
        G = bipartite.generic_weighted_projected_graph(B,bottom_nodes,weight_function=my_weight)
        print("\nArchi: {}".format(G.edges(data=True)[:10]))
def main():

    # initialize gender detector
    gender_detector = gender.Detector()

    # loop over universities
    for university in UNIVERSITIES:

        # format query
        params = {
            'q':
            '{"_and":[{"_gte":{"patent_date":"%s"}},{"_lt":{"patent_date":"%s"}},{"assignee_id":"%s"}]}'
            % (DATE_START, DATE_END, university["assignee_id"]),
            'f':
            '["patent_number","patent_date","patent_title","inventor_id","inventor_first_name","inventor_last_name", "assignee_organization", "cited_patent_number", "citedby_patent_number"]',
            'o':
            '{"per_page":%s}' % (PER_PAGE, )
        }

        # make api request
        request = requests.get(BASE_URL, params=params)

        # save response as dict
        data = json.loads(request.text)

        # pull patent data
        patent_data = data["patents"]

        # pull response information
        page_count = data["count"]
        total_patent_count = data["total_patent_count"]

        # initialize containers to hold data
        INVENTORS = {}
        EDGES_2MODE = set()

        # loop over patents to pull network data
        for d in patent_data:

            # extract data for each patent
            patent_number = d["patent_number"]
            patent_date = d["patent_date"]
            patent_title = d["patent_title"]
            inventors = d["inventors"]
            assignees = d["assignees"]
            cited_patents = d["cited_patents"]
            citedby_patents = d["citedby_patents"]

            # loop over inventors
            for inventor in inventors:

                # save inventor data
                if inventor["inventor_id"] not in INVENTORS:

                    # get full name
                    inventor_full_name = "%s %s" % (
                        inventor["inventor_first_name"],
                        inventor["inventor_last_name"])

                    # get gender
                    inventor_gender = None
                    for inventor_first_name_token in inventor[
                            "inventor_first_name"].split():
                        if gender_detector.get_gender(
                                inventor_first_name_token
                        ) in ("male", "mostly_male", "female",
                              "mostly_female") and inventor_gender is None:
                            inventor_gender = gender_detector.get_gender(
                                inventor_first_name_token).replace(
                                    "mostly_", "")
                    if inventor_gender is None:
                        inventor_gender = "UNKNOWN"
                    assert inventor_gender in ("male", "female", "UNKNOWN")

                    # add to dictionary
                    INVENTORS[inventor["inventor_id"]] = {
                        "inventor_first_name": inventor["inventor_first_name"],
                        "inventor_last_name": inventor["inventor_last_name"],
                        "inventor_full_name": inventor_full_name,
                        "inventor_gender": inventor_gender
                    }

                # save edge data
                EDGES_2MODE.add((inventor["inventor_id"], patent_number))

        # create a bipartite graph in networkx
        B = nx.Graph()
        B.add_nodes_from([n[0] for n in EDGES_2MODE], bipartite=0)
        B.add_nodes_from([n[1] for n in EDGES_2MODE], bipartite=1)
        B.add_edges_from(EDGES_2MODE)

        # project the network to a unipartite representation
        G = bipartite.generic_weighted_projected_graph(
            B, [n[0] for n in EDGES_2MODE])

        # add inventor attributes
        nx.set_node_attributes(G, INVENTORS)

        # set some graph attributes
        G.graph["assignee_id"] = university["assignee_id"]
        G.graph["name"] = university["name"]

        # get rid of node attributes we don't need
        for node in G.nodes:
            del G.nodes[node]["bipartite"]

        # impute gender randomly in proportion to distribution in the network
        nmale = len([
            i for i in G.nodes.data("inventor_gender") if i[1] == "male"
        ]) * ["male"]
        nfemale = len([
            i for i in G.nodes.data("inventor_gender") if i[1] == "female"
        ]) * ["female"]
        gender_distribution = nmale + nfemale
        for node in G.nodes(data=True):
            if G.nodes[node[0]]["inventor_gender"] == "UNKNOWN":
                G.nodes[node[0]]["inventor_gender"] = random.choice(
                    gender_distribution)

        # export the graph
        path = os.path.join(os.path.realpath('.'), OUTPUT_FOLDER,
                            "%s.graphml" % (G.graph["name"], ))
        nx.write_graphml(G, path)

        # print
        for node in G.nodes(data=True):
            print(G.graph["assignee_id"], G.graph["name"], node)
            assert node[1]["inventor_gender"] in ("male", "female")
Example #17
0
users = set()
items = set()
for (u, v) in given_graph_edges:
    users.add(u)
    items.add(v)

#----------------------------------------
# creating a bipartite graph (node attribute named “bipartite” with values 0 or 1 is to identify the sets each node belongs to)
given_graph.add_nodes_from(list(users), bipartite=0)  #set of 'users' nodes
given_graph.add_nodes_from(list(items), bipartite=1)  #set of 'items' nodes
given_graph.add_edges_from(given_graph_edges)

nx.is_bipartite(given_graph)

#-------------- Projected-Item-Item-Graph -------------------
Prog_graph = bipartite.generic_weighted_projected_graph(given_graph, items)

#----------------------------- Ground truth -------------

GT = defaultdict(
    list
)  #dictionary, key=User_id, value=list of items recommended to that user
g_t = list()  #list of tuples, (user_id,item_id)
with open(path1 + "/Part_2_1/dataset/Ground_Truth___UserID__ItemID.tsv") as f:
    for line in f:
        g_t.append(tuple(map(int, line.rstrip('\n').split('\t'))))

for u, i in g_t:  #user_id, item_id in (user_id,item_id)
    temp = set()
    if u in GT.keys():
        temp = set(GT[u])  #set of items for user u in ground truth
Example #18
0
nodes = pd.read_csv(options.input_filename, sep='\t', header=None)

B = Graph()
for row in nodes.iterrows():
    B.add_node(row[1][0], bipartite=0)
    B.add_node(row[1][1], bipartite=1)
    B.add_edge(row[1][0], row[1][1])

top_nodes = set(n for n, d in B.nodes(data=True) if d['bipartite'] == 0)
bottom_nodes = set(B) - top_nodes

top = list(top_nodes)
bottom = list(bottom_nodes)

print "Generating network projection"

if options.metric == "hypergeometric":
    G = bipartite.generic_weighted_projected_graph(
        B, top_nodes, weight_function=hypergeometric)  #HYPERGEOMETRIC
elif options.metric == "jaccard":
    G = bipartite.generic_weighted_projected_graph(
        B, top_nodes, weight_function=jaccard)  #Jaccard
elif options.metric == "PCC":
    G = bipartite.generic_weighted_projected_graph(
        B, top_nodes, weight_function=pcc_weight)  #PCC
elif options.metric == "simpson":
    G = bipartite.overlap_weighted_projected_graph(B, top_nodes,
                                                   jaccard=False)  #Simpson

write_weighted_edgelist(G, options.output_filename, delimiter="\t")
print "Execution finished"
Example #19
0
def get_reviewer_recommendation(repo_name,
                                access_token,
                                open_pr_id=None,
                                similarity_threshold=0.2,
                                limit_pr=None,
                                limit_recomm=5):

    # Get the access to Github API
    client = Github(access_token, per_page=300)

    print("[✔️] Connected to Github API.")

    # Get the repository object from Github API
    repo = client.get_repo(repo_name)

    # Get the maintainer of the repo
    repo_maintainer = repo.full_name.split("/")[0]

    # Get the list of closed PRS
    open_prs = list(repo.get_pulls(state='open', sort='created'))
    if len(open_prs) == 0:
        raise Exception(
            "Insufficient number of open pull requests. Use different repository."
        )

    # Get the first open PR
    open_pr = open_prs[0]

    # If Id is provided in function, choose this one
    if open_pr_id != None:
        for pr in open_prs:
            if open_pr_id == pr.number:
                open_pr = pr

    if open_pr_id != None and open_pr_id != open_pr.number:
        raise Exception("Open PR not found. Change Open PR ID.")

    print("[✔️] Using PR ID #", open_pr.number)

    # Get all the closed pull requests
    closed_prs = list(repo.get_pulls(state='closed'))

    if len(closed_prs) < 1:
        raise Exception(
            "Insufficient number of closed pull requests. Use different repository."
        )

    # Limit number of pull requests if limit_pr is set
    if limit_pr != None and limit_pr < len(closed_prs):
        closed_prs = closed_prs[:limit_pr]

    print("[✔️] Parsed closed PRs.")

    # Initialize a graph
    graphz = nx.Graph()

    # It inserts all the reviewers node we add to graph
    closed_prs_reviewers = []

    # Save the data loaded from API for future use
    closed_prs_meta = []

    # Iterate through all the closed pull requests
    for pr in closed_prs:

        # If PR doesnt have comments continue with next
        if pr.get_issue_comments().totalCount == 0:
            continue

        # Get the user who submitted this PR
        pull_requester = pr.user.login

        # Get the PR number
        pr_number = 'PR #' + str(pr.number)

        # Insert PR into graph node
        graphz.add_node(pr_number, type='Pull Request', bipartite=0)

        # Get all the comments of the PR
        comments = pr.get_issue_comments()

        # Get the meta data from PR and insert in closed_prs_meta
        pr_data = {}
        pr_data['id'] = pr_number
        pr_data['title'] = pr.title
        pr_data['body'] = pr.body
        pr_data['comments'] = comments
        closed_prs_meta.append(pr_data)

        # Iterate through all the comments
        for comment in comments:

            # Exclude user who are bots, maintainer, or PR submitter
            if comment.user != None and 'bot' not in comment.user.login and repo_maintainer != comment.user.login and pull_requester != comment.user.login:

                # Get the reviewer from comment
                reviewer = comment.user.login

                # Insert reviewer into graph node and closed_prs_reviewers list
                if reviewer not in closed_prs_reviewers:
                    closed_prs_reviewers.append(reviewer)
                    graphz.add_node(reviewer, type='user', bipartite=1)

                # If there is occurence of multiple comment, then add the occurence to the edge weight
                if graphz.has_edge(reviewer, pr_number):
                    # Increment weight of edge
                    new_weight = graphz.get_edge_data(reviewer,
                                                      pr_number)['weight'] + 1
                    graphz[reviewer][pr_number]['weight'] = new_weight
                else:
                    # Add edge with weight 1
                    graphz.add_edge(reviewer,
                                    pr_number,
                                    weight=1,
                                    type='reviews')

    print("[✔️] Built a bipartite graph.")

    # Generate document corpus for closed pull requests
    closed_prs_corpus = {}
    for pr in closed_prs_meta:
        title = str(pr['title'])
        body = str(pr['body'])
        doc = title + " " + body
        for comment in pr['comments']:
            doc += comment.body
        # Remove the code, mentions and URLS
        doc = re.sub('`.*`', '', doc)
        doc = re.sub(r"(?:\@|#|https?\://)\S+", "", doc)

        # insert document into corpus with index of corpus id
        closed_prs_corpus[pr['id']] = doc

    print("[✔️] Closed PRs corpus generated.")

    # Get corpus document for open PR
    open_pr_corpus = str(open_pr.title) + "\n" + str(open_pr.body)
    for comment in open_pr.get_issue_comments():
        open_pr_corpus += comment.body

    # Remove the code, mentions and URLS
    open_pr_corpus = re.sub('`.*`', '', open_pr_corpus)
    open_pr_corpus = re.sub(r"(?:\@|#|https?\://)\S+", "", open_pr_corpus)

    print("[✔️] Open PR corpus generated.")

    # Get the open PR submitter
    open_pr_requester = open_pr.user.login

    # Get the actual reviewers of open PR
    open_pr_reviewers = []
    for comment in open_pr.get_issue_comments():
        reviewer = comment.user.login
        # Exclude bot, maintainer and PR submitter
        if open_pr_requester != reviewer and reviewer not in open_pr_reviewers and 'bot' not in reviewer and repo_maintainer != reviewer:
            open_pr_reviewers.append(reviewer)

    # Remove the open PR reviewers that are not in our graph
    for open_pr_rv in open_pr_reviewers:
        if open_pr_rv not in closed_prs_reviewers:
            open_pr_reviewers.remove(open_pr_rv)

    # Get the similarity matrix between all the closed PRs and open PR
    similarity_matrix = lda_cosine_sim(closed_prs_meta, closed_prs_corpus,
                                       open_pr_corpus)

    print("[✔️] Calculated cosine similarity.")

    # Sort the similarity matrix in reverse order
    similarity_matrix = sorted(similarity_matrix, reverse=True)

    # Get all similarity matrix filtered with threshold
    top_similarity_matrix = {}
    for i, pr in enumerate(closed_prs_meta):
        top_similarity_matrix[pr['id']] = similarity_matrix[i]

    # Get top similarity matrix using similarity threshold value
    top_sim_length = int(len(top_similarity_matrix) * similarity_threshold)
    top_similarity_matrix = dict(
        itertools.islice(top_similarity_matrix.items(), top_sim_length))

    print("[✔️] Selected top ", similarity_threshold * 100,
          "% PRs using similarity threshold.")

    # Copy the bipartite graph into new one
    copied_barpartite_graphz = graphz.copy()

    # Get the top PR from similarity rank
    pr_nodes = []
    for similarity_id in top_similarity_matrix:
        pr_nodes.append(similarity_id)

    # Remove PR nodes other than top selected PR nodes
    for node in list(copied_barpartite_graphz.nodes):
        if 'PR #' in node and node not in pr_nodes and copied_barpartite_graphz.has_node(
                node):
            copied_barpartite_graphz.remove_node(node)

    # Insert similarity scores in PR nodes for further use in custom weight
    for node in copied_barpartite_graphz.nodes:
        if node in pr_nodes:
            copied_barpartite_graphz.nodes[node][
                'similarity'] = top_similarity_matrix[node]

    print("[✔️] Generated subgraph.")

    # Initialize a projected graph
    projected_graphz = nx.Graph()

    # Project the copied bipartate graph into reviewers graph considering the weights
    projected_graphz = bipartite.generic_weighted_projected_graph(
        copied_barpartite_graphz,
        closed_prs_reviewers,
        weight_function=custom_weight)

    # Remove isolatated nodes from the projected graph
    for node in list(nx.isolates(projected_graphz)):
        projected_graphz.remove_node(node)

    if len(projected_graphz.nodes) == 0:
        raise Exception("Use more similarity threshold.")

    print("[✔️] Subgraph projected into reviewer's graph.")

    # Run page rank algorithm in projected graph
    pagerank = nx.pagerank(projected_graphz,
                           alpha=0.85,
                           personalization=None,
                           max_iter=100,
                           tol=1e-06,
                           nstart=None,
                           weight='weight',
                           dangling=None)

    print("[✔️] Page rank calculated.")
    # Sort the page rank result by score
    pagerank = list(sorted(pagerank.items(), reverse=True, key=lambda x: x[1]))

    # Get only users from page rank result
    pagerank_reviewers = [pg[0] for pg in pagerank]

    # If there is recommendation limitation, limit it
    if limit_recomm != None:
        pagerank_reviewers = pagerank_reviewers[:limit_recomm]

    print("[✔️] Success.")
    # Print the current reviewers
    print("Current reviewers", open_pr_reviewers)

    # Print the recommended reviewers
    print("Recommended reviewers", pagerank_reviewers)
Example #20
0
def get_projected_graph(graph, items):
    G = bipartite.generic_weighted_projected_graph(graph, set(items))
    return G
# In[14]:

with open("User_Item_BIPARTITE_GRAPH___UserID__ItemID.tsv") as tsvfile:
    tsvreader = csv.reader(tsvfile, delimiter="\t")
    for line in tsvreader:
        B.add_nodes_from([''.join(line[:1])], bipartite=0)
        B.add_nodes_from([''.join(line[1:])], bipartite=1)
        B.add_edge(''.join(line[:1]), ''.join(line[1:]))
        #print (int(''.join(line[:1])))#user
    # print (int(''.join(line[1:])))#item

    #print(''.join(line[:1]),''.join(line[1:])) #user-item

# In[15]:

G = bipartite.generic_weighted_projected_graph(
    B, [n for n, d in B.nodes(data=True) if d['bipartite'] == 1])

# In[16]:

M = nx.to_scipy_sparse_matrix(G,
                              nodelist=G.nodes(),
                              weight='weight',
                              dtype=float)

# In[17]:

Mnorm = csr_matrix(M.T / M.sum(axis=1).T)

# In[275]: