Beispiel #1
0
 def test_empty(self):
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.hits_scipy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
Beispiel #2
0
 def test_empty_scipy(self):
     try:
         import scipy
     except ImportError:
         raise SkipTest('scipy not available.')
     G=networkx.Graph()
     assert_equal(networkx.hits_scipy(G),({},{}))
def prepare_hubs_auth():
 global papers_auth
 global papers_hub
  # Save Hubs and Authority Scores
 h,a = nx.hits_scipy(G)
 for (id, hubs) in h.iteritems():
  papers_hub[id] = hubs
 for (id, auth) in a.iteritems():
  papers_auth[id] = auth
Beispiel #4
0
 def test_scipy_hits(self):
     G=self.G
     try:
         h,a=networkx.hits_scipy(G,tol=1.e-08)
         for (x,y) in zip(sorted(h),self.G.h):
             assert_almost_equal(x,y,places=5)
         for (x,y) in zip(sorted(a),self.G.a):
             assert_almost_equal(x,y,places=5)
     except ImportError:
         print "Skipping scipy_numpy test"
 def test_scipy_hits(self):
     G=self.G
     try:
         import scipy
     except ImportError:
         raise SkipTest('scipy not available.')
     h,a=networkx.hits_scipy(G,tol=1.e-08)
     for (x,y) in zip(sorted(h),self.G.h):
         assert_almost_equal(x,y,places=5)
     for (x,y) in zip(sorted(a),self.G.a):
         assert_almost_equal(x,y,places=5)
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.hits_scipy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
Beispiel #7
0
    def test_hits_scipy(self):
        try:
            import scipy as sp
        except ImportError:
            raise SkipTest('SciPy not available.')

        G=self.G
        h,a=networkx.hits_scipy(G,tol=1.e-08)
        for n in G:
            assert_almost_equal(h[n],G.h[n],places=4)
        for n in G:
            assert_almost_equal(a[n],G.a[n],places=4)
Beispiel #8
0
    def test_hubs_authority_matrix(self):
        G=self.G
        try:
            import numpy
            import numpy.linalg
            H=networkx.hub_matrix(G,nodelist=None)
            e,ev=numpy.linalg.eig(H)
            m=e.argsort()[-1] # index of maximum eigenvalue
            h=numpy.array(ev[:,m]).flatten()

            A=networkx.authority_matrix(G,nodelist=None)
            e,ev=numpy.linalg.eig(A)
            m=e.argsort()[-1] # index of maximum eigenvalue
            a=numpy.array(ev[:,m]).flatten()
            h=h/h.sum()
            a=a/a.sum()
            h,a=networkx.hits_scipy(G,tol=1.e-08)
            for (x,y) in zip(sorted(h),self.G.h):
                assert_almost_equal(x,y,places=5)
            for (x,y) in zip(sorted(a),self.G.a):
                assert_almost_equal(x,y,places=5)
        except ImportError:
            print "Skipping hub_authority_matrix test"
Beispiel #9
0
    def test_hubs_authority_matrix(self):
        G = self.G
        try:
            import numpy
            import numpy.linalg
        except ImportError:
            raise SkipTest('numpy not available.')

        H = networkx.hub_matrix(G, nodelist=None)
        e, ev = numpy.linalg.eig(H)
        m = e.argsort()[-1]  # index of maximum eigenvalue
        h = numpy.array(ev[:, m]).flatten()

        A = networkx.authority_matrix(G, nodelist=None)
        e, ev = numpy.linalg.eig(A)
        m = e.argsort()[-1]  # index of maximum eigenvalue
        a = numpy.array(ev[:, m]).flatten()
        h = h / h.sum()
        a = a / a.sum()
        h, a = networkx.hits_scipy(G, tol=1.e-08)
        for (x, y) in zip(sorted(h), self.G.h):
            assert_almost_equal(x, y, places=5)
        for (x, y) in zip(sorted(a), self.G.a):
            assert_almost_equal(x, y, places=5)
Beispiel #10
0
def hubs_and_authorities(graph):
    return nx.hits_scipy(graph)
Beispiel #11
0
def HITS_alg(G):
    return nx.hits_scipy(G, 100000000000)
def determine_best_matches_for_pypi_import(
    mapping: List[Dict[str, Any]],
    cf_graph: str,
):
    map_by_import_name = defaultdict(set)
    map_by_conda_name = dict()
    final_map = {}
    ordered_import_names = []

    for m in mapping:
        # print(m)
        conda_name = m["conda_name"]
        map_by_import_name[m["import_name"]].add(conda_name)
        map_by_conda_name[conda_name] = m

    graph_file = str(pathlib.Path(cf_graph) / "graph.json")
    gx = load_graph(graph_file)
    # TODO: filter out archived feedstocks?

    try:
        clobberers = loads(
            requests.get(
                "https://raw.githubusercontent.com/regro/libcfgraph/master/"
                "clobbering_pkgs.json", ).text, )
    except Exception as e:
        print(e)
        clobberers = set()
    import networkx

    # computes hubs and authorities.
    # hubs are centralized sources (eg numpy)
    # whilst authorities are packages with many edges to them.
    hubs, authorities = networkx.hits_scipy(gx)

    mapping_src_weights = {
        "static": 1,
        "regro-bot": 2,
        "other": 3,
    }

    def _score(conda_name,
               conda_name_is_feedstock_name=True,
               pkg_clobbers=False):
        """A higher score means less preferred"""
        mapping_src = map_by_conda_name.get(conda_name, {}).get(
            "mapping_source",
            "other",
        )
        mapping_src_weight = mapping_src_weights.get(mapping_src, 99)
        return (
            # prefer static mapped packages over inferred
            mapping_src_weight,
            int(pkg_clobbers),
            # A higher hub score means more centrality in the graph
            -hubs.get(conda_name, 0),
            # A lower authority score means fewer dependencies
            authorities.get(conda_name, 0),
            # prefer pkgs that match feedstocks
            -int(conda_name_is_feedstock_name),
            conda_name,
        )

    def score(pkg_name):
        """Base the score on

        Packages that are hubs are preferred.
        In the event of ties, fall back to the one with the lower authority score
        which means in this case, fewer dependencies
        """
        conda_names = gx.graph["outputs_lut"].get(pkg_name, {pkg_name})
        return min(
            _score(
                conda_name,
                conda_name_is_feedstock_name=(conda_name == pkg_name),
                pkg_clobbers=pkg_name in clobberers,
            ) for conda_name in conda_names)

    pkgs = list(gx.graph["outputs_lut"])
    ranked_list = list(sorted(pkgs, key=score))
    with open(pathlib.Path(cf_graph) / "ranked_hubs_authorities.json",
              "w") as f:
        dump(ranked_list, f)

    for import_name, candidates in sorted(map_by_import_name.items()):
        if len(candidates) > 1:
            ranked_candidates = list(sorted(candidates, key=score))
            winner = ranked_candidates[0]
            print(
                f"needs {import_name} <- provided_by: {candidates} : chosen {winner}"
            )
            final_map[import_name] = map_by_conda_name[winner]
            ordered_import_names.append(
                {
                    "import_name": import_name,
                    "ranked_conda_names": reversed(ranked_candidates),
                }, )
        else:
            candidate = list(candidates)[0]
            final_map[import_name] = map_by_conda_name[candidate]
            ordered_import_names.append(
                {
                    "import_name": import_name,
                    "ranked_conda_names": [candidate]
                }, )

    return final_map, ordered_import_names
Beispiel #13
0
def network_from_df(path="data/high-energy-trimmed.txt"):
    df = pd.read_csv(path)
    G = nx.DiGraph()
    df = pd.read_csv(path, sep="\t")
    nodes = df.iloc[:, 1].unique().tolist()
    edges = [(f[0], f[1]) for f in df.as_matrix()]
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    return G


# 2

# A
hits = nx.hits_scipy(g)
pagerank = nx.pagerank_scipy(g)  # default .85
eigen = nx.eigenvector_centrality(g)
degree = nx.degree_centrality(g)

get_top_hubs = lambda hits: get_top_nodes(hits[0])
get_top_auths = lambda hits: get_top_nodes(hits[1])


def get_top_nodes(d, n=20):
    return map(lambda x: x[0], sorted(d.items(), key=lambda x: x[1]))[0:n]


get_top_nodes(degree)
get_top_nodes(eigen)
get_top_nodes(pagerank)
Beispiel #14
0
 def test_empty_scipy(self):
     scipy = pytest.importorskip('scipy')
     G = networkx.Graph()
     assert networkx.hits_scipy(G) == ({}, {})
Beispiel #15
0
 def _hits(graph):
     authority, hubness = nx.hits_scipy(graph, tol=1e-1, max_iter=100)
     return {a[0]: a[1] + hubness[a[0]] for a in authority.items()}
Beispiel #16
0
 def calculate_HITS_centrality(self):
     g = self.graph
     h, a = nx.hits_scipy(g, max_iter=900)
     nx.set_node_attributes(g, 'Hub', h)
     nx.set_node_attributes(g, 'authority', a)
     return g, h, a
Beispiel #17
0

def get_top_keys(dictionary, top):
    items = dictionary.items()
    items.sort(reverse=True, key=lambda x: x[1])
    return items[:top]


print "Reading in Full Graph."
stdout.flush()
g = read_edgelist('data/wiki-Talk.txt', create_using=DiGraph(), nodetype=int)

print "HITS."
stdout.flush()

hubs, authorities = hits_scipy(g)

file = open("results/hubs.txt", "w+")
file.write("Top 100 Hubs by HITS\n")
for node in get_top_keys(hubs, 100):
    file.write("{}, {}\n".format(node[0], node[1]))
file.close()

file = open("results/authorities.txt", "w+")
file.write("Top 100 Authorities by HITS\n")
for node in get_top_keys(authorities, 100):
    file.write("{}, {}\n".format(node[0], node[1]))
file.close()

print "We Done Here."