Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('graph')

    args = parser.parse_args()
    #vertices, edges = read_graph_from_file(args.graph)
    G = nx.read_edgelist(args.graph)
    n = G.number_of_nodes()
    print "nodes:", n
    print "edges:", G.number_of_edges()
    core_exponent = 0.5
    core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent,
                           G.nodes())
    print "core vertices:", len(core_vertices)
    core = G.subgraph(core_vertices)
    print "number of connected components in core:", nx.number_connected_components(
        core)

    # BFS-traversal
    fringe_fraction = 0.1
    max_fringe_size = int(n * fringe_fraction)
    core_vertices = set(core_vertices)
    for i in range(int(1 / fringe_fraction) + 1):
        fringe_vertices = set(
            sorted(fringe(G, core_vertices),
                   key=lambda v: -G.degree(v))[:max_fringe_size])
        if not fringe_vertices:
            break
        print "{}: core={}, fringe={}".format(i + 1, len(core_vertices),
                                              len(fringe_vertices))
        core_vertices |= fringe_vertices
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('graph')

    args = parser.parse_args()
    #vertices, edges = read_graph_from_file(args.graph)
    G = nx.read_edgelist(args.graph)
    n = G.number_of_nodes()
    print "nodes:", n
    print "edges:", G.number_of_edges()
    core_exponent = 0.5
    core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent, G.nodes())
    print "core vertices:", len(core_vertices)
    core = G.subgraph(core_vertices)
    print "number of connected components in core:", nx.number_connected_components(core)

    # BFS-traversal
    fringe_fraction = 0.1
    max_fringe_size = int(n * fringe_fraction)
    core_vertices = set(core_vertices)
    for i in range(int(1/fringe_fraction)+1):
        fringe_vertices = set(sorted(fringe(G, core_vertices), key=lambda v: -G.degree(v))[:max_fringe_size])
        if not fringe_vertices:
            break
        print "{}: core={}, fringe={}".format(i+1, len(core_vertices), len(fringe_vertices))
        core_vertices |= fringe_vertices
Ejemplo n.º 3
0
def find_embeddings(
    vertices,
    edges,
    mode,
    learning_rate=0.1,
    n_epoch=100,
    ratio_to_second=2.0,
    ratio_between_first=1.0,
    ratio_random=1.0,
    silent=False,
):
    "find (r, phi) for each vertex"
    vertices = list(vertices)
    n = len(vertices)
    R = 2 * np.log(n)

    print "mode: {}".format(mode)
    np.random.seed(0)
    degrees = defaultdict(int)
    print "count degrees"
    for v1, v2 in edges:
        degrees[v1] += 1
        degrees[v2] += 1
    if mode == "random":
        # phi=rand(0, 2pi), r = rand(0,R)
        return {v: (np.random.uniform(0.0, R), np.random.uniform(0.0, 2 * np.pi)) for v in vertices}
    elif mode == "degrees":
        # phi=rand(0,2pi), r = 2log(n/k)
        return {v: (2 * np.log(n / degrees[v]), np.random.uniform(0.0, 2 * np.pi)) for v in vertices}
    elif mode.startswith("fit"):
        x0 = []
        for (r, phi) in zip(
            [2 * np.log(n / degrees[v]) for v in vertices], [np.random.uniform(0.0, 2 * np.pi) for v in vertices]
        ):
            x0.append(r)
            x0.append(phi)
        x0 = np.array(x0)

        nedges = set()
        all_nedges = set()
        for (v1, v2) in combinations(vertices, 2):
            # if (v1, v2) not in edges and (v2, v1) not in edges:
            e = make_edge(v1, v2)
            if e not in edges:
                all_nedges.add(e)

        if mode == "fit_random":
            a = list(all_nedges)
            random.shuffle(a)
            nedges = set(a[: len(edges)])
        elif mode == "fit_degrees":
            K = float(ratio_to_second)  # ratio of nedges to second neighbour
            L = float(ratio_between_first)  # ratio of nedges between first neighbours
            M = float(ratio_random)  # ratio of random nedges
            # free_nedges = all_nedges.copy()

            G = nx.Graph()
            G.add_edges_from(edges)
            srt_vertices = sorted(degrees.keys(), key=lambda v: -degrees[v])
            shuf_vertices = srt_vertices[:]
            random.shuffle(shuf_vertices)
            for v in srt_vertices:
                # get first neighbours
                first_neigh = set(G.neighbors(v))
                # get second neighbours
                second_neigh = set()
                for neigh in first_neigh:
                    second_neigh.update(G.neighbors(neigh))
                second_neigh.remove(v)

                n_vertex_nedges = 0
                # from v to second neighbours
                for i, sec_n in enumerate(second_neigh):
                    # print "i: {}".format(i)
                    if i + 1 > degrees[v] * K:
                        continue
                    e = make_edge(v, sec_n)
                    if e not in nedges:
                        nedges.add(e)
                        n_vertex_nedges += 1

                # between first neighbours
                for j, pair in enumerate(combinations(first_neigh, 2)):
                    # print "j: {}".format(j)
                    if j + 1 > degrees[v] * L:
                        continue
                    v1, v2 = pair
                    e = make_edge(v1, v2)
                    if e not in nedges:
                        nedges.add(e)

                # random edges
                max_n_random_vertices = int(degrees[v] * M)
                n_random_vertices = 0
                for rand_v in shuf_vertices:
                    if n_random_vertices >= max_n_random_vertices:
                        break
                    e = make_edge(v, rand_v)
                    if e not in nedges and e not in edges:
                        nedges.add(e)
                        n_random_vertices += 1
        else:
            nedges = all_nedges.copy()
        print "number of nedges={}".format(len(nedges))
        q = Q(vertices, edges, nedges)
        grad_q = GradQ(vertices, edges, nedges)
        if mode == "fit_degrees_sgd":
            print "Learning rate: {}".format(learning_rate)
            print "Ratio to second: {}".format(ratio_to_second)
            print "Ratio between first: {}".format(ratio_between_first)
            print "Ratio random: {}".format(ratio_random)

            G = nx.Graph()
            G.add_edges_from(edges)

            # construct connected(!) core
            core_exponent = 0.4
            core_vertices, fringe_vertices = [], []
            # one-pass split by condition
            for v in vertices:
                core_vertices.append(v) if degrees[v] >= n ** core_exponent else fringe_vertices.append(v)
            # add vertices to ensure connectivity of core
            fringe_vertices.sort(key=lambda v: -degrees[v])
            while not nx.is_connected(G.subgraph(core_vertices)):
                core_vertices.append(fringe_vertices.pop(0))

            print "Core size: {}".format(len(core_vertices))
            G_core = G.subgraph(core_vertices)
            print "Is core connected:", nx.is_connected(G_core)

            # loss_function = MSE(binary_edges=True)
            loss_function = LogLoss(binary_edges=True)
            optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent)

            FRINGE_FRACTION = 0.1
            max_fringe_size = int(G.number_of_nodes() * FRINGE_FRACTION)
            curr_graph = G.subgraph(core_vertices)
            curr_core_vertices = set(core_vertices)
            curr_embedding_model = PoincareModel(curr_graph, fit_radius=False)
            curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1)
            optimizer.optimize_embedding(curr_embedding_model, loss_function, curr_pair_generator)
            for i in range(int(1 / FRINGE_FRACTION) + 1):
                total_fringe = fringe(G, curr_core_vertices)
                # print "DEBUG:", curr_graph. number_of_nodes(), len(curr_core_vertices), len(total_fringe)
                fringe_vertices = set(sorted(total_fringe, key=lambda v: -G.degree(v))[:max_fringe_size])
                # print "DEBUG:", i+1, fringe_vertices
                if not fringe_vertices:
                    break
                curr_graph = G.subgraph(curr_core_vertices | fringe_vertices)
                curr_embedding_model = PoincareModel(curr_graph, fit_radius=False, init_embedding=curr_embedding_model)
                curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1)
                optimizer.optimize_embedding(
                    curr_embedding_model, loss_function, curr_pair_generator, fixed_vertices=curr_core_vertices
                )

                curr_core_vertices |= fringe_vertices

            embedding_model = curr_embedding_model
            """
            core_embedding_model = PoincareModel(G_core, fit_radius=False)
            core_pair_generator = BinaryPairGenerator(G_core, batch_size=1)
            optimizer.optimize_embedding(core_embedding_model, loss_function, core_pair_generator)
            #optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent)
            embedding_model = PoincareModel(G, fit_radius=False, init_embedding=core_embedding_model)
            pair_generator = BinaryPairGenerator(G, batch_size=1)
            optimizer.optimize_embedding(embedding_model, loss_function, pair_generator, fixed_vertices=core_vertices)

            #print "Radius before: {}".format(embedding_model.embedding['radius'])
            #print "Radius after: {}".format(embedding_model.embedding['radius'])
            """
            return (embedding_model.embedding["vertices"], {"core": list(G.edges())})
        else:
            print "Check gradient: ", check_grad(q, grad_q, x0)
            res = minimize(q, x0, method="BFGS", jac=grad_q)
            # print res
            x = res.x
        retval = {}
        for i in range(len(vertices)):
            r = x[2 * i]
            phi = x[2 * i + 1]
            retval[vertices[i]] = (r, phi)

        return retval
    else:
        raise Exception("unknown mode")
Ejemplo n.º 4
0
def find_embeddings(vertices,
                    edges,
                    mode,
                    learning_rate=0.1,
                    n_epoch=100,
                    ratio_to_second=2.,
                    ratio_between_first=1.,
                    ratio_random=1.,
                    silent=False):
    "find (r, phi) for each vertex"
    vertices = list(vertices)
    n = len(vertices)
    R = 2 * np.log(n)

    print "mode: {}".format(mode)
    np.random.seed(0)
    degrees = defaultdict(int)
    print "count degrees"
    for v1, v2 in edges:
        degrees[v1] += 1
        degrees[v2] += 1
    if mode == 'random':
        # phi=rand(0, 2pi), r = rand(0,R)
        return {
            v: (np.random.uniform(0.0, R), np.random.uniform(0.0, 2 * np.pi))
            for v in vertices
        }
    elif mode == 'degrees':
        # phi=rand(0,2pi), r = 2log(n/k)
        return {
            v: (2 * np.log(n / degrees[v]), np.random.uniform(0.0, 2 * np.pi))
            for v in vertices
        }
    elif mode.startswith('fit'):
        x0 = []
        for (r,
             phi) in zip([2 * np.log(n / degrees[v]) for v in vertices],
                         [np.random.uniform(0.0, 2 * np.pi)
                          for v in vertices]):
            x0.append(r)
            x0.append(phi)
        x0 = np.array(x0)

        nedges = set()
        all_nedges = set()
        for (v1, v2) in combinations(vertices, 2):
            #if (v1, v2) not in edges and (v2, v1) not in edges:
            e = make_edge(v1, v2)
            if e not in edges:
                all_nedges.add(e)

        if mode == 'fit_random':
            a = list(all_nedges)
            random.shuffle(a)
            nedges = set(a[:len(edges)])
        elif mode == 'fit_degrees':
            K = float(ratio_to_second)  # ratio of nedges to second neighbour
            L = float(ratio_between_first
                      )  # ratio of nedges between first neighbours
            M = float(ratio_random)  # ratio of random nedges
            #free_nedges = all_nedges.copy()

            G = nx.Graph()
            G.add_edges_from(edges)
            srt_vertices = sorted(degrees.keys(), key=lambda v: -degrees[v])
            shuf_vertices = srt_vertices[:]
            random.shuffle(shuf_vertices)
            for v in srt_vertices:
                # get first neighbours
                first_neigh = set(G.neighbors(v))
                # get second neighbours
                second_neigh = set()
                for neigh in first_neigh:
                    second_neigh.update(G.neighbors(neigh))
                second_neigh.remove(v)

                n_vertex_nedges = 0
                # from v to second neighbours
                for i, sec_n in enumerate(second_neigh):
                    #print "i: {}".format(i)
                    if i + 1 > degrees[v] * K:
                        continue
                    e = make_edge(v, sec_n)
                    if e not in nedges:
                        nedges.add(e)
                        n_vertex_nedges += 1

                # between first neighbours
                for j, pair in enumerate(combinations(first_neigh, 2)):
                    #print "j: {}".format(j)
                    if j + 1 > degrees[v] * L:
                        continue
                    v1, v2 = pair
                    e = make_edge(v1, v2)
                    if e not in nedges:
                        nedges.add(e)

                # random edges
                max_n_random_vertices = int(degrees[v] * M)
                n_random_vertices = 0
                for rand_v in shuf_vertices:
                    if n_random_vertices >= max_n_random_vertices:
                        break
                    e = make_edge(v, rand_v)
                    if e not in nedges and e not in edges:
                        nedges.add(e)
                        n_random_vertices += 1
        else:
            nedges = all_nedges.copy()
        print "number of nedges={}".format(len(nedges))
        q = Q(vertices, edges, nedges)
        grad_q = GradQ(vertices, edges, nedges)
        if mode == 'fit_degrees_sgd':
            print "Learning rate: {}".format(learning_rate)
            print "Ratio to second: {}".format(ratio_to_second)
            print "Ratio between first: {}".format(ratio_between_first)
            print "Ratio random: {}".format(ratio_random)

            G = nx.Graph()
            G.add_edges_from(edges)

            # construct connected(!) core
            core_exponent = 0.4
            core_vertices, fringe_vertices = [], []
            # one-pass split by condition
            for v in vertices:
                core_vertices.append(v) if degrees[
                    v] >= n**core_exponent else fringe_vertices.append(v)
            # add vertices to ensure connectivity of core
            fringe_vertices.sort(key=lambda v: -degrees[v])
            while not nx.is_connected(G.subgraph(core_vertices)):
                core_vertices.append(fringe_vertices.pop(0))

            print "Core size: {}".format(len(core_vertices))
            G_core = G.subgraph(core_vertices)
            print "Is core connected:", nx.is_connected(G_core)

            #loss_function = MSE(binary_edges=True)
            loss_function = LogLoss(binary_edges=True)
            optimizer = SGD(n_epoch=n_epoch,
                            learning_rate=learning_rate,
                            verbose=not silent)

            FRINGE_FRACTION = 0.1
            max_fringe_size = int(G.number_of_nodes() * FRINGE_FRACTION)
            curr_graph = G.subgraph(core_vertices)
            curr_core_vertices = set(core_vertices)
            curr_embedding_model = PoincareModel(curr_graph, fit_radius=False)
            curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1)
            optimizer.optimize_embedding(curr_embedding_model, loss_function,
                                         curr_pair_generator)
            for i in range(int(1 / FRINGE_FRACTION) + 1):
                total_fringe = fringe(G, curr_core_vertices)
                #print "DEBUG:", curr_graph. number_of_nodes(), len(curr_core_vertices), len(total_fringe)
                fringe_vertices = set(
                    sorted(total_fringe,
                           key=lambda v: -G.degree(v))[:max_fringe_size])
                #print "DEBUG:", i+1, fringe_vertices
                if not fringe_vertices:
                    break
                curr_graph = G.subgraph(curr_core_vertices | fringe_vertices)
                curr_embedding_model = PoincareModel(
                    curr_graph,
                    fit_radius=False,
                    init_embedding=curr_embedding_model)
                curr_pair_generator = BinaryPairGenerator(curr_graph,
                                                          batch_size=1)
                optimizer.optimize_embedding(curr_embedding_model,
                                             loss_function,
                                             curr_pair_generator,
                                             fixed_vertices=curr_core_vertices)

                curr_core_vertices |= fringe_vertices

            embedding_model = curr_embedding_model
            '''
            core_embedding_model = PoincareModel(G_core, fit_radius=False)
            core_pair_generator = BinaryPairGenerator(G_core, batch_size=1)
            optimizer.optimize_embedding(core_embedding_model, loss_function, core_pair_generator)
            #optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent)
            embedding_model = PoincareModel(G, fit_radius=False, init_embedding=core_embedding_model)
            pair_generator = BinaryPairGenerator(G, batch_size=1)
            optimizer.optimize_embedding(embedding_model, loss_function, pair_generator, fixed_vertices=core_vertices)

            #print "Radius before: {}".format(embedding_model.embedding['radius'])
            #print "Radius after: {}".format(embedding_model.embedding['radius'])
            '''
            return (embedding_model.embedding['vertices'], {
                'core': list(G.edges())
            })
        else:
            print "Check gradient: ", check_grad(q, grad_q, x0)
            res = minimize(q, x0, method='BFGS', jac=grad_q)
            #print res
            x = res.x
        retval = {}
        for i in range(len(vertices)):
            r = x[2 * i]
            phi = x[2 * i + 1]
            retval[vertices[i]] = (r, phi)

        return retval
    else:
        raise Exception('unknown mode')