Esempio n. 1
0
def get_graph(filename='sample') -> LightMultiGraph:
    start_time = time()
    if filename == 'sample':
        # g = nx.MultiGraph()
        g = nx.Graph()
        g.add_edges_from([(1, 2), (1, 3), (1, 5), (2, 4), (2, 5), (2, 7),
                          (3, 4), (3, 5), (4, 5), (4, 9), (6, 7), (6, 8),
                          (6, 9), (7, 8), (7, 9), (8, 9)])
    elif filename == 'BA':
        g = nx.barabasi_albert_graph(10, 2, seed=42)
        # g = nx.MultiGraph(g)
        g = nx.Graph()
    else:
        g = nx.read_edgelist(f'./src/tmp/{filename}.g',
                             nodetype=int,
                             create_using=nx.Graph())
        # g = nx.MultiGraph(g)
        if not nx.is_connected(g):
            g = max(nx.connected_component_subgraphs(g), key=len)
        name = g.name
        g = nx.convert_node_labels_to_integers(g)
        g.name = name

    g_new = LightMultiGraph()
    g_new.add_edges_from(g.edges())

    end_time = time() - start_time
    print(
        f'Graph: {filename}, n = {g.order():_d}, m = {g.size():_d} read in {round(end_time, 3):_g}s.'
    )

    return g_new
Esempio n. 2
0
def create_rule(subtree: Set[int], g: LightMultiGraph, mode: str) -> Tuple[PartRule, List[Tuple[int, int]]]:
    sg = g.subgraph(subtree).copy()
    assert isinstance(sg, LightMultiGraph)
    boundary_edges = find_boundary_edges(g, subtree)

    if mode == 'full':  # in the full information case, we add the boundary edges to the RHS and contract it
        rule = FullRule(lhs=len(boundary_edges), internal_nodes=subtree, graph=sg)

        for bdry in boundary_edges:
            if len(bdry) == 2:
                u, v = bdry
                rule.graph.add_edge(u, v, b=True)
            elif len(bdry) == 3:
                u, v, dd = bdry
                rule.graph.add_edge(u, v, attr_dict=dd, b=True)

        rule.contract_rhs()  # contract and generalize

    elif mode == 'part':  # in the partial boundary info, we need to set the boundary degrees
        rule = PartRule(lhs=len(boundary_edges), graph=sg)
        set_boundary_degrees(g, rule.graph)
        rule.generalize_rhs()

    else:
        rule = NoRule(lhs=len(boundary_edges), graph=sg)
        rule.generalize_rhs()
    return rule, boundary_edges
Esempio n. 3
0
def leiden(g: LightMultiGraph):
    tree = []

    if g.order() < 2:
        clusters = [[n] for n in g.nodes()]
        return clusters

    clusters = leiden_one_level(g)
    if len(clusters) == 1:
        clusters = [[n] for n in list(clusters)[0]]
        return clusters

    for cluster in clusters:
        sg = g.subgraph(cluster).copy()
        # assert nx.is_connected(sg), "subgraph not connected"
        tree.append(leiden(sg))

    return tree
Esempio n. 4
0
def find_lu(g: LightMultiGraph) -> int:
    l_u = 1  # for edges
    node_types = set()
    for n, d in g.nodes(data=True):
        if 'label' in d:
            node_types.add('nt')
        else:
            node_types.add('t')
    l_u += len(node_types)
    return l_u
Esempio n. 5
0
def spectral_kmeans(g: LightMultiGraph, K):
    """
    k-way ncut spectral clustering Ng et al. 2002 KNSC1
    :param g: graph g
    :param K: number of clusters
    :return:
    """
    tree = []

    if g.order() <= K:  # not more than k nodes, return the list of nodes
        return [[n] for n in g.nodes()]

    if K == 2:  # if K is two, use approx min partitioning
        return approx_min_conductance_partitioning(g)

    if not nx.is_connected(g):
        for p in nx.connected_component_subgraphs(g):
            if p.order(
            ) > K + 1:  # if p has more than K + 1 nodes, use spectral K-means
                tree.append(spectral_kmeans(p, K))
            else:  # try spectral K-means with a lesser K
                tree.append(spectral_kmeans(p, K - 1))
        assert len(tree) > 0
        return tree

    if K >= g.order() - 2:
        return spectral_kmeans(g, K - 1)

    assert nx.is_connected(g), "g is not connected in spectral kmeans"

    L = nx.laplacian_matrix(g)

    assert K < g.order() - 2, "k is too high"

    _, eigenvecs = scipy.sparse.linalg.eigsh(
        L.asfptype(), k=K + 1,
        which='SM')  # compute the first K+1 eigenvectors
    eigenvecs = eigenvecs[:, 1:]  # discard the first trivial eigenvector

    U = sklearn.preprocessing.normalize(
        eigenvecs)  # normalize the eigenvecs by its L2 norm

    kmeans = KMeans(n_clusters=K).fit(U)

    cluster_labels = kmeans.labels_
    clusters = [[] for _ in range(max(cluster_labels) + 1)]

    for u, clu_u in zip(g.nodes(), cluster_labels):
        clusters[clu_u].append(u)

    for cluster in clusters:
        sg = g.subgraph(cluster)
        # assert nx.is_connected(sg), "subgraph not connected"
        if len(cluster) > K + 1:
            tree.append(spectral_kmeans(sg, K))
        else:
            tree.append(spectral_kmeans(sg, K - 1))

    return tree
Esempio n. 6
0
def get_random_partition(g: LightMultiGraph, seed=None):
    nodes = list(g.nodes())
    if seed is not None:
        random.seed(seed)
    random.shuffle(nodes)
    return random_partition(nodes)
Esempio n. 7
0
def approx_min_conductance_partitioning(g: LightMultiGraph, max_k=1):
    """
    Approximate minimum conductance partinioning. I'm using the median method as referenced here:
    http://www.ieor.berkeley.edu/~goldberg/pubs/krishnan-recsys-final2.pdf
    :param g: graph to recursively partition
    :param max_k: upper bound of number of nodes allowed in the leaves
    :return: a dendrogram
    """
    lvl = []
    node_list = list(g.nodes())
    if len(node_list) <= max_k:
        assert len(node_list) > 0
        return node_list

    if not nx.is_connected(g):
        for p in nx.connected_component_subgraphs(g):
            lvl.append(approx_min_conductance_partitioning(p, max_k))
        assert len(lvl) > 0
        return lvl

    assert nx.is_connected(g), "g is not connected in cond"

    fiedler_vector = nx.fiedler_vector(g, method='lanczos')

    p1, p2 = set(), set()

    fiedler_dict = {}
    for idx, n in enumerate(fiedler_vector):
        fiedler_dict[idx] = n
    fiedler_vector = [
        (k, fiedler_dict[k])
        for k in sorted(fiedler_dict, key=fiedler_dict.get, reverse=True)
    ]
    half_idx = len(fiedler_vector) // 2  # floor division

    for idx, _ in fiedler_vector:
        if half_idx > 0:
            p1.add(node_list[idx])
        else:
            p2.add(node_list[idx])
        half_idx -= 1  # decrement so halfway through it crosses 0 and puts into p2

    sg1 = g.subgraph(p1)
    sg2 = g.subgraph(p2)

    iter_count = 0
    while not (nx.is_connected(sg1) and nx.is_connected(sg2)):
        sg1 = g.subgraph(p1)
        sg2 = g.subgraph(p2)

        # Hack to check and fix non connected subgraphs
        if not nx.is_connected(sg1):
            for sg in sorted(nx.connected_component_subgraphs(sg1),
                             key=len,
                             reverse=True)[1:]:
                p2.update(sg.nodes())
                for n in sg.nodes():
                    p1.remove(n)

            sg2 = g.subgraph(p2)  # updating sg2 since p2 has changed

        if not nx.is_connected(sg2):
            for sg in sorted(nx.connected_component_subgraphs(sg2),
                             key=len,
                             reverse=True)[1:]:
                p1.update(sg.nodes())
                for n in sg.nodes():
                    p2.remove(n)

        iter_count += 1

    if iter_count > 2:
        print('it took {} iterations to stabilize'.format(iter_count))

    assert nx.is_connected(sg1) and nx.is_connected(
        sg2), "subgraphs are not connected in cond"

    lvl.append(approx_min_conductance_partitioning(sg1, max_k))
    lvl.append(approx_min_conductance_partitioning(sg2, max_k))

    assert (len(lvl) > 0)
    return lvl
Esempio n. 8
0
def _generate_graph(rule_dict: Dict[int, List[PartRule]],
                    upper_bound: int) -> Any:
    """
    Create a new graph from the VRG at random
    Returns None if the nodes in generated graph exceeds upper_bound
    :return: newly generated graph
    """
    node_counter = 1

    new_g = LightMultiGraph()
    new_g.add_node(0, label=0)

    non_terminals = {0}
    rule_ordering = []  # list of rule ids in the order they were fired

    while len(non_terminals) > 0:  # continue until no more non-terminal nodes
        if new_g.order() > upper_bound:  # early stopping
            return None, None

        node_sample = random.sample(
            non_terminals, 1)[0]  # choose a non terminal node at random
        lhs = new_g.nodes[node_sample]['label']

        rhs_candidates = rule_dict[lhs]

        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(
                np.random.choice(range(len(rhs_candidates)), size=1,
                                 p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        logging.debug(
            f'firing rule {rhs.id}, selecting node {node_sample} with label: {lhs}'
        )
        rule_ordering.append(rhs.id)

        broken_edges = find_boundary_edges(new_g, {node_sample})
        assert len(broken_edges) == lhs

        new_g.remove_node(node_sample)
        non_terminals.remove(node_sample)

        nodes = {}

        for n, d in rhs.graph.nodes(data=True):  # all the nodes are internal
            new_node = node_counter
            nodes[n] = new_node

            label = None
            if 'label' in d:  # if it's a new non-terminal add it to the set of non-terminals
                non_terminals.add(new_node)
                label = d['label']

            if label is None:
                new_g.add_node(new_node, b_deg=d['b_deg'])
            else:
                new_g.add_node(new_node, b_deg=d['b_deg'], label=label)
            node_counter += 1

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random
        for n, d in rhs.graph.nodes(data=True):
            num_boundary_edges = d['b_deg']
            if num_boundary_edges == 0:  # there are no boundary edges incident to that node
                continue

            assert len(broken_edges) >= num_boundary_edges

            edge_candidates = broken_edges[:
                                           num_boundary_edges]  # picking the first num_broken edges
            broken_edges = broken_edges[
                num_boundary_edges:]  # removing them from future consideration

            for u, v in edge_candidates:  # each edge is either (node_sample, v) or (u, node_sample)
                if u == node_sample:
                    u = nodes[n]
                else:
                    v = nodes[n]
                logging.debug(f'adding broken edge ({u}, {v})')
                new_g.add_edge(u, v)

        # adding the rhs to the new graph
        for u, v in rhs.graph.edges():
            edge_multiplicity = rhs.graph[u][v]['weight']  #
            new_g.add_edge(nodes[u], nodes[v], weight=edge_multiplicity)
            logging.debug(
                f'adding RHS internal edge ({nodes[u]}, {nodes[v]}) wt: {edge_multiplicity}'
            )

    return new_g, rule_ordering
Esempio n. 9
0
def generate_graph(rule_dict, rule_list):
    """
    Create a new graph from the VRG at random
    :param rule_dict: List of unique VRG rules
    :return: newly generated graph
    """

    node_counter = 1
    non_terminals = set()
    # new_g = nx.MultiGraph()
    new_g = LightMultiGraph()

    new_g.add_node(0, label=0)
    non_terminals.add(0)

    rule_ordering = []  # list of rule ids in the order they were fired

    while len(non_terminals) > 0:  # continue until no more non-terminal nodes
        # choose a non terminal node at random
        node_sample = random.sample(non_terminals, 1)[0]
        lhs = new_g.nodes[node_sample]['label']

        rhs_candidates = list(
            filter(lambda rule: rule.is_active, rule_dict[lhs]))
        # consider only active rules

        if len(rhs_candidates) == 1:
            rhs = rhs_candidates[0]
        else:
            weights = np.array([rule.frequency for rule in rhs_candidates])
            weights = weights / np.sum(weights)  # normalize into probabilities
            idx = int(
                np.random.choice(range(len(rhs_candidates)), size=1,
                                 p=weights))  # pick based on probability
            rhs = rhs_candidates[idx]

        # print(f'firing rule {rule_list.index(rhs)}')
        # rule_ordering.append(rule_list.index(rhs))
        # print('Selected node {} with label {}'.format(node_sample, lhs))

        broken_edges = find_boundary_edges(new_g, [node_sample])

        # print('broken edges: ', broken_edges)

        assert len(broken_edges) == lhs

        new_g.remove_node(node_sample)
        non_terminals.remove(node_sample)

        nodes = {}

        for n, d in rhs.graph.nodes(data=True):  # all the nodes are internal
            new_node = node_counter
            nodes[n] = new_node
            new_g.add_node(new_node, attr_dict=d)
            if 'label' in d:  # if it's a new non-terminal add it to the set of non-terminals
                non_terminals.add(new_node)
            node_counter += 1

        # randomly assign broken edges to boundary edges
        random.shuffle(broken_edges)

        # randomly joining the new boundary edges from the RHS to the rest of the graph - uniformly at random
        for n, d in rhs.graph.nodes(data=True):
            num_boundary_edges = d['b_deg']
            if num_boundary_edges == 0:  # there are no boundary edges incident to that node
                continue

            assert len(broken_edges) >= num_boundary_edges

            edge_candidates = broken_edges[:
                                           num_boundary_edges]  # picking the first num_broken edges
            broken_edges = broken_edges[
                num_boundary_edges:]  # removing them from future consideration

            for u, v in edge_candidates:  # each edge is either (node_sample, v) or (u, node_sample)
                if u == node_sample:
                    u = nodes[n]
                else:
                    v = nodes[n]
                # print('adding broken edge ({}, {})'.format(u, v))
                new_g.add_edge(u, v)

        # adding the rhs to the new graph
        for u, v in rhs.graph.edges():
            # print('adding RHS internal edge ({}, {})'.format(nodes[u], nodes[v]))
            edge_multiplicity = rhs.graph[u][v]['weight']  #
            for _ in range(edge_multiplicity):
                new_g.add_edge(nodes[u], nodes[v])
    return new_g, rule_ordering
Esempio n. 10
0
            edge_candidates = broken_edges[:
                                           num_boundary_edges]  # picking the first num_broken edges
            broken_edges = broken_edges[
                num_boundary_edges:]  # removing them from future consideration

            for u, v in edge_candidates:  # each edge is either (node_sample, v) or (u, node_sample)
                if u == node_sample:
                    u = nodes[n]
                else:
                    v = nodes[n]
                # print('adding broken edge ({}, {})'.format(u, v))
                new_g.add_edge(u, v)

        # adding the rhs to the new graph
        for u, v in rhs.graph.edges():
            # print('adding RHS internal edge ({}, {})'.format(nodes[u], nodes[v]))
            edge_multiplicity = rhs.graph[u][v]['weight']  #
            for _ in range(edge_multiplicity):
                new_g.add_edge(nodes[u], nodes[v])
    return new_g, rule_ordering


if __name__ == '__main__':
    g = LightMultiGraph()
    g.add_edges_from([(1, 2), (1, 2), (1, 3), (2, 3), (3, 4)])
    sg = g.subgraph([2, 3]).copy()
    print(g.edges(data=True))
    set_boundary_degrees(g, sg)
    print(sg.nodes(data=True))
Esempio n. 11
0
def compress_graph(g: LightMultiGraph, subtree: Set[int], boundary_edges: Any, permanent: bool) -> Union[None, float]:
    """
    :param g: the graph
    :param subtree: the set of nodes that's compressed
    :param boundary_edges: boundary edges
    :param permanent: if disabled, undo the compression after computing the new dl -> returns the float
    :return:
    """
    assert len(subtree) > 0, f'Empty subtree g:{g.order(), g.size()}, bound: {boundary_edges}'
    before = (g.order(), g.size())

    if not isinstance(subtree, set):
        subtree = set(subtree)

    if boundary_edges is None:
        # compute the boundary edges
        boundary_edges = find_boundary_edges(g, subtree)

    removed_edges = set()
    removed_nodes = set()
    # step 1: remove the nodes from subtree, keep track of the removed edges
    if not permanent:
        removed_edges = list(g.subgraph(subtree).edges(data=True))
        removed_nodes = list(g.subgraph(subtree).nodes(data=True))
    g.remove_nodes_from(subtree)
    new_node = min(subtree)

    # step 2: replace subtree with new_node
    g.add_node(new_node, label=len(boundary_edges))

    # step 3: rewire new_node
    for bdry in boundary_edges:
        if len(bdry) == 2:
            u, v = bdry
            if u in subtree:
                u = new_node
            if v in subtree:
                v = new_node
            g.add_edge(u, v)
        elif len(bdry) == 3:
            u, v, d = bdry
            if u in subtree:
                u = new_node
            if v in subtree:
                v = new_node
            g.add_edge(u, v, d)

    if not permanent:  # if this flag is set, then return the graph dl of the compressed graph and undo the changes
        compressed_graph_dl = graph_dl(g)
        # print(f'In compress_graph, dl after change: {compressed_graph_dl:_g}')
        g.remove_node(new_node)  # and the boundary edges
        g.add_nodes_from(removed_nodes)  # add the subtree

        for e in itertools.chain(removed_edges, boundary_edges):
            if len(e) == 3:
                u, v, d = e
            else:
                u, v = e
                d = {'weight': 1}
            if 'edge_colors' in d.keys():
                g.add_edge(u, v, weight=d['weight'], edge_colors=d['edge_colors'])
            else:
                g.add_edge(u, v, weight=d['weight'])

        after = (g.order(), g.size())
        assert before == after, 'Decompression did not work'
        return compressed_graph_dl
    else:
        return None
Esempio n. 12
0
if __name__ == '__main__':
    name = 'lesmis'
    outdir = 'output'
    # clustering = 'leiden'
    clustering = 'cond'
    type = 'mu_level'
    mu = 3

    g_ = nx.Graph()
    g_.add_edges_from([(1, 2), (1, 3), (1, 5),
                      (2, 4), (2, 5),
                      (3, 4), (3, 5), (4, 5),
                      (2, 7), (4, 9),
                      (6, 7), (6, 8), (6, 9),
                      (7, 8), (7, 9), (8, 9)])
    g = LightMultiGraph()
    g.add_edges_from(g_.edges())
    root = pickle.load(open('../output/trees/sample/cond_tree.pkl', 'rb'))
    print(root)

    grammar = VRG(clustering=clustering, type=type, name=name, mu=mu)

    # extractor = MuExtractor(g=g, type=type, mu=mu, grammar=grammar, root=root)
    # extractor = LocalExtractor(g=g, type=type, mu=mu, grammar=grammar, root=root)
    extractor = GlobalExtractor(g=g, type=type, mu=mu, grammar=grammar, root=root)

    key2node = {}
    s = [extractor.root]
    while len(s) != 0:
        tnode = s.pop()
        key2node[tnode.key] = tnode
Esempio n. 13
0
    name, clustering, mode, mu, type, outdir = args.graph, args.clustering, args.boundary, args.mu, \
                                               args.type, args.outdir
    grammar, orig_n = get_grammar_s(original_graph=g, name=name, grammar_type=type, clustering=clustering, mu=mu)
    g = generate_graph(rule_dict=grammar.rule_dict, target_n=orig_n)
    ng = g[0]
    return list(ng.edges())

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

args_d= {
            "graph": "chem",
            'clustering': 'leiden',
            'boundary':'part',
            'mu': 4,
            'type': 'mu_level_dl',
            'outdir': 'output',
            'n': 5}
# type(args_dict)
args_dict = dotdict(args_d)
print(args_dict.graph)

g_new= LightMultiGraph()
g = nx.karate_club_graph()
g_new.add_edges_from(g.edges())
new_grs = cnrg_learn_grammars_probabilistic_graph_generation(g_new,args_dict)
print(new_grs)
Esempio n. 14
0
def get_graph(name='sample',
              path_input='',
              path_node_attrs='',
              path_edge_attrs='',
              path_timestamps='') -> LightMultiGraph:
    start_time = time()
    if path_input == '':
        if name == 'sample':
            # g = nx.MultiGraph()
            g = nx.Graph()
            g.add_edges_from([(1, 2), (1, 3), (1, 5), (2, 4), (2, 5), (2, 7),
                              (3, 4), (3, 5), (4, 5), (4, 9), (6, 7), (6, 8),
                              (6, 9), (7, 8), (7, 9), (8, 9)])
        elif name == 'BA':
            g = nx.barabasi_albert_graph(10, 2, seed=42)
            # g = nx.MultiGraph(g)
            g = nx.Graph()
        else:
            g = nx.read_edgelist(f'./src/tmp/{name}.g',
                                 nodetype=int,
                                 create_using=nx.Graph())
            g.name = name
            # g = nx.MultiGraph(g)
            if not nx.is_connected(g):
                g = max(nx.connected_component_subgraphs(g), key=len)
            name = g.name
            g = nx.convert_node_labels_to_integers(g)
            g.name = name
    else:
        g = nx.read_edgelist(path_input, nodetype=int, create_using=nx.Graph())
        if not nx.is_connected(g):
            g = max(nx.connected_component_subgraphs(g), key=len)
        #g = nx.convert_node_labels_to_integers(g)
        g.name = name

    g_new = LightMultiGraph()
    g_new.add_edges_from(g.edges())

    # a node attribute is a list of "colors"
    if path_node_attrs != '':
        node_attrs = {}
        with open(path_node_attrs) as infile:
            for line in infile:
                v, attr = line.strip().replace('\t',
                                               ' ').replace(',',
                                                            ' ').split(' ')
                node_attrs[int(v)] = [attr]
        nx.set_node_attributes(g_new, node_attrs, 'node_colors')

    # an edge attribute is a list of "colors"
    if path_edge_attrs != '':
        edge_attrs = {}
        with open(path_edge_attrs) as infile:
            for line in infile:
                u, v, attr = line.strip().replace('\t',
                                                  ' ').replace(',',
                                                               ' ').split(' ')
                edge_attrs[(int(u), int(v))] = [attr]
        nx.set_edge_attributes(g_new, edge_attrs, 'edge_colors')

    # an edge timestamp is a floating-point number
    if path_timestamps != '':
        edge_attrs = {}
        with open(path_timestamps) as infile:
            for line in infile:
                u, v, timestamp = line.strip().replace('\t', ' ').replace(
                    ',', ' ').split(' ')
                edge_attrs[(int(u), int(v))] = float(timestamp)
        nx.set_edge_attributes(g_new, edge_attrs, 'timestamp')

    end_time = time() - start_time
    print(
        f'Graph: {name}, n = {g.order():_d}, m = {g.size():_d} read in {round(end_time, 3):_g}s.'
    )

    return g_new