def tuples_to_graph(tuples):
    G = Graph()
    for node, attribute in tuples:
        print 'adding', node, attribute
        G.add_nodes_from(node, freq=attribute)
        G.add_edges_from(to_edges(node))
    return G
 def generate_small_world_graph(self):
     max_edges = self.NODE_COUNT*(self.NODE_COUNT-1)/2
     if self.EDGE_COUNT > max_edges:
         return complete_graph(self.NODE_COUNT)
     graph = Graph()
     graph.add_nodes_from(range(self.NODE_COUNT))
     edges = performer.edge_indices.flatten()
     probabilities = performer.probabilities.flatten()
     for trial in range(len(edges)-9):
         edge_index = numpy.random.choice(edges, p=probabilities)
         source, destination = self.edge_nodes(edge_index)
         graph.add_edge(source, destination, length = self.link_length(source, destination),
                        weight = self.edge_weight(source, destination))
         probabilities[edge_index] = 0
         probabilities /= sum(probabilities)
         if max(graph.degree().values()) > self.DEGREE_MAX:
             graph.remove_edge(source, destination)
         if graph.number_of_edges() > self.EDGE_COUNT:
             victim = random.choice(graph.edges())
             graph.remove_edge(victim[0], victim[1])
         if self.constraints_satisfied(graph):
             print 'performer.generate_small_world_graph:',
             print self.BENCHMARK, self.NODE_COUNT, self.EDGE_COUNT, trial
             self.process_graph(graph)
             return graph
Beispiel #3
0
def convert_local_tree_topology_to_graph(loc_tree_topo, tree_node_labeling):
    """ Creates a directed, acyclic NetworkX graph from a local tree topology

    Parameters
    ----------
    loc_tree_topo: array-like
        The local tree toplogy, where the root node element is -1

    tree_node_labeling: array-like
        The integer ids for each tree node

    Returns
    -------
    G : NetworkX graph

    """

    assert( loc_tree_topo[0] == -1 )

    G = Graph()
    G.add_nodes_from( tree_node_labeling )
    # build up graph connectivity
    con = vstack( (loc_tree_topo, range(len(loc_tree_topo))) )
    # prune root node connectivity
    con = con[:,1:]
    # update with correct labels
    con = tree_node_labeling[con]
    G.add_edges_from( zip(con[0,:], con[1,:]) )

    return G
Beispiel #4
0
def fuzz_network(G_orig, threshold, b, edge_frac=1.0, nonedge_mult=5.0):
    G = G_orig.copy()
    n = len(G.nodes())
    H = Graph()
    H.add_nodes_from(range(n))
    pairs = n * (n - 1) / 2
    actual_edges = len(G.edges())
    edges = int(edge_frac * actual_edges)
    nonedges = int(edges * nonedge_mult)

    a = b / nonedge_mult

    # though these distributions are normalized to one, by selecting the appropriate number of edges
    # and nonedges, we make these 'distributions' correct
    edge_probs = np.random.beta(a + 1, b, edges)
    nonedge_probs = np.random.beta(a, b + 1, nonedges)

    # picking the right number of edges from the appropriate list
    edge_list = G.edges()
    nonedge_list = list(non_edges(G))
    shuffle(edge_list)
    shuffle(nonedge_list)
    for i in range(len(edge_probs)):
        G[edge_list[i][0]][edge_list[i][1]]["weight"] = edge_probs[i]
        if edge_probs[i] > threshold:
            H.add_edge(edge_list[i][0], edge_list[i][1])
    for i in range(len(nonedge_probs)):
        G.add_edge(nonedge_list[i][0], nonedge_list[i][1], weight=nonedge_probs[i])
        if nonedge_probs[i] > threshold:
            H.add_edge(nonedge_list[i][0], nonedge_list[i][1])

    return G, H
def make_graph(points, neighbor_max_dist=0.01):
    graph = Graph()
    graph.add_nodes_from(range(len(points)))
    for i in xrange(len(points)):
        for j in xrange(i+1, len(points)):
            if euclidian_3d_dist(points[i], points[j])<neighbor_max_dist:
                graph.add_edge(i,j)
    return graph
Beispiel #6
0
def eliminate_node(G, a):
    fillins = ()
    nb = frozenset(G.neighbors(a))
    for u in nb:
        for v in nb - frozenset((u,)):
            if not G.has_edge(v, u) and frozenset((u, v)) not in fillins:
                fillins += (frozenset((u, v)),)
    kill_edges = frozenset([(u, a) for u in nb] + [(a, u) for u in nb])
    H = Graph()
    H.add_nodes_from(list(frozenset(G.nodes()) - frozenset((a,))))
    H.add_edges_from(list((frozenset(G.edges()) - kill_edges) | frozenset(fillins)))
    return H
    def __init__(self, mol, eps):
        G = Graph()

        G.add_nodes_from(a.GetIdx() for a in mol.GetAtoms())

        for bond in mol.GetBonds():
            a = bond.GetBeginAtom()
            b = bond.GetEndAtom()

            w = a.GetDegree() * b.GetDegree()

            G.add_edge(a.GetIdx(), b.GetIdx(), weight=w)

        self.G = G
        self.lim = int(1.0 / (eps ** 2))
    def _build_authors_graph(self):
        """
        Build authors graph with each author name as nodes and the collaboration between them as edges.

        @author 1: CipherHat

        @rtype:   networkx.Graph()
        @return:  the Graph containing nodes and edges
        """
        all_data = self.get_network_data()
        # TODO refactor: revision on this part. whether to move the Graph code to its own class
        graph = Graph()
        # the nodes format will be {"id":int, "name":str}
        graph.add_nodes_from([(i, {"name": all_data[0][i][0]}) for i in range(len(all_data[0]))])
        graph.add_edges_from(all_data[1])
        return graph
    def get_coauthor_graph_by_author_name(self, name):
        coauthors = set()
        for p in self.publications:
            for a in p.authors:
                if a == self.author_idx[name]:
                    for a2 in p.authors:
                        if a != a2:
                            coauthors.add(a2)

        graph = Graph()
        # the nodes format will be {"id":int, "name":str}
        graph.add_node(self.author_idx[name], name = name)
        # graph.add_nodes_from([(i, {"name": all_data[0][i][0]}) for i in range(len(all_data[0]))])
        graph.add_nodes_from([(ca, {"name": self.authors[ca].name}) for ca in coauthors])
        graph.add_edges_from([(self.author_idx[name], ca) for ca in coauthors])

        return graph
    def calculate(self, P):
        C = self._prop.carbon

        G = Graph()

        G.add_nodes_from(a.GetIdx() for a in self.mol.GetAtoms())

        for bond in self.mol.GetBonds():
            i = bond.GetBeginAtomIdx()
            j = bond.GetEndAtomIdx()

            pi = bond.GetBondTypeAsDouble()

            with self.rethrow_zerodiv():
                w = (C * C) / (P[i] * P[j] * pi)

            G.add_edge(i, j, weight=w)

        sp = floyd_warshall_numpy(G)
        np.fill_diagonal(sp, [1. - C / P[a.GetIdx()] for a in self.mol.GetAtoms()])
        return sp
Beispiel #11
0
	def merge_slices_to_events(self, current_slices):
		"""
		Method merges DBSCAN-generated event slices with previously found events. 
		Bimodal network is used to find connections between events and slices,
		then slices are being merged with events, or transformed to new ones.
		Merged events are being deleted.

		Args:
			current_slices (Dict(List[Dict])): output of self.current_datapoints_dbscan method. Every item of dict is a slice cluster: list with dicts of messages from that cluster.
		"""
		slices_ids = set(current_slices.keys())
		events_ids = set(self.events.keys())
		edges = []
		for slice_id, event_slice in current_slices.items():
			slice_ids = {x['id'] for x in event_slice}
			for event in self.events.values():
				if event.is_successor(slice_ids):
					edges.append((slice_id, event.id))
		G = Graph()
		G.add_nodes_from(slices_ids.union(events_ids))
		G.add_edges_from(edges)
		events_to_delete = []
		for cluster in [x for x in connected_components(G) if x.intersection(slices_ids)]:
			unify_slices = cluster.intersection(slices_ids)
			unify_events = list(cluster.intersection(events_ids))
			meta_slice = [msg for i in unify_slices for msg in current_slices[i]]
			if not unify_events:
				new_event = Event(self.mysql, self.redis, self.tokenizer, self.morph, self.classifier, meta_slice)
				self.events[new_event.id] = new_event
			elif len(unify_events) == 1 and len(unify_slices) == 1 and set(self.events[unify_events[0]].messages.keys()) == {x['id'] for x in meta_slice}:
				continue
			else:
				if len(unify_events) > 1:
					for ancestor in unify_events[1:]:
						self.events[unify_events[0]].merge(self.events[ancestor])
						events_to_delete.append(ancestor)
				self.events[unify_events[0]].add_slice(meta_slice)
		for event in events_to_delete:
			del self.events[event]
			self.redis.delete("event:{}".format(event))
Beispiel #12
0
def maotree_old(g, m):
    from networkx import Graph, connected_components
    if len(m) == 0:
        return None

    T = Tree(None, [])
    # node -> index in mao
    o = dict((v,i) for i,v in enumerate(m))
    # list of edges (u,v) with o[u] <= o[v]
    e = [(u,v) if o[u] <= o[v] else (v,u) for u,v in g.edges()]
    # we sort e w.r.t. to o such that we can disregard the entire prefix
    # up to the first pair (u,v) with o[u] >= o[current node]
    e.sort(key=lambda (u,v): (o[u], o[v]))
    # todo is a tuple of the current tree node,
    # the remaining mao to process and
    # the offset of the edges to be considered in the
    # edge list e
    todo = [(T, m, 0)]
    while len(todo):
        t, m, i = todo.pop()
        # x = m.pop(0)
        x = m[0]
        t.tag = x
        if len(m) <= 1:
            continue
        while i < len(e) and o[e[i][0]] <= o[x]:
            i = i+1
        g_ = Graph()
        for (u,v) in e[i:]:
            g_.add_edge(u,v)
        g_.add_nodes_from(m[1:])
        cs = connected_components(g_)
        for c in cs:
            c.sort(key=o.get)
        t.children = [Tree(None, []) for c in cs]
        todo.extend(zip(t.children, cs, (i for c in cs)))
    return T
class FrameworkFeatureAnalyzer(object):
    """ A class to do feature location analyses on a project written in a specific framework

        Project Graph Details:
        -----------------------
        Node Groups:
            1: Android package
            2: -
            3: Android imported indentifier
            4: Java class
            5: Java method
            6: XML file Category
            7: XML file

        Edge Groups:
            1: internal/hierarchical links
            2: Java---Android mappings
            3: Java---XML mappings
    """

    def __init__(self, framework, project):
        """
            :param inspector.models.base.Project project: the project to be analyzed
        """
        self.project = project

        self.framework_namespace = str(framework)
        self.graph = Graph()
        self.graph.add_node(self.framework_namespace)
        self.import_usages = []

    def add_source_file(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        self.analyze_framework_imports(source_file)
        self.analyze_source(source_file)

    def analyze_framework_imports(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        for im in source_file.imports:
            if im.import_str.startswith(self.framework_namespace):
                self.import_usages.append((im, im.find_usages()))

                components = im.import_str.split('.')

                data = {'group': 1}
                if re.match(r'^[A-Z]+(_[A-Z]+)*$', components[-1]):
                    data['group'] = 3

                last = None
                for i in range(len(components)):
                    cn = '.'.join(components[:i + 1])
                    self.graph.add_node(cn, **data)
                    if last:
                        self.graph.add_edge(last, cn, weight=1, group=1)
                    last = cn
                if last:
                    data['group'] = 3
                    self.graph.add_node(last, **data)

    def analyze_source(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        for cl in source_file.classes:
            self.graph.add_node(cl.name, group=4)
            for fu in cl.methods:
                # print '[{0}-{1}]'.format(fu.starting_line, fu.ending_line), re.sub('\s*\n\s*', ' ', unicode(fu))
                fn = fu.qualified_name
                self.graph.add_node(fn, group=5)
                self.graph.add_edge(cl.name, fn, weight=1, group=1)
                for im, usages in self.import_usages:
                    w = 0
                    for ln in usages:
                        if fu.starting_line <= ln <= fu.ending_line:
                            w += 1
                    if w:
                        self.graph.add_edge(im.import_str, fn, weight=w, group=2)

    def add_xml_files(self):
        xml_sub_groups = {':layout', ':values', ':drawable', ':menu', ':xml', ':color'}
        self.graph.add_nodes_from([':XML'] + list(xml_sub_groups), group=6)
        self.graph.add_edges_from([(':XML', g) for g in xml_sub_groups], weight=1, group=1)
        for path in self.project.filter_files(extension='xml'):
            xml_file = self.project.get_file(path)

            if path.startswith('app/res/'):
                g = path.split('/')[2]
                name = '/'.join(path.split('/')[2:])
                self.graph.add_node(name, group=7)
            else:
                if not path.split('/')[-1] in ['pom.xml', 'AndroidManifest.xml']:  # is ignored?
                    print 'invalid path:', path
                continue

            valid_group = False
            if g == 'values':
                g = 'values-default'
            if g.startswith('values-'):
                g = g[7:]
                self.graph.add_edge(':values', ':' + g, weight=1, group=1)
                valid_group = True
            g = ':' + g
            if valid_group or g in xml_sub_groups:
                self.graph.add_edge(g, name, weight=1, group=1)
            else:
                print 'invalid subgroup:', g
Beispiel #14
0
# Define the graph and nodes
graph = Graph(name = 'Grid 3x3')

# Generate a set of nodes from 0 .. width - 1 and 0 .. height - 1
nodes = [
    node(0, 0),
    node(0, 1),
    node(0, 2),
    node(1, 0),
    node(1, 1),
    node(1, 2),
    node(2, 0),
    node(2, 1),
    node(2, 2),
]
graph.add_nodes_from(nodes)

# Generate a set of edges connecting each node in the list on an inverted L pattern
graph.add_edges_from([
    # For node (0, 0)
    (nodes[0], nodes[3], { 'time': np.random.poisson(10.0) }),
    (nodes[0], nodes[1], { 'time': np.random.poisson(10.0) }),
    # For node (0, 1)
    (nodes[1], nodes[4], { 'time': np.random.poisson(10.0) }),
    (nodes[1], nodes[2], { 'time': np.random.poisson(10.0) }),
    # For node (0, 2)
    (nodes[2], nodes[5], { 'time': np.random.poisson(10.0) }),
    # For node (1, 0)
    (nodes[3], nodes[6], { 'time': np.random.poisson(10.0) }),
    (nodes[3], nodes[4], { 'time': np.random.poisson(10.0) }),
    # For node (1, 1)
Beispiel #15
0
        if (item[1] is 2):
            points.append(item)
            matrix[y][x] = (item[0], 1)


def to_name(tlp):
    return tlp[0]


def extract_line(line):
    return list(map(to_name, line))


names = list(map(extract_line, matrix))

G = Graph()
for line in names:
    G.add_nodes_from(line)

for y, line in enumerate(matrix):
    for x, item in enumerate(line):
        if (x + 1 < len(line)):
            if ((item[1] is 1) and (line[x + 1][1] is 1)):
                G.add_edge(item[0], line[x + 1][0])
        if (y + 1 < len(matrix)):
            if ((item[1] is 1) and (matrix[y + 1][x][1] is 1)):
                G.add_edge(item[0], matrix[y + 1][x][0])

path = shortest_path(G, source=points.pop()[0], target=points.pop()[0])
print(path)
Before attempting the exercise, use the IPython Shell to access the dictionary metadata of T and explore it, for instance by running the commands T.edge[1][10] and then T.edge[10][1]. Note how there's only one field, and now you're going to add another field, called 'weight'.
'''

from pickle import load
from networkx import Graph

# Reading Graph v1 pickle data
#with open('../datasets/ego-twitter.p', 'rb') as f:
#    T = load(f)

# Reading Graph v2 pickle data
with open('../datasets/ego-twitter.p2', 'rb') as f:
    nodes, edges = load(f)
    T = Graph()
    T.add_nodes_from(nodes)
    T.add_edges_from(edges)
'''
INSTRUCTIONS

*   Set the 'weight' attribute of the edge between node 1 and 10 of T to be equal to 2. Refer to the following template to set an attribute of an edge: network_name.edge[node1][node2]['attribute'] = value. Here, the 'attribute' is 'weight'.
*   Set the weight of every edge involving node 293 to be equal to 1.1. To do this:
    *   Using a for loop, iterate over all the edges of T, including the metadata.
    *   If 293 is involved in the list of nodes [u, v]:
        *   Set the weight of the edge between u and v to be 1.1.
'''

# Set the weight of the edge
T[1][10]['weight'] = 2

# Iterate over all the edges (with metadata)
Beispiel #17
0
def is_balanced(graph_obj, meta_data=False):
    """
    Function to check if a signed graph is balanced. The algorithm used here has been
    adopted from the paper "On the notion of balance of a signed graph" by Frank Harary
    and the boook "Networks, Crowds, and Markets: Reasoning About a Highly Connected World"
    by David Easley and Jon Kleinberg.

    Args:
        graph_obj : The signed graph to pass
        meta_data : Option to get meta data regarding the nature of the balance in the graphs.
                    Default: False

    Returns:
        A two tuple: (bool, meta-data dict). The meta-data dict is None, if meta_data is False
    """
    from networkx import Graph
    from networkx.algorithms import bipartite

    if graph_obj.is_directed():
        undirected_graph_obj = graph_obj.to_undirected()
    else:
        undirected_graph_obj = graph_obj

    nodes = undirected_graph_obj.nodes()

    node_labels = {}
    cur_label = 0
    for node in nodes:
        if node not in node_labels:
            constrained_bfs(undirected_graph_obj, node_labels, cur_label, 1, node)
            cur_label += 1

    num_labels = cur_label

    set_graph = Graph()
    set_graph.add_nodes_from([x for x in range(num_labels)])

    # check for mutual antagonism between sets and mutual friendship inside sets
    edges = undirected_graph_obj.edges()
    balanced = True
    for edge in edges:
        f = edge[0]
        s = edge[1]
        if undirected_graph_obj[f][s]['weight'] == 1:
            if node_labels[f] != node_labels[s]:    # this shouldn't happen
                balanced = False
                break
        if undirected_graph_obj[f][s]['weight'] == -1:
            set_graph.add_edge(node_labels[f], node_labels[s])
            if node_labels[f] == node_labels[s]:
                balanced = False
                break

    metas = None
    if meta_data and balanced:
        # determine strength of balance (bipartite condition for sets antagonism)
        strong = None
        if bipartite.is_bipartite(set_graph):
            strong = True
        else:
            strong = False

        # sets
        sets = [[] for i in range(num_labels)]
        for node in node_labels:
            sets[node_labels[node]].append(node)

        # possible split
        split = None
        if strong:
            coloring = bipartite.color(set_graph)
            X = set()
            Y = set()
            for set_ in coloring:
                if coloring[set_] == 0:
                    for node in sets[set_]:
                        X.add(node)
                else:
                    for node in sets[set_]:
                        Y.add(node)
            split = {frozenset(X), frozenset(Y)}

        metas = {}
        metas['num_original_sets'] = num_labels

        metas['original_sets'] = sets
        metas['strength'] = 'strong' if strong else 'weak'
        metas['possible_split'] = split

    return (balanced, metas)
class FrameworkFeatureAnalyzer(object):
    """ A class to do feature location analyses on a project written in a specific framework

        Project Graph Details:
        -----------------------
        Node Groups:
            1: Android package
            2: -
            3: Android imported indentifier
            4: Java class
            5: Java method
            6: XML file Category
            7: XML file

        Edge Groups:
            1: internal/hierarchical links
            2: Java---Android mappings
            3: Java---XML mappings
    """
    def __init__(self, framework, project):
        """
            :param inspector.models.base.Project project: the project to be analyzed
        """
        self.project = project

        self.framework_namespace = str(framework)
        self.graph = Graph()
        self.graph.add_node(self.framework_namespace)
        self.import_usages = []

    def add_source_file(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        self.analyze_framework_imports(source_file)
        self.analyze_source(source_file)

    def analyze_framework_imports(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        for im in source_file.imports:
            if im.import_str.startswith(self.framework_namespace):
                self.import_usages.append((im, im.find_usages()))

                components = im.import_str.split('.')

                data = {'group': 1}
                if re.match(r'^[A-Z]+(_[A-Z]+)*$', components[-1]):
                    data['group'] = 3

                last = None
                for i in range(len(components)):
                    cn = '.'.join(components[:i + 1])
                    self.graph.add_node(cn, **data)
                    if last:
                        self.graph.add_edge(last, cn, weight=1, group=1)
                    last = cn
                if last:
                    data['group'] = 3
                    self.graph.add_node(last, **data)

    def analyze_source(self, source_file):
        """
            :param inspector.models.base.SourceFile source_file: the file
        """
        for cl in source_file.classes:
            self.graph.add_node(cl.name, group=4)
            for fu in cl.methods:
                # print '[{0}-{1}]'.format(fu.starting_line, fu.ending_line), re.sub('\s*\n\s*', ' ', unicode(fu))
                fn = fu.qualified_name
                self.graph.add_node(fn, group=5)
                self.graph.add_edge(cl.name, fn, weight=1, group=1)
                for im, usages in self.import_usages:
                    w = 0
                    for ln in usages:
                        if fu.starting_line <= ln <= fu.ending_line:
                            w += 1
                    if w:
                        self.graph.add_edge(im.import_str,
                                            fn,
                                            weight=w,
                                            group=2)

    def add_xml_files(self):
        xml_sub_groups = {
            ':layout', ':values', ':drawable', ':menu', ':xml', ':color'
        }
        self.graph.add_nodes_from([':XML'] + list(xml_sub_groups), group=6)
        self.graph.add_edges_from([(':XML', g) for g in xml_sub_groups],
                                  weight=1,
                                  group=1)
        for path in self.project.filter_files(extension='xml'):
            xml_file = self.project.get_file(path)

            if path.startswith('app/res/'):
                g = path.split('/')[2]
                name = '/'.join(path.split('/')[2:])
                self.graph.add_node(name, group=7)
            else:
                if not path.split('/')[-1] in [
                        'pom.xml', 'AndroidManifest.xml'
                ]:  # is ignored?
                    print 'invalid path:', path
                continue

            valid_group = False
            if g == 'values':
                g = 'values-default'
            if g.startswith('values-'):
                g = g[7:]
                self.graph.add_edge(':values', ':' + g, weight=1, group=1)
                valid_group = True
            g = ':' + g
            if valid_group or g in xml_sub_groups:
                self.graph.add_edge(g, name, weight=1, group=1)
            else:
                print 'invalid subgroup:', g
def to_graph(l):
    G = Graph()
    for clique in l:
        G.add_nodes_from(clique)
        G.add_edges_from(to_edges(clique))
    return G
def multigraph_to_graph(g: MultiGraph) -> Graph:
    gx = Graph()
    gt = Graph(g)
    gx.add_nodes_from(gt.nodes())
    gx.add_edges_from(gt.edges())
    return gx
Beispiel #21
0
"""
Unit tests for the :mod:`pennylane.qaoa` submodule.
"""
import pytest
import numpy as np
import pennylane as qml
from pennylane import qaoa
from networkx import Graph
from pennylane.wires import Wires

pytestmark = pytest.mark.usefixtures("tape_mode")

#####################################################

graph = Graph()
graph.add_nodes_from([0, 1, 2])
graph.add_edges_from([(0, 1), (1, 2)])

non_consecutive_graph = Graph([(0, 4), (3, 4), (2, 1), (2, 0)])


def decompose_hamiltonian(hamiltonian):

    coeffs = hamiltonian.coeffs
    ops = [i.name for i in hamiltonian.ops]
    wires = [i.wires for i in hamiltonian.ops]

    return [coeffs, ops, wires]


class TestMixerHamiltonians:
Beispiel #22
0
def create_graph(nodes, edges):
    graph = Graph()
    graph.add_nodes_from(nodes)
    graph.add_edges_from(edges)
    return graph
Beispiel #23
0
def partition_reads(tint, maximum_ilp_size):
    reads = tint['reads']
    read_reps = tint['read_reps']
    I = tint['ilp_data']['I']
    FL = tint['ilp_data']['FL']
    tint['partitions'] = list()

    rids = sorted(I.keys())
    unique_data = dict()
    edges = list()
    for i in rids:
        d = (tuple(I[i]), (FL[i][0], FL[i][1], reads[read_reps[i][0]]['poly_tail_category']))
        if d in unique_data:
            unique_data[d].append(i)
        else:
            unique_data[d] = [i]
    unique_data = list(unique_data.items())
    N = len(unique_data)
    for i in range(N):
        for j in range(i+1, N):
            d1, (f1, l1, t1) = unique_data[i][0]
            d2, (f2, l2, t2) = unique_data[j][0]
            f = max(f1, f2)
            l = min(l1, l2)
            o = l-f+1
            d = sum(x != y for x, y in zip(d1[f:l+1], d2[f:l+1]))
            w = sum(x == y == 1 for x, y in zip(d1[f:l+1], d2[f:l+1]))
            if t1 != 'N' and t2 != 'N' and t1 != t2:
                continue
            if w < 1:
                continue
            if (o > 3 and d < 3) or (1 <= o <= 3 and d == 0):
                edges.append((i, j))
    G = Graph()
    G.add_nodes_from(range(N))
    G.add_edges_from(edges)
    while True:
        edges_to_remove = list()
        for i, j in G.edges:
            n1 = set(G.neighbors(i))
            n2 = set(G.neighbors(j))
            if len(n1) == 1 or len(n2) == 1 or len(n1 & n2) > 0:
                continue
            edges_to_remove.append((i, j))
        G.remove_edges_from(edges_to_remove)
        if len(edges_to_remove) == 0:
            break
    for c in components.connected_components(G):
        rids = list()
        incomp = list()
        for c in split_list_evenly(list(c), maximum_ilp_size):
            for idx, i in enumerate(c):
                rids.extend(unique_data[i][1])
                for j in c[idx+1:]:
                    i,j = min(i,j),max(i,j)
                    assert i<j
                    if G.has_edge(i,j):
                        continue
                    for rid_1 in unique_data[i][1]:
                        for rid_2 in unique_data[j][1]:
                            incomp.append((rid_1,rid_2))
            tint['partitions'].append((rids, incomp))
Beispiel #24
0
def add_entities(graph: Graph, entities: List[Dict[str, any]]):
    nodes = [(entity['entity_name'], {
        'name': entity['entity_name'],
        'entity_type': entity['entity_type']
    }) for entity in entities]
    graph.add_nodes_from(nodes)
Beispiel #25
0
class PairWiseFiniteModel(GraphModel):
    """Pairwise finite graphical model.

    Represents a graphical model in which all variables have the same discrete
    domain, all factor depends on at most two variables.

    Model is represented by field F and interactions J. Probability of
    configuration ``X`` is proportional to
    ``exp(sum F[i][X_i] + 0.5*sum J[i][j][X[i]][X[j]])``.

    Field is stored explicitly as a matrix of shape ``(gr_size, al_size)``.

    Interactions are stored only for those pairs of variables for which they
    are non-zero. So, interactions are represented by undirected graph, where
    for each edge (i,j) we store matrix `J[i,j]`, which has shape
    ``(al_size, al_size)``.

    Names
        "Field" is called like that because in physical models (such as
        Ising model) these values correspond to local magnetic fields. They
        are also known as biases. "Interactions" are called like that because
        in physical models they correspond to strength of spin-spin
        interactions. The fact that all these terms enter the probability
        density function inside the exponent also refers to physical models,
        because fields and interactions are terms in energy and according to
        Bolzmann distribution probability of the state with energy E is
        proportional to ``exp(-E/(kT))``.
    """
    def __init__(self, size, al_size):
        """Initializes PairWiseFiniteModel.

        :param num_variables: Number of variables.
        :param al_size: Size of the alphabet (domain).

        Domain will consist of integers in range 0, 1, ... al_size - 1.
        """
        super().__init__(size, DiscreteDomain.range(al_size))

        self.gr_size = size
        self.al_size = al_size

        self.field = np.zeros((self.gr_size, self.al_size), dtype=np.float64)

        self.edges = []
        self._edges_interactions = []
        # Maps  (u,v) and (v,u) to index of one of them in self.edges.
        self._edge_ids = dict()

        # Cached properties that are invalidated when graph changes.
        self._graph = None
        self._edges_array = None
        self._dfs_result = None

    def set_field(self, field: np.ndarray):
        """Sets values of field (biases) in all vertices."""
        assert field.shape == (self.gr_size, self.al_size)
        self.field = np.array(field, dtype=np.float64)

    def add_interaction(self, u, v, interaction):
        """Adds factor corresponding to interaction between nodes u and v.

        Factor is f(x) = exp(interaction[x[u], x[v]]).
        If there already is interaction between these edges, this interaction
        will be added to it (old interaction isn't discarded).
        """
        if (u, v) in self._edge_ids:
            edge_id = self._edge_ids[(u, v)]
            if self.edges[edge_id] == (v, u):
                interaction = interaction.T
            self._edges_interactions[edge_id] += interaction
        else:
            self._on_graph_changed()
            self.edges.append((u, v))
            self._edges_interactions.append(
                np.array(interaction, dtype=np.float64))
            self._edge_ids[(u, v)] = len(self.edges) - 1
            self._edge_ids[(v, u)] = len(self.edges) - 1

    def get_interaction_matrix(self, u, v):
        """Returns interaction matrix between nodes u and v.

        Returns np.array of shape (al_size, al_size).
        If there is no interaction between these nodes, raises KeyError.
        """
        edge_id = self._edge_ids[(u, v)]
        if self.edges[edge_id] == (u, v):
            return self._edges_interactions[edge_id]
        else:
            return self._edges_interactions[edge_id].T

    def get_interactions_for_edges(self, edges) -> np.ndarray:
        """Returns interaction for given edges.

        If some edges don't exist, interaction matrix for them will be a zero
        matrix.

        :param edges: Edge list. np.array of shape ``(x, 2)``.
        :return: np.array of shape (x, al_size, al_size).
        """
        edges_num = edges.shape[0]
        assert edges.shape == (edges_num, 2)
        result = np.zeros((edges_num, self.al_size, self.al_size),
                          dtype=np.float64)

        for i in range(edges_num):
            u, v = edges[i]
            if self.has_edge(u, v):
                result[i, :, :] = self.get_interaction_matrix(u, v)

        return result

    def has_edge(self, u, v) -> bool:
        """Whether there is edge between vertices u and v."""
        return (u, v) in self._edge_ids

    def get_graph(self):
        """Returns interaction graph."""
        if self._graph is None:
            self._graph = Graph()
            self._graph.add_nodes_from(range(self.gr_size))
            for u, v in self.edges:
                self._graph.add_edge(u, v)
        return self._graph

    def get_dfs_result(self) -> FastDfsResult:
        """Performs DFS for interaction graph."""
        if self._dfs_result is None:
            self._dfs_result = fast_dfs(self.gr_size, self.get_edges_array())
        return self._dfs_result

    def is_graph_acyclic(self):
        """Whether interaction graph is acyclic."""
        return not self.get_dfs_result().had_cycles

    def get_edges_array(self) -> np.ndarray:
        """Returns edge list as np.array."""
        if self._edges_array is None:
            if len(self.edges) == 0:
                self._edges_array = np.empty((0, 2), dtype=np.int32)
            else:
                self._edges_array = np.array(self.edges, dtype=np.int32)
        return self._edges_array

    def get_edges_connected(self) -> np.ndarray:
        """Returns edges, ensuring that graph is connected.

        If graph is already connected, equivalent to ``get_edges_array``.
        If graph is not connected, adds minimal amount of edges to make it
        connected.

        This is needed for algorithms which require connected graph to work
        correctly.
        """
        if not self.get_dfs_result().was_disconnected:
            return self.get_edges_array()

        additional_edges = [(u, v) for u, v in self.get_dfs_result().dfs_edges
                            if not self.has_edge(u, v)]
        return np.concatenate([self.get_edges_array(), additional_edges])

    def _on_graph_changed(self):
        """Invalidates cached graphs."""
        self._graph = None
        self._edges_array = None
        self._dfs_result = None

    def get_all_interactions(self) -> np.ndarray:
        """Returns all interaction matrices in compact form.

        :return: np.array of shape ``(edge_num, al_size, al_size)`` with
          interaction matrix for every edge. Matrices correspond to edges in
          the same order as returned by get_edges.array.
        """
        if len(self.edges) == 0:
            shape = (0, self.al_size, self.al_size)
            return np.empty(shape, dtype=np.float64)
        return np.array(self._edges_interactions, dtype=np.float64)

    def add_factor(self, factor: Factor):
        """Adds a factor."""
        if isinstance(factor, DiscreteFactor):
            self._add_discrete_factor(factor)
        elif factor.is_discrete():
            self._add_discrete_factor(DiscreteFactor.from_factor(factor))
        else:
            raise ValueError("Can't add non-discrete factor.")

    def _add_discrete_factor(self, factor: DiscreteFactor):
        assert factor.model == self
        with np.errstate(divide='ignore'):
            log_factor = np.log(factor.values)
        if len(factor.var_idx) > 2:
            raise ValueError("Can't add factor with more than 2 variables.")
        if len(factor.var_idx) == 1:
            assert factor.values.shape == (self.al_size, )
            self.field[factor.var_idx[0], :] += log_factor
        elif len(factor.var_idx) == 2:
            v1, v2 = factor.var_idx
            self.add_interaction(v1, v2, log_factor)

    def get_factors(self) -> Iterable[Factor]:
        """Generates explicit list of factors."""
        for i in range(self.gr_size):
            if np.linalg.norm(self.field[i, :]) > 1e-9:
                yield DiscreteFactor(self, [i], np.exp(self.field[i, :]))
        for u, v in self.edges:
            factor = DiscreteFactor(self, [u, v],
                                    np.exp(self.get_interaction_matrix(u, v)))
            if self.num_variables < 10:
                factor.name = 'J%d%d' % (u, v)
            else:
                factor.name = 'J_%d_%d' % (u, v)
            yield factor

    def infer(self, algorithm='auto', **kwargs) -> InferenceResult:
        """Performs inference.

        Available algorithms
            * ``auto`` - Automatic.
            * ``bruteforce`` - Brute force (by definition). Exact
            * ``mean_field`` - Naive Mean Field. Approximate.
            * ``message_passing`` - Message passing. Approximate, exact only
              for trees.
            * ``path_dp`` - Dynamic programming on path decomposition. Exact.
              Effective on graphs of small pathwidth.
            * ``tree_dp`` - Dynamic programming on tree. Exact. Works only on
              trees.
            * ``junction_tree`` - DP on junction tree. Exact. Effective on
              graphs of small treewidth.

        :param algorithm: Which algorithm to use. String.
        :return: `InferenceResult` object, which contains logarithm of
          partition function and matrix of marginal probabilities.
        """
        if algorithm == 'auto':
            if self.is_graph_acyclic():
                return infer_tree_dp(self)
            try:
                return infer_junction_tree(self)
            except TooMuchStatesError:
                return belief_propagation(self)
        elif algorithm == 'bruteforce':
            return infer_bruteforce(self)
        elif algorithm == 'mean_field':
            return infer_mean_field(self, **kwargs)
        elif algorithm == 'message_passing':
            return infer_message_passing(self, **kwargs)
        elif algorithm == 'path_dp':
            return infer_path_dp(self)
        elif algorithm == 'tree_dp':
            return infer_tree_dp(self)
        elif algorithm == 'junction_tree':
            return infer_junction_tree(self, **kwargs)
        else:
            raise ValueError('Unknown algorithm %s' % algorithm)

    def max_likelihood(self, algorithm='auto', **kwargs) -> np.ndarray:
        """Finds the most probable state.

        Available algorithms
            * ``auto`` - Automatic.
            * ``bruteforce`` - Brute force (by definition).
            * ``path_dp`` - Dynamic programming on path decomposition. Exact.
              Effective on graphs of small pathwidth.
            * ``tree_dp`` - Dynamic programming on tree. Exact. Works only on
              trees.
            * ``junction_tree`` - DP on junction tree. Exact. Effective on
              graphs of small treewidth.

        :param algorithm: Which algorithm to use. String.
        :return: The most probable state as numpy int array.
        """
        if algorithm == 'auto':
            if self.is_graph_acyclic():
                return max_likelihood_tree_dp(self)
            else:
                try:
                    return max_lh_bruteforce(self)
                except TooMuchStatesError:
                    return max_likelihood_junction_tree(self)
        elif algorithm == 'bruteforce':
            return max_lh_bruteforce(self)
        elif algorithm == 'tree_dp':
            return max_likelihood_tree_dp(self)
        elif algorithm == 'path_dp':
            return max_lh_path_dp(self)
        elif algorithm == 'junction_tree':
            return max_likelihood_junction_tree(self)
        else:
            raise ValueError('Unknown algorithm %s' % algorithm)

    def sample(self,
               num_samples: int = 1,
               algorithm='auto',
               **kwargs) -> np.ndarray:
        """Draws i.i.d. samples from the distribution.

        Available algorithms
            * ``auto`` - Automatic.
            * ``bruteforce`` - Sampling from explicitly calculated
              probabilities for each state.
            * ``tree_dp`` - Dynamic programming on tree. Works only on trees.
            * ``junction_tree`` - DP on junction tree.

        :param num_samples: How many samples to generate.
        :param algorithm: Which algorithm to use.
        :return: ``np.array`` of type ``np.int32`` and shape
          ``(num_samples, gr_size)``. Every row is an independent sample.
        """
        if algorithm == 'auto':
            if self.is_graph_acyclic():
                return sample_tree_dp(self, num_samples=num_samples)
            else:
                try:
                    return sample_bruteforce(self, num_samples=num_samples)
                except TooMuchStatesError:
                    return sample_junction_tree(self, num_samples=num_samples)
        elif algorithm == 'bruteforce':
            return sample_bruteforce(self, num_samples=num_samples)
        elif algorithm == 'tree_dp':
            return sample_tree_dp(self, num_samples=num_samples)
        elif algorithm == 'junction_tree':
            return sample_junction_tree(self, num_samples=num_samples)
        else:
            raise ValueError('Unknown algorithm %s' % algorithm)

    def encode_state(self, state):
        """Returns state represented by its integer id."""
        return encode_state(state, self.gr_size, self.al_size)

    def decode_state(self, state):
        """Returns id of given state.

        State id is integer between `0` and `al_size**gr_size-1`.
        """
        return decode_state(state, self.gr_size, self.al_size)

    @staticmethod
    def create(field: np.ndarray, edges: Union[np.ndarray, List],
               interactions: np.ndarray):
        """Creates PairwiseFiniteModel from compact representation.

        Infers number of variables and size of alphabet from shape of
        ``field``.

        :param field: Values of the field. ``np.array`` of shape
          ``(gr_size, al_size)``.
        :param edges: List of edges with interactions. ``np.array`` of integer
          dtype and shape ``(edge_num, 2)``. Edges can't repeat. If there is
          edge (u,v), you can't have edge (v,u).
        :param interactions: ``np.array`` of shape
          ``(edge_num, al_size, al_size)``, or Iterable which can be converted
          to such an array. ``interactons[i,:,:]`` is a matrix  decribing
          interactions between variables ``edges[i, 0]`` and ``edges[i, `]``.
        """
        size, al_size = field.shape
        model = PairWiseFiniteModel(size, al_size)
        model.set_field(field)
        idx = 0
        assert len(edges) == len(interactions)
        for v1, v2 in edges:
            model.add_interaction(v1, v2, interactions[idx])
            idx += 1
        return model

    def draw_pairwise_graph(self, ax):
        """Draws pairwise graph."""
        graph = self.get_graph()
        pos = nx.kamada_kawai_layout(graph)
        node_labels = {i: self[i].name for i in range(self.num_variables)}
        nx.draw_networkx(graph,
                         pos,
                         ax,
                         labels=node_labels,
                         edge_color='green',
                         node_color='#ffaaaa')
        edge_labels = {(u, v): "J_%d_%d" % (u, v) for u, v in self.edges}
        nx.draw_networkx_edge_labels(graph, pos, edge_labels=edge_labels)

    def get_subgraph_factor_values(
        self, vars_idx: np.ndarray,
        vars_skip: Set = frozenset()) -> np.ndarray:
        """Calculates factor values for subgraph.

        Consider model on subgraph containing only variables with indices
        ``vars``. That is, containing only factors which depend only on
        variables from ``vars``. For every possible combination of those
        variable values, calculate product of all factors in the new model -
        that's what this function returns.

        This can also be described as "interactions within subgraph". Or if we
        condense all variables in ``vars`` in single "supervariable", this
        function returns field for the new supervariable.

        :param vars_idx: Indices of variables in subgraph.
        :param vars_skip_factors: Set. Indices of variables, which should be
          skipped for factor calculation. Field factors for these variables
          won't be included in the result. Interaction factors oth arguments
          of which are in ``vars_skip_factors``, won't be included in the
          result. However, interaction factors where only one variable appears
          in ``vars_skip_factors``, will be included in result. This parameter
          is useful when building junction tree, to avoid double-counting
          factors.
        :return: ``np.array`` of length ``al_size ** len(vars)``. Each value
          is logarithm of product of all relevant factors for certain variable
          values. Correspondence between indices in this array and states
          is consistent with ``decode_state``.
        """
        vars_num = len(vars_idx)
        edges = []
        for i in range(vars_num):
            v1 = vars_idx[i]
            for j in range(i + 1, vars_num):
                v2 = vars_idx[j]
                should_skip = v1 in vars_skip and v2 in vars_skip
                if not should_skip and self.has_edge(v1, v2):
                    edges.append((i, j, self.get_interaction_matrix(v1, v2)))

        all_states = decode_all_states(vars_num, self.al_size)
        a = np.zeros(self.al_size**vars_num)
        for u in range(vars_num):
            if vars_idx[u] in vars_skip:
                continue
            a += self.field[vars_idx[u]][all_states[:, u]]
        for u, v, j in edges:
            a += j[all_states[:, u], all_states[:, v]]
        return a

    @staticmethod
    def from_model(original_model: GraphModel) -> PairWiseFiniteModel:
        """Constructs Pairwise Finite model which is equivalent to given model.

        All variables must be discrete. All factors must depend on at most 2
        variables.

        New model will have the same number of variables and factors. If
        variables in original model have different domain sizes, in new model
        they will be extended to have the same domain size.
        """
        al_size = max(v.domain.size() for v in original_model.get_variables())
        old_factors = list(original_model.get_factors())

        def pad_tensor(t):
            padding = [[0, al_size - dim] for dim in t.shape]
            return np.pad(t, padding)

        # Validate model.
        if al_size > 1000:
            raise ValueError("Not all variables are discrete.")
        if max(len(f.var_idx) for f in old_factors) > 2:
            raise ValueError("Model is not pairwise.")

        new_model = PairWiseFiniteModel(original_model.num_variables, al_size)
        for old_factor in old_factors:
            values = DiscreteFactor.from_factor(old_factor).values
            values = pad_tensor(values)
            new_factor = DiscreteFactor(new_model, old_factor.var_idx, values)
            new_model.add_factor(new_factor)

        return new_model
import pickle
import networkx as nx

from networkx import Graph

# Reading Graph v1 pickle data
#with open('../datasets/github.p', 'rb') as f:
#    G = pickle.load(f)

# Reading Graph v2 pickle data
with open('../datasets/github.p2', 'rb') as f:
    nodes, edges = pickle.load(f)

    G = Graph()

    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
'''
INSTRUCTIONS

*   Write a function called recommend_repositories() that accepts 3 arguments - G, from_user, and to_user - and returns the repositories that the from_user is connected to that the to_user is not connected to.
    *   Get the set of repositories the from_user has contributed to and store it as from_repos. To do this, first obtain the neighbors of from_user and use the set() function on this.
    *   Get the set of repositories the to_user has contributed to and store it as to_repos.
    *   Using the .difference() method, return the repositories that the from_user is connected to that the to_user is not connected to.
*   Print the repositories to be recommended from 'u7909' to 'u2148'.
'''


def recommend_repositories(G, from_user, to_user):
    # Get the set of repositories that from_user has contributed to
    from_repos = set(G.neighbors(from_user))
Beispiel #27
0
                stop = transfer_map[stop]
            route_new.append(stop)
        unique_routes_new[pair_new].append(route_new)
unique_routes = unique_routes_new

print '    tracing rolle connectedness...'

# # 2.3.2 collect rolle connectedness of each stop
system = Graph()
for pair in unique_routes.keys():
    # filter out staten island routes
    if pair[0] not in si_ids and pair[1] not in si_ids:
        for route in unique_routes[pair]:
            route = np.array(route)
            edges = np.vstack([route[:-1], route[1:]]).T
            system.add_nodes_from(route)
            system.add_edges_from(edges)

# find location of each node as subway stop
locs = {node:None for node in system.nodes()}
stop_id = stops['stop_id'].tolist()
for node in system.nodes():
    stop = stops[stops['stop_id']==node].iloc[0]
    locs[node] = stop[['x','y']].values

# alex rolle's connectedness
def f(layers, n):
    if n in layers.keys():
        return len(layers[n])
    else:
        return -1
class ClusterNetwork(object):
    def __init__(self, reps):
        self.g = Graph()
        self.N = len(reps.keys())
        nodes = []
        self.lookup = {}
        self.attributes = None
        for i, r in enumerate(sorted(reps.keys())):
            self.lookup[r] = i
            if self.attributes is None:
                self.attributes = list(reps[r].attributes.keys())
            nodes.append((i, {'rep': reps[r]}))
        self.g.add_nodes_from(nodes)
        self.clusters = None

    def __iter__(self):
        for i, d in self.g.nodes_iter(data=True):
            yield d

    def __len__(self):
        return self.N

    def __getitem__(self, key):
        if isinstance(key, str):
            return self.g.node[self.lookup[key]]
        elif isinstance(key, tuple):
            return self.simMat[key]
        return self.g.node[key]

    def cluster(self, scores, cluster_method, oneCluster):
        #Clear any edges
        self.g.remove_edges_from(list(self.g.edges_iter(data=False)))

        if cluster_method is None:
            return
        if scores is not None:
            self.simMat = zeros((self.N, self.N))
            for k, v in scores.items():
                indOne = self.lookup[k[0]]
                indTwo = self.lookup[k[1]]
                self.simMat[indOne, indTwo] = v
                self.simMat[indTwo, indOne] = v
            self.simMat = -1 * self.simMat
        if cluster_method == 'affinity':
            true_labels = array(
                [self[i]['rep']._true_label for i in range(self.N)])
            self.clusters = affinity_cluster(self.simMat, true_labels,
                                             oneCluster)
            edges = []
            for k, v in self.clusters.items():
                for v2 in v:
                    if v2[0] == k:
                        continue
                    edges.append((k, v2[0], v2[1]))
        elif cluster_method == 'complete':
            edges = []
            for i in range(self.N):
                for j in range(i + 1, self.N):
                    edges.append((i, j, self.simMat[i, j]))
        self.g.add_weighted_edges_from(edges)
        seed = RandomState(seed=3)
        mds = manifold.MDS(n_components=2,
                           max_iter=3000,
                           eps=1e-9,
                           random_state=seed,
                           dissimilarity="precomputed",
                           n_jobs=4)
        pos = mds.fit(-1 * self.simMat).embedding_
        clf = PCA(n_components=2)
        pos = clf.fit_transform(pos)
        for i, p in enumerate(pos):
            self.g.node[i]['pos'] = p

    def calc_reduction(self):
        if self.clusters is None:
            return
        means = {}
        reverse_mapping = {}
        for k, v in self.clusters.items():
            s = 0
            for ind in v:
                reverse_mapping[ind[0]] = k
                s += ind[1]
            means[k] = s / len(v)
        for i in self.g.nodes_iter():
            clust_center = reverse_mapping[i]
            if i == clust_center:
                self.g.node[i]['HyperHypoMeasure'] = 0
                continue
            dist = self.g[i][clust_center]['weight']
            norm_dist = abs(dist - means[clust_center])
            len_diff = self[clust_center]['representation'].shape[0] - self[i][
                'representation'].shape[0]
            if len_diff < 0:
                norm_dist *= -1
            self.g.node[i]['HyperHypoMeasure'] = norm_dist
        if 'HyperHypoMeasure' not in self.attributes:
            self.attributes.append('HyperHypoMeasure')

    def get_edges(self):
        return array(self.g.edges(data=False))

    def labels(self):
        labels = list(range(len(self.g)))
        for k, v in self.clusters.items():
            for v2 in v:
                labels[v2[0]] = k
        true_labels = list()
        for i in range(len(labels)):
            true_labels.append(self[i]['rep']._true_label)
        levels = {x: i for i, x in enumerate(set(true_labels))}
        for i in range(len(true_labels)):
            true_labels[i] = levels[true_labels[i]]
        return array(labels), array(true_labels)

    def silhouette_coefficient(self):
        labels, true_labels = self.labels()
        return metrics.silhouette_score(self.simMat,
                                        labels,
                                        metric='precomputed')

    def homogeneity(self):
        labels, true_labels = self.labels()
        return metrics.homogeneity_score(true_labels, labels)

    def completeness(self):
        labels, true_labels = self.labels()
        return metrics.completeness_score(true_labels, labels)

    def v_score(self):
        labels, true_labels = self.labels()
        return metrics.v_measure_score(true_labels, labels)

    def adjusted_mutual_information(self):
        labels, true_labels = self.labels()
        return metrics.adjusted_mutual_info_score(true_labels, labels)

    def adjusted_rand_score(self):
        labels, true_labels = self.labels()
        return metrics.adjusted_rand_score(true_labels, labels)
Beispiel #29
0
 def to_nx(self) -> Graph:
     nx_graph = Graph()
     nx_graph.add_nodes_from(self._nodes)
     nx_graph.add_edges_from(self._edges)
     return nx_graph
def graph_from_vertex_and_edge_lists(vertices, edges):
    graph = Graph()
    graph.add_nodes_from(vertices)
    graph.add_edges_from(edges)
    return graph
Beispiel #31
0
def get_all_aut_classes(F, length, dirname="aut_classes_cache/", verbose=True):
    """
    Get all automorphism classes of words in F_r with bounded length.
    Caches the result to dirname.
    """
    assert is_FreeGroup(F), "F must be a free group"
    r = F.rank()

    cache_dir = os.fsencode(dirname)
    cache_file = os.fsencode(f"r{r}-len{length}.pkl")
    if not os.path.exists(cache_dir):
        os.mkdir(cache_dir)
    if os.path.exists(cache_dir + cache_file):
        aut_classes = pickle.load(open(cache_dir + cache_file, 'rb'))
        return [set([F(w.Tietze()) for w in cls]) for cls in aut_classes]
    # Maybe we computed something bigger before
    for file in os.listdir(cache_dir):
        filename = os.fsdecode(file)
        r_str, len_str = filename.split(".")[0].split("-")
        r_cached = int(r_str[1:])
        len_cached = int(len_str[3:])
        if r_cached >= r and len_cached >= length:
            cached_aut_classes = pickle.load(open(cache_dir + file, 'rb'))
            aut_classes = []
            for cls in cached_aut_classes:
                word = cls.pop()
                word_rep = word.Tietze()
                if len(word_rep) == 0:
                    aut_classes.append(set([F(1)]))
                elif len(word_rep) < length and max(
                        set([abs(x) for x in word_rep])) < r:
                    cls.add(word)
                    aut_classes.append(set([F(w.Tietze()) for w in cls]))
            pickle.dump(aut_classes,
                        open(f"aut_classes_cache/r{r}-len{length}.pkl", 'wb'))
            return aut_classes

    letters = list(range(1, r + 1)) + list(range(-r, 0))

    minimal_words = set()
    all_words = set()  # To avoid stuff like (1,-1,2,3) and (2,-2,2,3)

    # We only consider words that are in "canonical order"
    # We also assume we only check tuple starting with a
    # (They can still be a*a^-1*b*...)
    tuples = product(letters, repeat=length - 1)
    if verbose:
        print(f"Minimizing all words in {F} of length <={length}")
        print(f"{2*(len(letters)) ** (length - 1)} words to minimize.")
        tuples = tqdm(tuples)
    for tup in tuples:
        tup = [1] + list(tup)
        word = F(tup)
        if word not in all_words and word == canonical_letter_permute_form(
                F, word):
            all_words.add(word)
            minimal_words.add(
                canonical_letter_permute_form(F, minimize(F, word)))
    if length > 0:
        # Due to cancellations (e.g. (1,-1, ...)), we only
        # need to check words of length N, N - 1.
        tuples = product(letters, repeat=length - 1)
        if verbose:
            tuples = tqdm(tuples)
        for tup in tuples:
            word = F(tup)
            if word == canonical_letter_permute_form(
                    F, word):  # We only consider canonized orders
                minimal_words.add(
                    canonical_letter_permute_form(F, minimize(F, word)))
    if verbose:
        print(
            f"Finished minimizing letters, found {len(minimal_words)} minimal words."
        )
        print("Creating the Whitehead moves graph on minimal words.")

    G = Graph()
    G.add_nodes_from(minimal_words)
    for word in minimal_words:
        nbrs = get_minword_wh_nbrs(F, word)
        assert (len(nbrs[0].Tietze()) == len(
            word.Tietze())), "Something's wrong"
        for nbr in nbrs:
            nbr = canonical_letter_permute_form(F, nbr)
            assert nbr in minimal_words, f"Found a word ({nbr}) not in minimal_words"
            G.add_edge(word, nbr)
    aut_classes = list(connected_components(G))
    pickle.dump(aut_classes,
                open(f"aut_classes_cache/r{r}-len{length}.pkl", 'wb'))
    return aut_classes
Beispiel #32
0
def multigraph_to_graph(g: MultiGraph) -> Graph:
    gx = Graph()
    gt = Graph(g)
    gx.add_nodes_from(gt.nodes())
    gx.add_edges_from(gt.edges())
    return gx
Beispiel #33
0
def osm_post(lim,
             file_name_out,
             around=1000,
             eps=0.01,
             safe_dist=100,
             penalize=20):
    from limic.util import start, end, status, file_size, load_pickled, distance, save_pickled
    from scipy.spatial import cKDTree as KDTree
    from networkx import Graph, astar_path_length
    from pyproj import CRS, Transformer
    from itertools import chain
    from limic.overpass import intersect, pylon
    lines, substations, towers, id2tower, id2node, id2lines, id2types = lim
    start("Building KD-tree from white nodes")
    from limic.util import kdtree
    towers_tree = kdtree(towers, get_latlon=lambda x: x.latlon)
    end('')
    status(len(towers))
    start("Deleting black nodes")
    to_delete = set()
    from limic.util import nodes_in_geometry
    for substation in substations:
        to_delete.update(
            nodes_in_geometry(towers_tree,
                              list(map(lambda x: id2node[x], substation))))
    towers = [tower for tower in towers if tower not in to_delete]
    end('')
    status(len(towers))
    start("Building initial graph")
    g = Graph()
    g.add_nodes_from(towers)
    for line in lines:
        line_nodes = list(map(lambda x: id2tower[x], line))
        for from_node, to_node in zip(line_nodes, line_nodes[1:]):
            if from_node in to_delete or to_node in to_delete:
                continue
            w = distance(from_node.latlon, to_node.latlon)
            g.add_edge(from_node,
                       to_node,
                       weight=w,
                       type=id2types[from_node.id])
    end('')
    status(len(g.nodes()), end='/')
    status(len(g.edges()))
    start("Finding neighbours within " + str(around) + "m")
    towers_tree = kdtree(towers, get_latlon=lambda x: x.latlon)
    end('')
    neighbour_indices, neighbours = towers_tree.get_neighbours(around=1000)
    end()
    start("Computing non-logical intersections")
    tower2index = {}
    for i, t in zip(range(len(towers)), towers):
        tower2index[t] = i
    for k, v in id2lines.items():
        id2lines[k] = tuple(map(tuple, v))
    end('')
    segments = set()
    for u, v in g.edges():
        this = (u, v) if u < v else (v, u)
        ui, vi = tower2index[u], tower2index[v]
        lines = set()
        lines.update(id2lines[u.id])
        lines.update(id2lines[v.id])
        for neighbour in chain(neighbours[ui], neighbours[vi]):
            if neighbour == u or neighbour == v:
                continue
            if not lines.intersection(id2lines[neighbour.id]):
                for nn in g.neighbors(neighbour):
                    other = (neighbour, nn) if neighbour < nn else (nn,
                                                                    neighbour)
                    segments.add(tuple(sorted((this, other))))
    end('')
    status(len(segments), end='   ')
    neighbours2intersection = {}
    minusid = 0
    latlon2id = {}
    segments2intersections = {}
    for (t1, t2), (t3, t4) in segments:
        res = intersect(t1.latlon,
                        t2.latlon,
                        t3.latlon,
                        t4.latlon,
                        eps=eps,
                        no_tu=False)
        if res:
            intersection, (t, u) = res
            if not intersection in latlon2id:
                minusid -= 1
                latlon2id[intersection] = minusid
            segments2intersections.setdefault((t1, t2), []).append(
                (t, latlon2id[intersection], intersection))
            segments2intersections.setdefault((t3, t4), []).append(
                (u, latlon2id[intersection], intersection))
    end('')
    status(-minusid, end='   ')
    for (u, v), intersections in segments2intersections.items():
        intersections.sort()
        g.remove_edge(u, v)
        type = id2types[u.id]
        assert (type == id2types[v.id])
        seq = [u]
        for _, id, latlon in intersections:
            seq.append(pylon(id, latlon))
        seq.append(v)
        for from_node, to_node in zip(seq, seq[1:]):
            w = distance(from_node.latlon, to_node.latlon)
            g.add_edge(from_node, to_node, weight=w, type=type)
    end()
    start("Adding routing through air")
    airs = set()
    for ns in neighbours:
        n = ns[0]
        for m in ns[1:]:
            if not g.has_edge(n, m):
                airs.add((n, m))
    end('')
    for n, m in airs:
        w = penalize * distance(n.latlon, m.latlon)
        g.add_edge(n, m, weight=w, type=-1)
    end('')
    status(len(g.nodes()), end='/')
    status(len(g.edges()))
    from networkx import relabel_nodes
    start("Prune redundant edges (incomplete)")
    prune_incomplete(g)
    end('')
    status(len(g.edges()))
    start("Prune redundant edges (complete)")
    prune_complete(g)
    end('')
    status(len(g.edges()))
    start("Cleaning up graph")
    relabel = dict(
        map(
            lambda tower: (tower,
                           (tower.id, tower.latlon[0], tower.latlon[1])),
            g.nodes()))
    relabel_nodes(g, relabel, copy=False)
    end()
    start("Saving graph to", file_name_out)
    save_pickled(file_name_out, g)
    end('')
    file_size(file_name_out)
Beispiel #34
0
class TextRank(object):
    stopwords = [
        "이하",
        "만약",
        "대한",
        "아",
        "휴",
        "아이구",
        "아이쿠",
        "아이고",
        "어",
        "나",
        "우리",
        "저희",
        "따라",
        "의해",
        "을",
        "를",
        "에",
        "의",
        "가",
    ]
    eng_stopwords = [
        "lot",
        "day",
        "way",
    ]

    def __init__(self, text):
        self.text = text.strip()
        self.build()

    def build(self):
        self._build_sentences()

        # self.has_nouns = self._extract_nouns()

        # 문장 처리
        self._build_graph()
        self.pageranks = pagerank(self.graph, weight='weight')
        self.reordered = sorted(self.pageranks,
                                key=self.pageranks.get,
                                reverse=True)

        # 단어 처리
        self.word_rank_collections = Counter(self.nouns)
        #if self.has_noun:
        # self._build_word_graph()
        # word_rank_idx = self.get_word_ranks(self.words_graph)
        # self.sorted_word_rank_idx = sorted(word_rank_idx, key=lambda k: word_rank_idx[k], reverse=True)

    def _build_sentences(self):
        okt = Okt()
        dup = {}
        candidates = []

        #candidates = split(r'(?:(?<=[^0-9])\.|\n)', self.text)
        #전체 text를 문장단위로 split한다
        #   전체 문장 text를 \n으로 split
        #   나눈 한 line에서 .으로 split 파일명 안잘리게 정규식적용
        #   line에서 앞 뒤 공백 제거후 append

        for enter_line in re.split('\n|! |\? ', self.text):
            for line in split(r'[\.](?=[^0-9])(?=[^a-z])', enter_line):
                candidates.append(line.strip(' ').strip('.').strip('\t'))

        self.sentences = []
        self.nouns = []
        self.has_noun = False
        index = 0
        eng_list = []
        eng_nouns = []
        for candidate in candidates:
            if len(candidate) >= 1 and candidate not in dup:
                dup[candidate] = True
                # 문장 추가
                self.sentences.append(Sentence(candidate + '.', index))
                index += 1
                # 문장의 명사들 추가
                for pos in okt.pos(str(candidate)):
                    if pos[0] not in self.stopwords and len(
                            pos[0]) > 1 and pos[1] == "Noun":
                        self.nouns.append(pos[0])
                    elif pos[1] == "Alpha":
                        eng_list.append(pos[0])

                # 영어 문자열
                for pos in nltk.pos_tag(eng_list):
                    if pos[1] == "NN" and pos[0].lower(
                    ) not in self.eng_stopwords and len(pos[0]) > 1:
                        eng_nouns.append(pos[0].lower())

        if len(self.nouns) > 0:
            self.has_noun = True
        else:
            if (len(eng_nouns) > 0):
                self.nouns.extend(eng_nouns)
                self.has_noun = True

        del dup
        del candidates

    def _build_graph(self):

        #문장 그래프 처리
        self.graph = Graph()
        self.graph.add_nodes_from(self.sentences)
        #문장간의 모든 경우에서 유사도 탐색
        for sent1, sent2 in combinations(self.sentences, 2):
            # print(sent1,sent2)
            weight = self._jaccard(sent1, sent2)
            if weight:
                self.graph.add_edge(sent1, sent2, weight=weight)

    def _jaccard(self, sent1, sent2):
        p = sum((sent1.bow & sent2.bow).values())
        q = sum((sent1.bow | sent2.bow).values())
        return p / q if q else 0

    def summarize(self, count=3, verbose=True):
        results = sorted(self.reordered[:count],
                         key=lambda sentence: sentence.index)
        results = [result.text for result in results]
        if len(results) < count:
            for i in range(len(results), count):
                results.append("None")
                i += 1

        if verbose:
            return '\n'.join(results)
        else:
            return results

    def keywords(self, word_num=3):
        keywords = []

        keywords = self.word_rank_collections.most_common(word_num)
        results = []
        for keyword in keywords:
            results.append(keyword[0])
        #리턴값 3개 보장
        if len(results) < word_num:
            for i in range(len(results), word_num):
                results.append("None")
                i += 1

        return results
Beispiel #35
0
def rewire_benchmark(g: nx.Graph, com2node: dict, node2com: dict):

    # rewiring links without destroying structure of the network
    random.seed(int(time.time()))

    #com_cnt = {x:len(com2node[x])/4 for x in com2node.keys()}
    com_cnt = {x: 10 for x in com2node.keys()}
    # rewire inner links
    rand_nodes = g.nodes()
    random.shuffle(rand_nodes)
    for x1 in rand_nodes:

        # 节点所属的社团
        in_coms = node2com[x1]
        # 随机选择其中一个社团
        com = random.choice(in_coms)

        if com_cnt[com] < 1:
            continue
        else:
            com_cnt[com] -= 1

        # 选择与 x1 相邻的社团内节点 x2,构成边 x1--x2
        _t = list(set(g.neighbors(x1)) & set(com2node[com]))
        if len(_t) < 1:
            continue
        x2 = random.choice(_t)

        # 选择与 x1 在一个社团的边 y1--y2
        y1 = random.choice(com2node[com])
        _t = list(set(g.neighbors(y1)) & set(com2node[com]))
        if len(_t) < 1:
            continue
        y2 = random.choice(_t)

        # 交换边对节点 x1--x2, y1--y2
        _t = list({x1, x2, y1, y2})

        if len(_t) == 4 and x1 not in g.neighbors(
                y2) and x2 not in g.neighbors(y1):
            g.remove_edges_from([(x1, x2), (y1, y2)])
            g.add_edges_from([(x1, y2), (x2, y1)])
        else:
            pass

    # rewire outer links
    rand_nodes = g.nodes()
    random.shuffle(rand_nodes)
    for x1 in rand_nodes:

        # 节点所属的社团
        in_coms = node2com[x1]
        # 随机选择其中一个社团
        com = random.choice(in_coms)

        # 选择与 x1 相邻的社团外节点 x2,构成边 x1--x2
        _t = list(set(g.neighbors(x1)) - set(com2node[com]))
        if len(_t) < 1:
            continue
        x2 = random.choice(_t)

        # 选择与 x1 在一个社团的节点 y1 的外边 y1--y2
        y1 = random.choice(com2node[com])
        _t = list(set(g.neighbors(y1)) - set(com2node[com]))
        if len(_t) < 1:
            continue
        y2 = random.choice(_t)

        _c = 0  # random.randint(0,1)

        if x1 is not y1:
            g.remove_edges_from([(x1, x2), (y1, y2)])
            if _c == 0:
                # 交换边对节点 x1--x2, y1--y2
                g.add_edges_from([(x1, y2), (x2, y1)])
            else:
                g.add_nodes_from([x1, x2, y1, y2])