Esempio n. 1
0
def get_adjacency_network(g, path, filename, region_type):
    """ Gets a network representing the physical adjacency of another network.

        :param g: The network to use as a base. The vertices of this network
        must have a cell attribute. If two cells have an intersection of
        non-zero length then they are considered adjacent.
        :param path: The base path to the network. The algorithm uses this path
        to cache temporary results.
        :param filename: The filename of the network. Also used for caching.
        :param region_type: The type of regions contained in the cell attribute
        of the base network. This is also used for caching.
        :return: An igraph.Graph object. All vertex attributes of the base
        network and the new network should be the same. Any attributes that
        cannot be written to a file by igraph (except cell) may not be present.

        Examples
        --------
        >>> path = 'data/testing'
        >>> filename = 'test'
        >>> region_type = 'zip'
        >>> g = load_network(path, filename)
        >>> add_regions(g, path, filename, region_type)
        >>> adj = get_adjacency_network(g, path, filename, region_type)
        >>> adj.vcount() == g.vcount()
        True
    """
    network_path = os.path.join(path, 'regions', region_type,
                                filename + '.graphml')
    ensure_folder(network_path)
    if os.path.exists(network_path):
        logger.info('Loading Adjacency Network')
        h = open(network_path, 'r')
        try:
            multithreading.lock_file_handle(h)
            return igraph.Graph.Read(network_path)
        finally:
            multithreading.unlock_file_handle(h)
            h.close()
    else:
        logger.info('Creating Adjacency Network')
        info = [v.attributes() for v in g.vs]
        for attrs in info:
            attrs.pop('id', None)
        if region_type == 'voronoi':
            adj = get_voronoi_adjacency(g)
            # TODO: Get rid of bad edges
        else:
            adj = network_creation.get_graph(
                info,
                lambda a, b: a['cell'].intersection(b['cell']).length > 0)
        h = open(network_path, 'a')
        try:
            multithreading.lock_file_handle(h)
            adj.write_graphml(network_path)
        finally:
            multithreading.unlock_file_handle(h)
            h.close()

        return adj
Esempio n. 2
0
def get_communities(g, n, path, filename, algorithm='label_propagation'):
    """ Gets a number of igraph.VertexClustering objects.

        These objects are loaded from file if possible, otherwise they are
        found using the given algorithm.

        :param g: The graph to find communities in.
        :param n: The number of communities to find.
        :param path: The path to the base folder for the graph.
        :param filename: The filename of the graph to use.
        :param algorithm: The name of the clustering algorithm to use.

        The filename and path arguments are used to find clusters stored on
        disk. Any new clusters are stored along with the ones already present
        for future use.

        :return: A list of VertexClustering objects

        Examples
        --------
        >>> path = 'data/testing'
        >>> filename = 'test1'
        >>> g = load_network(path, filename)
        >>> comms = get_communities(g, 10, path, filename, algorithm='random_walk')
        >>> len(comms)
        10
    """
    # load any preexisting clusters
    cluster_path = '{}/communities/{}/{}.json'.format(path, algorithm,
                                                      filename)
    ensure_folder(cluster_path)
    h = open(cluster_path, 'a')
    try:
        multithreading.lock_file_handle(h)
        try:
            cluster_sets = json.load(open(cluster_path, 'r'))
        except ValueError:
            # the file is probably empty because we just made it
            cluster_sets = []
        logger.info('Loaded {} communities'.format(len(cluster_sets)))
        # add new clusters if needed
        while len(cluster_sets) < n:
            logger.debug('{} / {} communities'.format(len(cluster_sets), n))
            clustering = _algorithms[algorithm](g)
            cluster_sets.append({
                'membership':
                clustering.membership,
                'modularity_params':
                clustering._modularity_params
            })
        # save the cluster sets
        json.dump(cluster_sets, open(cluster_path, 'w'), indent=2)
    finally:
        multithreading.unlock_file_handle(h)
        h.close()

    # construct a list of objects
    clusters = [igraph.VertexClustering(g, **c) for c in cluster_sets]
    return clusters[:n]  # return only the first n
Esempio n. 3
0
def load_network(path, filename):
    """ Loads cached network for the filesystem.

        :param path: The base path to the network. This will contain all
        information about the type of the network.
        :param filename: The filename of the network. This is usually the dates
        of the data used to build the network.
        :return: an igraph.Graph representation of the network.

        Examples
        --------
        >>> path = 'data/lake_wobegon/distance/1.6/crime'
        >>> filename = '2010'
        >>> network = load_network(path, filename)
    """
    network_path = '{}/networks/{}.graphml'.format(path, filename)
    h = open(network_path, 'r')
    try:
        multithreading.lock_file_handle(h)
        return igraph.Graph.Read(network_path)
    finally:
        multithreading.unlock_file_handle(h)
        h.close()
Esempio n. 4
0
def add_regions(g, path, filename, region_type):
    """ Adds regions to each node in a network.

        The regions are stored as shapely.Polygon objects as the cell attribute
        in each vertex of the network. If no regions can be found on disk the
        create_and_add function is used to make new ones. These are then saved
        to disk for future use.

        :param g: The network to add regions to.
        :param path: The path to the base folder of the network of interest.
        :param filename: The filename of the network of interest.
        :param region_type: The type of region to add to the graph. Should be
        one of: 'voronoi' or 'zip'.

        Examples
        --------
        >>> path = 'data/testing'
        >>> name = 'test'
        >>> g = load_network(path, name)
        >>> add_regions(g, path, name, 'zip')
        >>> 'cell' in g.vs.attributes()
        True
    """
    cells_file = os.path.join(path, 'regions', region_type, filename + '.shp')
    if os.path.exists(cells_file):
        logger.info("Loading Regions")
        h = open(cells_file[:-4] + '.dbf', 'r')
        try:
            multithreading.lock_file_handle(h)
            # load cells from file
            for p in fiona.open(cells_file):
                g.vs[p['properties']
                     ['index']]['cell'] = shapely.geometry.shape(p['geometry'])
        finally:
            multithreading.unlock_file_handle(h)
            h.close()
    else:
        logger.info("No Regions Found")

        # add the regions to the graph
        if region_type == 'voronoi':
            layout_position(g)
            bound = get_bounds(g)
            logger.info('Creating Voronoi Cells')
            create_voronoi_regions(g, bound)
            logger.info('Clipping Cells')
            clip_cells(g, bound)
        elif region_type == 'zip':
            logger.info('Finding Zipcode Cells')
            geometry = _client['crimes'].geometry
            g.vs['cell'] = [
                shapely.geometry.shape(
                    geometry.find_one({'zip': node['zipcode']})['geometry'])
                for node in g.vs
            ]
        else:
            logger.warning(
                "Unrecognized region type: '{}'".format(region_type))
            raise NotImplementedError(
                '{} regions not implemented'.format(region_type))

        logger.info("Saving Regions")

        # save cells for future use
        ensure_folder(cells_file)
        schema = {
            'geometry': 'Polygon',
            'properties': {
                'index': 'int',
                'count': 'int'
            }
        }
        h = open(cells_file[:-4] + '.dbf', 'a')
        try:
            multithreading.lock_file_handle(h)
            with fiona.open(cells_file, 'w', 'ESRI Shapefile', schema) as c:
                for i in range(g.vcount()):
                    writable = shapely.geometry.mapping(
                        shapely.geometry.shape(g.vs[i]['cell']))
                    if region_type == 'zip':
                        count = g.vs[i]['description']
                    else:
                        count = 1
                    c.write({
                        'geometry': writable,
                        'properties': {
                            'index': i,
                            'count': count
                        }
                    })
        finally:
            multithreading.unlock_file_handle(h)
            h.close()
Esempio n. 5
0
def save_borders(path, filename, region_type, iterations, algorithm):
    """ Saves a shapefile containing the borders found a network.

        Finds a border network containing the information about any borders.
        Uses the cell attribute of the vertices and the weight attribute of the
        edges to build the shapes of the borders between communities.

        :param path: The base path to the network of crimes.
        :param filename: The filename of the crimes network.
        :param region_type: The type of region surrounding each vertex.
        :param iterations: The number of iterations of the community detection
        algorithm.
        :param algorithm: The community detection algorithm to use.

        Examples
        --------
        >>> import plotting
        >>> path = 'data/testing'
        >>> filename = 'test'
        >>> iterations = 30
        >>> save_borders(path, filename, 'zip', iterations, 'random_walk')
        >>> fig = plotting.get_border_fig('{}/borders/{}_{}'.format(path,
        ...   filename, iterations))
        >>> fig.savefig('test.svg')
    """
    borders_path = os.path.join(path, 'borders', region_type, algorithm,
                                '{}_{}.shp'.format(filename, iterations))
    if os.path.exists(borders_path):
        # skip
        logger.info("Borders Exist, skipping")
        return
    border_network = get_border_network(path, filename, region_type, algorithm,
                                        iterations)
    borders = dict()
    for e in border_network.es:
        if e['weight'] > 0:
            line = border_network.vs[e.source]['cell'].intersection(
                border_network.vs[e.target]['cell'])

            # remove any points that might have snuck in
            if line.geom_type == 'GeometryCollection':
                points = [shp for shp in line if shp.geom_type == 'Point']
                for p in points:
                    line = line.difference(p)

            if line.geom_type == 'LineString' or line.geom_type == 'MultiLineString':
                borders[line] = e['weight']
            elif line.geom_type == 'Polygon' or line.geom_type == 'MultiPolygon':
                borders[line.boundary] = e['weight']
            else:
                logger.error('Unknown border geometry {}, skipping'.format(
                    line.geom_type))

    ensure_folder(borders_path)
    schema = {'geometry': 'MultiLineString', 'properties': {'weight': 'int'}}
    h = open(borders_path[:-4] + '.dbf', 'a')
    try:
        multithreading.lock_file_handle(h)
        with fiona.open(borders_path, 'w', 'ESRI Shapefile', schema) as c:
            for border, _w in borders.iteritems():
                c.write({
                    'geometry':
                    shapely.geometry.mapping(shapely.geometry.shape(border)),
                    'properties': {
                        'weight': _w
                    }
                })
    finally:
        multithreading.unlock_file_handle(h)
        h.close()
Esempio n. 6
0
def get_border_network(path, filename, region_type, algorithm, iterations):
    """ Finds a network representing the borders between communities.

        :param path: The base path to the network of crimes.
        :param filename: The filename of the network of crimes.
        :param region_type: The type of regions around each vertex.
        :param algorithm: The community detection algorithm to use.
        :param iterations: The number of runs of the community detection
        algorithm.
        :return: An `igraph.Graph` object where the weights of edges between
        two vertices represent the strength of a border between them.

        Examples
        --------
        >>> path = 'data/testing'
        >>> filename = 'test'
        >>> bn = get_border_network(path, filename, 'voronoi',
        ...   'label_propagation', 30)
        >>> bn.write_graphml('{}/borders/{}.graphml'.format(path, filename))
    """
    border_path = os.path.join(path, 'borders', region_type, algorithm,
                               '{}_{}.graphml'.format(filename, iterations))
    ensure_folder(border_path)
    if os.path.exists(border_path):
        logger.info('Loading Border Network')
        h = open(border_path, 'r')
        try:
            multithreading.lock_file_handle(h)
            border_network = igraph.Graph.Read(border_path)
        finally:
            multithreading.unlock_file_handle(h)
            h.close()
        add_regions(border_network, path, filename, region_type)
        return border_network
    else:
        # create a network showing the physical adjacency of each cell
        g = load_network(path, filename)

        add_regions(g, path, filename, region_type)

        # build the border network from the adjacency network
        border_network = get_adjacency_network(g, path, filename, region_type)
        add_regions(border_network, path, filename, region_type)

        if 'id' in border_network.vs.attributes():
            # get rid of it because it causes a warning
            del border_network.vs['id']

        # get the list of communities to use
        comms = get_communities(g, iterations, path, filename, algorithm)

        logger.info('Creating Border Network')
        # change the weights on the edges to reflect the border weight
        for e in border_network.es:
            e['weight'] = find_border_weight(comms, e.source, e.target)
        # scale edges based on number of iterations
        border_network.es['weight'] = [
            float(w) / iterations for w in border_network.es['weight']
        ]

        # save the network
        h = open(border_path, 'a')
        try:
            multithreading.lock_file_handle(h)
            border_network.write_graphml(border_path)
        finally:
            multithreading.unlock_file_handle(h)
            h.close()

        return border_network