def get_adjacency_network(g, path, filename, region_type): """ Gets a network representing the physical adjacency of another network. :param g: The network to use as a base. The vertices of this network must have a cell attribute. If two cells have an intersection of non-zero length then they are considered adjacent. :param path: The base path to the network. The algorithm uses this path to cache temporary results. :param filename: The filename of the network. Also used for caching. :param region_type: The type of regions contained in the cell attribute of the base network. This is also used for caching. :return: An igraph.Graph object. All vertex attributes of the base network and the new network should be the same. Any attributes that cannot be written to a file by igraph (except cell) may not be present. Examples -------- >>> path = 'data/testing' >>> filename = 'test' >>> region_type = 'zip' >>> g = load_network(path, filename) >>> add_regions(g, path, filename, region_type) >>> adj = get_adjacency_network(g, path, filename, region_type) >>> adj.vcount() == g.vcount() True """ network_path = os.path.join(path, 'regions', region_type, filename + '.graphml') ensure_folder(network_path) if os.path.exists(network_path): logger.info('Loading Adjacency Network') h = open(network_path, 'r') try: multithreading.lock_file_handle(h) return igraph.Graph.Read(network_path) finally: multithreading.unlock_file_handle(h) h.close() else: logger.info('Creating Adjacency Network') info = [v.attributes() for v in g.vs] for attrs in info: attrs.pop('id', None) if region_type == 'voronoi': adj = get_voronoi_adjacency(g) # TODO: Get rid of bad edges else: adj = network_creation.get_graph( info, lambda a, b: a['cell'].intersection(b['cell']).length > 0) h = open(network_path, 'a') try: multithreading.lock_file_handle(h) adj.write_graphml(network_path) finally: multithreading.unlock_file_handle(h) h.close() return adj
def get_communities(g, n, path, filename, algorithm='label_propagation'): """ Gets a number of igraph.VertexClustering objects. These objects are loaded from file if possible, otherwise they are found using the given algorithm. :param g: The graph to find communities in. :param n: The number of communities to find. :param path: The path to the base folder for the graph. :param filename: The filename of the graph to use. :param algorithm: The name of the clustering algorithm to use. The filename and path arguments are used to find clusters stored on disk. Any new clusters are stored along with the ones already present for future use. :return: A list of VertexClustering objects Examples -------- >>> path = 'data/testing' >>> filename = 'test1' >>> g = load_network(path, filename) >>> comms = get_communities(g, 10, path, filename, algorithm='random_walk') >>> len(comms) 10 """ # load any preexisting clusters cluster_path = '{}/communities/{}/{}.json'.format(path, algorithm, filename) ensure_folder(cluster_path) h = open(cluster_path, 'a') try: multithreading.lock_file_handle(h) try: cluster_sets = json.load(open(cluster_path, 'r')) except ValueError: # the file is probably empty because we just made it cluster_sets = [] logger.info('Loaded {} communities'.format(len(cluster_sets))) # add new clusters if needed while len(cluster_sets) < n: logger.debug('{} / {} communities'.format(len(cluster_sets), n)) clustering = _algorithms[algorithm](g) cluster_sets.append({ 'membership': clustering.membership, 'modularity_params': clustering._modularity_params }) # save the cluster sets json.dump(cluster_sets, open(cluster_path, 'w'), indent=2) finally: multithreading.unlock_file_handle(h) h.close() # construct a list of objects clusters = [igraph.VertexClustering(g, **c) for c in cluster_sets] return clusters[:n] # return only the first n
def load_network(path, filename): """ Loads cached network for the filesystem. :param path: The base path to the network. This will contain all information about the type of the network. :param filename: The filename of the network. This is usually the dates of the data used to build the network. :return: an igraph.Graph representation of the network. Examples -------- >>> path = 'data/lake_wobegon/distance/1.6/crime' >>> filename = '2010' >>> network = load_network(path, filename) """ network_path = '{}/networks/{}.graphml'.format(path, filename) h = open(network_path, 'r') try: multithreading.lock_file_handle(h) return igraph.Graph.Read(network_path) finally: multithreading.unlock_file_handle(h) h.close()
def add_regions(g, path, filename, region_type): """ Adds regions to each node in a network. The regions are stored as shapely.Polygon objects as the cell attribute in each vertex of the network. If no regions can be found on disk the create_and_add function is used to make new ones. These are then saved to disk for future use. :param g: The network to add regions to. :param path: The path to the base folder of the network of interest. :param filename: The filename of the network of interest. :param region_type: The type of region to add to the graph. Should be one of: 'voronoi' or 'zip'. Examples -------- >>> path = 'data/testing' >>> name = 'test' >>> g = load_network(path, name) >>> add_regions(g, path, name, 'zip') >>> 'cell' in g.vs.attributes() True """ cells_file = os.path.join(path, 'regions', region_type, filename + '.shp') if os.path.exists(cells_file): logger.info("Loading Regions") h = open(cells_file[:-4] + '.dbf', 'r') try: multithreading.lock_file_handle(h) # load cells from file for p in fiona.open(cells_file): g.vs[p['properties'] ['index']]['cell'] = shapely.geometry.shape(p['geometry']) finally: multithreading.unlock_file_handle(h) h.close() else: logger.info("No Regions Found") # add the regions to the graph if region_type == 'voronoi': layout_position(g) bound = get_bounds(g) logger.info('Creating Voronoi Cells') create_voronoi_regions(g, bound) logger.info('Clipping Cells') clip_cells(g, bound) elif region_type == 'zip': logger.info('Finding Zipcode Cells') geometry = _client['crimes'].geometry g.vs['cell'] = [ shapely.geometry.shape( geometry.find_one({'zip': node['zipcode']})['geometry']) for node in g.vs ] else: logger.warning( "Unrecognized region type: '{}'".format(region_type)) raise NotImplementedError( '{} regions not implemented'.format(region_type)) logger.info("Saving Regions") # save cells for future use ensure_folder(cells_file) schema = { 'geometry': 'Polygon', 'properties': { 'index': 'int', 'count': 'int' } } h = open(cells_file[:-4] + '.dbf', 'a') try: multithreading.lock_file_handle(h) with fiona.open(cells_file, 'w', 'ESRI Shapefile', schema) as c: for i in range(g.vcount()): writable = shapely.geometry.mapping( shapely.geometry.shape(g.vs[i]['cell'])) if region_type == 'zip': count = g.vs[i]['description'] else: count = 1 c.write({ 'geometry': writable, 'properties': { 'index': i, 'count': count } }) finally: multithreading.unlock_file_handle(h) h.close()
def save_borders(path, filename, region_type, iterations, algorithm): """ Saves a shapefile containing the borders found a network. Finds a border network containing the information about any borders. Uses the cell attribute of the vertices and the weight attribute of the edges to build the shapes of the borders between communities. :param path: The base path to the network of crimes. :param filename: The filename of the crimes network. :param region_type: The type of region surrounding each vertex. :param iterations: The number of iterations of the community detection algorithm. :param algorithm: The community detection algorithm to use. Examples -------- >>> import plotting >>> path = 'data/testing' >>> filename = 'test' >>> iterations = 30 >>> save_borders(path, filename, 'zip', iterations, 'random_walk') >>> fig = plotting.get_border_fig('{}/borders/{}_{}'.format(path, ... filename, iterations)) >>> fig.savefig('test.svg') """ borders_path = os.path.join(path, 'borders', region_type, algorithm, '{}_{}.shp'.format(filename, iterations)) if os.path.exists(borders_path): # skip logger.info("Borders Exist, skipping") return border_network = get_border_network(path, filename, region_type, algorithm, iterations) borders = dict() for e in border_network.es: if e['weight'] > 0: line = border_network.vs[e.source]['cell'].intersection( border_network.vs[e.target]['cell']) # remove any points that might have snuck in if line.geom_type == 'GeometryCollection': points = [shp for shp in line if shp.geom_type == 'Point'] for p in points: line = line.difference(p) if line.geom_type == 'LineString' or line.geom_type == 'MultiLineString': borders[line] = e['weight'] elif line.geom_type == 'Polygon' or line.geom_type == 'MultiPolygon': borders[line.boundary] = e['weight'] else: logger.error('Unknown border geometry {}, skipping'.format( line.geom_type)) ensure_folder(borders_path) schema = {'geometry': 'MultiLineString', 'properties': {'weight': 'int'}} h = open(borders_path[:-4] + '.dbf', 'a') try: multithreading.lock_file_handle(h) with fiona.open(borders_path, 'w', 'ESRI Shapefile', schema) as c: for border, _w in borders.iteritems(): c.write({ 'geometry': shapely.geometry.mapping(shapely.geometry.shape(border)), 'properties': { 'weight': _w } }) finally: multithreading.unlock_file_handle(h) h.close()
def get_border_network(path, filename, region_type, algorithm, iterations): """ Finds a network representing the borders between communities. :param path: The base path to the network of crimes. :param filename: The filename of the network of crimes. :param region_type: The type of regions around each vertex. :param algorithm: The community detection algorithm to use. :param iterations: The number of runs of the community detection algorithm. :return: An `igraph.Graph` object where the weights of edges between two vertices represent the strength of a border between them. Examples -------- >>> path = 'data/testing' >>> filename = 'test' >>> bn = get_border_network(path, filename, 'voronoi', ... 'label_propagation', 30) >>> bn.write_graphml('{}/borders/{}.graphml'.format(path, filename)) """ border_path = os.path.join(path, 'borders', region_type, algorithm, '{}_{}.graphml'.format(filename, iterations)) ensure_folder(border_path) if os.path.exists(border_path): logger.info('Loading Border Network') h = open(border_path, 'r') try: multithreading.lock_file_handle(h) border_network = igraph.Graph.Read(border_path) finally: multithreading.unlock_file_handle(h) h.close() add_regions(border_network, path, filename, region_type) return border_network else: # create a network showing the physical adjacency of each cell g = load_network(path, filename) add_regions(g, path, filename, region_type) # build the border network from the adjacency network border_network = get_adjacency_network(g, path, filename, region_type) add_regions(border_network, path, filename, region_type) if 'id' in border_network.vs.attributes(): # get rid of it because it causes a warning del border_network.vs['id'] # get the list of communities to use comms = get_communities(g, iterations, path, filename, algorithm) logger.info('Creating Border Network') # change the weights on the edges to reflect the border weight for e in border_network.es: e['weight'] = find_border_weight(comms, e.source, e.target) # scale edges based on number of iterations border_network.es['weight'] = [ float(w) / iterations for w in border_network.es['weight'] ] # save the network h = open(border_path, 'a') try: multithreading.lock_file_handle(h) border_network.write_graphml(border_path) finally: multithreading.unlock_file_handle(h) h.close() return border_network