def mapper1(partitionId,records): if partitionId==0: next(records) import csv reader=csv.reader(records) proj = pyproj.Proj(init="epsg:2263", preserve_units=True) #import with hdfs.open('hdfs:///tmp/bdm/neighborhoods.geojson', 'rt') as f: neighbor = gpd.read_file(f).to_crs(fiona.crs.from_epsg(2263)) with hdfs.open('hdfs:///tmp/bdm/boroughs.geojson', 'rt') as f: boroughs = gpd.read_file(f).to_crs(fiona.crs.from_epsg(2263)) index1 = rtree.Rtree() for idx,geometry in enumerate(neighbor.geometry): index1.insert(idx, geometry.bounds) index2 = rtree.Rtree() for idx,geometry in enumerate(boroughs.geometry): index2.insert(idx, geometry.bounds) for row in reader: p = geom.Point(proj(float(row[3]), float(row[2]))) for idx1 in index1.intersection((p.x, p.y, p.x, p.y)): # idx is in the list of shapes that might match if neighbor.geometry[idx1].contains(p): for idx2 in index2.intersection((neighbor.geometry[idx1].bounds)): if boroughs.geometry[idx2].contains(p): yield ((idx2,idx1),1)
def processTrips(pid, records): if pid==0: next(records) counts = {} import rtree import fiona import fiona.crs import shapely import rtree import pyproj import shapely.geometry as geom import csv reader = csv.reader(records) proj = pyproj.Proj(init="epsg:2263", preserve_units=True) neighborhoods = gpd.read_file(neighbor_shape).to_crs(fiona.crs.from_epsg(2263)) boroughs = gpd.read_file(borough_shape).to_crs(fiona.crs.from_epsg(2263)) index_pick = rtree.Rtree() index_drop = rtree.Rtree() #gets the pick location neighborhood and sets up an index for it for idx,geometry in enumerate(neighborhoods.geometry): index_pick.insert(idx, geometry.bounds) #set up r-tree index for drop off location for idx,geometry in enumerate(boroughs.geometry): index_drop.insert(idx, geometry.bounds) for row in reader: try: p_pick = geom.Point(proj(float(row[3]), float(row[2]))) #pick-up location lat / lon p_drop = geom.Point(proj(float(row[5]), float(row[4]))) #drop off match_pick = None for idx in index_pick.intersection((p_pick.x, p_pick.y, p_pick.x, p_pick.y)): shape = neighborhoods.geometry[idx] if shape.contains(p_pick): match_pick = idx break match_drop = None for idx in index_drop.intersection((p_drop.x, p_drop.y, p_drop.x, p_drop.y)): shape = boroughs.geometry[idx] if shape.contains(p_drop): match_drop = idx break if match_pick is None or match_drop is None: pass else: match = (match_pick,match_drop) counts[match] = counts.get(match, 0) + 1 except ValueError: pass return counts.items()
def parse(records): reader = csv.reader(records) distance = 1/.3048*50 # setup the distance parameter #relief_path = 'new_york_city_taxi_relief_stations.geojson' #relief_path = 'relief_stands_23July.geojson' relief_path = 'TRS_149.geojson' relief = gpd.GeoDataFrame.from_file(relief_path) relief.crs = from_epsg(4326) relief = relief.to_crs(epsg=2263) #relief.drop_duplicates(subset=["location"], inplace=True) relief["buffer"] = relief.apply(lambda x: x.geometry.buffer(distance), axis=1) relief = relief.set_geometry("buffer") ## index = rtree.Rtree() for idx, geometry in zip(relief.index.values, relief.geometry): index.insert(idx, geometry.bounds) for row in reader: date = row[1][:6] x,y = int(row[3]), int(row[4]) potentialMatches = index.intersection((x, y, x, y)) p = geom.Point(x,y) match = None for idx in potentialMatches: if relief.geometry[idx].contains(p): match = idx break if match != None: yield ((match, date), int(row[2]))
def parseIdles(records): reader = csv.reader(records) hexagon = gpd.GeoDataFrame.from_file('Hexagon_clipped.geojson') counts = {} index = rtree.Rtree() for idx, geometry in zip(hexagon.index.values, hexagon.geometry): index.insert(idx, geometry.bounds) for row in reader: date = row[1][:6] x, y = int(row[3]), int(row[4]) potentialMatches = index.intersection((x, y, x, y)) p = geom.Point(x, y) match = None for idx in potentialMatches: if hexagon.geometry[idx].contains(p): match = idx break if match: k = (hexagon.GRID_ID[match], date) v = counts.get(k, (0, 0, 0, 0)) counts[k] = (v[0] + 1, v[1] + int(row[2]), 0, 0) #yield ((match, date), int(row[2])) return counts.items()
def _build_elem_index(self): # Build Rtree index for elems if self._elem_index is None: print "Building element indices..." elem_i = 0 tuples = [] for element in self._elems: # TODO: This could be better with numpy. box = [None, None, None, None] # [xmin xmax ymin ymax] for node_i in element: node = self._nodes[node_i] if box[0] is None or box[0] > node[0]: box[0] = node[0] if box[1] is None or box[1] < node[0]: box[1] = node[0] if box[2] is None or box[2] > node[1]: box[2] = node[1] if box[3] is None or box[3] < node[1]: box[3] = node[1] index = (elem_i, box, None) tuples.append(index) elem_i += 1 self._elem_index = rtree.Rtree(tuples, interleaved=False)
def find_closest_elems(self, pos, count=1): """ Find indices of the closet elems with the given position. The distance is measured with the element mass center. All triangular elems is assumed. pos = position tuple return = element indices """ if self._elemcenter_index is None: tuples = [] for i, element in enumerate(self._elems): center = np.zeros(2) for node_i in element: np.add(center, self._nodes[node_i][:2], center) center /= 3. tuples.append((i, center[_XXYY], None)) self._elemcenter_index = rtree.Rtree(tuples, interleaved=False) pos = np.array(pos) # returns the index of the grid point closest to the given point: hits = self._elemcenter_index.nearest(pos[_XXYY], count) # newer versions of rtree return a generator: if isinstance(hits, types.GeneratorType): # so translate that into a list like we used to get. hits = [hits.next() for i in range(count)] if count > 1: return hits else: return hits[0]
def create_index(zones_): import rtree import fiona.crs index = rtree.Rtree() for idx, geometry in enumerate(zones_.geometry): index.insert(idx, geometry.bounds) return index
def process(pid, records): import csv import pyproj import shapely.geometry as geom import fiona import fiona.crs import shapely import rtree import pandas as pd import geopandas as gpd import json neighborhoods = gpd.read_file("neighborhoods.geojson").to_crs( fiona.crs.from_epsg(2263)) index = rtree.Rtree() for idx, geometry in enumerate(neighborhoods.geometry): index.insert(idx, geometry.bounds) #return (index, zones) for idx1, geometry in enumerate(neighborhoods.geometry): index.insert(idx1, geometry.bounds) proj = pyproj.Proj(init="epsg:2263", preserve_units=True) counts = {} reader = csv.reader(records) if pid == 0: next(records) #next(reader) for row in reader: match = None boro = None try: p = geom.Point( proj(float(row[5]), float(row[6])) ) ##Just making a POINT data.'pickup_latitude','pickup_longitude p1 = geom.Point(proj(float(row[9]), float(row[10]))) # Dropoff for idx1 in index.intersection((p1.x, p1.y, p1.x, p1.y)): if neighborhoods.geometry[idx1].contains(p1): boro = neighborhoods.borough[idx1] break for idx in index.intersection((p.x, p.y, p.x, p.y)): if neighborhoods.geometry[idx].contains(p): match = neighborhoods.neighborhood[idx] break except Exception: pass if match and boro: combname = tuple((boro, match)) counts[combname] = counts.get(combname, 0) + 1 return counts.items()
def indexZones(buildingfiles): ##creates rtree import rtree import fiona.crs import geopandas as gpd import shapely.geometry as geom index = rtree.Rtree() import csv #if index==0: #lines.next() dic = {} with open(buildingfiles, 'rb') as f: reader = csv.DictReader(f) inx = 0 for row in reader: #if row[2]!='s': #if row[0] =='Borough': continue if row['YearBuilt'] != '0': if row['XCoord'].strip() != '' and row['YCoord'].strip() != '': point = geom.Point( float(row['XCoord']), float(row['YCoord']) ) #point=POINT (1012703.999983049 255827.0144377612) g = point.buffer( 20 ) # create a polygon,Polygon has a list of Points which correspond to polygon corners (self.corners) index.insert(inx, g.bounds) dic[inx] = (row['YearBuilt'], g) inx += 1 return (index, dic)
def create_rtree_index(self): """Create `rtree <http://toblerity.org/rtree/>`_ index for efficient spatial querying. **Note**: Bounds are given in lat/long, not in the native CRS""" self.rtree_index = rtree.Rtree() for index, geom in self.iter_latlong(): self.rtree_index.add(index, geom.bounds) return self.rtree_index
def createIndex(shapefile): import rtree import fiona.crs import geopandas as gpd zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263)) index = rtree.Rtree() for idx, geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def createIndex(shapefile): import rtree import fiona.crs import geopandas as gpd zones = C_TRACT.value index = rtree.Rtree() for idx, geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def createIndex(geojson): import rtree import fiona.crs import geopandas as gpd zones = gpd.read_file(geojson) index = rtree.Rtree() for idx, geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def processTrips(pid, records): import fiona import fiona.crs import shapely import rtree import pandas as pd import geopandas as gpd import csv import pyproj import shapely.geometry as geom if pid == 0: next(records) counts = {} import rtree reader = csv.reader(records) proj = pyproj.Proj(init="epsg:2263", preserve_units=True) shapefile_start = 'neighborhoods.geojson' #shapefile_end = 'boroughs.geojson' neighborhoods = gpd.read_file(shapefile_start).to_crs( fiona.crs.from_epsg(2263)) #boroughs = gpd.read_file(shapefile_end).to_crs(fiona.crs.from_epsg(2263)) index_start = rtree.Rtree() for idx, geometry in enumerate(neighborhoods.geometry): index_start.insert(idx, geometry.bounds) # index_end = rtree.Rtree() # for idx,geometry in enumerate(boroughs.geometry): # index_end.insert(idx, geometry.bounds) for row in reader: try: p_start = geom.Point(proj(float(row[5]), float(row[6]))) p_end = geom.Point(proj(float(row[9]), float(row[10]))) except: continue match_end = None for idx in index_start.intersection( (p_end.x, p_end.y, p_end.x, p_end.y)): shape = neighborhoods.geometry[idx] if shape.contains(p_end): match_end = neighborhoods['borough'][idx] break if match_end: match_start = None for idx in index_start.intersection( (p_start.x, p_start.y, p_start.x, p_start.y)): shape = neighborhoods.geometry[idx] if shape.contains(p_start): match_start = neighborhoods['neighborhood'][idx] break if match_start: yield ((match_start, match_end), 1)
def index(text_elements): bbox_to_text = rtree.Rtree() text_to_corner = collections.defaultdict(list) for idx, (bbox, text) in enumerate(text_elements): bbox_to_text.add(idx, bbox, text.strip()) text_to_corner[text.replace("*", "").strip()].append( Anchor(bbox[0], bbox[1])) return bbox_to_text, text_to_corner
def createindex(shapefile): import geopandas as gpd import rtree import fiona.crs neighbor = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263)) index1 = rtree.Rtree() for idx, geometry in enumerate(neighbor.geometry): index1.insert(idx,geometry.bounds) return (index1,neighbor)
def get_ids(self): """indices of particles in the line list""" st = self.tracer if self.tracer.output.lines.number_of_cells == 0: # no lines, no indices logger.info('No output lines found') return np.array([], dtype='int64') # create an rtree for fast lookup # ids should be as long as number of points in particles msg = "%s should be %s" % (len( self.source_ids), self.particles.number_of_points) assert len(self.source_ids) == self.particles.number_of_points, msg # ids of the source points tree = rtree.Rtree() for i, (x_i, y_i, _) in zip(self.source_ids, self.particles.points.to_array()): tree.add(i, (x_i, y_i)) # lookup lines and points lines = st.output.lines.data.to_array() points = st.output.points.to_array() rows = [] start = 0 for i in range(st.output.lines.number_of_cells): # loop over al lines n = lines[start] idx = lines[start + 1] coord = points[idx] start += (n + 1) rows.append(coord) lines = np.array(rows) # lookup all locations of the particles in the ids idxs = [] for line_i in lines: # find the particle that is closest, max of 10 locations for idx in tree.nearest(tuple(line_i[:2]), num_results=10): if idx in idxs: # if we already found this, keep looking continue else: # found one break else: # oops, we can't find a single particle here that # we haven't used already idx = iter(tree.nearest(tuple(line_i[:2]))).next() msg = 'Could not find particle for %s, reusing %s' logging.warn(msg, line_i, idx) # add it to the list idxs.append(idx) return np.array(idxs)
def processTrips(pid, records): if pid == 0: print(next(records)) counts = {} import rtree import geopandas as gpd import fiona.crs import csv import pyproj import shapely.geometry as geom reader = csv.reader(records) counts = {} proj = pyproj.Proj(init="epsg:2263", preserve_units=True) boroughs = 'boroughs.geojson' #boroughs = "hdfs:///tmp/bdm/boroughs.geojson" boroughs = gpd.read_file(boroughs).to_crs(fiona.crs.from_epsg(2263)) bor_index = rtree.Rtree() for idx, geometry in enumerate(boroughs.geometry): bor_index.insert(idx, geometry.bounds) neighborhoods = 'neighborhoods.geojson' #neighborhoods = "hdfs:///tmp/bdm/neighborhoods.geojson" nbs = gpd.read_file(neighborhoods).to_crs(fiona.crs.from_epsg(2263)) nei_index = rtree.Rtree() for idx, geometry in enumerate(nbs.geometry): nei_index.insert(idx, geometry.bounds) for row in reader: try: p_end = geom.Point(proj(float(row[9]), float(row[10]))) p_start = geom.Point(proj(float(row[5]), float(row[6]))) except: continue for idx in bor_index.intersection( (p_end.x, p_end.y, p_end.x, p_end.y)): if boroughs.geometry[idx].contains(p_end): borough = boroughs['boroname'][idx] for idx2 in nei_index.intersection( (p_start.x, p_start.y, p_start.x, p_start.y)): if nbs.geometry[idx2].contains(p_start): neigh = nbs['neighborhood'][idx2] key = neigh + "_" + borough counts[key] = counts.get(key, 0) + 1 return counts.items()
def createIndex(shapefile): import rtree import fiona.crs import pyproj import geopandas as gpd zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263)) proj = pyproj.Proj(init="epsg:2263", preserve_units=True) index = rtree.Rtree() for idx,geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def createIndex(tracts): import rtree import fiona.crs import geopandas as gpd zones = gpd.read_file(tracts).to_crs(fiona.crs.from_epsg(5070)) zones = zones.loc[(zones['plctrpop10'] > 0) & (zones.geometry.is_valid)].reset_index() index = rtree.Rtree() for idx, geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def indexZones(shapeFilename): ##creates rtree import rtree import fiona.crs import geopandas as gpd index = rtree.Rtree() zones = gpd.read_file(shapeFilename).to_crs(fiona.crs.from_epsg(2263)) g = zones.geometry.buffer(450) #450 is radius zones = zones.set_geometry(g) for idx, geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def _build_node_index(self): if self._node_index is None: if self._logger is not None: self._logger.info("Building node indexes...") # assemble points into list of (id, [x x y y], None) # but new rtree allows for interleaved coordinates all the time. # best solution probably to specify interleaved=False tuples = [(i, self._nodes[i, _XXYY], None) for i in range(self.n_nodes()) if np.isfinite(self._nodes[i, 0])] self._node_index = rtree.Rtree(tuples, interleaved=False)
def createIndex(shapefile): """This function performs the indexing of the censusTract """ import rtree import fiona.crs import geopandas as gpd censusTracts500 = gpd.read_file(shapefile).to_crs( fiona.crs.from_epsg(2263)) index = rtree.Rtree() for idx, geometry in enumerate(censusTracts500.geometry): index.insert(idx, geometry.bounds) return (index, censusTracts500)
def findBoundary(): import rtree import geopandas as gpd import fiona.crs import csv import pyproj import shapely.geometry as geom tracts = gpd.read_file('500cities_tracts.geojson') tracts_index = rtree.Rtree() for idx, geometry in enumerate(tracts.geometry): tracts_index.insert(idx, geometry.bounds) return tracts, tracts_index
def createindex(shapefile1): import geopandas as gpd import rtree import fiona.crs tract= gpd.read_file(shapefile1).to_crs(fiona.crs.from_epsg(2263)) indexr = rtree.Rtree() for idx, geometry in enumerate(tract.geometry): indexr.insert(idx,geometry.bounds) return (indexr,tract)
def createIndex(shapefile): # Import needed libraries import rtree import fiona.crs import geopandas as gpd # Create a geodataframe from the input shapefile and convert to 2263 coordinate projection system zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263)) # Create an R-Tree spatial index index = rtree.Rtree() # Iterate through shapefile, create indices, and get the polygon geometry data for each feature for idx, geometry in enumerate(zones.geometry): # Add the indices and create bounding boxes based on polygons index.insert(idx, geometry.bounds) # Return the R-Tree spatial index and the "zones" geodataframe return (index, zones)
def search_index(h5path, variable, dimensions, bbox): # check if the idx data exists if !(os.path.isfile(h5path + '.idx') and os.path.isfile(h5path + '.dat')): print 'index files don\'t exist' sys.exit(1) idx = rtree.Rtree(h5path) hits = list(idx.intersection(bbox, objects='raw')) #for item in hits: # print item h5_fh = h5py.File(h5path, 'r') var_id = h5_fh[variable] for (xmin, ymin, xmax, ymax) in hits: print xmin, ymin, xmax, ymax print var_id[xmin:xmax,ymin:ymax] h5_fh.close()
def createIndex(shapefile): ''' This function takes in a shapefile path, and return: (1) index: an R-Tree based on the geometry data in the file (2) zones: the original data of the shapefile Note that the ID used in the R-tree 'index' is the same as the order of the object in zones. ''' import rtree import fiona.crs import geopandas as gpd zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263)) index = rtree.Rtree() for idx,geometry in enumerate(zones.geometry): index.insert(idx, geometry.bounds) return (index, zones)
def drawLocations(coord, ctx, locs): labels = rtree.Rtree() for name, (lat, lng) in locs: pos = coord.screenFromWorld(lng, lat) ctx.select_font_face("Verdana", cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL) ctx.set_font_size(9) x_bearing, y_bearing, width, height = ctx.text_extents(name)[:4] pos = pos[0] + 5, pos[1] newRegion = (pos[0], pos[1], pos[0] + width, pos[1] + height) if list(labels.intersection(newRegion)): continue dot(ctx, (pos[0] - 5, pos[1]), 4, 1, (0, .3, 0), (0, .5, 0)) labels.add(0, newRegion) ctx.move_to(*pos) ctx.set_source_rgb(1, 1, 0.0) ctx.show_text(name)
def extractGeom(partId, records): ''' extract pickup date extract pickup hour extract pickup long & lat map pickup lat & long to subway stations return station, date and time ''' from shapely.geometry import Point import pyproj import rtree import geopandas as gpd import csv # create rtree for subway stations index = rtree.Rtree() for idx, geometry in enumerate(subwayst['buffer']): index.insert(idx, geometry.bounds) if partId == 0: records.next() proj = pyproj.Proj(init="epsg:2263", preserve_units=True) reader = csv.reader(records) for row in reader: # extract longit & latit and convert to feet if len(row) > 8: longit = float(row[5]) latit = float(row[6]) geom = Point(proj(longit, latit)) match = index.intersection( (geom.x - 300, geom.y - 300, geom.x + 300, geom.y + 300)) nearest = (1e6, None) for idx in match: nearest = min(nearest, (geom.distance(subwayst.geometry[idx]), idx)) #(datetime, geom) = (row[1], (float(row[5]), float(row[6]))) #geom = Point(float(row[5]), float(row[6])) if nearest != (1e6, None): datetime = row[1] date = datetime[8:10] hour = datetime[11:13] if (int(hour) > 6) and (int(hour) < 21): yield ((int(date), int(hour)), subwayst.objectid[nearest[1]])