Beispiel #1
0
def mapper1(partitionId,records):
    if partitionId==0:
        next(records)
    import csv
    reader=csv.reader(records)
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)   
    
    #import
    with hdfs.open('hdfs:///tmp/bdm/neighborhoods.geojson', 'rt') as f:
        neighbor = gpd.read_file(f).to_crs(fiona.crs.from_epsg(2263))
    with hdfs.open('hdfs:///tmp/bdm/boroughs.geojson', 'rt') as f:
        boroughs = gpd.read_file(f).to_crs(fiona.crs.from_epsg(2263))
    
    index1 = rtree.Rtree()
    for idx,geometry in enumerate(neighbor.geometry):
        index1.insert(idx, geometry.bounds)
    index2 = rtree.Rtree()
    for idx,geometry in enumerate(boroughs.geometry):
        index2.insert(idx, geometry.bounds)
    for row in reader:
        p = geom.Point(proj(float(row[3]), float(row[2])))
        for idx1 in index1.intersection((p.x, p.y, p.x, p.y)):
            # idx is in the list of shapes that might match
            if neighbor.geometry[idx1].contains(p):
                for idx2 in index2.intersection((neighbor.geometry[idx1].bounds)):
                    if boroughs.geometry[idx2].contains(p):
                        yield ((idx2,idx1),1)
Beispiel #2
0
def processTrips(pid, records):
    if pid==0:
        next(records)
    counts = {}
    import rtree
    import fiona
    import fiona.crs
    import shapely
    import rtree
    import pyproj
    import shapely.geometry as geom
    import csv
    reader = csv.reader(records)
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)
    neighborhoods = gpd.read_file(neighbor_shape).to_crs(fiona.crs.from_epsg(2263))
    boroughs = gpd.read_file(borough_shape).to_crs(fiona.crs.from_epsg(2263))
    index_pick = rtree.Rtree()
    index_drop = rtree.Rtree()
    
    #gets the pick location neighborhood and sets up an index for it
    for idx,geometry in enumerate(neighborhoods.geometry):
        index_pick.insert(idx, geometry.bounds)
    
    #set up r-tree index for drop off location
    for idx,geometry in enumerate(boroughs.geometry):
        index_drop.insert(idx, geometry.bounds)
    
    for row in reader:
        try:
            p_pick = geom.Point(proj(float(row[3]), float(row[2]))) #pick-up location lat / lon
            p_drop = geom.Point(proj(float(row[5]), float(row[4]))) #drop off
            match_pick = None
            for idx in index_pick.intersection((p_pick.x, p_pick.y, p_pick.x, p_pick.y)):
                
                shape = neighborhoods.geometry[idx]
                
                if shape.contains(p_pick):
                    match_pick = idx
                    break

            match_drop = None
            for idx in index_drop.intersection((p_drop.x, p_drop.y, p_drop.x, p_drop.y)):
                
                shape = boroughs.geometry[idx]
                
                if shape.contains(p_drop):
                    match_drop = idx
                    break

            if match_pick is None or match_drop is None:
                pass
            else:
                match = (match_pick,match_drop)
                counts[match] = counts.get(match, 0) + 1
        
        except ValueError:
            pass 

    return counts.items()
Beispiel #3
0
def parse(records):
    reader = csv.reader(records)
    
    distance = 1/.3048*50 # setup the distance parameter
    #relief_path = 'new_york_city_taxi_relief_stations.geojson'
    #relief_path = 'relief_stands_23July.geojson'
    relief_path = 'TRS_149.geojson'
    relief = gpd.GeoDataFrame.from_file(relief_path)
    relief.crs = from_epsg(4326)
    relief = relief.to_crs(epsg=2263)
    #relief.drop_duplicates(subset=["location"], inplace=True)
    relief["buffer"] = relief.apply(lambda x: x.geometry.buffer(distance), axis=1)
    relief = relief.set_geometry("buffer")
    ##
    index = rtree.Rtree()
    for idx, geometry in zip(relief.index.values, relief.geometry):
        index.insert(idx, geometry.bounds)
    
    for row in reader:
        date = row[1][:6]
        x,y = int(row[3]), int(row[4])
        potentialMatches = index.intersection((x, y, x, y))
        p = geom.Point(x,y)
        
        match = None
        for idx in potentialMatches:
            if relief.geometry[idx].contains(p):
                match = idx
                break
        if match != None:
            yield ((match, date), int(row[2]))
Beispiel #4
0
def parseIdles(records):
    reader = csv.reader(records)
    hexagon = gpd.GeoDataFrame.from_file('Hexagon_clipped.geojson')

    counts = {}
    index = rtree.Rtree()
    for idx, geometry in zip(hexagon.index.values, hexagon.geometry):
        index.insert(idx, geometry.bounds)

    for row in reader:
        date = row[1][:6]
        x, y = int(row[3]), int(row[4])
        potentialMatches = index.intersection((x, y, x, y))
        p = geom.Point(x, y)

        match = None
        for idx in potentialMatches:
            if hexagon.geometry[idx].contains(p):
                match = idx
                break
        if match:
            k = (hexagon.GRID_ID[match], date)
            v = counts.get(k, (0, 0, 0, 0))
            counts[k] = (v[0] + 1, v[1] + int(row[2]), 0, 0)
            #yield ((match, date), int(row[2]))
    return counts.items()
Beispiel #5
0
    def _build_elem_index(self):
        # Build Rtree index for elems
        if self._elem_index is None:
            print "Building element indices..."
            elem_i = 0
            tuples = []
            for element in self._elems:
                # TODO: This could be better with numpy.
                box = [None, None, None, None]  # [xmin xmax ymin ymax]
                for node_i in element:
                    node = self._nodes[node_i]
                    if box[0] is None or box[0] > node[0]:
                        box[0] = node[0]
                    if box[1] is None or box[1] < node[0]:
                        box[1] = node[0]
                    if box[2] is None or box[2] > node[1]:
                        box[2] = node[1]
                    if box[3] is None or box[3] < node[1]:
                        box[3] = node[1]

                index = (elem_i, box, None)
                tuples.append(index)
                elem_i += 1

            self._elem_index = rtree.Rtree(tuples, interleaved=False)
Beispiel #6
0
    def find_closest_elems(self, pos, count=1):
        """ Find indices of the closet elems with the given position.
            The distance is measured with the element mass center.
            All triangular elems is assumed.
            pos = position tuple
            return = element indices
        """
        if self._elemcenter_index is None:
            tuples = []
            for i, element in enumerate(self._elems):
                center = np.zeros(2)
                for node_i in element:
                    np.add(center, self._nodes[node_i][:2], center)
                center /= 3.
                tuples.append((i, center[_XXYY], None))
            self._elemcenter_index = rtree.Rtree(tuples, interleaved=False)

        pos = np.array(pos)

        # returns the index of the grid point closest to the given point:
        hits = self._elemcenter_index.nearest(pos[_XXYY], count)

        # newer versions of rtree return a generator:
        if isinstance(hits, types.GeneratorType):
            # so translate that into a list like we used to get.
            hits = [hits.next() for i in range(count)]

        if count > 1:
            return hits
        else:
            return hits[0]
def create_index(zones_):
    import rtree
    import fiona.crs
    index = rtree.Rtree()
    for idx, geometry in enumerate(zones_.geometry):
        index.insert(idx, geometry.bounds)
    return index
Beispiel #8
0
def process(pid, records):

    import csv
    import pyproj
    import shapely.geometry as geom

    import fiona
    import fiona.crs
    import shapely
    import rtree

    import pandas as pd
    import geopandas as gpd
    import json

    neighborhoods = gpd.read_file("neighborhoods.geojson").to_crs(
        fiona.crs.from_epsg(2263))
    index = rtree.Rtree()

    for idx, geometry in enumerate(neighborhoods.geometry):
        index.insert(idx, geometry.bounds)
    #return (index, zones)

    for idx1, geometry in enumerate(neighborhoods.geometry):
        index.insert(idx1, geometry.bounds)

    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)
    counts = {}

    reader = csv.reader(records)
    if pid == 0:
        next(records)
        #next(reader)
    for row in reader:
        match = None
        boro = None

        try:
            p = geom.Point(
                proj(float(row[5]), float(row[6]))
            )  ##Just making a POINT data.'pickup_latitude','pickup_longitude
            p1 = geom.Point(proj(float(row[9]), float(row[10])))  # Dropoff

            for idx1 in index.intersection((p1.x, p1.y, p1.x, p1.y)):
                if neighborhoods.geometry[idx1].contains(p1):
                    boro = neighborhoods.borough[idx1]
                    break

            for idx in index.intersection((p.x, p.y, p.x, p.y)):
                if neighborhoods.geometry[idx].contains(p):
                    match = neighborhoods.neighborhood[idx]
                    break
        except Exception:
            pass

        if match and boro:
            combname = tuple((boro, match))
            counts[combname] = counts.get(combname, 0) + 1

    return counts.items()
Beispiel #9
0
def indexZones(buildingfiles):  ##creates rtree
    import rtree
    import fiona.crs
    import geopandas as gpd
    import shapely.geometry as geom
    index = rtree.Rtree()
    import csv
    #if index==0:
    #lines.next()
    dic = {}
    with open(buildingfiles, 'rb') as f:
        reader = csv.DictReader(f)
        inx = 0
        for row in reader:
            #if row[2]!='s':
            #if row[0] =='Borough': continue
            if row['YearBuilt'] != '0':
                if row['XCoord'].strip() != '' and row['YCoord'].strip() != '':

                    point = geom.Point(
                        float(row['XCoord']), float(row['YCoord'])
                    )  #point=POINT (1012703.999983049 255827.0144377612)
                    g = point.buffer(
                        20
                    )  # create a polygon,Polygon has a list of Points which correspond to polygon corners (self.corners)

                    index.insert(inx, g.bounds)
                    dic[inx] = (row['YearBuilt'], g)
                    inx += 1

    return (index, dic)
Beispiel #10
0
    def create_rtree_index(self):
        """Create `rtree <http://toblerity.org/rtree/>`_ index for efficient spatial querying.

        **Note**: Bounds are given in lat/long, not in the native CRS"""
        self.rtree_index = rtree.Rtree()
        for index, geom in self.iter_latlong():
            self.rtree_index.add(index, geom.bounds)
        return self.rtree_index
def createIndex(shapefile):
    import rtree
    import fiona.crs
    import geopandas as gpd
    zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263))
    index = rtree.Rtree()
    for idx, geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #12
0
def createIndex(shapefile):
    import rtree
    import fiona.crs
    import geopandas as gpd
    zones = C_TRACT.value
    index = rtree.Rtree()
    for idx, geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #13
0
def createIndex(geojson):
    import rtree
    import fiona.crs
    import geopandas as gpd
    zones = gpd.read_file(geojson)
    index = rtree.Rtree()
    for idx, geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #14
0
def processTrips(pid, records):
    import fiona
    import fiona.crs
    import shapely
    import rtree
    import pandas as pd
    import geopandas as gpd
    import csv
    import pyproj
    import shapely.geometry as geom

    if pid == 0:
        next(records)

    counts = {}
    import rtree
    reader = csv.reader(records)
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)
    shapefile_start = 'neighborhoods.geojson'
    #shapefile_end = 'boroughs.geojson'

    neighborhoods = gpd.read_file(shapefile_start).to_crs(
        fiona.crs.from_epsg(2263))
    #boroughs = gpd.read_file(shapefile_end).to_crs(fiona.crs.from_epsg(2263))

    index_start = rtree.Rtree()
    for idx, geometry in enumerate(neighborhoods.geometry):
        index_start.insert(idx, geometry.bounds)

    # index_end = rtree.Rtree()
    # for idx,geometry in enumerate(boroughs.geometry):
    #     index_end.insert(idx, geometry.bounds)

    for row in reader:
        try:
            p_start = geom.Point(proj(float(row[5]), float(row[6])))
            p_end = geom.Point(proj(float(row[9]), float(row[10])))
        except:
            continue

        match_end = None
        for idx in index_start.intersection(
            (p_end.x, p_end.y, p_end.x, p_end.y)):
            shape = neighborhoods.geometry[idx]
            if shape.contains(p_end):
                match_end = neighborhoods['borough'][idx]
                break
        if match_end:
            match_start = None
            for idx in index_start.intersection(
                (p_start.x, p_start.y, p_start.x, p_start.y)):
                shape = neighborhoods.geometry[idx]
                if shape.contains(p_start):
                    match_start = neighborhoods['neighborhood'][idx]
                    break
            if match_start:
                yield ((match_start, match_end), 1)
Beispiel #15
0
    def index(text_elements):
        bbox_to_text = rtree.Rtree()
        text_to_corner = collections.defaultdict(list)

        for idx, (bbox, text) in enumerate(text_elements):
            bbox_to_text.add(idx, bbox, text.strip())
            text_to_corner[text.replace("*", "").strip()].append(
                Anchor(bbox[0], bbox[1]))

        return bbox_to_text, text_to_corner
Beispiel #16
0
def createindex(shapefile):
    import geopandas as gpd
    import rtree
    import fiona.crs
    neighbor = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263))
    index1 = rtree.Rtree()
    for idx, geometry in enumerate(neighbor.geometry):
        index1.insert(idx,geometry.bounds)
   
    return (index1,neighbor)
Beispiel #17
0
    def get_ids(self):
        """indices of particles in the line list"""
        st = self.tracer

        if self.tracer.output.lines.number_of_cells == 0:
            # no lines, no indices
            logger.info('No output lines found')
            return np.array([], dtype='int64')

        # create an rtree for fast lookup
        # ids should be as long as number of points in particles
        msg = "%s should be %s" % (len(
            self.source_ids), self.particles.number_of_points)
        assert len(self.source_ids) == self.particles.number_of_points, msg

        # ids of the source points
        tree = rtree.Rtree()
        for i, (x_i, y_i, _) in zip(self.source_ids,
                                    self.particles.points.to_array()):
            tree.add(i, (x_i, y_i))

        # lookup lines and points
        lines = st.output.lines.data.to_array()
        points = st.output.points.to_array()
        rows = []
        start = 0
        for i in range(st.output.lines.number_of_cells):
            # loop over al lines
            n = lines[start]
            idx = lines[start + 1]
            coord = points[idx]
            start += (n + 1)
            rows.append(coord)
        lines = np.array(rows)

        # lookup all locations of the particles in the ids
        idxs = []
        for line_i in lines:
            # find the particle that is closest, max of 10 locations
            for idx in tree.nearest(tuple(line_i[:2]), num_results=10):
                if idx in idxs:
                    # if we already found this, keep looking
                    continue
                else:
                    # found one
                    break
            else:
                # oops, we can't find a single particle here that
                # we haven't used already
                idx = iter(tree.nearest(tuple(line_i[:2]))).next()
                msg = 'Could not find particle for %s, reusing %s'
                logging.warn(msg, line_i, idx)
            # add it to the list
            idxs.append(idx)
        return np.array(idxs)
 def processTrips(pid, records):
     if pid == 0:
         print(next(records))
     counts = {}
     import rtree
     import geopandas as gpd
     import fiona.crs
     import csv
     import pyproj
     import shapely.geometry as geom
     reader = csv.reader(records)
     counts = {}
     proj = pyproj.Proj(init="epsg:2263", preserve_units=True)
     boroughs = 'boroughs.geojson'
     #boroughs = "hdfs:///tmp/bdm/boroughs.geojson"
     boroughs = gpd.read_file(boroughs).to_crs(fiona.crs.from_epsg(2263))
     bor_index = rtree.Rtree()
     for idx, geometry in enumerate(boroughs.geometry):
         bor_index.insert(idx, geometry.bounds)
     neighborhoods = 'neighborhoods.geojson'
     #neighborhoods = "hdfs:///tmp/bdm/neighborhoods.geojson"
     nbs = gpd.read_file(neighborhoods).to_crs(fiona.crs.from_epsg(2263))
     nei_index = rtree.Rtree()
     for idx, geometry in enumerate(nbs.geometry):
         nei_index.insert(idx, geometry.bounds)
     for row in reader:
         try:
             p_end = geom.Point(proj(float(row[9]), float(row[10])))
             p_start = geom.Point(proj(float(row[5]), float(row[6])))
         except:
             continue
         for idx in bor_index.intersection(
             (p_end.x, p_end.y, p_end.x, p_end.y)):
             if boroughs.geometry[idx].contains(p_end):
                 borough = boroughs['boroname'][idx]
                 for idx2 in nei_index.intersection(
                     (p_start.x, p_start.y, p_start.x, p_start.y)):
                     if nbs.geometry[idx2].contains(p_start):
                         neigh = nbs['neighborhood'][idx2]
                         key = neigh + "_" + borough
                         counts[key] = counts.get(key, 0) + 1
     return counts.items()
def createIndex(shapefile):
    import rtree
    import fiona.crs
    import pyproj
    import geopandas as gpd
    zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263))
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)    
    index = rtree.Rtree()
    for idx,geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #20
0
def createIndex(tracts):
    import rtree
    import fiona.crs
    import geopandas as gpd
    zones = gpd.read_file(tracts).to_crs(fiona.crs.from_epsg(5070))
    zones = zones.loc[(zones['plctrpop10'] > 0)
                      & (zones.geometry.is_valid)].reset_index()
    index = rtree.Rtree()
    for idx, geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #21
0
def indexZones(shapeFilename):  ##creates rtree
    import rtree
    import fiona.crs
    import geopandas as gpd
    index = rtree.Rtree()
    zones = gpd.read_file(shapeFilename).to_crs(fiona.crs.from_epsg(2263))
    g = zones.geometry.buffer(450)  #450 is radius
    zones = zones.set_geometry(g)
    for idx, geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #22
0
 def _build_node_index(self):
     if self._node_index is None:
         if self._logger is not None:
             self._logger.info("Building node indexes...")
         # assemble points into list of (id, [x x y y], None)
         # but new rtree allows for interleaved coordinates all the time.
         # best solution probably to specify interleaved=False
         tuples = [(i, self._nodes[i, _XXYY], None)
                   for i in range(self.n_nodes())
                   if np.isfinite(self._nodes[i, 0])]
         self._node_index = rtree.Rtree(tuples, interleaved=False)
Beispiel #23
0
def createIndex(shapefile):
    """This function performs the indexing of the censusTract """
    import rtree
    import fiona.crs
    import geopandas as gpd

    censusTracts500 = gpd.read_file(shapefile).to_crs(
        fiona.crs.from_epsg(2263))
    index = rtree.Rtree()
    for idx, geometry in enumerate(censusTracts500.geometry):
        index.insert(idx, geometry.bounds)
    return (index, censusTracts500)
 def findBoundary():
     import rtree
     import geopandas as gpd
     import fiona.crs
     import csv
     import pyproj
     import shapely.geometry as geom
     tracts = gpd.read_file('500cities_tracts.geojson')
     tracts_index = rtree.Rtree()
     for idx, geometry in enumerate(tracts.geometry):
         tracts_index.insert(idx, geometry.bounds)
     return tracts, tracts_index
Beispiel #25
0
def createindex(shapefile1):
    import geopandas as gpd
    import rtree
    import fiona.crs
    tract= gpd.read_file(shapefile1).to_crs(fiona.crs.from_epsg(2263))

    
    indexr = rtree.Rtree()
 
    for idx, geometry in enumerate(tract.geometry):
        indexr.insert(idx,geometry.bounds)
   
    return (indexr,tract)
Beispiel #26
0
def createIndex(shapefile):
    # Import needed libraries
    import rtree
    import fiona.crs
    import geopandas as gpd
    # Create a geodataframe from the input shapefile and convert to 2263 coordinate projection system
    zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263))
    # Create an R-Tree spatial index
    index = rtree.Rtree()
    # Iterate through shapefile, create indices, and get the polygon geometry data for each feature
    for idx, geometry in enumerate(zones.geometry):
        # Add the indices and create bounding boxes based on polygons
        index.insert(idx, geometry.bounds)
    # Return the R-Tree spatial index and the "zones" geodataframe
    return (index, zones)
Beispiel #27
0
def search_index(h5path, variable, dimensions, bbox):
	# check if the idx data exists
	if !(os.path.isfile(h5path + '.idx')  and os.path.isfile(h5path + '.dat')):
		print 'index files don\'t exist'
		sys.exit(1)

	idx = rtree.Rtree(h5path)
	hits = list(idx.intersection(bbox, objects='raw'))
	#for item in hits:
	#	print item
	h5_fh = h5py.File(h5path, 'r')
	var_id = h5_fh[variable]
	for (xmin, ymin, xmax, ymax) in hits:
		print xmin, ymin, xmax, ymax
		print var_id[xmin:xmax,ymin:ymax]
	h5_fh.close()
Beispiel #28
0
def createIndex(shapefile):
    '''
    This function takes in a shapefile path, and return:
    (1) index: an R-Tree based on the geometry data in the file
    (2) zones: the original data of the shapefile
    
    Note that the ID used in the R-tree 'index' is the same as
    the order of the object in zones.
    '''
    import rtree
    import fiona.crs
    import geopandas as gpd
    zones = gpd.read_file(shapefile).to_crs(fiona.crs.from_epsg(2263))
    index = rtree.Rtree()
    for idx,geometry in enumerate(zones.geometry):
        index.insert(idx, geometry.bounds)
    return (index, zones)
Beispiel #29
0
def drawLocations(coord, ctx, locs):
    labels = rtree.Rtree()
    for name, (lat, lng) in locs:
        pos = coord.screenFromWorld(lng, lat)

        ctx.select_font_face("Verdana", cairo.FONT_SLANT_NORMAL,
                             cairo.FONT_WEIGHT_NORMAL)
        ctx.set_font_size(9)
        x_bearing, y_bearing, width, height = ctx.text_extents(name)[:4]

        pos = pos[0] + 5, pos[1]
        newRegion = (pos[0], pos[1], pos[0] + width, pos[1] + height)
        if list(labels.intersection(newRegion)):
            continue
        dot(ctx, (pos[0] - 5, pos[1]), 4, 1, (0, .3, 0), (0, .5, 0))
        labels.add(0, newRegion)
        ctx.move_to(*pos)
        ctx.set_source_rgb(1, 1, 0.0)
        ctx.show_text(name)
Beispiel #30
0
def extractGeom(partId, records):
    ''' extract pickup date
        extract pickup hour
        extract pickup long & lat
        map pickup lat & long to subway stations
        return station, date and time
        '''
    from shapely.geometry import Point
    import pyproj
    import rtree
    import geopandas as gpd
    import csv
    # create rtree for subway stations
    index = rtree.Rtree()
    for idx, geometry in enumerate(subwayst['buffer']):
        index.insert(idx, geometry.bounds)
    if partId == 0:
        records.next()
    proj = pyproj.Proj(init="epsg:2263", preserve_units=True)
    reader = csv.reader(records)
    for row in reader:
        # extract longit & latit and convert to feet
        if len(row) > 8:
            longit = float(row[5])
            latit = float(row[6])
            geom = Point(proj(longit, latit))
            match = index.intersection(
                (geom.x - 300, geom.y - 300, geom.x + 300, geom.y + 300))
            nearest = (1e6, None)
            for idx in match:
                nearest = min(nearest,
                              (geom.distance(subwayst.geometry[idx]), idx))
        #(datetime, geom) = (row[1], (float(row[5]), float(row[6])))
        #geom = Point(float(row[5]), float(row[6]))
            if nearest != (1e6, None):
                datetime = row[1]
                date = datetime[8:10]
                hour = datetime[11:13]
                if (int(hour) > 6) and (int(hour) < 21):
                    yield ((int(date), int(hour)),
                           subwayst.objectid[nearest[1]])