Ejemplo n.º 1
0
def dfs(request):
    polys1 = GeoSeries(
        [
            Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
            Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
            Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]),
        ]
    )

    polys2 = GeoSeries(
        [
            Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
            Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
            Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]),
        ]
    )

    df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]})
    df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]})

    if request.param == "string-index":
        df1.index = ["a", "b", "c"]
        df2.index = ["d", "e", "f"]

    if request.param == "named-index":
        df1.index.name = "df1_ix"
        df2.index.name = "df2_ix"

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = (
        df2.copy()
        .iloc[[0, 1, 1, 2]]
        .reset_index()
        .rename(columns={"index": "index_right"})
    )
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["intersects"] = exp.drop("_merge", axis=1).copy()

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = df2.copy().reset_index().rename(columns={"index": "index_right"})
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 3, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["contains"] = exp.drop("_merge", axis=1).copy()

    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [3, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["within"] = exp.drop("_merge", axis=1).copy()

    return [request.param, df1, df2, expected]
Ejemplo n.º 2
0
def get_mascon_gdf(mascon_ds):
    """
    converts the mascon group in the hdf5 file to a geodataframe

    Parameters
    ----------
    mascon_ds : HDF5 group
       the HDF5 group labeled "mascon"
    
    Returns
    -------
    mascon_gdf : geodataframe
       a geodataframe with the same data as in the HDF5 group
    """
    mascon_dct = {}
    poly_geom = []

    dataset_list = list(mascon_ds.keys())
    for d in dataset_list:
        mascon_dct.update({d: mascon_ds[d][0, :]})

    mascon_df = pd.DataFrame.from_dict(mascon_dct)
    for k, m in mascon_df.iterrows():
        poly_geom.append(polygeom(m))

    mascon_gdf = GeoDataFrame(mascon_df, crs=CRS, geometry=poly_geom)
    mascon_gdf.index = mascon_gdf.index + 1
    print('There are {} Mascons in this dataset.'.format(len(mascon_gdf)))

    return mascon_gdf
Ejemplo n.º 3
0
def make_grid(gdf, height, cut=True):
    """
    Return a grid, based on the shape of *gdf* and on a *height* value (in
    units of *gdf*). If cut=False, the grid will not be intersected with *gdf*
    (i.e it makes a grid on the bounding-box of *gdf*).

    Parameters
    ----------
    gdf: GeoDataFrame
        The collection of polygons to be covered by the grid.
    height: Integer
        The dimension (will be used as height and width) of the ceils to create,
        in units of *gdf*.
    cut: Boolean, default True
        Cut the grid to fit the shape of *gdf* (ceil partially covering it will
        be truncated). If False, the returned grid will fit the bounding box
        of *gdf*.

    Returns
    -------
    grid: GeoDataFrame
        A collection of polygons.
    """
    from math import ceil
    from shapely.ops import unary_union
    xmin, ymin = [i.min() for i in gdf.bounds.T.values[:2]]
    xmax, ymax = [i.max() for i in gdf.bounds.T.values[2:]]
    rows = int(ceil((ymax - ymin) / height))
    cols = int(ceil((xmax - xmin) / height))

    x_left_origin = xmin
    x_right_origin = xmin + height
    y_top_origin = ymax
    y_bottom_origin = ymax - height

    res_geoms = []
    for countcols in range(cols):
        y_top = y_top_origin
        y_bottom = y_bottom_origin
        for countrows in range(rows):
            res_geoms.append(
                ((x_left_origin, y_top), (x_right_origin, y_top),
                 (x_right_origin, y_bottom), (x_left_origin, y_bottom)))
            y_top = y_top - height
            y_bottom = y_bottom - height
        x_left_origin = x_left_origin + height
        x_right_origin = x_right_origin + height
    if cut:
        res = GeoDataFrame(
            geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)),
            crs=gdf.crs).intersection(unary_union(gdf.geometry).convex_hull)
        res = res[res.geometry.type == 'Polygon']
        res.index = [i for i in range(len(res))]
        return GeoDataFrame(geometry=res)

    else:
        return GeoDataFrame(
            index=[i for i in range(len(res_geoms))],
            geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)),
            crs=gdf.crs)
Ejemplo n.º 4
0
def dfs(request):
    polys1 = GeoSeries(
        [Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
         Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
         Polygon([(6, 0), (9, 0), (9, 3), (6, 3)])])

    polys2 = GeoSeries(
        [Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
         Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
         Polygon([(7, 7), (10, 7), (10, 10), (7, 10)])])

    df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]})
    df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]})
    if request.param == 'string-index':
        df1.index = ['a', 'b', 'c']
        df2.index = ['d', 'e', 'f']

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(
        columns={'index': 'index_left'})
    part2 = df2.copy().iloc[[0, 1, 1, 2]].reset_index().rename(
        columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['intersects'] = exp.drop('_merge', axis=1).copy()

    part1 = df1.copy().reset_index().rename(
        columns={'index': 'index_left'})
    part2 = df2.copy().reset_index().rename(
        columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 3, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['contains'] = exp.drop('_merge', axis=1).copy()

    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [3, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['within'] = exp.drop('_merge', axis=1).copy()

    return [request.param, df1, df2, expected]
Ejemplo n.º 5
0
def dfs(request):
    polys1 = GeoSeries([
        Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
        Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
        Polygon([(6, 0), (9, 0), (9, 3), (6, 3)])
    ])

    polys2 = GeoSeries([
        Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
        Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
        Polygon([(7, 7), (10, 7), (10, 10), (7, 10)])
    ])

    df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]})
    df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]})
    if request.param == 'string-index':
        df1.index = ['a', 'b', 'c']
        df2.index = ['d', 'e', 'f']

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(columns={'index': 'index_left'})
    part2 = df2.copy().iloc[[
        0, 1, 1, 2
    ]].reset_index().rename(columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['intersects'] = exp.drop('_merge', axis=1).copy()

    part1 = df1.copy().reset_index().rename(columns={'index': 'index_left'})
    part2 = df2.copy().reset_index().rename(columns={'index': 'index_right'})
    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [0, 3, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['contains'] = exp.drop('_merge', axis=1).copy()

    part1['_merge'] = [0, 1, 2]
    part2['_merge'] = [3, 1, 3]
    exp = pd.merge(part1, part2, on='_merge', how='outer')
    expected['within'] = exp.drop('_merge', axis=1).copy()

    return [request.param, df1, df2, expected]
Ejemplo n.º 6
0
def dfs(request):
    s1 = GeoSeries([Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
                    Polygon([(2, 2), (4, 2), (4, 4), (2, 4)])])
    s2 = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
                    Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
    df1 = GeoDataFrame({'geometry': s1, 'col1': [1, 2]})
    df2 = GeoDataFrame({'geometry': s2, 'col2': [1, 2]})
    if request.param:
        df1.index = ['row1', 'row2']
    return df1, df2
Ejemplo n.º 7
0
def write_outputs(
    cfg: dict,
    bin_gdf: GeoDataFrame,
    eq_gdf: GeoDataFrame,
    write_index: bool = False,
) -> None:
    """
    Writes output GIS files and plots (i.e., maps or MFD plots.)

    All of the options for what to write are specified in the `cfg`.

    :param cfg:
        Configuration for the evaluations, such as that parsed from the YAML
        config file.

    :param bin_gdf:
        :class:`GeoDataFrame` with the spatial bins for testing

    :param eq_gdf:
        :class:`GeoDataFrame` with the observed earthquake catalog.
    """

    logger.info("writing outputs")

    if "plots" in cfg["output"].keys():
        write_mfd_plots_to_gdf(bin_gdf, **cfg["output"]["plots"]["kwargs"])

    if "map_epsg" in cfg["config"]:
        out_gdf = out_gdf.to_crs(cfg["config"]["map_epsg"])

    if "bin_gdf" in cfg["output"].keys():
        outfile = cfg["output"]["bin_gdf"]["file"]
        out_format = outfile.split(".")[-1]
        bin_gdf["bin_index"] = bin_gdf.index
        bin_gdf.index = np.arange(len(bin_gdf))

        if out_format == "csv":
            write_bin_gdf_to_csv(outfile, bin_gdf)

        else:
            try:

                bin_gdf.drop("SpacemagBin", axis=1).to_file(
                    outfile,
                    driver=OUTPUT_FILE_MAP[out_format],
                    index=write_index,
                )
            except KeyError:
                raise Exception(f"No writer for {out_format} format")
Ejemplo n.º 8
0
    def dissolve_un_m49_regions(
            self, world: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame:
        codes_mapping = self._unm49_data[[
            'Region Code', 'Region Name', 'Sub-region Code', 'Sub-region Name',
            'Intermediate Region Code', 'Intermediate Region Name',
            'ISO-alpha3 Code', 'Country or Area', 'M49 Code'
        ]]
        world = world.merge(codes_mapping,
                            left_on='iso_a3',
                            right_on='ISO-alpha3 Code',
                            how='left')

        world_regions = []

        for level in ['Region', 'Sub-region', 'Intermediate Region']:
            code_label = f'{level} Code'
            name_label = f'{level} Name'

            regions_group = world.dissolve(by=code_label)[[
                'geometry', name_label
            ]].rename(columns={name_label: 'name'})
            regions_group.index.name = 'id'
            regions_group.index = regions_group.index.astype('int64')

            world_regions.append(regions_group)

        antartica = world[world['ISO-alpha3 Code'] == 'ATA'][[
            'geometry', 'M49 Code', 'Country or Area'
        ]]
        antartica = antartica.rename(columns={
            'M49 Code': 'id',
            'Country or Area': 'name'
        }).astype({
            'id': 'int64'
        }).set_index('id')
        world_regions.append(antartica)

        world = pd.concat(world_regions)
        world['un_m49_numeric'] = world.index.astype('int64')
        world.index = world.index.astype('str')

        return world
Ejemplo n.º 9
0
def make_grid(gdf, height, cut=True):
    """
    Return a grid, based on the shape of *gdf* and on a *height* value (in
    units of *gdf*). If cut=False, the grid will not be intersected with *gdf*
    (i.e it makes a grid on the bounding-box of *gdf*).

    Parameters
    ----------
    gdf: GeoDataFrame
        The collection of polygons to be covered by the grid.
    height: Integer
        The dimension (will be used as height and width) of the ceils to create,
        in units of *gdf*.
    cut: Boolean, default True
        Cut the grid to fit the shape of *gdf* (ceil partially covering it will
        be truncated). If False, the returned grid will fit the bounding box
        of *gdf*.

    Returns
    -------
    grid: GeoDataFrame
        A collection of polygons.
    """
    from math import ceil
    from shapely.ops import unary_union
    xmin, ymin = [i.min() for i in gdf.bounds.T.values[:2]]
    xmax, ymax = [i.max() for i in gdf.bounds.T.values[2:]]
    rows = ceil((ymax-ymin) / height)
    cols = ceil((xmax-xmin) / height)

    x_left_origin = xmin
    x_right_origin = xmin + height
    y_top_origin = ymax
    y_bottom_origin = ymax - height

    res_geoms = []
    for countcols in range(cols):
        y_top = y_top_origin
        y_bottom = y_bottom_origin
        for countrows in range(rows):
            res_geoms.append((
                (x_left_origin, y_top), (x_right_origin, y_top),
                (x_right_origin, y_bottom), (x_left_origin, y_bottom)
                ))
            y_top = y_top - height
            y_bottom = y_bottom - height
        x_left_origin = x_left_origin + height
        x_right_origin = x_right_origin + height
    if cut:
        if all(gdf.eval(
            "geometry.type =='Polygon' or geometry.type =='MultiPolygon'")):
            res = GeoDataFrame(
                geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)),
                crs=gdf.crs
                ).intersection(unary_union(gdf.geometry).convex_hull)
        else:
            res = GeoDataFrame(
                geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)),
                crs=gdf.crs
                ).intersection(unary_union(gdf.geometry).convex_hull)
        res = res[res.geometry.type == 'Polygon']
        res.index = [i for i in range(len(res))]
        return GeoDataFrame(geometry=res)

    else:
        return GeoDataFrame(
            index=[i for i in range(len(res_geoms))],
            geometry=pd.Series(res_geoms).apply(lambda x: Polygon(x)),
            crs=gdf.crs
            )
Ejemplo n.º 10
0
# first, calculate a bounding box to restrict the diagram
min_x = min(stops_pts[:,0]) - 5000
max_x = max(stops_pts[:,0]) + 5000
min_y = min(stops_pts[:,1]) - 5000
max_y = max(stops_pts[:,1]) + 5000
bbox = np.array([[min_x,min_y], [max_x,max_y], [min_x,max_y], [max_x,min_y]])

# find the voronoi
coords = np.vstack([stops_pts, bbox])
vor = Voronoi(coords)

# rearrange, so that regions are in the same order as their corresponding
# points, so the last four are the bbox dummy observations, and remove them
regions = np.array(vor.regions)[vor.point_region]
regions = regions[:-4]
clipped = []
for region in regions:
    region_vertices = vor.vertices[region]
    region_polygon = Polygon(region_vertices)

    if nyc.intersects(region_polygon):
        clipped.append(nyc.intersection(region_polygon))

clipped = GeoSeries(clipped)
stops = GeoDataFrame(stops)
stops.index = np.arange(stops.shape[0])
stops['region'] = clipped

pickle.dump(stops,open('save/stops.p','wb'))
pickle.dump(nyc,open('save/nyc.p','wb'))
Ejemplo n.º 11
0
def test_write_index_to_file(tmpdir, df_points, driver, ext):
    fngen = FileNumber(tmpdir, "check", ext)

    def do_checks(df, index_is_used):
        # check combinations of index=None|True|False on GeoDataFrame/GeoSeries
        other_cols = list(df.columns)
        other_cols.remove("geometry")

        if driver == "ESRI Shapefile":
            # ESRI Shapefile will add FID if no other columns exist
            driver_col = ["FID"]
        else:
            driver_col = []

        if index_is_used:
            index_cols = list(df.index.names)
        else:
            index_cols = [None] * len(df.index.names)

        # replicate pandas' default index names for regular and MultiIndex
        if index_cols == [None]:
            index_cols = ["index"]
        elif len(index_cols) > 1 and not all(index_cols):
            for level, index_col in enumerate(index_cols):
                if index_col is None:
                    index_cols[level] = "level_" + str(level)

        # check GeoDataFrame with default index=None to autodetect
        tempfilename = next(fngen)
        df.to_file(tempfilename, driver=driver, index=None)
        df_check = read_file(tempfilename)
        if len(other_cols) == 0:
            expected_cols = driver_col[:]
        else:
            expected_cols = []
        if index_is_used:
            expected_cols += index_cols
        expected_cols += other_cols + ["geometry"]
        assert list(df_check.columns) == expected_cols

        # similar check on GeoSeries with index=None
        tempfilename = next(fngen)
        df.geometry.to_file(tempfilename, driver=driver, index=None)
        df_check = read_file(tempfilename)
        if index_is_used:
            expected_cols = index_cols + ["geometry"]
        else:
            expected_cols = driver_col + ["geometry"]
        assert list(df_check.columns) == expected_cols

        # check GeoDataFrame with index=True
        tempfilename = next(fngen)
        df.to_file(tempfilename, driver=driver, index=True)
        df_check = read_file(tempfilename)
        assert list(df_check.columns) == index_cols + other_cols + ["geometry"]

        # similar check on GeoSeries with index=True
        tempfilename = next(fngen)
        df.geometry.to_file(tempfilename, driver=driver, index=True)
        df_check = read_file(tempfilename)
        assert list(df_check.columns) == index_cols + ["geometry"]

        # check GeoDataFrame with index=False
        tempfilename = next(fngen)
        df.to_file(tempfilename, driver=driver, index=False)
        df_check = read_file(tempfilename)
        if len(other_cols) == 0:
            expected_cols = driver_col + ["geometry"]
        else:
            expected_cols = other_cols + ["geometry"]
        assert list(df_check.columns) == expected_cols

        # similar check on GeoSeries with index=False
        tempfilename = next(fngen)
        df.geometry.to_file(tempfilename, driver=driver, index=False)
        df_check = read_file(tempfilename)
        assert list(df_check.columns) == driver_col + ["geometry"]

        return

    #
    # Checks where index is not used/saved
    #

    # index is a default RangeIndex
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    do_checks(df, index_is_used=False)

    # index is a RangeIndex, starting from 1
    df.index += 1
    do_checks(df, index_is_used=False)

    # index is a Int64Index regular sequence from 1
    df_p.index = list(range(1, len(df) + 1))
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    do_checks(df, index_is_used=False)

    # index was a default RangeIndex, but delete one row to make an Int64Index
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry).drop(5, axis=0)
    do_checks(df, index_is_used=False)

    # no other columns (except geometry)
    df = GeoDataFrame(geometry=df_p.geometry)
    do_checks(df, index_is_used=False)

    #
    # Checks where index is used/saved
    #

    # named index
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    df.index.name = "foo_index"
    do_checks(df, index_is_used=True)

    # named index, same as pandas' default name after .reset_index(drop=False)
    df.index.name = "index"
    do_checks(df, index_is_used=True)

    # named MultiIndex
    df_p = df_points.copy()
    df_p["value3"] = df_p["value2"] - df_p["value1"]
    df_p.set_index(["value1", "value2"], inplace=True)
    df = GeoDataFrame(df_p, geometry=df_p.geometry)
    do_checks(df, index_is_used=True)

    # partially unnamed MultiIndex
    df.index.names = ["first", None]
    do_checks(df, index_is_used=True)

    # unnamed MultiIndex
    df.index.names = [None, None]
    do_checks(df, index_is_used=True)

    # unnamed Float64Index
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    df.index = df_p.index.astype(float) / 10
    do_checks(df, index_is_used=True)

    # named Float64Index
    df.index.name = "centile"
    do_checks(df, index_is_used=True)

    # index as string
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    df.index = pd.TimedeltaIndex(range(len(df)), "days")
    # TODO: TimedeltaIndex is an invalid field type
    df.index = df.index.astype(str)
    do_checks(df, index_is_used=True)

    # unnamed DatetimeIndex
    df_p = df_points.copy()
    df = GeoDataFrame(df_p["value1"], geometry=df_p.geometry)
    df.index = pd.TimedeltaIndex(range(len(df)), "days") + pd.DatetimeIndex(
        ["1999-12-27"] * len(df))
    if driver == "ESRI Shapefile":
        # Shapefile driver does not support datetime fields
        df.index = df.index.astype(str)
    do_checks(df, index_is_used=True)

    # named DatetimeIndex
    df.index.name = "datetime"
    do_checks(df, index_is_used=True)
Ejemplo n.º 12
0
            # project from latitude to longitude
            p1_points = np.array(subshape)
            p2_points = transform(p1, p2, p1_points[:, 0], p1_points[:, 1])
            p2_points = np.array(p2_points).T

            # create polygon
            tract_polygons.append(Polygon(p2_points))
            geoids.append(shape['properties']['GEOID'])

tracts = GeoDataFrame(index=range(len(tract_polygons)))
# initialize data
tracts['region'] = tract_polygons
tracts['geoid'] = geoids
tracts['population'] = np.tile(np.nan, len(tracts))
tracts['area'] = np.tile(np.nan, len(tracts))
tracts.index = range(len(tracts))

# # trim tracts to nyc
# read in nyc boundary
nyc = nyc_boundary()

areas = []
print 'Trimming tracts...'
for i in range(len(tracts)):
    if i % 100 == 0:
        print i
    # trim to nyc boundaries, no water (nybb_13a)
    tract = tracts.iloc[i]
    region = tract['region']
    if nyc.intersects(region):
        tracts['region'].iloc[i] = nyc.intersection(region)
Ejemplo n.º 13
0
min_x = min(stops_pts[:, 0]) - 5000
max_x = max(stops_pts[:, 0]) + 5000
min_y = min(stops_pts[:, 1]) - 5000
max_y = max(stops_pts[:, 1]) + 5000
bbox = np.array([[min_x, min_y], [max_x, max_y], [min_x, max_y],
                 [max_x, min_y]])

# find the voronoi
coords = np.vstack([stops_pts, bbox])
vor = Voronoi(coords)

# rearrange, so that regions are in the same order as their corresponding
# points, so the last four are the bbox dummy observations, and remove them
regions = np.array(vor.regions)[vor.point_region]
regions = regions[:-4]
clipped = []
for region in regions:
    region_vertices = vor.vertices[region]
    region_polygon = Polygon(region_vertices)

    if nyc.intersects(region_polygon):
        clipped.append(nyc.intersection(region_polygon))

clipped = GeoSeries(clipped)
stops = GeoDataFrame(stops)
stops.index = np.arange(stops.shape[0])
stops['region'] = clipped

pickle.dump(stops, open('save/stops.p', 'wb'))
pickle.dump(nyc, open('save/nyc.p', 'wb'))
Ejemplo n.º 14
0
# rearrange, so that regions are in the same order as their corresponding
# points, so the last four are the bbox dummy observations, and remove them
regions = np.array(vor.regions)[vor.point_region]
regions = regions[:-4]
clipped = []
for region in regions:
    region_vertices = vor.vertices[region]
    region_polygon = Polygon(region_vertices)

    if nyc.intersects(region_polygon):
        clipped.append(nyc.intersection(region_polygon))

# add clipped regions to dataframe
clipped = GeoSeries(clipped)
stops = GeoDataFrame(stops)
stops.index = np.arange(stops.shape[0])
stops['region'] = clipped

# calculate area of each region
stops['v_area'] = GeoSeries(index=np.arange(stops.shape[0]))
stops['v_larea'] = GeoSeries(index=np.arange(stops.shape[0]))
for i in np.arange(stops.shape[0]):
    stops['v_area'].ix[i] = stops.ix[i]['region'].area
    stops['v_larea'].ix[i] = np.log(stops.ix[i]['v_area'])

print "OK"

print "collapsing transfers..."

# # 2.2 collapse all transfers into single stops
transfers = read_csv('data/indata/google_transit/transfers.txt')