def clean(city):
    inpath = os.path.join(BASE, city, 'standardized')
    outpath = os.path.join(BASE, city, 'clean')
    if not os.path.exists(outpath):
        os.mkdir(outpath)

    streets = gpd.read_file(os.path.join(inpath, 'streets.shp'))
    sidewalks = gpd.read_file(os.path.join(inpath, 'sidewalks.shp'))

    click.echo('Assigning sidewalk side to streets...')
    sidewalks = sidewalk_clean.sw_tag_streets(sidewalks, streets)

    click.echo('Drawing sidewalks...')
    sidewalks = sidewalk_clean.redraw_sidewalks(sidewalks, streets)

    click.echo('Cleaning with street buffers...')
    sidewalks, buffers = sidewalk_clean.buffer_clean(sidewalks, streets)

    click.echo('Sanitizing sidewalks...')
    sidewalks = sidewalk_clean.sanitize(sidewalks)

    click.echo('Snapping sidewalk ends...')
    sidewalks = sidewalk_clean.snap(sidewalks, streets)

    click.echo('Writing to file...')
    streets.to_file(os.path.join(outpath, 'streets.shp'))
    sidewalks.to_file(os.path.join(outpath, 'sidewalks.shp'))

    # FIXME: curbramps should go through its own standardization/cleanup
    # workflow
    for path in os.listdir(inpath):
        filename = os.path.basename(path)
        if path.split(os.extsep, 1)[0] == 'curbramps':
            shutil.copy2(os.path.join(inpath, path),
                         os.path.join(BASE, city, 'clean', filename))
Example #2
0
def test_overlay_nybb(how):
    polydf = read_file(geopandas.datasets.get_path('nybb'))

    # construct circles dataframe
    N = 10
    b = [int(x) for x in polydf.total_bounds]
    polydf2 = GeoDataFrame(
            [{'geometry': Point(x, y).buffer(10000), 'value1': x + y,
              'value2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=polydf.crs)

    result = overlay(polydf, polydf2, how=how)

    cols = ['BoroCode', 'BoroName', 'Shape_Leng', 'Shape_Area',
            'value1', 'value2']
    if how == 'difference':
        cols = cols[:-2]

    # expected result

    if how == 'identity':
        # read union one, further down below we take the appropriate subset
        expected = read_file(os.path.join(
            DATA, 'nybb_qgis', 'qgis-union.shp'))
    else:
        expected = read_file(os.path.join(
            DATA, 'nybb_qgis', 'qgis-{0}.shp'.format(how)))

    # The result of QGIS for 'union' contains incorrect geometries:
    # 24 is a full original circle overlapping with unioned geometries, and
    # 27 is a completely duplicated row)
    if how == 'union':
        expected = expected.drop([24, 27])
        expected.reset_index(inplace=True, drop=True)
    # Eliminate observations without geometries (issue from QGIS)
    expected = expected[expected.is_valid]
    expected.reset_index(inplace=True, drop=True)

    if how == 'identity':
        expected = expected[expected.BoroCode.notnull()].copy()

    # Order GeoDataFrames
    expected = expected.sort_values(cols).reset_index(drop=True)

    # TODO needed adaptations to result
    result = result.sort_values(cols).reset_index(drop=True)

    if how in ('union', 'identity'):
        # concat < 0.23 sorts, so changes the order of the columns
        # but at least we ensure 'geometry' is the last column
        assert result.columns[-1] == 'geometry'
        assert len(result.columns) == len(expected.columns)
        result = result.reindex(columns=expected.columns)

    assert_geodataframe_equal(result, expected, check_crs=False,
                              check_column_type=False,)
Example #3
0
 def setUp(self):
     path_to_shp = examples.get_path('streets.shp')
     gdf = geopandas.read_file(path_to_shp)
     self.ntw = network.Network(in_data=gdf)
     for obs in ['schools', 'crimes']:
         path_to_shp = examples.get_path('{}.shp'.format(obs))
         in_data = geopandas.read_file(path_to_shp)
         self.ntw.snapobservations(in_data, obs, attribute=True)
         setattr(self, obs, self.ntw.pointpatterns[obs])
Example #4
0
    def test_read_paths(self):

        gdf = read_file(get_path('naturalearth_lowres'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('naturalearth_cities'))
        assert isinstance(gdf, GeoDataFrame)

        gdf = read_file(get_path('nybb'))
        assert isinstance(gdf, GeoDataFrame)
 def setUp(self):
     path_to_shp = examples.get_path('streets.shp')
     gdf = geopandas.read_file(path_to_shp)
     self.ntw = spgh.Network(in_data=gdf)
     pt_str = 'crimes'
     path_to_shp = examples.get_path('{}.shp'.format(pt_str))
     in_data = geopandas.read_file(path_to_shp)
     self.ntw.snapobservations(in_data, pt_str, attribute=True)
     npts = self.ntw.pointpatterns['crimes'].npoints
     self.ntw.simulate_observations(npts)
def exportBaltimore(): # Separates out just the Baltimore data from the data
    regionsdf = gpd.read_file('dc-baltimore_maryland_admin.geojson')
    
    #baltimore shape in index 18
    baltimoreBD = regionsdf.values[18,:]
    baltimoreBD[1]
    df = gpd.read_file('dc-baltimore_maryland_roads.geojson')
    df = df[df.geometry.within(baltimoreBD[1])]
    
    with open('baltimore.geojson','w') as f:
        f.write(df.to_json())
Example #7
0
    def setup(self, *args):
        world = read_file(datasets.get_path('naturalearth_lowres'))
        capitals = read_file(datasets.get_path('naturalearth_cities'))
        countries = world[['geometry', 'name']]
        countries = countries.to_crs('+init=epsg:3395')[
            countries.name != "Antarctica"]
        capitals = capitals.to_crs('+init=epsg:3395')
        capitals['geometry'] = capitals.buffer(500000)

        self.countries = countries
        self.capitals = capitals
Example #8
0
    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')
        self.df = read_file(nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file(
            os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))
    def setUp(self):
        N = 10

        nybb_filename = download_nybb()

        self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename)
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file('examples/null_geom.geojson')
        self.line_paths = self.df3['Name']
Example #10
0
def test_overlay_overlap(how):
    """
    Overlay test with overlapping geometries in both dataframes.
    Test files are created with::

        import geopandas
        from geopandas import GeoSeries, GeoDataFrame
        from shapely.geometry import Point, Polygon, LineString

        s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2)
        s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2)

        df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]})
        df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]})

        ax = df1.plot(alpha=0.5)
        df2.plot(alpha=0.5, ax=ax, color='C1')

        df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson',
                    driver='GeoJSON')
        df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson',
                    driver='GeoJSON')

    and then overlay results are obtained from using  QGIS 2.16
    (Vector -> Geoprocessing Tools -> Intersection / Union / ...),
    saved to GeoJSON.
    """
    df1 = read_file(os.path.join(DATA, 'overlap', 'df1_overlap.geojson'))
    df2 = read_file(os.path.join(DATA, 'overlap', 'df2_overlap.geojson'))

    result = overlay(df1, df2, how=how)

    if how == 'identity':
        raise pytest.skip()

    expected = read_file(os.path.join(
        DATA, 'overlap', 'df1_df2_overlap-{0}.geojson'.format(how)))

    if how == 'union':
        # the QGIS result has the last row duplicated, so removing this
        expected = expected.iloc[:-1]

    # TODO needed adaptations to result
    result = result.reset_index(drop=True)
    if how == 'union':
        result = result.sort_values(['col1', 'col2']).reset_index(drop=True)

    assert_geodataframe_equal(result, expected, check_column_type=False,
                              check_less_precise=True)
def combineResults(input_file_list, outputFileName):

    # Read files in
    for file in input_file_list:
        if "2017" in file:
            data17 = gpd.read_file(file)
        elif "2013" in file:
            data13 = gpd.read_file(file)
        elif "2009" in file:
            data09 = gpd.read_file(file)

    # Drop duplicate geometries
    data13 = data13.drop(labels='geometry', axis=1)
    data17 = data17.drop(labels='geometry', axis=1)

    # Join datasets
    join09_13 = data09.merge(data13, how='inner', left_on ='YKR_ID', right_on='from_id')
    join09_13.drop(labels=['from_id', 'to_id'], inplace=True, axis=1)
    full_join = join09_13.merge(data17, how='inner', left_on='YKR_ID', right_on='from_id')


    # Rename columns
    full_join.columns = [u'Asuk09', u'YKR_ID', u'geometry', u'maxT09', u'meanT09', u'minT09', u'Asuk13', u'Car_D13', u'Car_T13', u'PT_D13', u'PT_T13', u'PT_ToT13', u'PT_D17', u'PT_T17', u'PT_ToT17', u'from_id', u'Asuk17', u'to_id']

    # Choose and reorder
    join = full_join[[ u'from_id', u'to_id', u'maxT09', u'meanT09', u'minT09', u'Asuk09', u'PT_T13', u'PT_ToT13', u'PT_D13', u'Car_T13', u'Car_D13', u'Asuk13', u'PT_T17', u'PT_ToT17', u'PT_D17', u'Asuk17', u'geometry']]

    # Set -1 values to NaN
    join = join.replace(to_replace={'PT_T13': {-1: np.nan}})

    # Drop NaNs
    join = join.dropna()

    # Calculate accessibility differences
    join['Dif09_13'] = None
    join['Dif09_17'] = None
    join['Dif13_17'] = None
    join['Dif09_13'] = join['meanT09'] - join['PT_T13']
    join['Dif09_17'] = join['meanT09'] - join['PT_T17']
    join['Dif13_17'] = join['PT_T13'] - join['PT_T17']

    # Save output
    folder = os.path.dirname(input_file_list[0])
    outfile = os.path.join(folder, outputFileName)

    join.to_file(outfile, driver="ESRI Shapefile")

    print outfile
 def readShpFile(self, input_dir, fname):
     #need to make sure that cols are lower
     shp = gpd.read_file(input_dir+ fname)
     cols =  shp.columns.values.tolist()
     colsLower = [col.lower() for col in cols]
     shp.rename(columns=dict(zip(cols,colsLower)), inplace=True)
     return shp
    def calculateCarCO2emissions(self, src_file, time, car_co2_emission=171):
        # Read data into GeoDataFrame
        print("Reading: %s" % os.path.basename(src_file))
        data = gpd.read_file(src_file)

        # Calculate CO2 distance (in meters) and total CO2 emissions (in grams per kilometer) from car usage
        print("Calculating the driven distance and CO2 emissions")
        data['distDriven'] = data['Pituus_Ajo'] + data['Pituus_P_E']
        data['co2FromCar'] = (data['distDriven'] / 1000.0) * car_co2_emission

        # Select columns
        slct_cols = ['from_id', 'to_id', 'Pituus_TOT', 'distDriven', 'co2FromCar']
        outdata = data[slct_cols]

        # Create output file
        outname = "%s_CO2_emissions.txt" % os.path.basename(src_file).split('.')[0]

        if time in ['08', '8', 'r']:
            outfp = os.path.join(self.car_r_co2_dir, outname)
        else:
            outfp = os.path.join(self.car_m_co2_dir, outname)

        # Save CO2 data to disk
        print("Saving the CO2 emissions to: %s" % outfp)
        outdata.to_csv(outfp, sep=';', index=False)

        # Return the output path
        return outfp
	def _rasterize_subdomains( self ):
		'''
		rasterize a subdomains shapefile to the extent and resolution of 
		a template raster file. The two must be in the same reference system 
		or there will be potential issues. 
		returns:
			numpy.ndarray with the shape of the input raster and the shapefile
			polygons burned in with the values of the id_field of the shapefile
		gotchas:
			currently the only supported data type is uint8 and all float values will be
			coerced to integer for this purpose.  Another issue is that if there is a value
			greater than 255, there could be some error-type issues.  This is something that 
			the user needs to know for the time-being and will be fixed in subsequent versions
			of rasterio.  Then I can add the needed changes here.
		'''
		import geopandas as gpd
		import numpy as np

		gdf = gpd.read_file( self.subdomains_fn )
		id_groups = gdf.groupby( self.id_field ) # iterator of tuples (id, gdf slice)

		out_shape = self.rasterio_raster.height, self.rasterio_raster.width
		out_transform = self.rasterio_raster.affine

		arr_list = [ self._rasterize_id( df, value, out_shape, out_transform, background_value=self.background_value ) for value, df in id_groups ]
		self.sub_domains = arr_list
Example #15
0
    def clean_boundary_shapefile(shapefile_path):
        """
        Cleans the boundary shapefile to that there is only one main polygon.
        :param shapefile_path:
        :return:
        """
        wfg =  gpd.read_file(shapefile_path)
        first_shape = wfg.iloc[0].geometry
        if hasattr(first_shape, 'geoms'):
            log.warning("MultiPolygon found in boundary. "
                        "Picking largest area ...")
            # pick largest shape to be the watershed boundary
            # and assume the other ones are islands to be removed
            max_area = -9999.0
            main_geom = None
            for geom in first_shape.geoms:
                if geom.area > max_area:
                    main_geom = geom
                    max_area = geom.area

            # remove self intersections
            if not main_geom.is_valid:
                log.warning("Invalid geometry found in boundary. "
                            "Attempting to self clean ...")
                main_geom = main_geom.buffer(0)
            wfg.loc[0, 'geometry'] = main_geom
            out_cleaned_boundary_shapefile = \
                os.path.splitext(shapefile_path)[0] +\
                str(uuid.uuid4()) +\
                '.shp'
            wfg.to_file(out_cleaned_boundary_shapefile)
            log.info("Cleaned boundary shapefile written to:"
                     "{}".format(out_cleaned_boundary_shapefile))
            return out_cleaned_boundary_shapefile
        return shapefile_path
Example #16
0
 def read(self, format=None, epsg=None):
     """
     Read vector data from a file (JSON, Shapefile, etc)
     :param format: Format to return data in (default is GeoDataFrame)
     :param epsg: EPSG code to reproject data to
     :return: Data in requested format (GeoDataFrame, GeoJSON)
     """
     if not format:
         format = self.default_output
     if self.ext not in formats.VECTOR:
         raise UnsupportedFormatException(
             "Only the following vector formats are supported: {}".format(
                 ','.join(formats.VECTOR)
             )
         )
     if self.data is None:
         self.data = geopandas.read_file(self.uri)
         if self.filters:
             self.filter_data()
     out_data = self.data
     if epsg and self.get_epsg() != epsg:
         out_data = geopandas.GeoDataFrame.copy(out_data)
         out_data[out_data.geometry.name] = \
             self.data.geometry.to_crs(epsg=epsg)
         out_data.crs = fiona.crs.from_epsg(epsg)
     if format == formats.JSON:
         return out_data.to_json()
     else:
         return out_data
Example #17
0
 def setUp(self):
     nybb_filename = tests.util.download_nybb()
     path = '/nybb_13a/nybb.shp'
     vfs = 'zip://' + nybb_filename
     self.df = read_file(path, vfs=vfs)
     with fiona.open(path, vfs=vfs) as f:
         self.crs = f.crs
Example #18
0
 def setUp(self):
     answers = gpd.read_file(DIRPATH + '/answers.geojson')
     tests = pd.read_json(DIRPATH + '/tests.json')
     hard_tests = pd.read_json(DIRPATH + '/degenerate.json')
     self.all = answers.merge(tests, on='names').merge(hard_tests, on='names')
     self.conn = Connection('DECENNIALSF12010')
     self.conn.set_mapservice('tigerWMS_Census2010')
Example #19
0
    def setup_method(self):

        nybb_filename = geopandas.datasets.get_path('nybb')
        self.polydf = read_file(nybb_filename)
        self.polydf = self.polydf[['geometry', 'BoroName', 'BoroCode']]

        self.polydf = self.polydf.rename(columns={'geometry': 'myshapes'})
        self.polydf = self.polydf.set_geometry('myshapes')

        self.polydf['manhattan_bronx'] = 5
        self.polydf.loc[3:4, 'manhattan_bronx'] = 6

        # Merged geometry
        manhattan_bronx = self.polydf.loc[3:4, ]
        others = self.polydf.loc[0:2, ]

        collapsed = [others.geometry.unary_union,
                     manhattan_bronx.geometry.unary_union]
        merged_shapes = GeoDataFrame(
            {'myshapes': collapsed}, geometry='myshapes',
            index=pd.Index([5, 6], name='manhattan_bronx'))

        # Different expected results
        self.first = merged_shapes.copy()
        self.first['BoroName'] = ['Staten Island', 'Manhattan']
        self.first['BoroCode'] = [5, 1]

        self.mean = merged_shapes.copy()
        self.mean['BoroCode'] = [4, 1.5]
Example #20
0
    def setUp(self):
        N = 10

        nybb_filename, nybb_zip_path = download_nybb()

        self.df = read_file(nybb_zip_path, vfs='zip://' + nybb_filename)
        with fiona.open(nybb_zip_path, vfs='zip://' + nybb_filename) as f:
            self.schema = f.schema
        self.tempdir = tempfile.mkdtemp()
        self.boros = self.df['BoroName']
        self.crs = {'init': 'epsg:4326'}
        self.df2 = GeoDataFrame([
            {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y}
            for x, y in zip(range(N), range(N))], crs=self.crs)
        self.df3 = read_file(os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))
        self.line_paths = self.df3['Name']
Example #21
0
 def setUp(self):
     nybb_filename, nybb_zip_path = download_nybb()
     vfs = 'zip://' + nybb_filename
     self.df = read_file(nybb_zip_path, vfs=vfs)
     with fiona.open(nybb_zip_path, vfs=vfs) as f:
         self.crs = f.crs
         self.columns = list(f.meta["schema"]["properties"].keys())
Example #22
0
 def setUp(self):
     nybb_filename = tests.util.download_nybb()
     path = "/nybb_14a_av/nybb.shp"
     vfs = "zip://" + nybb_filename
     self.df = read_file(path, vfs=vfs)
     with fiona.open(path, vfs=vfs) as f:
         self.crs = f.crs
def assign_taxi_zones(df, lon_var, lat_var, locid_var):
    """Joins DataFrame with Taxi Zones shapefile.
    This function takes longitude values provided by `lon_var`, and latitude
    values provided by `lat_var` in DataFrame `df`, and performs a spatial join
    with the NYC taxi_zones shapefile. 
    The shapefile is hard coded in, as this function makes a hard assumption of
    latitude and longitude coordinates. It also assumes latitude=0 and 
    longitude=0 is not a datapoint that can exist in your dataset. Which is 
    reasonable for a dataset of New York, but bad for a global dataset.
    Only rows where `df.lon_var`, `df.lat_var` are reasonably near New York,
    and `df.locid_var` is set to np.nan are updated. 
    Parameters
    ----------
    df : pandas.DataFrame or dask.DataFrame
        DataFrame containing latitudes, longitudes, and location_id columns.
    lon_var : string
        Name of column in `df` containing longitude values. Invalid values 
        should be np.nan.
    lat_var : string
        Name of column in `df` containing latitude values. Invalid values 
        should be np.nan
    locid_var : string
        Name of column in `df` containing taxi_zone location ids. Rows with
        valid, nonzero values are not overwritten. 
    """

    import geopandas
    from shapely.geometry import Point


    localdf = df[[lon_var, lat_var, locid_var]].copy()
    # localdf = localdf.reset_index()
    localdf[lon_var] = localdf[lon_var].fillna(value=0.)
    localdf[lat_var] = localdf[lat_var].fillna(value=0.)
    localdf['replace_locid'] = (localdf[locid_var].isnull()
                                & (localdf[lon_var] != 0.)
                                & (localdf[lat_var] != 0.))

    if (np.any(localdf['replace_locid'])):
        shape_df = geopandas.read_file('../shapefiles/taxi_zones.shp')
        shape_df.drop(['OBJECTID', "Shape_Area", "Shape_Leng", "borough", "zone"],
                      axis=1, inplace=True)
        shape_df = shape_df.to_crs({'init': 'epsg:4326'})

        try:
            local_gdf = geopandas.GeoDataFrame(
                localdf, crs={'init': 'epsg:4326'},
                geometry=[Point(xy) for xy in
                          zip(localdf[lon_var], localdf[lat_var])])

            local_gdf = geopandas.sjoin(
                local_gdf, shape_df, how='left', op='within')

            return local_gdf.LocationID.rename(locid_var)
        except ValueError as ve:
            print(ve)
            print(ve.stacktrace())
            return df[locid_var]
    else:
        return df[locid_var]
def test_timedynamic_geo_json():
    """
    tests folium.plugins.TimeSliderChoropleth
    """
    import geopandas as gpd
    assert 'naturalearth_lowres' in gpd.datasets.available
    datapath = gpd.datasets.get_path('naturalearth_lowres')
    gdf = gpd.read_file(datapath)

    n_periods = 3
    dt_index = pd.date_range('2016-1-1', periods=n_periods, freq='M').strftime('%s')

    styledata = {}

    for country in gdf.index:
        pdf = pd.DataFrame(
            {'color': np.random.normal(size=n_periods),
             'opacity': np.random.normal(size=n_periods)},
            index=dt_index)
        styledata[country] = pdf.cumsum()

    max_color, min_color = 0, 0

    for country, data in styledata.items():
        max_color = max(max_color, data['color'].max())
        min_color = min(max_color, data['color'].min())

    cmap = linear.PuRd_09.scale(min_color, max_color)

    # Define function to normalize column into range [0,1]
    def norm(col):
        return (col - col.min())/(col.max()-col.min())

    for country, data in styledata.items():
        data['color'] = data['color'].apply(cmap)
        data['opacity'] = norm(data['opacity'])

    styledict = {str(country): data.to_dict(orient='index') for
                 country, data in styledata.items()}

    m = folium.Map((0, 0), tiles='Stamen Watercolor', zoom_start=2)

    time_slider_choropleth = TimeSliderChoropleth(
        gdf.to_json(),
        styledict
    )
    time_slider_choropleth.add_to(m)

    rendered = time_slider_choropleth._template.module.script(time_slider_choropleth)

    m._repr_html_()
    out = m._parent.render()
    assert '<script src="https://d3js.org/d3.v4.min.js"></script>' in out

    # We verify that data has been inserted correctly
    expected_timestamps = """var timestamps = ["1454198400", "1456704000", "1459382400"];"""  # noqa
    assert expected_timestamps.split(';')[0].strip() == rendered.split(';')[0].strip()

    expected_styledict = json.dumps(styledict, sort_keys=True, indent=2)
    assert expected_styledict in rendered
Example #25
0
    def setup_method(self):
        N = 10

        nybb_filename = geopandas.datasets.get_path('nybb')

        self.polydf = read_file(nybb_filename)
        self.crs = {'init': 'epsg:4326'}
        b = [int(x) for x in self.polydf.total_bounds]
        self.polydf2 = GeoDataFrame(
            [{'geometry': Point(x, y).buffer(10000), 'value1': x + y,
              'value2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)
        self.pointdf = GeoDataFrame(
            [{'geometry': Point(x, y), 'value1': x + y, 'value2': x - y}
             for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)),
                             range(b[1], b[3], int((b[3]-b[1])/N)))],
            crs=self.crs)

        # TODO this appears to be necessary;
        # why is the sindex not generated automatically?
        self.polydf2._generate_sindex()

        self.union_shape = (180, 7)
Example #26
0
    def __init__(self, img_rds, depth_rds, sand_shp, gdf_query=None,
                 depth_range=None, surface_refraction=False,
                 surface_reflectance=False):
        self.surf_reflectance = surface_reflectance
        self.surf_refraction = surface_refraction
        self.depth_range = depth_range
        if type(img_rds).__name__ == 'RasterDS':
            self.img_rds = img_rds
        else:
            self.img_rds = RasterDS(img_rds)

        if type(depth_rds).__name__ == 'RasterDS':
            self.depth_rds = depth_rds
        else:
            self.depth_rds = RasterDS(depth_rds)

        if type(sand_shp).__name__ == 'GeoDataFrame':
            self.gdf = sand_shp
        else:
            self.gdf = gpd.read_file(sand_shp)

        self.gdf_query = gdf_query
        # self.full_image_array = self.img_rds.band_array

        self._set_arrays()
def test_mapclassify_bin():
    # data
    link_to_data = examples.get_path('columbus.shp')
    gdf = gpd.read_file(link_to_data)
    x = gdf['HOVAL'].values
    # quantiles
    mapclassify_bin(x, 'quantiles')
    mapclassify_bin(x, 'quantiles', k=3)
    # box_plot
    mapclassify_bin(x, 'box_plot')
    mapclassify_bin(x, 'box_plot', hinge=2)
    # headtail_breaks
    mapclassify_bin(x, 'headtail_breaks')   
    # percentiles
    mapclassify_bin(x, 'percentiles')
    mapclassify_bin(x, 'percentiles', pct=[25,50,75,100])
    # std_mean
    mapclassify_bin(x, 'std_mean')
    mapclassify_bin(x, 'std_mean', multiples=[-1,-0.5,0.5,1])
    # maximum_breaks
    mapclassify_bin(x, 'maximum_breaks')
    mapclassify_bin(x, 'maximum_breaks', k=3, mindiff=0.1)
    # natural_breaks, max_p_classifier
    mapclassify_bin(x, 'natural_breaks')
    mapclassify_bin(x, 'max_p_classifier', k=3, initial=50)
    # user_defined
    mapclassify_bin(x, 'user_defined', bins=[20, max(x)])
Example #28
0
def prepare_edge(edge_shapefile, building_shapefile):
    """Create edge graph with grouped building demands.
    """
    # load buildings and sum by type and nearest edge ID
    # 1. read shapefile to DataFrame (with special geometry column)
    # 2. group DataFrame by columns 'nearest' (ID of nearest edge) and 'type'
    #    (residential, commercial, industrial, other)
    # 3. sum by group and unstack, i.e. convert secondary index 'type' to columns
    buildings = geopandas.read_file(building_shapefile+'.shp')
    buildings = buildings.convert_objects(convert_numeric=True)
    building_type_mapping = {
        'basin': 'other', 'chapel': 'other', 'church': 'other',
        'farm_auxiliary': 'other', 'greenhouse': 'other',
        'school': 'public',
        'office': 'commercial', 'restaurant': 'commercial',
        'yes': 'residential', 'house': 'residential'}
    buildings.replace(to_replace={'type': building_type_mapping}, inplace=True)
    buildings = buildings.to_crs(epsg=32632)
    buildings['AREA'] = buildings.area
    buildings_grouped = buildings.groupby(['nearest', 'type'])
    total_area = buildings_grouped.sum()['AREA'].unstack()

    # load edges (streets) and join with summed areas
    # 1. read shapefile to DataFrame (with geometry column)
    # 2. join DataFrame total_area on index (=ID)
    # 3. fill missing values with 0
    edge = pdshp.read_shp(edge_shapefile)
    edge = edge.set_index('Edge')
    edge = edge.join(total_area)
    edge = edge.fillna(0)
    return edge
def union_and_filter(input_dir, output_dir, countries):
    # make dir to hold unioned and dissolved shapefiles
    rm_and_mkdir(output_dir)
    for country in countries:
        print country
        # specify io paths
        input_filename = country + '.shp'
        input_path = os.path.join(input_dir, input_filename)
        output_path = os.path.join(output_dir, input_filename)

        # load country shapefile
        gdf_country = gpd.read_file(input_path)
        gdf_country.rename(columns={'country': 'val', 'val': 'country'}, inplace=True)

        # filter out low pixel values
        thresh = 25
        gdf_country = gdf_country[gdf_country['val'] >= thresh]

        # union resulting geometries, assign crs, write to temp file
        polys = gdf_country.geometry
        poly = polys.unary_union
        poly_country = [country, poly]
        gdf_poly_country = gpd.GeoDataFrame(poly_country).T.rename(columns={0: 'country', 1: 'geometry'})
        gdf_poly_country.crs = {'init': 'epsg:4326', 'no_defs': True}
        try:
            gdf_poly_country.to_file(output_path)
        except:
            print 'No polygon values greater than thresh'
def test_vba_choropleth():
    # data
    link_to_data = examples.get_path('columbus.shp')
    gdf = gpd.read_file(link_to_data)
    x = gdf['HOVAL'].values
    y = gdf['CRIME'].values
    # plot
    fig, _ = vba_choropleth(x, y, gdf)
    plt.close(fig)
    # plot with divergent and reverted alpha
    fig, _ = vba_choropleth(x, y, gdf, cmap='RdBu',
                            divergent=True,
                            revert_alpha=True)
    plt.close(fig)
    # plot with classified alpha and rgb
    fig, _ = vba_choropleth(x, y, gdf, cmap='RdBu',
                            alpha_mapclassify=dict(classifier='quantiles'),
                            rgb_mapclassify=dict(classifier='quantiles'))
    plt.close(fig)
    # plot classified with legend
    fig, _ = vba_choropleth(x, y, gdf,
                            alpha_mapclassify=dict(classifier='std_mean'),
                            rgb_mapclassify=dict(classifier='std_mean'),
                            legend=True)
    plt.close(fig)
def spectral_unmixing_main(args, img_src, endmember_array, unmixing_method):

    # Find the number of regional clusters for the area of interest
    n_regional_clusters = return_nclusters(args)
    img_meta = img_src.meta

    # Read in polygons file
    polygons_file = os.path.join(
        args.base_dir, 'saved_rainfall_regions', 'clean_regions',
        '{}_rainfall_regions_nclusters_{}_clean.shp'.format(
            args.unmixing_region, n_regional_clusters))
    region_polygons = gpd.read_file(polygons_file).to_crs(img_src.meta['crs'])

    # Reorder maps and endmembers
    endmember_array = np.transpose(np.array(endmember_array))

    print('Cropping image and spectral unmixing, starting timer')
    t = time.time()

    for region in range(n_regional_clusters):

        # Crop image to the regional clusters
        cropped_img, cropped_transform = mask(
            img_src, [region_polygons['geometry'].iloc[region]], crop=True)
        evi_img = np.moveaxis(cropped_img, 0, -1)

        abundance_map = np.zeros(
            (evi_img.shape[0], evi_img.shape[1], 4)) * np.nan

        nonzero_indices = np.mean(evi_img, axis=-1) != 0
        evi_img_nonzero = evi_img[nonzero_indices]

        # Set up an unmixing modeling instance
        if unmixing_method == 'ucls':
            amap = UCLS
        elif unmixing_method == 'fcls':
            amap = FCLS
        elif unmixing_method == 'nnls':
            amap = NNLS

        # Select and normalize endmembers
        regional_endmembers = endmember_array[region * 3:(region + 1) * 3]
        for i in range(2):
            regional_endmembers[i] = normalize(regional_endmembers[i])

        print('Unmixing for region {}'.format(region))
        abundance_map[nonzero_indices] = flattened_image_unmixing(
            amap, evi_img_nonzero, regional_endmembers)

        abundance_map = np.moveaxis(abundance_map, -1, 0).astype(np.float32)

        out_file_path = os.path.join(
            args.base_dir, 'abundance_maps', args.unmixing_region,
            'regional_maps',
            '{}_abundancemap_modis_250m_{}_unmixingmethod_automatic_tEMs_'
            'outphasetype_{}_region_{}.tif'.format(
                args.unmixing_region, args.unmixing_method,
                args.outphase_endmember_type, region))

        if os.path.exists(out_file_path):
            os.remove(out_file_path)

        # Update metadata
        img_meta['count'] = 4
        img_meta['dtype'] = 'float32'
        img_meta['nodata'] = 'nan'
        img_meta['transform'] = cropped_transform
        img_meta['height'] = abundance_map.shape[1]
        img_meta['width'] = abundance_map.shape[2]

        # Write out regional abundance map
        with rasterio.open(out_file_path, 'w+', **img_meta) as dest:
            dest.write(abundance_map)

    elapsed = (time.time() - t)

    print('Elapsed time for abundance map creation: {}s'.format(elapsed))
    # Merge the abundance maps into a mosaic
    merge_regional_abundance_maps(args)
Example #32
0
 def add_columns_from_shapefile(self,
                                shapefile_path,
                                columns=None,
                                id_column=None):
     df = geopandas.read_file(shapefile_path)
     self.add_data_from_df(df, columns, id_column)
Example #33
0
import geopandas
import pandas as pd
#import shapely
#from shapely.geometry import Point, LineString, MultiPolygon, Polygon
import pyproj
#import matplotlib
import matplotlib.pyplot as plt
import numpy as np

#import os
#os.chdir('Code/mapproj')
import mapproj

geod = pyproj.Geod(a=6371, f=0)
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
a = np.arctan(1 / 2) / np.pi * 180
actrlpts3 = np.array([[15 + 0, 15 + 36, 15 - 36], [-a, a, a]])
ctrlpoly3 = mapproj.geodesics(actrlpts3[0],
                              actrlpts3[1],
                              geod,
                              includepts=True)
a = 180 / np.pi * np.arctan(1 / np.sqrt(2))
actrlpts4 = np.array([[0, 90, 90, 0], [-a, -a, a, a]])
ctrlpoly4 = mapproj.geodesics(actrlpts4[0],
                              actrlpts4[1],
                              geod,
                              includepts=True)

antipodepoly3 = mapproj.transeach(mapproj.transform_antipode, ctrlpoly3)
antipodepoly4 = mapproj.transeach(mapproj.transform_antipode, ctrlpoly4)
Example #34
0
import geopandas
import pandas as pd

if __name__ == '__main__':
    file = '/home/elvis/map/analize/analizeCross/countXEnt_new.shp'
    od_group_file = '/home/elvis/map/analize/analizeCross/count_od_group.shp'
    geo_df = geopandas.read_file(file)
    pd_df = pd.DataFrame(geo_df)
    geo_df = geopandas.read_file(od_group_file)
    od_group_df = pd.DataFrame(geo_df)
    od_group_df.score = od_group_df.score * pd_df.ENTROPY
    od_group_df = od_group_df[od_group_df.score > 0]
    od_group_df = od_group_df.sort_values(by=['score'], ascending=False)
    od_group_df = od_group_df.reset_index(drop=True)
    od_group_df['rank'] = od_group_df.index

    path = '/home/elvis/map/analize/analizeCross/od_group*entropy_v4.shp'
    cross_scores = geopandas.GeoDataFrame(od_group_df, geometry='geometry')
    cross_scores.to_file(path)
username = "******"  #please update with a correct user
raw_data_location = f"Users/{username}/geospatial/workshop/data/raw/"
dbutils.fs.mkdirs(f"dbfs:/{raw_data_location}/")
dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.prj",
              f"dbfs:/{raw_data_location}/neighbourhoods.prj")
dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.shx",
              f"dbfs:/{raw_data_location}/neighbourhoods.shx")
dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.shp",
              f"dbfs:/{raw_data_location}/neighbourhoods.shp")

# COMMAND ----------

# DBTITLE 1,Load Data to geopandas
import geopandas as gpd

neighborhoods_pd = gpd.read_file(
    f"/dbfs/{raw_data_location}/neighbourhoods.shp")
display(neighborhoods_pd.head())

# COMMAND ----------

# MAGIC %md
# MAGIC In order to prepare our polygon data we will be using two set representation via h3 decompositon. </br>
# MAGIC The only difference to what we discussed in the notebook 01 is that we will be using WKB representation for the chips. </br>
# MAGIC Binary representation of polygons will result in more optimal runtime handling of chips. </br>
# MAGIC In addition we will be returning a single array of structs instead of separate collections for core and border chips. </br>
# MAGIC This will simplify our join logic.

# COMMAND ----------

# DBTITLE 1,H3 Two Set Representation
import shapely
Example #36
0
import geopandas as gpd
import numpy as np
import pandas as pd
from scipy.spatial import cKDTree

points = gpd.read_file(
    'hotosm_gha_points_of_interest_points_shp/hotosm_gha_points_of_interest_points.shp'
)


def ckdnearest(gdA, gdB, k):
    """Quickly fine the k nearest points close to another
    
    :param gdA: geopandas dataframe A
    :param gdB: geopandas dataframe B
    :param k: number of nearest neighbours to find
    
    :returns: joined geopandas dataframe with distance computed 
    in the last column
    """

    nA = np.array(list(zip(gdA.geometry.x, gdA.geometry.y)))
    nB = np.array(list(zip(gdB.geometry.x, gdB.geometry.y)))

    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k)

    dd = []

    # when idx is a 1d array
    try:
def get_data(config_file):	
	########## Inputs ##########
	# read in config file
	config = configparser.ConfigParser()
	config.read(config_file)

	# shapefile and unique ID info
	shapefile = config['shapefile']['fname']
	geoid = config['shapefile']['geoid']

	# get column names from votes and districtings to read
	cols = []
	columns = config['columns']
	for key in columns:
		cols.append(columns[key])

	# optional demographic data
	if 'demographics' in config:
		for key in config['demographics']:
			cols.append(config['demographics'][key])
	# make dual graph
	dual_graph = construct_graph_from_file(shapefile, geoid, cols)
	num_nodes = dual_graph.number_of_nodes()
	
	#list of vectors 
	data_vectors = []
	#list of vector indexed by nodes
	data_vectors_att = []
	# streamline the process
	for key in columns:
		vector = np.zeros((num_nodes,1))
		vector_att = nx.get_node_attributes(dual_graph, columns[key])
		data_vectors.append(vector)
		data_vectors_att.append(vector_att)
        
	# get position data for drawing nodes at centroids
	df_vtd = gpd.read_file(shapefile)
	vtd_centroids = df_vtd.centroid
	vtd_x = vtd_centroids.x
	vtd_y = vtd_centroids.y
    
	inverse = {}
	sf = shp.Reader(shapefile)
	for i in range(len(sf.fields)):
		if sf.fields[i][0] == geoid:
			idx = i-1
			break
	records = sf.records()
	for i in range(len(records)):
		inverse[records[i][idx]] = i
        
	# assign attributes in order of nodes to match adjacency matrix
	for i in range(len(data_vectors)):
		count = 0 
		for node in dual_graph.nodes():
			data_vectors[i][count] = data_vectors_att[i][node]
			count += 1

	pos = {}                
	for node in dual_graph.nodes():
		pos[node] = (vtd_x[inverse[node]], vtd_y[inverse[node]])

	node_size = [(data_vectors[1][i] + data_vectors[2][i])/500 for i in range(dual_graph.number_of_nodes())]

	return dual_graph, data_vectors, pos, node_size
Example #38
0
 def add_columns_from_shapefile(self,
                                shapefile,
                                columns=None,
                                id_column=None):
     df = geopandas.read_file(shapefile)
     return self.add_columns_from_df(df, columns, id_column)
def main():
    config = load_config()

    hazard_cols = ['hazard_type', 'climate_scenario', 'year']
    duration = 10

    hazard_set = [{
        'hazard': 'fluvial flooding',
        'name': 'Fluvial flooding'
    }, {
        'hazard': 'pluvial flooding',
        'name': 'Pluvial flooding'
    }]
    change_colors = [
        '#1a9850', '#66bd63', '#a6d96a', '#d9ef8b', '#fee08b', '#fdae61',
        '#f46d43', '#d73027', '#969696'
    ]
    change_labels = [
        '< -100', '-100 to -50', '-50 to -10', '-10 to 0', '0 to 10',
        '10 to 50', '50 to 100', ' > 100', 'No change/value'
    ]
    change_ranges = [(-1e10, -100), (-100, -50), (-50, -10), (-10, 0),
                     (0.001, 10), (10, 50), (50, 100), (100, 1e10)]

    eael_set = [{
        'column': 'min_eael',
        'title': 'Min EAEL',
        'legend_label': "EAEL (million USD)",
        'divisor': 1000000,
        'significance': 0
    }, {
        'column': 'max_eael',
        'title': 'Max EAEL',
        'legend_label': "EAEL (million USD)",
        'divisor': 1000000,
        'significance': 0
    }]
    data_path = config['paths']['data']

    region_file_path = os.path.join(config['paths']['data'], 'network',
                                    'rail_edges.shp')
    flow_file_path = os.path.join(config['paths']['output'],
                                  'flow_mapping_combined',
                                  'weighted_flows_rail_100_percent.csv')
    region_file = gpd.read_file(region_file_path, encoding='utf-8')
    flow_file = pd.read_csv(flow_file_path)
    region_file = pd.merge(region_file, flow_file, how='left',
                           on=['edge_id']).fillna(0)

    region_file = region_file[region_file['max_total_tons'] > 0]
    del flow_file

    flow_file_path = os.path.join(
        config['paths']['output'], 'failure_results',
        'minmax_combined_scenarios',
        'single_edge_failures_minmax_rail_100_percent_disrupt.csv')
    flow_file = pd.read_csv(flow_file_path)

    flow_file_path = os.path.join(
        config['paths']['output'], 'network_stats',
        'rail_hazard_intersections_risk_weights.csv')

    fail_sc = pd.read_csv(flow_file_path)
    fail_scenarios = pd.merge(fail_sc, flow_file, how='left',
                              on=['edge_id']).fillna(0)
    del flow_file, fail_sc

    fail_scenarios['min_eael'] = duration * fail_scenarios[
        'risk_wt'] * fail_scenarios['min_econ_impact']
    fail_scenarios['max_eael'] = duration * fail_scenarios[
        'risk_wt'] * fail_scenarios['max_econ_impact']
    all_edge_fail_scenarios = fail_scenarios[
        hazard_cols + ['edge_id', 'min_eael', 'max_eael']]
    all_edge_fail_scenarios = all_edge_fail_scenarios.groupby(
        hazard_cols + ['edge_id'])['min_eael', 'max_eael'].max().reset_index()

    # Climate change effects
    all_edge_fail_scenarios = all_edge_fail_scenarios.set_index(
        ['hazard_type', 'edge_id'])
    scenarios = list(set(all_edge_fail_scenarios.index.values.tolist()))
    change_tup = []
    for sc in scenarios:
        eael = all_edge_fail_scenarios.loc[[sc], 'max_eael'].values.tolist()
        yrs = all_edge_fail_scenarios.loc[[sc], 'year'].values.tolist()
        cl = all_edge_fail_scenarios.loc[[sc],
                                         'climate_scenario'].values.tolist()
        if 2016 not in yrs:
            for e in range(len(eael)):
                if eael[e] > 0:
                    # change_tup += list(zip([sc[0]]*len(cl),[sc[1]]*len(cl),cl,yrs,[0]*len(cl),eael,[1e9]*len(cl)))
                    change_tup += [(sc[0], sc[1], cl[e], yrs[e], 0, eael[e],
                                    1e9)]
        elif len(yrs) > 1:
            vals = list(zip(cl, eael, yrs))
            vals = sorted(vals, key=lambda pair: pair[-1])
            change = 100.0 * (np.array([p for (c, p, y) in vals[1:]]) -
                              vals[0][1]) / vals[0][1]
            cl = [c for (c, p, y) in vals[1:]]
            yrs = [y for (c, p, y) in vals[1:]]
            fut = [p for (c, p, y) in vals[1:]]
            change_tup += list(
                zip([sc[0]] * len(cl), [sc[1]] * len(cl), cl, yrs,
                    [vals[0][1]] * len(cl), fut, change))

    change_df = pd.DataFrame(change_tup,
                             columns=[
                                 'hazard_type', 'edge_id', 'climate_scenario',
                                 'year', 'current', 'future', 'change'
                             ]).fillna('inf')
    change_df = change_df[change_df['change'] != 'inf']
    change_df.to_csv(os.path.join(config['paths']['output'], 'network_stats',
                                  'national_rail_eael_climate_change.csv'),
                     index=False)

    # Change effects
    change_df = change_df.set_index(hazard_cols)
    scenarios = list(set(change_df.index.values.tolist()))
    for sc in scenarios:
        hazard_type = sc[0]
        climate_scenario = sc[1]
        year = sc[2]
        percentage = change_df.loc[[sc], 'change'].values.tolist()
        edges = change_df.loc[[sc], 'edge_id'].values.tolist()
        edges_df = pd.DataFrame(list(zip(edges, percentage)),
                                columns=['edge_id', 'change'])
        edges_vals = pd.merge(region_file,
                              edges_df,
                              how='left',
                              on=['edge_id']).fillna(0)
        del percentage, edges, edges_df

        proj_lat_lon = ccrs.PlateCarree()
        ax = get_axes()
        plot_basemap(ax, data_path)
        scale_bar(ax, location=(0.8, 0.05))
        plot_basemap_labels(ax, data_path, include_regions=True)

        name = [c['name'] for c in hazard_set if c['hazard'] == hazard_type][0]
        for record in edges_vals.itertuples():
            geom = record.geometry
            region_val = record.change
            if region_val:
                cl = [
                    c for c in range(len((change_ranges)))
                    if region_val >= change_ranges[c][0]
                    and region_val < change_ranges[c][1]
                ]
                if cl:
                    c = cl[0]
                    ax.add_geometries([geom],
                                      crs=proj_lat_lon,
                                      linewidth=2.0,
                                      edgecolor=change_colors[c],
                                      facecolor='none',
                                      zorder=8)
                    # ax.add_geometries([geom.buffer(0.1)],crs=proj_lat_lon,linewidth=0,facecolor=change_colors[c],edgecolor='none',zorder=8)
            else:
                ax.add_geometries([geom],
                                  crs=proj_lat_lon,
                                  linewidth=1.5,
                                  edgecolor=change_colors[-1],
                                  facecolor='none',
                                  zorder=7)
                # ax.add_geometries([geom.buffer(0.1)], crs=proj_lat_lon, linewidth=0,facecolor=change_colors[-1],edgecolor='none',zorder=7)
        # Legend
        legend_handles = []
        for c in range(len(change_colors)):
            legend_handles.append(
                mpatches.Patch(color=change_colors[c], label=change_labels[c]))

        ax.legend(handles=legend_handles,
                  title='Percentage change in EAEL',
                  loc=(0.55, 0.2),
                  fancybox=True,
                  framealpha=1.0)
        if climate_scenario == 'none':
            climate_scenario = 'current'
        else:
            climate_scenario = climate_scenario.upper()

        title = 'Percentage change in EAEL for {} {} {}'.format(
            name,
            climate_scenario.replace('_', ' ').title(), year)
        print(" * Plotting {}".format(title))

        plt.title(title, fontsize=10)
        output_file = os.path.join(
            config['paths']['figures'],
            'national-rail-{}-{}-{}-risks-change-percentage.png'.format(
                name,
                climate_scenario.replace('-', ' ').title(), year))
        save_fig(output_file)
        plt.close()

    # Absolute effects
    all_edge_fail_scenarios = all_edge_fail_scenarios.reset_index()
    all_edge_fail_scenarios = all_edge_fail_scenarios.set_index(hazard_cols)
    scenarios = list(set(all_edge_fail_scenarios.index.values.tolist()))
    for sc in scenarios:
        hazard_type = sc[0]
        climate_scenario = sc[1]
        if climate_scenario == 'none':
            climate_scenario = 'current'
        else:
            climate_scenario = climate_scenario.upper()
        year = sc[2]
        min_eael = all_edge_fail_scenarios.loc[[sc],
                                               'min_eael'].values.tolist()
        max_eael = all_edge_fail_scenarios.loc[[sc],
                                               'max_eael'].values.tolist()
        edges = all_edge_fail_scenarios.loc[[sc], 'edge_id'].values.tolist()
        edges_df = pd.DataFrame(list(zip(edges, min_eael, max_eael)),
                                columns=['edge_id', 'min_eael', 'max_eael'])
        edges_vals = pd.merge(region_file,
                              edges_df,
                              how='left',
                              on=['edge_id']).fillna(0)
        del edges_df

        for c in range(len(eael_set)):
            proj_lat_lon = ccrs.PlateCarree()
            ax = get_axes()
            plot_basemap(ax, data_path)
            scale_bar(ax, location=(0.8, 0.05))
            plot_basemap_labels(ax, data_path, include_regions=True)

            # generate weight bins
            column = eael_set[c]['column']
            weights = [
                record[column] for iter_, record in edges_vals.iterrows()
            ]

            max_weight = max(weights)
            width_by_range = generate_weight_bins(weights,
                                                  width_step=0.04,
                                                  n_steps=5)

            rail_geoms_by_category = {'1': [], '2': []}

            for iter_, record in edges_vals.iterrows():
                geom = record.geometry
                val = record[column]
                if val == 0:
                    cat = '2'
                else:
                    cat = '1'

                buffered_geom = None
                for (nmin, nmax), width in width_by_range.items():
                    if nmin <= val and val < nmax:
                        buffered_geom = geom.buffer(width)

                if buffered_geom is not None:
                    rail_geoms_by_category[cat].append(buffered_geom)
                else:
                    print("Feature was outside range to plot", iter_)

            styles = OrderedDict([
                ('1',
                 Style(color='#006d2c',
                       zindex=9,
                       label='Hazard failure effect')),  # green
                ('2',
                 Style(color='#969696',
                       zindex=7,
                       label='No hazard exposure/effect'))
            ])

            for cat, geoms in rail_geoms_by_category.items():
                cat_style = styles[cat]
                ax.add_geometries(geoms,
                                  crs=proj_lat_lon,
                                  linewidth=0,
                                  facecolor=cat_style.color,
                                  edgecolor='none',
                                  zorder=cat_style.zindex)
            name = [
                h['name'] for h in hazard_set if h['hazard'] == hazard_type
            ][0]

            x_l = -62.4
            x_r = x_l + 0.4
            base_y = -42.1
            y_step = 0.8
            y_text_nudge = 0.2
            x_text_nudge = 0.2

            ax.text(x_l,
                    base_y + y_step - y_text_nudge,
                    eael_set[c]['legend_label'],
                    horizontalalignment='left',
                    transform=proj_lat_lon,
                    size=10)

            divisor = eael_set[c]['divisor']
            significance_ndigits = eael_set[c]['significance']
            max_sig = []
            for (i, ((nmin, nmax),
                     line_style)) in enumerate(width_by_range.items()):
                if round(nmin / divisor, significance_ndigits) < round(
                        nmax / divisor, significance_ndigits):
                    max_sig.append(significance_ndigits)
                elif round(nmin / divisor, significance_ndigits + 1) < round(
                        nmax / divisor, significance_ndigits + 1):
                    max_sig.append(significance_ndigits + 1)
                elif round(nmin / divisor, significance_ndigits + 2) < round(
                        nmax / divisor, significance_ndigits + 2):
                    max_sig.append(significance_ndigits + 2)
                else:
                    max_sig.append(significance_ndigits + 3)

            significance_ndigits = max(max_sig)
            for (i, ((nmin, nmax),
                     width)) in enumerate(width_by_range.items()):
                y = base_y - (i * y_step)
                line = LineString([(x_l, y), (x_r, y)]).buffer(width)
                ax.add_geometries([line],
                                  crs=proj_lat_lon,
                                  linewidth=0,
                                  edgecolor='#000000',
                                  facecolor='#000000',
                                  zorder=2)
                if nmin == max_weight:
                    value_template = '>{:.' + str(significance_ndigits) + 'f}'
                    label = value_template.format(
                        round(max_weight / divisor, significance_ndigits))
                else:
                    value_template = '{:.' + str(significance_ndigits) + \
                        'f}-{:.' + str(significance_ndigits) + 'f}'
                    label = value_template.format(
                        round(nmin / divisor, significance_ndigits),
                        round(nmax / divisor, significance_ndigits))

                ax.text(x_r + x_text_nudge,
                        y - y_text_nudge,
                        label,
                        horizontalalignment='left',
                        transform=proj_lat_lon,
                        size=10)

            if climate_scenario == 'none':
                climate_scenario = 'Current'

            climate_scenario = climate_scenario.replace('_', ' ')
            title = 'Railways ({}) {} {} {}'.format(eael_set[c]['title'], name,
                                                    climate_scenario.title(),
                                                    year)
            print('* Plotting ', title)

            plt.title(title, fontsize=12)
            legend_from_style_spec(ax, styles, loc='lower left')

            # output
            output_file = os.path.join(
                config['paths']['figures'],
                'national-rail-{}-{}-{}-{}.png'.format(
                    name.replace(' ', ''), climate_scenario.replace('.', ''),
                    year, eael_set[c]['column']))
            save_fig(output_file)
            plt.close()
Example #40
0
st.header("Select Workflow Parameters")
# Define the aoi and input parameters of the workflow.
col1_params, col2_params = st.beta_columns(2)

with col1_params:
    aoi_location = st.selectbox('Which area of interest?',
                                ["Berlin", "Washington"])
    aoi = up42.get_example_aoi(location=aoi_location, as_dataframe=True)
    # expander_aoi = st.beta_expander("Show aoi feature")
    # expander_aoi.json(aoi)

with col1_params:
    uploaded_file = st.file_uploader("Or upload a geojson file:",
                                     type=["geojson"])
    if uploaded_file is not None:
        aoi = gpd.read_file(uploaded_file, driver="GeoJSON")
        st.success("Using uploaded geojson as aoi!")

with col1_params:
    st.text("")
    start_date = st.date_input("Start date", parse("2019-01-01"))
    end_date = st.date_input("End date", parse("2020-01-01"))

with col1_params:
    limit = st.number_input(label='limit',
                            min_value=1,
                            max_value=10,
                            value=1,
                            step=1)

with col1_params:
 def test_Absolute_Centralization(self):
     s_map = gpd.read_file(libpysal.examples.get_path("sacramentot2.shp"))
     df = s_map[['geometry', 'HISP_', 'TOT_POP']]
     index = Absolute_Centralization(df, 'HISP_', 'TOT_POP')
     np.testing.assert_almost_equal(index.statistic, 0.6891422368736286)
Example #42
0
def shapefile_to_gpd_df(shp_fp, bbox=None):
    """
    read a shapefile filepath string to a geodataframe
    """
    gpd_df = gpd.read_file(shp_fp, bbox=bbox)
    return gpd_df
Example #43
0
def main():
    config = load_config()
    data_path = config['paths']['data']
    mode_file_path = os.path.join(config['paths']['data'], 'network',
                                  'road_edges.shp')
    flow_file_path = os.path.join(config['paths']['output'],
                                  'flow_mapping_combined',
                                  'weighted_flows_road_100_percent.csv')

    mode_file = gpd.read_file(mode_file_path, encoding='utf-8')
    flow_file = pd.read_csv(flow_file_path)
    mode_file = pd.merge(mode_file, flow_file, how='left',
                         on=['edge_id']).fillna(0)
    mode_file = mode_file[(mode_file['road_type'] == 'national') |
                          (mode_file['road_type'] == 'province') |
                          (mode_file['road_type'] == 'rural')]

    plot_sets = [
        {
            'file_tag': 'tmda',
            'legend_label': "AADT ('000 vehicles/day)",
            'divisor': 1000,
            'columns': ['tmda_count'],
            'title_cols': ['Vehicle Count'],
            'significance': 0
        },
        {
            'file_tag':
            'commodities',
            'legend_label':
            "AADF ('000 tons/day)",
            'divisor':
            1000,
            'columns': [
                'max_{}'.format(x) for x in [
                    'total_tons',
                    'AGRICULTURA, GANADERÍA, CAZA Y SILVICULTURA', 'Carnes',
                    'Combustibles', 'EXPLOTACIÓN DE MINAS Y CANTERAS',
                    'Granos', 'INDUSTRIA MANUFACTURERA', 'Industrializados',
                    'Mineria', 'PESCA', 'Regionales', 'Semiterminados'
                ]
            ],
            'title_cols': [
                'Total tonnage', 'AGRICULTURA, GANADERÍA, CAZA Y SILVICULTURA',
                'Carnes', 'Combustibles', 'EXPLOTACIÓN DE MINAS Y CANTERAS',
                'Granos', 'INDUSTRIA MANUFACTURERA', 'Industrializados',
                'Mineria', 'PESCA', 'Regionales', 'Semiterminados'
            ],
            'significance':
            0
        },
    ]

    for plot_set in plot_sets:
        for c in range(len(plot_set['columns'])):
            # basemap
            proj_lat_lon = ccrs.PlateCarree()
            ax = get_axes()
            plot_basemap(ax, data_path)
            scale_bar(ax, location=(0.8, 0.05))
            plot_basemap_labels(ax, data_path, include_regions=False)

            # generate weight bins
            if plot_set['columns'][c] == 'tmda':
                column = plot_set['columns'][c]
                weights = [
                    int(str(record[column]))
                    for iter_, record in mode_file.iterrows()
                    if str(record[column]).isdigit() is True
                    and int(str(record[column])) > 0
                ]
                max_weight = max(weights)
                width_by_range = generate_weight_bins(weights,
                                                      n_steps=7,
                                                      width_step=0.02)
                # width_by_range = generate_weight_bins(weights, n_steps=9, width_step=0.01, interpolation='log')
            else:
                column = 'max_total_tons'
                weights = [
                    record[column] for iter_, record in mode_file.iterrows()
                ]
                max_weight = max(weights)
                width_by_range = generate_weight_bins(weights,
                                                      n_steps=7,
                                                      width_step=0.02)

            road_geoms_by_category = {
                'national': [],
                'province': [],
                'rural': [],
            }

            column = plot_set['columns'][c]
            for iter_, record in mode_file.iterrows():
                if column == 'tmda':
                    if str(record[column]).isdigit() is False:
                        val = 0
                    else:
                        val = int(str(record[column]))
                else:
                    val = record[column]

                if val > 0:
                    cat = str(record['road_type']).lower().strip()
                    if cat not in road_geoms_by_category:
                        raise Exception
                    geom = record.geometry

                    buffered_geom = None
                    for (nmin, nmax), width in width_by_range.items():
                        if nmin <= val and val < nmax:
                            buffered_geom = geom.buffer(width)

                    if buffered_geom is not None:
                        road_geoms_by_category[cat].append(buffered_geom)
                    else:
                        print("Feature was outside range to plot", iter_)

            styles = OrderedDict([
                ('national', Style(color='#e41a1c', zindex=9,
                                   label='National')),  # red
                ('province',
                 Style(color='#377eb8', zindex=8,
                       label='Provincial')),  # orange
                ('rural', Style(color='#4daf4a', zindex=7,
                                label='Rural')),  # blue
            ])

            for cat, geoms in road_geoms_by_category.items():
                cat_style = styles[cat]
                ax.add_geometries(geoms,
                                  crs=proj_lat_lon,
                                  linewidth=0,
                                  facecolor=cat_style.color,
                                  edgecolor='none',
                                  zorder=cat_style.zindex)

            x_l = -62.4
            x_r = x_l + 0.4
            base_y = -42.1
            y_step = 0.8
            y_text_nudge = 0.2
            x_text_nudge = 0.2

            ax.text(x_l,
                    base_y + y_step - y_text_nudge,
                    plot_set['legend_label'],
                    horizontalalignment='left',
                    transform=proj_lat_lon,
                    size=10)

            divisor = plot_set['divisor']
            significance_ndigits = plot_set['significance']
            max_sig = []
            for (i, ((nmin, nmax),
                     line_style)) in enumerate(width_by_range.items()):
                if round(nmin / divisor, significance_ndigits) < round(
                        nmax / divisor, significance_ndigits):
                    max_sig.append(significance_ndigits)
                elif round(nmin / divisor, significance_ndigits + 1) < round(
                        nmax / divisor, significance_ndigits + 1):
                    max_sig.append(significance_ndigits + 1)
                elif round(nmin / divisor, significance_ndigits + 2) < round(
                        nmax / divisor, significance_ndigits + 2):
                    max_sig.append(significance_ndigits + 2)
                else:
                    max_sig.append(significance_ndigits + 3)

            significance_ndigits = max(max_sig)

            for (i, ((nmin, nmax),
                     width)) in enumerate(width_by_range.items()):
                y = base_y - (i * y_step)
                line = LineString([(x_l, y), (x_r, y)]).buffer(width)
                ax.add_geometries([line],
                                  crs=proj_lat_lon,
                                  linewidth=0,
                                  edgecolor='#000000',
                                  facecolor='#000000',
                                  zorder=2)
                if nmin == max_weight:
                    value_template = '>{:.' + str(significance_ndigits) + 'f}'
                    label = value_template.format(
                        round(max_weight / divisor, significance_ndigits))
                else:
                    value_template = '{:.' + str(significance_ndigits) + \
                        'f}-{:.' + str(significance_ndigits) + 'f}'
                    label = value_template.format(
                        round(nmin / divisor, significance_ndigits),
                        round(nmax / divisor, significance_ndigits))

                ax.text(x_r + x_text_nudge,
                        y - y_text_nudge,
                        label,
                        horizontalalignment='left',
                        transform=proj_lat_lon,
                        size=10)

            plt.title('Max AADF - {}'.format(plot_set['title_cols'][c]),
                      fontsize=10)
            legend_from_style_spec(ax, styles)
            output_file = os.path.join(
                config['paths']['figures'],
                'road_flow-map-{}-{}-max-scale.png'.format(
                    plot_set['file_tag'], column))
            save_fig(output_file)
            plt.close()
def return_endmembers(args, src):
    # This is the top-level function for calling all the helper functions to return the endmembers

    save_file_endmembers = os.path.join(
        args.base_dir, 'saved_endmembers', args.unmixing_region,
        'extracted_endmembers_{}_outphasetype_{}_nclusters_{}_nsamples_{}.csv'.
        format(args.unmixing_region, args.outphase_endmember_type,
               args.num_clusters, args.num_samples))

    if args.calculate_new_endmembers:

        # Calculate new endmembers
        print('Calculating new endmembers')

        n_regional_clusters = return_nclusters(args)
        rainfall_ts_file = os.path.join(
            args.base_dir, 'saved_rainfall_regions',
            'cluster_center_rainfall_ts_csvs',
            '{}_rainfall_regions_nclusters_{}_normalized_monthly_ts.csv'.
            format(args.unmixing_region, n_regional_clusters))
        monthly_rainfall_ts = np.array(
            pd.read_csv(rainfall_ts_file, index_col=0))

        print('Interpolate rainfall timeseries')
        interpolated_rainfall_ts = interpolate_rainfall(
            args, monthly_rainfall_ts)

        print('Read regional polygons')
        polygons_file = os.path.join(
            args.base_dir, 'saved_rainfall_regions', 'clean_regions',
            '{}_rainfall_regions_nclusters_{}_clean.shp'.format(
                args.unmixing_region, n_regional_clusters))
        region_polygons = gpd.read_file(polygons_file).to_crs(src.meta['crs'])

        all_endmembers_df = pd.DataFrame(
            index=range(len(interpolated_rainfall_ts[0])))

        for region_index in range(n_regional_clusters):
            print('Calculating endmembers for {}, Region {}'.format(
                args.unmixing_region, region_index))

            masked_evi_img, img_transform = mask(
                src, [region_polygons['geometry'].iloc[region_index]],
                nodata=0)

            print('PCA Transform')
            principalComponents, pca, evi_img_flattened = pca_transform(
                args, masked_evi_img)

            print('Clustering')
            cluster_centers, cluster_predicts = clustering(
                args, principalComponents)

            print('Finding Cluster Timeseries')
            cluster_timeseries = calculate_cluster_timeseries(
                cluster_predicts, evi_img_flattened)

            print('Extract Endmembers')
            endmember_df = find_endmembers(
                args, cluster_timeseries,
                interpolated_rainfall_ts[region_index], region_index)
            all_endmembers_df = pd.concat([all_endmembers_df, endmember_df],
                                          axis=1)

        # Save extracted endmembers
        all_endmembers_df.to_csv(save_file_endmembers)

    endmember_array = np.array(
        pd.read_csv(save_file_endmembers, index_col=0, header=0))

    # Return endmember array
    return endmember_array
Example #45
0
Roads
-----
The :meth:`pygmt.Figure.plot` method allows us to plot geographical data such
as lines which are stored in a :class:`geopandas.GeoDataFrame` object. Use
:func:`geopandas.read_file` to load data from any supported OGR format such as
a shapefile (.shp), GeoJSON (.geojson), geopackage (.gpkg), etc. Then, pass the
:class:`geopandas.GeoDataFrame` as an argument to the ``data`` parameter in
:meth:`pygmt.Figure.plot`, and style the geometry using the ``pen`` parameter.
"""

import geopandas as gpd
import pygmt

# Read shapefile data using geopandas
gdf = gpd.read_file(
    "http://www2.census.gov/geo/tiger/TIGER2015/PRISECROADS/tl_2015_15_prisecroads.zip"
)
# The dataset contains different road types listed in the RTTYP column,
# here we select the following ones to plot:
roads_common = gdf[gdf.RTTYP == "M"]  # Common name roads
roads_state = gdf[gdf.RTTYP == "S"]  # State recognized roads
roads_interstate = gdf[gdf.RTTYP == "I"]  # Interstate roads

fig = pygmt.Figure()

# Define target region around O'ahu (Hawai'i)
region = [-158.3, -157.6, 21.2, 21.75]  # xmin, xmax, ymin, ymax

title = r"Main roads of O\047ahu (Hawai\047i)"  # \047 is octal code for '
fig.basemap(region=region, projection="M12c", frame=["af", f'WSne+t"{title}"'])
fig.coast(land="gray", water="dodgerblue4", shorelines="1p,black")
from __future__ import division
import geopandas as gpd
from shapely.geometry import Polygon

from utility import printHeader

###########################
# Loading the data and print out the CRS and features
###########################
printHeader("Loading the data and print out the CRS and features")
# Load and join the spatial data
gdf = gpd.read_file("spatialref/taxi_zones.shp")

# Print out the coordinate reference system
print "Coordinate system: %s\n" % gdf.crs["init"]

# Print out the features of the spatial data
print "Features:"
for feature in gdf.columns:
    print "* %s" % feature

###########################
# Check whether we should use the OBJECT ID or the LocationID as our index
###########################
printHeader(
    "Check whether we should use the OBJECT ID or the LocationID as our index")

print "The following entries exist in the dataframe where LocationID is not the same as OBJECTID"
print gdf[(gdf["OBJECTID"] != gdf["LocationID"])]

# Index by OBJECTID
Example #47
0
df = pd.read_csv(
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
)

listaPaises = [
    'Chile', 'Argentina', 'Brazil', 'Uruguay', 'Bolivia', 'Paraguay', 'Peru',
    'Ecuador', 'Colombia', 'Venezuela', 'Guyana', 'Suriname'
]

df = df[df['Country/Region'].isin(listaPaises)]

df.drop(['Province/State', 'Country/Region', 'Lat', 'Long'],
        axis=1,
        inplace=True)

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

mapa = world[world['name'].isin(listaPaises)]

mapa = mapa.sort_values(["name"], ascending=(True))

mapa.drop(['continent', 'iso_a3', 'gdp_md_est'], axis=1, inplace=True)

pa = mapa['name'].tolist()
pob = mapa['pop_est'].tolist()

lista = df.columns
cols = lista.size - 1
i = 4
l = []
Example #48
0
import pandas as pnd
import numpy as np
import sys
import traceback
import geopandas
from shapely.geometry import Point

fichier_dist = "C:\\DEV\\GIS\\rwanda\\excel_dist.txt"
fichier_provinces = "C:\\DEV\\GIS\\rwanda\\rwa_provinces_osm.gpkg"
output_file = "C:\\DEV\\GIS\\rwanda\\excel_dist_provinces.txt"

prov_shp = geopandas.read_file(fichier_provinces)
prov_shp.set_crs(epsg=4326, inplace=True)
df_dist = pnd.read_csv(fichier_dist, sep='\t', encoding='ISO-8859–1')
df_dist = df_dist.fillna('')
print(df_dist)


def add_prov():
    global df_dist
    global prov_shp
    df_dist["province"] = ""
    df_dist['Latitude'] = df_dist['Latitude'].astype(str)
    df_dist['Longitude'] = df_dist['Longitude'].astype(str)
    for index, row_dist in df_dist.iterrows():
        if len(row_dist["Latitude"]) > 0 and len(row_dist["Longitude"]) > 0:
            nom_jointure = row_dist["NOM_JOINTURE"].lower()
            collector = row_dist["COLLECTOR"].replace("&", "\&")
            coll_num = row_dist["COLL_NUM"].replace("&", "\&")
            lat = row_dist["Latitude"].replace(",", ".")
            long = row_dist["Longitude"].replace(",", ".")
# use ftp client to download all zip files from https://www2.census.gov/geo/tiger/TIGER2014/TRACT/
# unzip all to "*/" in tracts_path
import os
import geopandas as gpd
import time

start_time = time.time()
tracts_path = '2014-tracts-by-state'

gdf = gpd.GeoDataFrame()
for folder in os.listdir(tracts_path):
    print(folder)
    tmp = gpd.read_file('{}/{}'.format(tracts_path, folder))
    gdf = gdf.append(tmp)

gdf = gdf.head()

original_crs = tmp.crs
gdf.crs = original_crs
gdf = gdf.to_crs({'init': 'epsg:4326'})
gdf.to_file('us_tracts_2014')

print('created shapefile with {} rows'.format(len(gdf)))
print('finished in {:.1f} seconds'.format(time.time() - start_time))
Example #50
0
def plot_density(
    data,
    *,
    year,
    group,
    kernel_function,
    cell_size,
    crs=None,
    bandwidth,
    show_title,
):
    if crs is None:
        crs = {'init': 'epsg:3067'}

    pop = get_xy(data)
    pad = bandwidth * 2
    minx, miny, maxx, maxy = pop['geometry'].total_bounds
    minx -= pad
    miny -= pad
    maxx += pad
    maxy += pad
    w, h = maxx - minx, maxy - miny

    fig = figure(
        title=f"Density of {group.capitalize()} population in Vyborg in {year}",
        x_range=(minx, maxx),
        y_range=(miny, maxy),
    )
    fig.title.visible = show_title
    fig.xaxis.major_tick_line_color = None
    fig.xaxis.minor_tick_line_color = None
    fig.yaxis.major_tick_line_color = None
    fig.yaxis.minor_tick_line_color = None

    fig.xaxis.major_label_text_font_size = '0pt'
    fig.yaxis.major_label_text_font_size = '0pt'

    fig.xgrid.visible = False
    fig.ygrid.visible = False

    water = gpd.read_file('water_clip.shp')
    water.crs = {'init': 'epsg:4326'}
    water.geometry = water.geometry.to_crs(crs)
    water = get_xy(water)
    water_src = GeoJSONDataSource(geojson=water.to_json())

    density = kernel_density_surface(
        data,
        group=group,
        bandwidth=bandwidth,
        cell_size=cell_size,
        kernel_function=kernel_function,
    )

    fig.image(
        [density],
        minx,
        miny,
        w,
        h,
        palette=grey(10)[::-1],
    )
    fig.patches(
        xs='x',
        ys='y',
        source=water_src,
        fill_color='#59d0ff',
        fill_alpha=0.8,
        line_color=None,
        line_width=0,
    )

    return fig
Example #51
0
def run_cmip():
    """



    """
    # Initialize OGGM and set up the default run parameters
    vascaling.initialize(logging_level='DEBUG')
    rgi_version = '62'
    cfg.PARAMS['border'] = 80

    # CLUSTER paths
    wdir = os.environ.get('WORKDIR', '')
    utils.mkdir(wdir)
    cfg.PATHS['working_dir'] = wdir
    outdir = os.environ.get('OUTDIR', '')
    utils.mkdir(outdir)

    # define the baseline climate CRU or HISTALP
    cfg.PARAMS['baseline_climate'] = 'CRU'
    # set the mb hyper parameters accordingly
    cfg.PARAMS['prcp_scaling_factor'] = 3
    cfg.PARAMS['temp_melt'] = 0
    cfg.PARAMS['temp_all_solid'] = 4
    cfg.PARAMS['prcp_default_gradient'] = 4e-4
    cfg.PARAMS['run_mb_calibration'] = False
    # set minimum ice thickness to include in glacier length computation
    # this reduces weird spikes in length records
    cfg.PARAMS['min_ice_thick_for_length'] = 0.1

    # the bias is defined to be zero during the calibration process,
    # which is why we don't use it here to reproduce the results
    cfg.PARAMS['use_bias_for_run'] = True

    # read RGI entry for the glaciers as DataFrame
    # containing the outline area as shapefile
    # RGI glaciers
    rgi_reg = os.environ.get('RGI_REG', '')
    if rgi_reg not in ['{:02d}'.format(r) for r in range(1, 20)]:
        raise RuntimeError('Need an RGI Region')
    rgi_ids = gpd.read_file(
        utils.get_rgi_region_file(rgi_reg, version=rgi_version))

    # For greenland we omit connectivity level 2
    if rgi_reg == '05':
        rgi_ids = rgi_ids.loc[rgi_ids['Connect'] != 2]

    # get and set path to intersect shapefile
    intersects_db = utils.get_rgi_intersects_region_file(region=rgi_reg)
    cfg.set_intersects_db(intersects_db)

    # operational run, all glaciers should run
    cfg.PARAMS['continue_on_error'] = True

    # Module logger
    log = logging.getLogger(__name__)
    log.workflow('Starting run for RGI reg {}'.format(rgi_reg))

    # Go - get the pre-processed glacier directories
    base_url = 'https://cluster.klima.uni-bremen.de/' \
               '~moberrauch/prepro_vas_paper/'
    gdirs = workflow.init_glacier_directories(rgi_ids, from_prepro_level=3,
                                              prepro_base_url=base_url,
                                              prepro_rgi_version=rgi_version)

    # read gcm list
    gcms = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0)

    # iterate over all specified GCMs
    for gcm in sys.argv[1:]:
        # iterate over all SSPs (Shared Socioeconomic Pathways)
        df1 = gcms.loc[gcms.gcm == gcm]
        for ssp in df1.ssp.unique():
            df2 = df1.loc[df1.ssp == ssp]
            assert len(df2) == 2
            # get temperature projections
            ft = df2.loc[df2['var'] == 'tas'].iloc[0]
            # get precipitation projections
            fp = df2.loc[df2['var'] == 'pr'].iloc[0].path
            rid = ft.fname.replace('_r1i1p1f1_tas.nc', '')
            ft = ft.path

            log.workflow('Starting run for {}'.format(rid))

            workflow.execute_entity_task(gcm_climate.process_cmip_data, gdirs,
                                         # recognize the climate file for later
                                         filesuffix='_' + rid,
                                         # temperature projections
                                         fpath_temp=ft,
                                         # precip projections
                                         fpath_precip=fp,
                                         year_range=('1981', '2020'))

            workflow.execute_entity_task(vascaling.run_from_climate_data,
                                         gdirs,
                                         # use gcm_data, not climate_historical
                                         climate_filename='gcm_data',
                                         # use a different scenario
                                         climate_input_filesuffix='_' + rid,
                                         # this is important! Start from 2019
                                         init_model_filesuffix='_historical',
                                         # recognize the run for later
                                         output_filesuffix=rid,
                                         return_value=False)
            gcm_dir = os.path.join(outdir, 'RGI' + rgi_reg, gcm)
            utils.mkdir(gcm_dir)
            utils.compile_run_output(gdirs, input_filesuffix=rid,
                                     path=os.path.join(gcm_dir, rid + '.nc'))

    log.workflow('OGGM Done')
Example #52
0
import geopandas as gp
import contextily as cx
import matplotlib.pyplot as plt
from shapely import geometry

data = gp.read_file("dataset/milano_municipi/Municipi.shx").to_crs(epsg=3857)
incidenti = gp.read_file(
    "dataset/incidenti/inc_strad_milano_2016.geojson").to_crs(epsg=3857)

incidenti_per_municipio = {}
for m in data['MUNICIPIO']:
    incidenti_per_municipio[m] = 0

for m, poly in zip(data['MUNICIPIO'], data['geometry']):
    poly = geometry.Polygon(poly)

    for point in incidenti['geometry']:
        point = geometry.Point(point)

        if poly.contains(point):
            incidenti_per_municipio[m] += 1

data.index = data['MUNICIPIO']

inc = gp.GeoSeries(incidenti_per_municipio).sort_index()
data['Incidenti'] = inc

layer_m = data.plot(column='Incidenti', cmap='OrRd', alpha=0.5, figsize=(9, 7))
cx.add_basemap(ax=layer_m)
plt.axis('off')
plt.show()
	for val in count_dict.values():
		if val < args.samples_per_label:
			return False
	return True



# Local testing constraint
# shp_files = shp_files[:2]



for shpfile in shp_files:
	tries = 0
	shp_id = shpfile.split("_")[-1][:-4].upper()
	shape_data = gpd.read_file(SHAPEFILE_DIR + shpfile)

	# reset the sample count dict per image
	sample_count = {
		"L": 0,
		"W": 0,
		"I": 0,
	}

	# read in associated GeoTIFF file
	tiff_file = [g for g in tiff_files if shp_id in g]
	print(tiff_file[0])

	if len(tiff_file):
		src = rasterio.open(TIFF_DIR + tiff_file[0])
Example #54
0
def read_lines_dams(gdrive):
    # Read in data
    t0 = datetime.datetime.now()
    ## NABD
    nabd_dams = gp.read_file(gdrive + "nabd/nabd_fish_barriers_2012.shp",
                             usecols=[
                                 'COMID', 'NIDID', 'Norm_stor', 'Max_stor',
                                 'Year_compl', 'Purposes', 'geometry'
                             ])  #read in NABD from Drive
    nabd_dams = nabd_dams.drop_duplicates(
        subset='NIDID', keep="first")  #drop everything after first duplicate
    nabd_dams["DamID"] = range(len(nabd_dams.COMID))  #add DamID
    nabd_dams = pd.DataFrame(nabd_dams)
    nabd_dams['Grand_flag'] = np.zeros(len(nabd_dams))  #add flag column

    ## GRanD
    grand = pd.read_csv(gdrive + "other_dam_datasets/Reservoir_Attributes.csv",
                        usecols=['GRAND_ID',
                                 'NABD_ID'])  #read in NABD from Drive
    #Filter out dams without NABD IDs
    grand['NABD_ID'] = grand['NABD_ID'].fillna(0)
    grand = grand[grand['NABD_ID'] != 0]

    #Merge NABD and GRanD
    nabd = pd.merge(nabd_dams,
                    grand,
                    left_on='NIDID',
                    right_on='NABD_ID',
                    how='left')
    nabd['GRAND_ID'] = nabd['GRAND_ID'].fillna(0)
    nabd.loc[nabd.GRAND_ID != 0,
             'Grand_flag'] = 1  #if a GRanD ID exists, make flag =1

    #Merge NABD and GRanD
    nabd = pd.merge(nabd_dams,
                    grand,
                    left_on='NIDID',
                    right_on='NABD_ID',
                    how='left')
    nabd['GRAND_ID'] = nabd['GRAND_ID'].fillna(0)
    nabd.loc[nabd.GRAND_ID != 0,
             'Grand_flag'] = 1  #if a GRanD ID exists, make flag =1

    ## NHD
    flowlines = pd.read_csv(gdrive + "nhd/NHDFlowlines.csv",
                            usecols=[
                                'Hydroseq', 'UpHydroseq', 'DnHydroseq',
                                'REACHCODE', 'LENGTHKM', 'StartFlag', 'FTYPE',
                                'COMID', 'WKT', 'QE_MA', 'QC_MA', 'StreamOrde'
                            ])  #all NHD Flowlines

    #Filter the flowlines to select by HUC 2
    flowlines['HUC2'] = flowlines['REACHCODE'] / (
        10**12)  #convert Reachcode to HUC 2 format
    flowlines['HUC4'] = flowlines['REACHCODE'] / (
        10**10)  #convert Reachcode to HUC 4 format
    flowlines['HUC8'] = flowlines['REACHCODE'] / (
        10**6)  #convert Reachcode to HUC 4 format
    flowlines[['HUC2', 'HUC4',
               'HUC8']] = flowlines[['HUC2', 'HUC4', 'HUC8'
                                     ]].apply(np.floor)  #round down to integer

    #round the hydroseq values because of bug
    flowlines[['UpHydroseq', 'DnHydroseq', 'Hydroseq'
               ]] = flowlines[['UpHydroseq', 'DnHydroseq',
                               'Hydroseq']].round(decimals=0)

    read_flag = 1
    t1 = datetime.datetime.now()
    print("Time to read in flowlines and dams:", (t1 - t0))

    return flowlines, nabd
Example #55
0
def echantillonnage_pix(path_depot, path_mnt, path_metriques, output, nbPoints,
                        minDistance):
    '''
    :param path_depot: Chemin de la couche de dépôts (str)
    :param path_mnt: Chemin du MNT à échantillonner (str)
    :param path_metriques: Chemin du répertoire contenant les métriques à échantillonner (str)
    :param output: Chemin du fichier de sortie (str)
    :param nbPoints: Nombre de points voulus (int)
    :param minDistance: Distance minimale à respecter entre les points (int)
    :return: Couche de points échantillonnés aléatoirement sur le MNT avec les valeurs des métriques comme attribut (shp)
    '''

    print('***ÉCHANTILLONNAGE PAR PIXEL***')

    # Création du cadre du MNT
    print('Création du cadre...')
    cadre, epsg, nodata = creation_cadre(path_mnt)

    # Lecture de la couche de dépôts et reprojection si nécessaire
    print('Lecture de la couche de dépôts...')
    depot = gpd.read_file(path_depot)

    if str(depot.crs) != epsg:
        print('Reprojection...')
        depot.crs = epsg

    # Regroupement de la couche de dépôts
    print('Regroupement couche de dépôts...')
    depot_reg = dissolve(depot)

    # Création du buffer autour de la couche de dépôts à la valeur de la distance minimale
    print('Création du buffer...')
    buff = creation_buffer(depot_reg, minDistance, epsg, 1, 1)

    # # Clip du buffer aux dimension du cadre
    print('Clip du buffer...')
    buff_clip = gpd.clip(buff, cadre)

    # Création de la zone extérieure: différence entre le cadre et le buffer clippé
    print('Création zone externe...')
    zone_ext = difference(cadre, buff_clip, epsg)

    # Comparaison de superficie entre les dépôts et la zone extérieure pour fixer la limite du nombre de points
    print('Comparaison...')
    plus_petite_zone = None
    plus_grande_zone = None
    zone = None
    if comparaison_area(depot_reg, zone_ext):
        plus_petite_zone = depot_reg
        plus_grande_zone = zone_ext
        zone = 1
        print('Plus petite zone: couche de dépôts ')
    else:
        plus_petite_zone = zone_ext
        plus_grande_zone = depot_reg
        zone = 0
        print('Plus petite zone: zone extérieure')

    # Échantillonnage de la plus petite zone
    print('Échantillonnage petite zone...')
    ech_petite_zone = echantillon_pixel(plus_petite_zone, minDistance,
                                        nbPoints, epsg, zone)
    #ech_petite_zone.to_file(r'C:\Users\home\Documents\Documents\APP3\ech_petite_zone.shp')

    # Échantillonnage de la plus grande zone selon le nombre de points contenu dans la petite zone
    if zone == 1:
        zone = 0
    elif zone == 0:
        zone = 1
    print('Échantillonnage grande zone...')
    nbPoints_petite = len(ech_petite_zone)
    ech_grande_zone = echantillon_pixel(plus_grande_zone, minDistance,
                                        nbPoints_petite, epsg, zone)
    #ech_grande_zone.to_file(r'C:\Users\home\Documents\Documents\APP3\ech_grande_zone.shp')
    print('Terminé')

    # Combinaison des deux zones
    print('Combinaison des échantillons...')
    ech_total = gpd.GeoDataFrame(pd.concat([ech_petite_zone, ech_grande_zone],
                                           ignore_index=True),
                                 crs=epsg)
    if not os.path.exists(os.path.dirname(output)):
        os.makedirs(os.path.dirname(output))
    ech_total.to_file(output)

    # Extraction des valeurs des métriques
    print('Extraction des valeurs des métriques')
    extract_value_metrique(output, path_metriques)
    print('Terminé')
Example #56
0
        bandwidth=100,
        cell_size=10,
        kernel_function=biweight,
    )
    density = pd.DataFrame({
        'orthodox': density_orthodox.flatten(),
        'total': density_total.flatten(),
    })
    s = MinMax(density, 'orthodox', 'total')

    return s


if __name__ == '__main__':
    os.chdir('../data')
    points = gpd.read_file('points1878.geojson')
    year = 1920
    points['geometry'].crs = {'init': 'epsg:3067'}
    points = prepare_point_data(points, 'NUMBER', 'NUMBER2')
    pop_data = prepare_pop_data(pd.read_csv(f'{year}.csv'))
    pop_data = aggregate_sum(pop_data, ['plot_number'], [
        'other_christian',
        'orthodox',
        'other_religion',
        'lutheran',
    ])
    data = merge_dataframes(
        location_data=points,
        other_data=pop_data,
        on_location='NUMBER',
        on_other='plot_number',
    def get_data(self):

        if self.masking == True:
            df = gpd.read_file(self.geometry)
            geom = df['geometry'][0]
            if self.mgrs_tile != 'None':
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1]-1, geom.bounds[3]+1, geom.bounds[0]-1, geom.bounds[2]+1, self.token, self.scale,self.mgrs_tile)
            else:
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1], geom.bounds[3], geom.bounds[0], geom.bounds[2], self.token, self.scale)

        else:
            if self.mgrs_tile != 'None':
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale, self.mgrs_tile)
            else:
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale)

        # if self.mgrs_tile != 'None':
        #     url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.mgrs_tile)
        # else:
        #     url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.token, self.mgrs_tile)

        #print(url)
        result = requests.get(url)
        with open(self.fname, 'wb') as f:
            f.write(result.content)
            f.close()


        #if image is sentinel2 tiled transform to EPSG:4326
        dst_crs = 'EPSG:4326'

        with rasterio.open(self.fname) as src:
            transform, width, height = calculate_default_transform(
                src.crs, dst_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': dst_crs,
                'transform': transform,
                'width': width,
                'height': height,
                'dtype': 'float32',
                'bbox': src.bounds
            })
            new_fname = '4326_{}'.format(self.fname)
            with rasterio.open(new_fname, 'w', **kwargs) as dst:
                for i in range(1, src.count + 1):
                    reproject(
                        source=rasterio.band(src, i),
                        destination=rasterio.band(dst, i),
                        src_transform=src.transform,
                        src_crs=src.crs,
                        dst_transform=transform,
                        dst_crs=dst_crs,
                        resampling=Resampling.nearest)

        src = rasterio.open(new_fname)
        out_image = src.read(1)
        out_image = out_image.astype(np.float32)
        out_image[out_image == src.nodata] = 'nan'
        out_meta = src.meta.copy()
        #out_meta.update({"offset": src.offsets[0],
        #                "scale": src.scales[0]})

        if self.masking == True:
            with fiona.open(self.geometry, "r") as shapefile:
                features = [feature["geometry"] for feature in shapefile]
            out_image, out_transform = mask(src, features, crop=True)
            out_image = out_image.astype(np.float32)
            out_image[out_image == src.nodata] = 'nan'
            out_image = out_image[0,:,:]
            out_meta = src.meta.copy()
            out_meta.update({"driver": "GTiff",
                            "height": out_image.shape[0],
                            "width": out_image.shape[1],
                            "transform": out_transform,
                            "bbox": src.bounds})

        with rasterio.open(self.fname, 'w', **out_meta) as dst:
            dst.write_band(1, out_image)

        return out_image, out_meta
    def get_data(self):

        if self.masking == True:
            df = gpd.read_file(self.geometry)
            geom = df['geometry'][0]
            if self.mgrs_tile != 'None':
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1]-1, geom.bounds[3]+1, geom.bounds[0]-1, geom.bounds[2]+1, self.token, self.scale,self.mgrs_tile)
            else:
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1], geom.bounds[3], geom.bounds[0], geom.bounds[2], self.token, self.scale)

        else:
            if self.mgrs_tile != 'None':
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale, self.mgrs_tile)
            else:
                url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale)

        # if self.mgrs_tile != 'None':
        #     url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.mgrs_tile)
        # else:
        #     url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.token, self.mgrs_tile)

        #print(url)
        result = requests.get(url)
        with open(self.fname, 'wb') as f:
            f.write(result.content)
            f.close()


        #if image is sentinel2 tiled transform to EPSG:4326


        src = rasterio.open(self.fname)
        out_image = src.read(1)
        out_image = out_image.astype(np.float32)
        out_image[out_image == src.nodata] = 'nan'
        out_meta = src.meta.copy()
        out_meta.update({"bbox": src.bounds})

        if 'CAMS' in self.collection:
            out_meta.update({"offset": src.offsets[0],
                            "scale": src.scales[0]})

        if self.masking == True:
            with fiona.open(self.geometry, "r") as shapefile:
                features = [feature["geometry"] for feature in shapefile]
            out_image, out_transform = mask(src, features, crop=True)
            out_image = out_image.astype(np.float32)
            out_image[out_image == src.nodata] = 'nan'
            out_image = out_image[0,:,:]
            out_meta = src.meta.copy()

            if 'CAMS' in self.collection:
                out_meta.update({"driver": "GTiff",
                                "height": out_image.shape[0],
                                "width": out_image.shape[1],
                                "transform": out_transform,
                                "offset": src.offsets[0],
                                "scale": src.scales[0]})
            else:
                out_meta.update({"driver": "GTiff",
                                "height": out_image.shape[0],
                                "width": out_image.shape[1],
                                "transform": out_transform})

        return out_image, out_meta
from fiona.crs import from_epsg
import os

# Script location
basepath = os.path.dirname(os.path.realpath(__file__))

# Projections: http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
# Weather stations:

# Filepath
country_fp = os.path.join(basepath, 'data', 'Finland_boundaries.shp')
station_fp = os.path.join(basepath, 'data', "Weather_stations_all_Finland.txt")
outfp = os.path.join(basepath, 'img', "FMI_stations_70_years_older.png")

# Read data
data = gpd.read_file(country_fp)
stations = pd.read_csv(station_fp,
                       sep='\t',
                       encoding='latin1',
                       header=None,
                       names=[
                           'Name', 'FMISID', 'LPNN', 'WMO', 'Lat', 'Lon',
                           'Elevation', 'Groups', 'Started'
                       ])

# Prepare stations into GeoDataFrame
# ----------------------------------

# Create Point geometries from coordinates
stations['geometry'] = None
stations['geometry'] = stations.apply(
def plot():
    map_path = file_path + "/resources/sf_block_groups/sf_block_groups_nowater.geojson"
    coc_path = file_path + "/resources/sf_block_groups/coc"
    plot_path = file_path + "/resources/sf_data/sf_overspace_plot_data.json"
    fig_path = file_path + "/results/sf_change_overspace.pdf"

    # Read data.
    with open(plot_path, "r") as plot_file:
        data = json.loads(plot_file.read().strip("\n"))

    coc = gpd.read_file(coc_path)
    coc = coc[coc["GEOID"].astype("int") -
              coc["GEOID"].astype("int") % 1000000 == 6075000000]
    coc = coc[coc["GEOID"].astype("int") != 6075017902]
    coc = coc[coc["COCFLAG__1"] == 1]
    coc = coc.to_crs({"init": "epsg:4326"})

    map = gpd.read_file(map_path)
    map["geoid"] = map["stfid"].astype("int")
    map = map[["geoid", "geometry"]]
    map["bg_lng"] = map.centroid.apply(lambda p: p.x)
    map["bg_lat"] = map.centroid.apply(lambda p: p.y)
    map = map[map["geoid"] != 60750179021]

    # Get supply curve data
    sup = pd.DataFrame.from_dict(data["sup"])
    sup["geoid"] = data["index"]
    sup = sup[sup["geoid"] != 60750601001]
    sup = sup[sup["geoid"] != 60750604001]
    sup = sup[sup["geoid"] != 60750332011]
    sup = sup[sup["geoid"] != 60750610002]
    sup = sup[sup["geoid"] != 60750264022]
    sup = sup[sup["geoid"] != 60750258002]
    sup[sup["geoid"] == 60750610001] = 1
    sup = map.merge(sup, on="geoid", how="left")

    # Get price curve data
    pri = pd.DataFrame.from_dict(data["pri"])
    pri["geoid"] = data["index"]
    pri = map.merge(pri, on="geoid", how="left")

    # Plot parameter and setting.
    font = FontProperties()
    font.set_weight("bold")
    font.set_size(10)
    matplotlib.rcParams.update({"font.size": 6})
    alpha = 0.5
    alpha2 = 0.3
    k = 2
    bar_cons = 0.66
    bar_mv = 0.27
    for i in [0, 1, 2, 3, 4]:
        ax[i].set_xlim([-122.513, -122.355])
        ax[i].set_ylim([37.707, 37.833])
        ax[i].set_axis_off()
        ax[i].xaxis.set_major_locator(plt.NullLocator())
        ax[i].yaxis.set_major_locator(plt.NullLocator())
        coc.plot(ax=ax[i], linewidth=0.5, alpha=0)
    app_list = ["uber", "lyft", "taxi"]
    cmap = "RdYlGn"

    f = 0
    for i in [0, 1, 2]:
        sup["plot"] = sup[app_list[i]]  #/ sup["area"] * 581
        knn = neighbors.KNeighborsRegressor(k, "distance")  # Fill empty area.
        train_x = sup[["plot", "bg_lat",
                       "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values
        train_y = sup["plot"].dropna().values
        predict_x = sup[["bg_lat", "bg_lng"]].values
        sup["plot"] = knn.fit(train_x, train_y).predict(predict_x)
        vmin = sup["plot"].min()
        vmax = sup["plot"].quantile(0.95)
        # plot
        sup.plot(ax=ax[i],
                 linewidth=0,
                 column="plot",
                 cmap=cmap,
                 alpha=alpha,
                 k=10,
                 vmin=vmin,
                 vmax=vmax)
        ax[i].set_title(upperfirst(app_list[i]) + " Supply",
                        fontproperties=font)
        fig = ax[i].get_figure()
        cax = fig.add_axes([0.128 + 0.087 * i, 0.07, 0.07, 0.02])
        sm = plt.cm.ScalarMappable(cmap=cmap,
                                   norm=plt.Normalize(vmin=vmin, vmax=vmax))
        sm._A = []
        fig.colorbar(sm,
                     cax=cax,
                     alpha=alpha2,
                     extend="both",
                     orientation="horizontal")

    cmap = "RdYlGn_r"
    f = 2
    for i in [3, 4]:
        pri["plot"] = (pri[app_list[i - 3]] - 1) * 100
        knn = neighbors.KNeighborsRegressor(k, "distance")  # Fill empty area.
        train_x = pri[["plot", "bg_lat",
                       "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values
        train_y = pri["plot"].dropna().values
        predict_x = pri[["bg_lat", "bg_lng"]].values
        pri["plot"] = knn.fit(train_x, train_y).predict(predict_x)
        vmin = 0
        vmax = 12
        print pri["plot"].max() - pri["plot"].min()
        print pri["plot"].std()
        # plot
        pri.plot(ax=ax[i],
                 linewidth=0,
                 column="plot",
                 cmap=cmap,
                 alpha=alpha,
                 k=10,
                 vmin=vmin,
                 vmax=vmax)
        ax[i].set_title(upperfirst(app_list[i - 3]) + " Price",
                        fontproperties=font)
        fig = ax[i].get_figure()
        cax = fig.add_axes([0.128 + 0.087 * i, 0.07, 0.07, 0.02])
        sm = plt.cm.ScalarMappable(cmap=cmap,
                                   norm=plt.Normalize(vmin=vmin, vmax=vmax))
        sm._A = []
        fig.colorbar(sm,
                     cax=cax,
                     alpha=alpha2,
                     extend="both",
                     orientation="horizontal")

    map_path = file_path + "/resources/nyc_block_groups/nyc_bg_with_data_acs15.geojson"
    plot_path = file_path + "/resources/nyc_data/nyc_overspace_plot_data.json"
    fig_path = file_path + "/results/nyc_change_overspace.pdf"

    # Read data.
    with open(plot_path, "r") as plot_file:
        data = json.loads(plot_file.read().strip("\n"))

    map = gpd.read_file(map_path)
    coc = map.sort_values("income")[:80]
    map = map[map["population"].astype("float") > 10.0]
    map["geoid"] = map["geo_id"].astype("int")
    map = map[["geoid", "geometry"]]
    map["bg_lng"] = map.centroid.apply(lambda p: p.x)
    map["bg_lat"] = map.centroid.apply(lambda p: p.y)

    # Get supply curve data
    sup = pd.DataFrame.from_dict(data["sup"])
    sup["geoid"] = data["index"]
    sup = map.merge(sup, on="geoid", how="left")

    # Get price curve data
    pri = pd.DataFrame.from_dict(data["pri"])
    pri["geoid"] = data["index"]
    pri = pri[pri["uber"] > 1.0]
    pri = pri[pri["lyft"] > 1.0]
    pri = map.merge(pri, on="geoid", how="left")

    # Plot parameter and setting.
    bar_cons = 0.66
    bar_mv = 0.27
    for i in [5, 6, 7, 8]:
        ax[i].set_xlim([-74.055, -73.88])
        ax[i].set_ylim([40.64, 40.90])
        ax[i].set_axis_off()
        ax[i].xaxis.set_major_locator(plt.NullLocator())
        ax[i].yaxis.set_major_locator(plt.NullLocator())
        coc.plot(ax=ax[i], linewidth=0.5, alpha=0)
    app_list = ["uber", "lyft"]
    cmap = "RdYlGn"

    f = 0
    for i in [5, 6]:
        sup["plot"] = sup[app_list[i - 5]]
        vmin = sup["plot"].min()
        if i == 5:
            vmax = 7  #sup["plot"].quantile(0.9)
        else:
            vmax = 5
        # plot
        sup.plot(ax=ax[i],
                 linewidth=0,
                 column="plot",
                 cmap=cmap,
                 alpha=alpha,
                 k=10,
                 vmin=vmin,
                 vmax=vmax)
        ax[i].set_title(upperfirst(app_list[i - 5]) + " Supply",
                        fontproperties=font)
        fig = ax[i].get_figure()
        cax = fig.add_axes([0.132 + 0.087 * i, 0.07, 0.07, 0.02])
        sm = plt.cm.ScalarMappable(cmap=cmap,
                                   norm=plt.Normalize(vmin=vmin, vmax=vmax))
        sm._A = []
        fig.colorbar(sm,
                     cax=cax,
                     alpha=alpha2,
                     extend="both",
                     orientation="horizontal")

    cmap = "RdYlGn_r"
    f = 2
    for i in [7, 8]:
        pri["plot"] = (pri[app_list[i - 3 - 4]] - 1) * 100
        knn = neighbors.KNeighborsRegressor(k, "distance")  # Fill empty area.
        train_x = pri[["plot", "bg_lat",
                       "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values
        train_y = pri["plot"].dropna().values
        predict_x = pri[["bg_lat", "bg_lng"]].values
        pri["plot"] = knn.fit(train_x, train_y).predict(predict_x)
        vmin = 0
        if i == 7:
            vmax = 2.5  #sup["plot"].quantile(0.9)
        else:
            vmax = 7
        print pri["plot"].max() - pri["plot"].min()
        print pri["plot"].std()
        # plot
        pri.plot(ax=ax[i],
                 linewidth=0,
                 column="plot",
                 cmap=cmap,
                 alpha=alpha,
                 k=10,
                 vmin=vmin,
                 vmax=vmax)
        ax[i].set_title(upperfirst(app_list[i - 3 - 4]) + " Price",
                        fontproperties=font)
        fig = ax[i].get_figure()
        cax = fig.add_axes([0.132 + 0.087 * i, 0.07, 0.07, 0.02])
        sm = plt.cm.ScalarMappable(cmap=cmap,
                                   norm=plt.Normalize(vmin=vmin, vmax=vmax))
        sm._A = []
        fig.colorbar(sm,
                     cax=cax,
                     alpha=alpha2,
                     extend="both",
                     orientation="horizontal")