Beispiel #1
0
def _create_df(x, y=None, crs=None):
    y = y or x
    x = np.asarray(x)
    y = np.asarray(y)

    return GeoDataFrame(
        {'geometry': points_from_xy(x, y), 'value1': x + y, 'value2': x * y},
        crs=crs)
Beispiel #2
0
def world_map(x, y, normalize, world, folds, title1, title2):

    drop_list = ['geometry', x]
    map_world = world.drop(world.columns.difference(drop_list), axis=1)

    world["x"] = world['geometry'].centroid.x
    world["y"] = world['geometry'].centroid.y
    gdf = gpd.GeoDataFrame(map_world,
                           geometry=gpd.points_from_xy(world.x, world.y))
    gdf['size'] = gdf[x] * normalize

    known = world.dropna(subset=[y])
    unknown = world[world[y].isna()]

    ax = known.plot(column=y,
                    cmap='inferno_r',
                    figsize=(20, 12),
                    scheme='fisher_jenks',
                    k=folds,
                    legend=True,
                    edgecolor='#aaaaaa')
    unknown.plot(ax=ax, color='#ffffff', hatch='//', edgecolor='#aaaaaa')

    gdf.plot(ax=ax, color='red', markersize='size')

    ax.set_title(title1, fontdict={'fontsize': 20}, loc='left')
    description = title2.strip()
    ax.annotate(description,
                xy=(0.07, 0.1),
                size=12,
                xycoords='figure fraction')

    ax.set_axis_off()
    legend = ax.get_legend()
    legend.set_bbox_to_anchor((.11, .4))
    legend.prop.set_size(12)
Beispiel #3
0
def load_hydro_data(hydro_var_name,dataset_name,fill_value=-9999,path='../datasets/hydrology',version=1,fill_nan=True):
    # load dataset
    X=netCDF4.Dataset("{}/{}_{}.nc".format(path,hydro_var_name,dataset_name))

    # create time indexes
    if version==1:
        db=pd.DataFrame({'year':np.asarray(X.variables['time'][:][0,:]).astype(int),
                        'month':np.asarray(X.variables['time'][:][1,:]).astype(int),
                        'day':15})
        time_X=pd.to_datetime(db)
    if version==2:
        year=[d[3:] for d in np.asarray(X.variables['time'])]
        month=[d[:2] for d in np.asarray(X.variables['time'])]
        db=pd.DataFrame({'year':np.asarray(year).astype(int),
                        'month':np.asarray(month).astype(int),
                        'day':15})
        time_X=pd.to_datetime(db)

    # dataframe of all grid points
    if version==1:
        lat=np.asarray(X.variables['Lat'][:][0])
        long=np.asarray(X.variables['Long'][:][0])
        df=pd.DataFrame({'x':long,'y':lat})
    if version==2:
        lat=np.asarray(X.variables['Lat'])
        long=np.asarray(X.variables['Long'])
        (lat_flat,long_flat)=np.meshgrid(lat,long)
        df=pd.DataFrame({'x':long_flat.flatten(),'y':lat_flat.flatten()})

    spatial_grid=geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y))

    # dataframe of each variable at each month over all grid point
    hydro_var=np.asarray(X.variables['{}_mm'.format(hydro_var_name)])
    X_grid=hydrological_variables_grid(hydro_var,time_X,hydro_var_name,spatial_grid, fill_value=fill_value,version=version,fill_nan=fill_nan)

    return spatial_grid, X_grid, time_X
Beispiel #4
0
    def sample_points_for_hard_negative_mining(self):
        # hard negative mining:
        # get some random negatives from the image bounds to ensure that the model can learn on negative examples
        # e.g. land, clouds, etc

        with rio.open(self.imagefile) as src:
            left, bottom, right, top = src.bounds

        offset = HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET  # m
        assert top - bottom > 2 * HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET, f"Hard Negative Mining offset 2x{HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET}m too large for the image height: {top - bottom}m"
        assert right - left > 2 * HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET, f"Hard Negative Mining offset 2x{HARD_NEGATIVE_MINING_SAMPLE_BORDER_OFFSET}m too large for the image width: {right - left}m"
        N_random_points = len(self.lines)

        # sample random x positions within bounds
        zx = np.random.rand(N_random_points)
        zx *= ((right - offset) - (left + offset))
        zx += left + offset

        # sample random y positions within bounds
        zy = np.random.rand(N_random_points)
        zy *= ((top - offset) - (bottom + offset))
        zy += bottom + offset

        return gpd.GeoDataFrame(geometry=gpd.points_from_xy(zx, zy))
Beispiel #5
0
def geo_data_frame(dataframe, lat_col, lon_col):
    """
    Makes a geopandas.GeoDataFrame given pandas.DataFrame
    CRS is hardcoded to GPS (EPSG:4326)    

    Parameters
    ----------
    dataframe : pandas dataframe
    lat_col : a NAME of a column which holds the lattitude in given dataframe
    lon_col : a NAME of a column which holds the longtitude in given dataframe

    Returns
    -------
    geopandas.GeoDataFrame
    """
    _geoDataFrame = gpd.GeoDataFrame(dataframe,
                                     geometry=gpd.points_from_xy(
                                         dataframe[lon_col],
                                         dataframe[lat_col]))

    # Set the coordinate reference system (CRS)
    _geoDataFrame.crs = {'init': 'epsg:4326'}

    return _geoDataFrame
 def plot_map(self):
     world = geopandas.read_file(
         geopandas.datasets.get_path('naturalearth_lowres'))
     gdf = geopandas.GeoDataFrame(self.coord,
                                  geometry=geopandas.points_from_xy(
                                      self.coord.Longitude,
                                      self.coord.Latitude))
     name = difflib.get_close_matches(self.country, world.name.to_list())[0]
     ax = world[world.name == name].plot(color='white', edgecolor='black')
     # We can now plot our ``GeoDataFrame``.
     gdf.plot(ax=ax,
              color='red',
              alpha=0.5,
              markersize=10,
              figsize=[100, 50])
     gdf.apply(lambda x: ax.annotate(
         s=x.location, xy=x.geometry.centroid.coords[0], ha='center'),
               axis=1)
     # plt.show()
     plt.savefig(
         os.path.join(
             os.path.dirname(
                 self.projects[0]['static_data']['path_project']),
             'Map_Country.png'))
def map_to_crs(map_: gpd.GeoDataFrame, target: pyproj.crs.CRS) -> gpd.GeoDataFrame:
    """Transforms map (geometry and height) to a target CRS.

    Parameters
    ----------
    map_ : gpd.GeoDataFrame
        map to be transformed
    target : pyproj.crs.CRS
        target CRS in any pyproj parsable format.

    Returns
    -------
    gpd.GeoDataFrame
        transformed map
    """
    cm.check.check_type(map_, 'map', raise_errors=True)
    transformed = to_crs(map_, target)

    xy = map_.geometry.centroid
    old_heights = gpd.GeoSeries(gpd.points_from_xy(
        xy.x, xy.y, map_.height), crs=map_.crs)
    new_heights = to_crs(old_heights, target)
    transformed['height'] = [point.z for point in new_heights]
    return transformed
def polygon_filter(input_df, filter_gdf):
    """
    Purpose: This removes records from the TROPOMI NO2 Pandas DataFrame that
        is not found within the filter polygons

    Parameters:
    input_df: Pandas DataFrame containing NO2 data coming from nc_to_df() 
    filter_gdf: GeoPandas GeoDataFrame containing geometries to constrain
        NO2 records

    Returns:
    geodataframe: Filtered GeoPandas GeoDataFrame
    """
    tic = time.perf_counter()
    output_gdf = pd.DataFrame()
    print('Processing input dataframe...')
    crs = filter_gdf.crs
    # 1. Convert input_df to gdf
    gdf1 = gpd.GeoDataFrame(geometry=gpd.points_from_xy(
        input_df.longitude, input_df.latitude),
                            crs=crs)
    print('Original NO2 DataFrame length:', len(gdf1))
    # 2. Find out intersection between African Countries GeoDataFrames (geometry) and
    #       NO2 GeoDataFrames using Geopandas sjoin (as GeoDataFrame, gdf2)
    sjoin_gdf = gpd.sjoin(gdf1, filter_gdf, how='inner', op='intersects')
    # 3. Do a Pandas inner join of sjoin_gdf and df1 NO2 DataFrame (sjoin_gdf is a filter GDF)
    #     using indexes. Inner join filters out non-intersecting records
    gdf2 = input_df.join(sjoin_gdf, how='inner')
    print('Filtered NO2 GeoDataFrame length:', len(gdf2))
    toc = time.perf_counter()
    elapsed_time = toc - tic
    print("Processed NO2 DataFrame sjoin in " + str(elapsed_time / 60) +
          " minutes")
    output_gdf = gdf2

    return output_gdf
Beispiel #9
0
    def __init__(self, bounds: gpd.GeoDataFrame):
        """Initial stat and variables for the police agents.

        Takes the bounds input which determines where agents may be created.
        Agents may be spawned within the extent of bounds, but to determine
        whether they fall within a bounds polygon this must be checked with a
        geographic function gpd.within().

        Args:
            bounds (gpd.GeoDataFrame): GeoDataFrame with the input polygon.
        """
        # takes bounds from main.py
        self.bounds = bounds

        # find extent of bounds
        x_min, y_min, x_max, y_max = self.bounds.total_bounds

        while True:
            # random xy from extent of bounds (square)
            self.x = random.uniform(x_min, x_max)
            self.y = random.uniform(y_min, y_max)

            # convert to geodataframe
            df = pd.DataFrame({'x': [self.x], 'y': [self.y]})
            geom = gpd.points_from_xy(df.x, df.y)
            gdf = gpd.GeoDataFrame(df, geometry=geom)

            # check whether point falls within polygon
            within = int(gdf.within(self.bounds))

            # only keep point if within poly, otherwise repeat random coords
            if within == 1:
                self.x = gdf['x']
                self.y = gdf['y']
                self.geom = gdf['geometry']
                break
    def __init__(self, path_inventario):
        self.df = pd.read_csv(path_inventario,
                              engine='python',
                              sep='\t',
                              delimiter=';',
                              parse_dates=['UltimaAtualizacao'])
        self.df[['Latitude', 'Longitude'
                 ]] = self.df[['Latitude', 'Longitude'
                               ]].apply(lambda x: x.str.replace(',', '.'))
        self.df['Latitude'] = self.df['Latitude'].astype('float')
        self.df['Longitude'] = self.df['Longitude'].astype('float')

        self.gdf = gpd.GeoDataFrame(self.df,
                                    geometry=gpd.points_from_xy(
                                        self.df.Longitude, self.df.Latitude),
                                    crs='epsg:4674')

        self.m01 = ipyleaflet.Map(zoom=2, center=(-16, -47))
        self.layer()
        self.controls_on_Map()
        self.control_buttonDownload.on_click(self.download_buttom)
        self.control_shapefileButtom.on_click(self.shapefile_buttom)

        display(self.m01)
Beispiel #11
0
def CreateMapWithPaths(sp, df):
    # stores lines/roads/edges of sp (shortest path)
    listLine = []

    for i in range(len(sp) - 1):
        # l1 and l2 are row number of each point in data frame
        l1 = df[df['Point'] == sp[i]].index.values.astype(int)
        l2 = df[df['Point'] == sp[i + 1]].index.values.astype(int)
        # from row number we get geometry and form list of line
        point1 = df.iloc[l1[0]].Latitude, df.iloc[l1[0]].Longitude
        point2 = df.iloc[l2[0]].Latitude, df.iloc[l2[0]].Longitude
        listLine.append(LineString([point1, point2]))

    # convert line to dataframe then to geo df
    dfLine = pd.DataFrame({'geometry': listLine})
    gdfLine = geopandas.GeoDataFrame(dfLine)

    # get orignal df and change to crs
    # this will be helpful to mark intersection on path map that only has line
    gdf = geopandas.GeoDataFrame(df,
                                 geometry=geopandas.points_from_xy(
                                     df.Longitude, df.Latitude))

    gdf.crs = from_epsg(3857)
    gdfLine.crs = from_epsg(3857)

    ax = gdfLine.plot(figsize=(10, 10), alpha=1, edgecolor='red')

    for a in gdf.itertuples():
        if sp.__contains__(a.Point):
            plt.text(a.geometry.x, a.geometry.y, a.Point)

    # ctx.add_basemap(ax, url=ctx.providers.Stamen.TonerLite, zoom=12)
    ax.set_axis_off()
    # plt.show()
    return plt
Beispiel #12
0
def read_grd(filename):
    with open(filename) as infile:
        ncols = int(infile.readline().split()[1])
        nrows = int(infile.readline().split()[1])
        xllcorner = float(infile.readline().split()[1])
        yllcorner = float(infile.readline().split()[1])
        cellsize = float(infile.readline().split()[1])
        nodata_value = int(infile.readline().split()[1])
        #version = float(infile.readline().split()[1])
    longitude = xllcorner + cellsize * np.arange(ncols)
    latitude = yllcorner + cellsize * np.arange(nrows)
    value = np.loadtxt(filename,
                       skiprows=6)  #change to 7 if version is in file
    #value is a matrix where the bottom right corner is [xllcorner, yllcorner]

    x_input = np.array([[lon, lat] for lon in longitude for lat in latitude])
    y_input = value
    y_input = np.flipud(y_input).flatten(order='F')

    arr = np.hstack([x_input, y_input[:, None]])
    df = pd.DataFrame(arr, columns=['x', 'y', 'height'])

    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y))
    return gdf
    def _read_berlin_bounds(self) -> gpd.GeoDataFrame: 
        """
        Reads berlin bounds from a poly file and set epsg to 4326.
        If the file does not exist, return an empty geodataframe.
        :rtype geopandas.GeoDataFrame
        """
        if not isfile(self.berlin_bounds_file): 
            return gpd.GeoDataFrame()

        df = pd.read_csv(self.berlin_bounds_file, delim_whitespace=True, header=None)
        df.columns = ['lat', 'lon']

        # create a geodataframe
        gdf = gpd.GeoDataFrame(
            df,
            geometry = gpd.points_from_xy(df.lat, df.lon)
        )

        # drop the lat, lon columns as they are in the geometry column
        gdf.drop(['lat', 'lon'], axis=1, inplace=True)
        # set the coordinate system. This has to be done this way due to geopandas==0.5.0
        gdf.crs = {'init': 'epsg:4326'}

        return gdf
Beispiel #14
0
def add_public_transport_connection_quality_work(df_businesses):
    """ Add connection quality of public transport from coordinates
    :param df_businesses: Contains the businesses from the SynPop, incl. coordinates
    :return: df_businesses: Contains the businesses from the SynPop, including a column containing the public
    transport connection quality
    - A = very good, coded as 1
    - B = good, coded as 2
    - C = medium, coded as 3
    - D = low, coded as 4
    - 5 = marginal or no public transport connection
    """
    # Read the shape file containing the connection quality
    connection_quality_folder_path = Path('../data/input/OeV_Gueteklassen/Fahrplanperiode_17_18/')
    df_connection_quality = geopandas.read_file(connection_quality_folder_path / 'OeV_Gueteklassen_ARE.shp')
    df_connection_quality.to_crs(epsg=2056, inplace=True)  # Change the projection
    geodf_businesses = geopandas.GeoDataFrame(df_businesses,
                                              geometry=geopandas.points_from_xy(df_businesses.xcoord_work,
                                                                                df_businesses.ycoord_work),
                                              crs='epsg:2056')
    geodf_businesses = geopandas.sjoin(geodf_businesses, df_connection_quality[['KLASSE', 'geometry']],
                                      how='left', op='intersects')
    geodf_businesses['KLASSE'] = geodf_businesses['KLASSE'].map({'A': 1,
                                                                 'B': 2,
                                                                 'C': 3,
                                                                 'D': 4})
    geodf_businesses['KLASSE'].fillna(5, inplace=True)
    # Rename the column with the public transport connection quality
    geodf_businesses.rename(columns={'KLASSE': 'public_transport_connection_quality_ARE_work'}, inplace=True)
    # base = df_connection_quality.plot()
    # geodf_businesses[geodf_businesses['public_transport_connection_quality_ARE_work'] == 1].head(n=5000).plot(ax=base,
    #                                                                   marker='o',
    #                                                                   color='red',
    #                                                                   markersize=5)
    # plt.show()
    geodf_businesses.drop(['index_right'], axis=1, inplace=True)
    return geodf_businesses
Beispiel #15
0
def ClipData(From, To, data):
    # create data frame with source and dest to clipp map
    ID = [1, 2]
    # TODO: should we create a TRY EXPECT or something? bad adresses?
    Source = get_geocords(From)
    Destination = get_geocords(To)

    Lat1, Long1 = Source
    Lat2, Long2 = Destination
    Lat = [Lat1, Lat2]
    Long = [Long1, Long2]

    df = pd.DataFrame()
    df['id_trip'] = ID
    df['Lat'] = Lat
    df['Long'] = Long

    # pass as a geodata frame, change crs to metters so we buffer and
    # clipp the original map
    df_geo = gp.GeoDataFrame(df, geometry=gp.points_from_xy(df.Long, df.Lat))
    df_geo.crs = {'init': 'EPSG:4326'}
    df_geo = df_geo.to_crs("epsg:3043")

    trips = df_geo.copy()
    trips['geometry'] = LineString(df_geo.geometry)
    # WHY THIS? is not already in this CRS?
    trips = trips.to_crs("epsg:3043")
    # buffer a kilometer
    trips.geometry = trips.geometry.buffer(1000)

    # clipping
    streets_clipped = gp.sjoin(left_df=data, right_df=trips, how='inner')
    # change crs for folium and networkx
    streets_clipped2 = streets_clipped.to_crs('EPSG:4326')

    return streets_clipped2, Source, Destination
Beispiel #16
0
def knn_plot(df, k, figsize=(15, 15), column='class'):
    data = df[['lng', 'lat']].to_numpy()
    classes, idx_to_class = pd.factorize(df[column])
    x = np.arange(-180, 180, 0.5)
    y = np.arange(-90, 90, 0.5)
    # Use the haversine metric for real distance
    clf = neighbors.KNeighborsClassifier(k, metric='haversine')
    clf.fit(data*np.pi/180, classes)  # Fit on radians and not on degrees
    xx, yy = np.meshgrid(x, y)
    pred = clf.predict(np.c_[xx.ravel(), yy.ravel()]
                       * np.pi/180).reshape(xx.shape)
    world = geopandas.read_file(
        geopandas.datasets.get_path('naturalearth_lowres'))
    world = world[['continent', 'geometry']]
    fig, ax = plt.subplots(figsize=figsize)
    gdf = geopandas.GeoDataFrame(
        df.copy(), geometry=geopandas.points_from_xy(df.lng, df.lat))
    cmap = cm.get_cmap('tab20', len(idx_to_class))
    ax.pcolormesh(xx, yy, pred, cmap=cmap)
    world.boundary.plot(color='k', ax=ax)
    for i, c in enumerate(idx_to_class):
        gdf[gdf[column] == c].geometry.plot(
            ax=ax, color=cmap.colors[i], label=c)
    ax.legend()
    def dataset(self):
        """ Load data as a geopandas GeoDataFrame """

        if self._dataset is None:
            self._dataset = self.load_dataframe(self)

        if not hasattr(self, 'geometry'):
            if self.geometry_col:
                self._dataset[self.geometry_col] = (
                    self._dataset[self.geometry_col].apply(shapely.wkt.loads))
                self._dataset = gpd.GeoDataFrame(self._dataset,
                                                 geometry=self.geometry_col)
            elif self.xy_cols:
                self._dataset = gpd.GeoDataFrame(
                    self._dataset,
                    geometry=gpd.points_from_xy(
                        self._dataset[self.xy_cols[0]],
                        self._dataset[self.xy_cols[1]]))

        if self.bbox:
            self._dataset = self._dataset.cx[bbox.min.x:bbox.max.y,
                                             bbox.min.x:bbox.max.y]

        return self._dataset
def crime_cluster(from_date, to_date, crime):
    data = pd.read_csv(
        'https://opendata.arcgis.com/datasets/3eeb0a2cbae94b3e8549a8193717a9e1_0.csv?outSR=%7B%22latestWkid%22%3A2248%2C%22wkid%22%3A102685%7D',
        sep=',',
        header='infer')
    data['CrimeDateTime'] = pd.to_datetime(data['CrimeDateTime'])
    data = data[(data['CrimeDateTime'] > from_date)
                & (data['CrimeDateTime'] < to_date)]
    data = data[data['Latitude'] > 0]
    data = data[data['Description'] == crime]
    coords = np.array(data[['Longitude', 'Latitude']])
    model = hdbscan.HDBSCAN(min_cluster_size=4,
                            min_samples=5,
                            cluster_selection_epsilon=0.001)
    fit = model.fit(coords)
    labels = fit.labels_.reshape(fit.labels_.shape[0], -1)
    arr = np.concatenate((coords, labels), axis=1)
    df = pd.DataFrame(arr, columns=['Latitude', 'Longitude', 'Cluster'])
    df.Cluster.astype(int)
    gdf = gpd.GeoDataFrame(df,
                           geometry=gpd.points_from_xy(df.Longitude,
                                                       df.Latitude))
    clus_geojson = gdf.to_json()
    return clus_geojson
Beispiel #19
0
def spatial_join(df_input, shapefile):

    # Load shapefile
    shapefile = gpd.read_file(shapefile)
    shapefile.drop(['GID_0', 'GID_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'NAME_0',
                    'NAME_1', 'ENGTYPE_2', 'CC_2', 'HASC_2'], axis=1, inplace=True)
    # shapefile.crs = {'init': 'epsg:4326'}
    shapefile.crs = 'epsg:4326'

    # Load Excel file

    df = gpd.GeoDataFrame(
        df_input, geometry=gpd.points_from_xy(df_input['Longitude'], df_input['Latitude']))
    # df.crs = {'init': 'epsg:4326'}
    df.crs = 'epsg:4326'

    # Spatial Join
    df = gpd.sjoin(df, shapefile, how='left', op='within')
    df.drop(['index_right', 'geometry'], axis=1, inplace=True)
    df.rename(columns={'NAME_2': 'City'}, inplace=True)
    df = df[['Date', 'Time', 'City', 'Location', 'High_Accuracy', 'Latitude', 'Longitude', 'Direction',
             'Type', 'Lanes_Blocked', 'Involved', 'Tweet', 'Source']]

    return df
Beispiel #20
0
def pandas_handler():
    try:
        metrobus_data = get_data(uri_data)
        metrobus_df = pd.DataFrame.from_records(metrobus_data)
        metrobus_flat_df = json_normalize(data=metrobus_df['fields'])
        metrobus_gdf = gpd.GeoDataFrame(
            metrobus_flat_df,
            geometry=gpd.points_from_xy(metrobus_flat_df.position_longitude,
                                        metrobus_flat_df.position_latitude))
        geo_df = gpd.read_file(file_geo)
        merge_geo_data_df = gpd.sjoin(metrobus_gdf,
                                      geo_df,
                                      how="left",
                                      op='intersects')
        nomgeo_nomalize = merge_geo_data_df.nomgeo.str.normalize(
            'NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8')
        merge_geo_data_df.nomgeo = nomgeo_nomalize
        result = merge_geo_data_df.to_json()
        parsed = json.loads(result)
        data_json = parsed["features"]
        cdmx_data = list(map(lambda x: x["properties"], data_json))
        return cdmx_data
    except Exception as ex:
        logging.error(str(ex))
Beispiel #21
0
def make_geodf(df, lat_col_name='latitude', lon_col_name='longitude'):

    import geopandas as gpd
    """
    Take a dataframe with latitude and longitude columns, and turn
    it into a geopandas df. Needed to plot the map.
    
    The function is more or less copy-pasted from 
    https://www.martinalarcon.org/2018-12-31-d-geopandas/

    """
    try:
        df = df.copy()
        lat = df[lat_col_name]
        lon = df[lon_col_name]
        geodf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(lon, lat))
        geodf = geodf.set_crs("EPSG:4326")
        geodf = geodf.to_crs("EPSG:3877")
    except:
        print(
            'Jotain meni pieleen, kun data yritettiin muuntaa geodataframeen.')
        geodf = []

    return geodf
def makeDataframe(data_file, epsg=3857):

    # pprint(WMS_LAYERS)
    # pprint([op.name for op in wms.operations])

    # https://frictionlessdata.io/tooling/python/extracting-data/
    # Handles multiline columns cleanly.
    data_filename = os.path.basename(data_file)
    import_rows = extract(data_file)
    import_dataframe = pandas.DataFrame(import_rows)

    #cities_3857 = geopandas.read_file(CITIES_SHP).to_crs(epsg=3857)

    point_geodataframe = geopandas.GeoDataFrame(
        import_dataframe[import_dataframe.Longitude.notnull()],
        geometry=geopandas.points_from_xy(
            import_dataframe[import_dataframe.Longitude.notnull()].Longitude,
            import_dataframe[import_dataframe.Longitude.notnull()].Latitude),
        crs="EPSG:4326")
    if DEBUG:
        pprint(point_geodataframe)

    point_geodataframe_3857 = point_geodataframe.to_crs(epsg=epsg)
    return point_geodataframe_3857
Beispiel #23
0
    def set_geometry_points(self, scheduler=None):
        """Set geometry attribute of gpd.GeoSeries with Points from latitude and
        longitude attributes if geometry not present.

        Parameters:
            scheduler (str): used for dask map_partitions. “threads”,
                “synchronous” or “processes”
        """
        def apply_point(df_exp):
            return df_exp.apply((lambda row: Point(row.longitude, row.latitude)), axis=1)
        if not self.geometry.size:
            LOGGER.info('Convert centroids to GeoSeries of Point shapes.')
            if not self.lat.size or not self.lon.size:
                self.set_meta_to_lat_lon()
            if not scheduler:
                self.geometry = gpd.GeoSeries(
                    gpd.points_from_xy(self.lon, self.lat), crs=self.geometry.crs)
            else:
                import dask.dataframe as dd
                from multiprocessing import cpu_count
                ddata = dd.from_pandas(self, npartitions=cpu_count())
                self.geometry = (ddata
                                 .map_partitions(apply_point, meta=Point)
                                 .compute(scheduler=scheduler))
Beispiel #24
0
def join_receiver_position(
        gnss_obs: pd.DataFrame,
        gnss_fix: pd.DataFrame) -> gpd.GeoDataFrame:
    """  Add receiver positions to Raw data.

    Joined by utc time in milliseconds.
    """
    clean_fix = gnss_fix[["Longitude","Latitude","Altitude","(UTC)TimeInMs"]].dropna().set_index("(UTC)TimeInMs")
    df = gnss_obs.join(clean_fix,
                       on="time_ms", how="inner", lsuffix="obs", rsuffix="fix")
    df.reset_index(drop=True, inplace = True)
    df
    if len(df) != len(gnss_obs):
        warnings.warn(
            f'{len(gnss_obs)-len(df)} observations discarded without matching fix.'
        )
    

    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df["Longitude"],df["Latitude"], 
                                    df["Altitude"]),
        crs=cm.constants.epsg_gnss_logger)
    return gdf
Beispiel #25
0
def get_loc_gdf(tweet_df, column_name='locs'):
    '''
    Given a Twitter DataFrame, this will generate a GeoPandas DataFrame, based on
    the Twitter DataFrame.

    Parameters
    ----------
    tweet_df: Pandas DataFrame
              A DataFrame generated by libpypack.Locations.map_locations().

    column_name: str
              Column name with the locations generated by libpypack.Locations.map_locations()

    Returns
    -------
    : GeoPandas DataFrame
      A GeoPandas DataFrame which can be used by libpypack.visualization.heatmap)

    '''
    gdf = create_new_df(tweet_df, column_name=column_name)
    loc_gdf = geopandas.GeoDataFrame(gdf,
                                     geometry=geopandas.points_from_xy(
                                         gdf.Longitude, gdf.Latitude))
    return loc_gdf
Beispiel #26
0
    df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190103.csv')
elif date == 4:
    df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190104.csv')
elif date == 5:
    df = pd.read_csv('https://raw.githubusercontent.com/Tibxch/steamlit/master/20190105.csv')

###Raw Data Visualization###
  
if st.checkbox("Show raw data", False):
    st.subheader('Raw Data')
    st.write(df)
    
#### GEOMETRY ###

crs = "EPSG:4326"
geometry = gp.points_from_xy(df.lonstartl, df.latstartl)
geo_df  = gp.GeoDataFrame(df,crs=crs,geometry=geometry)

### 3 Hours ####

hours_3 = st.slider("Hour of interest (Every 3 hours)",0,23,step=3)
data = geo_df
data["timestart"] = pd.to_datetime(data["timestart"])
    

### MAP ###

st.subheader("Map show data Picked up at %i:00" % (hours_3))
st.markdown(""" This map will show you only data of Picked up.""")
long = 100.5018 #longitude of BKK
lat = 13.7563 #latitude of BKK
print("*" * 100)

# 2 Read, analyze and summarize Schedule data
########################################################################################################################
print(
    "Run Section 2: Read, analyze and summarize rawnav, WMATA schedule data..."
)
begin_time = datetime.now()
# Read the Wmata_Schedule data
wmata_schedule_dat = wr.read_sched_db_patterns(path=os.path.join(
    path_source_data, "wmata_schedule_data", "Schedule_082719-201718.mdb"),
                                               analysis_routes=analysis_routes)

wmata_schedule_gdf = (gpd.GeoDataFrame(wmata_schedule_dat,
                                       geometry=gpd.points_from_xy(
                                           wmata_schedule_dat.stop_lon,
                                           wmata_schedule_dat.stop_lat),
                                       crs='EPSG:4326').to_crs(epsg=wmata_crs))

# Make Output Directory
path_stop_summary = os.path.join(path_processed_data, "stop_summary.parquet")
if not os.path.isdir(path_stop_summary):
    os.mkdir(path_stop_summary)

path_stop_index = os.path.join(path_processed_data, "stop_index.parquet")
if not os.path.isdir(path_stop_index):
    os.mkdir(path_stop_index)

for analysis_route in analysis_routes:
    print("*" * 100)
    print('Processing analysis route {}'.format(analysis_route))
Beispiel #28
0
import matplotlib.pyplot as plt

#change this only
os.chdir('/Users/Sarah/Documents/GitHub/US-schoolday-temperatures')
date = 'Winter 2020-21'
title = 'Average Daily Temp (with Wind Chill)'

#df = pd.read_csv('{} temperature.csv'.format(date))
data_folder = 'Data'
filename = '{} temperature.csv'.format(date)
data_file = os.path.join(data_folder, filename)

df = pd.read_csv(data_file)

gdf = geopandas.GeoDataFrame(df,
                             geometry=geopandas.points_from_xy(
                                 df['lon'], df['lat']),
                             crs='epsg:4269')  #crs for North America

#from https://www.census.gov/geographies/mapping-files/time-series/geo/cartographic-boundary.html
#can also get school districts here!
state_file = os.path.join(data_folder,
                          'cb_2019_us_state_20m/cb_2019_us_state_20m.shp')
state = geopandas.read_file(state_file)

#https://stackoverflow.com/questions/19960077/how-to-filter-pandas-dataframe-using-in-and-not-in-like-in-sql
state_abres = ['AK', 'PR', 'HI'
               ]  # contenental US only, remove Alaska, Puerto Rico and Hawaii
contenental_states = state[~state['STUSPS'].isin(state_abres)]
#state_test.head()

fig, ax = plt.subplots()  #figsize=(25,8))
Beispiel #29
0
PAIPR_dir = ROOT_DIR.joinpath('data/gamma_20111109')
data_0 = import_PAIPR(PAIPR_dir)

# Format accumulation data
accum_long = format_PAIPR(data_0, start_yr=1979, end_yr=2009).drop('elev',
                                                                   axis=1)
traces = accum_long.groupby('trace_ID')

# New accum and std dfs in wide format
accum = accum_long.pivot(index='Year', columns='trace_ID', values='accum')
accum_std = accum_long.pivot(index='Year', columns='trace_ID', values='std')

# Create df for mean annual accumulation
accum_trace = traces.aggregate(np.mean).drop('Year', axis=1)
accum_trace = gpd.GeoDataFrame(accum_trace,
                               geometry=gpd.points_from_xy(
                                   accum_trace.Lon, accum_trace.Lat),
                               crs="EPSG:4326").drop(['Lat', 'Lon'], axis=1)

# Import Antarctic outline shapefile
ant_path = ROOT_DIR.joinpath(
    'data/Ant_basemap/Coastline_medium_res_polygon.shp')
ant_outline = gpd.read_file(ant_path)

# Convert accum crs to same as Antarctic outline
accum_trace = accum_trace.to_crs(ant_outline.crs)

##### Estimate time series regressions

# Preallocate arrays for linear regression
lm_data = accum.transpose()
std_data = accum_std.transpose()
Beispiel #30
0
    680: "Þórshöfn",
    681: "Þórshöfn",
}


ADMINISTRATIVE_DIVISIONS: Dict[str, List[str]] = {
    "Seltjarnarnesbær": ["Seltjarnarnes"],
    "Ísafjarðarbær": ["Ísafjörður"],
}

data_path = pkg_resources.resource_filename("stadfangaskra.data", "df.parquet.gzip")

_df = pd.read_parquet(data_path)

df = _df = geopandas.GeoDataFrame(
    _df, geometry=geopandas.points_from_xy(_df.lon, _df.lat), crs=4326
)
df = df.drop(["lat", "lon"], axis=1)

for c in ["municipality_code"]:
    df[c] = pd.Categorical(df[c].astype(pd.Int32Dtype()))


regions = pd.read_parquet(
    pkg_resources.resource_filename("stadfangaskra.data", "regions.parquet")
)

REGION_MAP = {
    k: list(v)
    for (k, v) in regions.groupby("region")["municipality"].unique().to_dict().items()
}
Beispiel #31
0
def generate_value_added_sales_by_year(start_year=2006, end_year=2018):
    """
    Generate the sales files by year with value-added columns.

    Notes
    -----
    This takes the file of unique sales and adds several useful columns, including
    indexed housing prices and geocoded fields (zip codes, neighborhoods, and 
    police districts).
    """

    # get the main sales file
    matches = glob(os.path.join(data_dir, "OPA", "sales_file_*.csv"))
    if not len(matches):
        sales_data = generate_sales_file()
    else:
        sales_data = pd.read_csv(matches[0])

    # format the data
    sales_data = (sales_data.assign(
        sale_date=lambda df: pd.to_datetime(df["sale_date"]),
        sale_year=lambda df: df.sale_date.dt.year,
        sale_price_psf=lambda df: df.sale_price / df.total_livable_area,
        test=lambda df: ~np.isinf(df.sale_price_psf) & df.sale_price_psf.
        notnull(),
        housing_index=lambda df: PhillyMSAHousingIndex.interpolate(df[
            "sale_date"]),
    ).assign(
        housing_index=lambda df: df.housing_index / df.housing_index.max(),
        sale_price_indexed=lambda df: df.sale_price / df.housing_index,
    ).query("test == True").drop(labels=["test"], axis=1))

    # make sure the output directory exists
    dirname = os.path.join(data_dir, "OPA", "ValueAdded")
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    # geocode!
    zip_codes = ZIPCodes.get()
    neighborhoods = Neighborhoods.get()
    police_districts = PoliceDistricts.get()

    # save each year
    for year in range(start_year, end_year + 1):
        print(f"Processing sale year {year}...")

        # get this year's data
        df = sales_data.query("sale_year == @year")

        # convert to geopandas
        gdf = (gpd.GeoDataFrame(
            df,
            geometry=gpd.points_from_xy(df["lng"].astype(float),
                                        df["lat"].astype(float)),
            crs={
                "init": "epsg:4326"
            },
        ).to_crs(epsg=EPSG).drop(labels=["lat", "lng"], axis=1))

        if "zip_code" in gdf.columns:
            gdf = gdf.drop(labels=["zip_code"], axis=1)

        # geocode
        gdf = (gdf.pipe(geocode, zip_codes).pipe(geocode, neighborhoods).pipe(
            geocode, police_districts))

        path = os.path.join(dirname, f"{year}.csv")
        gdf.to_csv(path, index=False)
df = pd.DataFrame(
    {'City': ['Buenos Aires', 'Brasilia', 'Santiago', 'Bogota', 'Caracas'],
     'Country': ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela'],
     'Latitude': [-34.58, -15.78, -33.45, 4.60, 10.48],
     'Longitude': [-58.66, -47.91, -70.66, -74.08, -66.86]})

###############################################################################
# A ``GeoDataFrame`` needs a ``shapely`` object. We use geopandas
# ``points_from_xy()`` to transform **Longitude** and **Latitude** into a list
# of ``shapely.Point`` objects and set it as a ``geometry`` while creating the
# ``GeoDataFrame``. (note that ``points_from_xy()`` is an enhanced wrapper for
# ``[Point(x, y) for x, y in zip(df.Longitude, df.Latitude)]``)

gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude))


###############################################################################
# ``gdf`` looks like this :

print(gdf.head())

###############################################################################
# Finally, we plot the coordinates over a country-level map.

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

# We restrict to South America.
ax = world[world.continent == 'South America'].plot(
    color='white', edgecolor='black')