import transportation_tutorials as tt import geopandas as gpd from matplotlib import pyplot as plt from shapely.geometry import Polygon shapefile_filename = tt.data('SERPM8-TAZSHAPE') taz = gpd.read_file(shapefile_filename) ax = taz.plot() #画shapefile的图 # Selection by Rectangular Envelope xmin = 905712.145924 ymin = 905343.94408855 xmax = 983346.68922847 ymax = 981695.93140023 taz_jupiter = taz.cx[xmin:xmax, ymin:ymax] ax = taz_jupiter.plot(edgecolor='w') taz_jupiter2 = taz.cx[xmin:, ymin:] taz_jupiter2.equals(taz_jupiter) from shapely.geometry import box study_area = gpd.GeoDataFrame(geometry=[box(xmin, ymin, xmax, ymax)], crs={'init': 'epsg:2236'}) ax = taz_jupiter.plot(edgecolor='w') transparent = (0, 0, 0, 0) ax = study_area.plot(ax=ax, edgecolor='red', facecolor=transparent) # Selection by Polygon irregular_polygon = Polygon([ (973346, 935343),
import transportation_tutorials as tt import pandas as pd import numpy as np import statsmodels.api as sm per = pd.read_csv(tt.data('SERPM8-BASE2015-PERSONS')) hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS')) print(per.info()) print(hh.info()) per_hh_merge = pd.merge( per, hh, on='hh_id', how='inner', ) print(per_hh_merge.info()) print(per_hh_merge['type'].head()) print(per_hh_merge['gender'].head()) print(per_hh_merge['transponder'].head()) print(per_hh_merge['autotech'].head()) print(per_hh_merge['autos'].head()) per_hh_merge['gender'] = np.where( (per_hh_merge['gender'] == 'm'), 1, 0) # result = np.where(condition, x, y) condition=True→x per_hh_merge['type'] = np.where((per_hh_merge['type'] == 'Full-time worker'), 1, 0) mod = sm.OLS( per_hh_merge.value_of_time,
import transportation_tutorials as tt import numpy as np import pandas as pd import geopandas as gpd from matplotlib import pyplot as plt # 导入TAZ和MAZ的数据 xmin = 905712 ymin = 905343 taz = gpd.read_file(tt.data('SERPM8-TAZSHAPE')).cx[xmin:, ymin:] maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE')).cx[xmin:, ymin:] center = (945495, 941036) ''' # 5.3.1 Simple Map # 用plot工具 ax = taz.plot() ax = taz.plot(color='green', linewidth=2, edgecolor='white') # 可以再同一个地图中展示 ax = maz.plot(linewidth=1, color='green', edgecolor='white') ax = taz.plot(ax=ax, color=(0,0,0,0), linewidth=1, edgecolor='black') # 自定义地图的特征 fig, ax = plt.subplots(figsize=(12,9)) ax.axis('on') # do show axis as a frame ax.set_xticks([]) # but no tick marks ax.set_yticks([]) # one either axis ax.set_title("SERPM 8 Zones", fontweight='bold', fontsize=16, pad=20) ax.annotate('in the viscinity of Jupiter, FL', xy=(0.5, 1.0), xycoords='axes fraction', xytext=(0, 5), textcoords='offset points',
import transportation_tutorials as tt import geopandas as gpd import pandas as pd import matplotlib.pyplot as plt import folium from shapely.geometry import Polygon maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE')) maz_data = pd.read_csv(tt.data('SERPM8-MAZDATA', '*.csv')) print(maz.crs) # maz = maz.to_crs(epsg = 4326) print(maz.info()) print(maz_data.info()) maz_data_1 = maz_data[['PopDen', 'emp_total']] print(maz_data_1) maz_data_1 = maz_data_1.astype(float) maz_1 = maz.merge(maz_data_1, how='left', left_on='MAZ', right_on='PopDen') print(maz_1) maz_1 = maz_1.sort_values(by='PopDen', ascending=False).head(5) print(maz_1.info()) # From the Solution, how to change the data from CRS format to folium import pyproj # 读取经纬度 input_Proj = pyproj.Proj(init="EPSG:2236", preserve_units=True) # 定义数据地理坐标系 output_Proj = pyproj.Proj(init="EPSG:4326") # 定义转换投影坐标系
import transportation_tutorials as tt import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns pd.options.display.max_columns = 100 trip = pd.read_csv(tt.data('SERPM8-BASE2015-TRIPS')) print(trip.info()) hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS')) print(hh.info()) trip_hh_merge = pd.merge( hh, trip.groupby(['hh_id']).size().rename('n_trips'), left_on=['hh_id'], right_index=True, ) print(trip_hh_merge.head()) # Prepare a heatmap that visualizes the joint distribution of the number of trips taken by each household and the number of automobiles owned by the household. # For households with 2 automobiles, what is the most frequent number of trips made by those households in the data? '''resluts_1 = trip_hh_merge.pivot_table( index='autos', columns='n_trips', aggfunc='size' ) print(resluts_1) ''' sns.heatmap(trip_hh_merge.pivot_table(index='autos', columns='n_trips',
import transportation_tutorials as tt import geopandas as gpd import pandas as pd from shapely.geometry import Point import matplotlib.pyplot as plt maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE')) taz = gpd.read_file(tt.data('SERPM8-TAZSHAPE')) #print(maz.info()) #print(taz.info()) maz_points = maz.copy() # copy # Q1: Within the SERPM 7 region, what TAZ has the largest number of MAZs within its boundary? How many does it have? # Every piece of data has a center point maz_points.geometry = maz_points.apply(lambda x: Point(x.POINT_X, x.POINT_Y), axis=1) # join taz_maz = gpd.sjoin(maz_points, taz, how='left', op='within') taz_maz_counts = taz_maz.groupby(['TAZ_REG'])['MAZ'].count() idx = taz_maz_counts.idxmax() print(idx) idx_max_1 = taz_maz_counts.max() print(idx_max_1) idx_max_2 = taz_maz_counts.loc[idx] print(idx_max_2)
tm = {11,12,13,14,15,16,17,18,19} trip_mode_dictionary[21] = 'TRANSIT' trip_mode_counts = trips.trip_mode.map(lambda x: 21 if x in tm else x).value_counts(sort=False) trip_mode_counts.index = trip_mode_counts.index.map(trip_mode_dictionary) ax = trip_mode_counts.plot(kind='bar', color='green') ax.set_title("Trip Mode Frequency") ax.set_xlabel("Trip Mode") ax.set_ylabel("Number of Trips"); plt.show() ''' # Plotting Histogram Data hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS'), index_col=0) hh.set_index('hh_id', inplace=True) hh.income.hist() plt.show() hh.income.hist(bins=50, grid=False, color='red') plt.show() hh.income.hist(bins=100, grid=False, color='red') plt.show() hh.income.hist(bins=200, grid=False, color='red') plt.show() bins = np.array([0, 10, 20, 40, 60, 70, 80, 90, 100, 125, 150, 200, 1000
import transportation_tutorials as tt import pandas as pd import numpy as np tour = pd.read_csv(tt.data('SERPM8-BASE2015-TOURS')) print(tour.info()) tour_mode_dict = { 1: "DRIVEALONEFREE", 2: "DRIVEALONEPAY", 3: "SHARED2GP", 4: "SHARED2PAY", 5: "SHARED3GP", 6: "SHARED3PAY", 7: "TNCALONE", 8: "TNCSHARED", 9: "WALK", 10: "BIKE", 11: "WALK_MIX", 12: "WALK_PRMW", 13: "WALK_PRMD", 14: "PNR_MIX", 15: "PNR_PRMW", 16: "PNR_PRMD", 17: "KNR_MIX", 18: "KNR_PRMW", 19: "KNR_PRMD", 20: "SCHBUS", } # Within the Jupiter study area, what is the average distance for bike tours to work? (Hint: It is 4.03 miles)
import transportation_tutorials as tt import pandas as pd import numpy as np districts = pd.read_csv(tt.data('FL-COUNTY-BY-DISTRICT')) print(districts.head()) bridges = pd.read_csv(tt.data('FL-BRIDGES')) # Recall the necessary cleaning for the bridges data file bridges = bridges.replace('-', 0) bridges[['Poor #', 'SD #']] = bridges[['Poor #', 'SD #']].astype(int) bridges.fillna(0, inplace=True) print(bridges.head()) print(bridges.info()) # Within each FDOT District, what is the fraction of structurally deficient bridge deck area in each County? bridges['County'] = bridges[ 'County'].str[:-6] # delete the code of each country districts['County'] = districts['County'].str.upper( ) # transfer the country name bridges_2 = pd.merge(bridges, districts, on='County') print(bridges_2.head()) print(bridges_2.info()) bridges_2['Fraction of SD area'] = bridges_2.groupby( 'District')['SD Area'].transform(lambda x: (x / x.sum())) print(bridges_2) # Which county has the highest share of structurally deficient bridge deck area within its FDOT District? (Hint: the correct answer is PALM BEACH.)
import transportation_tutorials as tt import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings('ignore') bridge = pd.read_csv(tt.data('FL-BRIDGES')) print(bridge.info()) print(bridge.head()) print(bridge['SD #']) print(bridge['County']) # Which county has the highest number of structurally deficient bridges? Use a frequency plot to find your answer. bridge = bridge[:-1] bridge['SD #'] = bridge['SD #'].replace('-', 0).astype('int64') print(bridge['SD #']) # bridge_SD_counts = bridge['SD #'].value_counts(sort=True) ax = bridge.plot(x='County', y='SD #', kind='bar', color='green', figsize = (25,3)) ax.set_xlabel("County") ax.set_ylabel("number of structurally deficient bridges"); plt.show() # answer: DUVAL(031) # Solution: bridge_count.sort_values(by = 'SD #', ascending = False).plot( x = 'County', y = 'SD #', kind = 'bar', figsize = (25,3), color = 'coral'); # bridge_count[bridge_count['SD #'] == bridge_count['SD #'].max()]['County'].values[0] # Which county has the lowest percentage of bridges that are in good condition? Use a bar chart to find your answer. bridge['Fraction of Good #'] = bridge["Good #"] / bridge.groupby('County')["Total #"].transform('sum') bridge.sort_values(by = 'Fraction of Good #', ascending = True).plot(x = 'County', y = 'Fraction of Good #', kind = 'bar', figsize = (25,3), color = 'coral'); ax.set_xlabel("County")