Python data 예제들, transportation_tutorials.data Python 예제들

예제 #1

0

파일 보기

import transportation_tutorials as tt
import geopandas as gpd
from matplotlib import pyplot as plt
from shapely.geometry import Polygon

shapefile_filename = tt.data('SERPM8-TAZSHAPE')
taz = gpd.read_file(shapefile_filename)

ax = taz.plot()  #画shapefile的图

# Selection by Rectangular Envelope
xmin = 905712.145924
ymin = 905343.94408855
xmax = 983346.68922847
ymax = 981695.93140023
taz_jupiter = taz.cx[xmin:xmax, ymin:ymax]
ax = taz_jupiter.plot(edgecolor='w')

taz_jupiter2 = taz.cx[xmin:, ymin:]
taz_jupiter2.equals(taz_jupiter)

from shapely.geometry import box
study_area = gpd.GeoDataFrame(geometry=[box(xmin, ymin, xmax, ymax)],
                              crs={'init': 'epsg:2236'})
ax = taz_jupiter.plot(edgecolor='w')
transparent = (0, 0, 0, 0)
ax = study_area.plot(ax=ax, edgecolor='red', facecolor=transparent)

# Selection by Polygon
irregular_polygon = Polygon([
    (973346, 935343),

예제 #2

0

파일 보기

파일: Using Linear Regression.py 프로젝트: ShirleyLau633/FDOT0726

import transportation_tutorials as tt
import pandas as pd
import numpy as np
import statsmodels.api as sm

per = pd.read_csv(tt.data('SERPM8-BASE2015-PERSONS'))
hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS'))
print(per.info())
print(hh.info())

per_hh_merge = pd.merge(
    per,
    hh,
    on='hh_id',
    how='inner',
)
print(per_hh_merge.info())
print(per_hh_merge['type'].head())
print(per_hh_merge['gender'].head())
print(per_hh_merge['transponder'].head())
print(per_hh_merge['autotech'].head())
print(per_hh_merge['autos'].head())

per_hh_merge['gender'] = np.where(
    (per_hh_merge['gender'] == 'm'), 1,
    0)  # result = np.where(condition, x, y) condition=True→x
per_hh_merge['type'] = np.where((per_hh_merge['type'] == 'Full-time worker'),
                                1, 0)

mod = sm.OLS(
    per_hh_merge.value_of_time,

예제 #3

0

파일 보기

import transportation_tutorials as tt
import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt

# 导入TAZ和MAZ的数据
xmin = 905712
ymin = 905343
taz = gpd.read_file(tt.data('SERPM8-TAZSHAPE')).cx[xmin:, ymin:]
maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE')).cx[xmin:, ymin:]
center = (945495, 941036)
'''
# 5.3.1 Simple Map
# 用plot工具
ax = taz.plot()
ax = taz.plot(color='green', linewidth=2, edgecolor='white')

# 可以再同一个地图中展示
ax = maz.plot(linewidth=1,  color='green', edgecolor='white')
ax = taz.plot(ax=ax, color=(0,0,0,0), linewidth=1, edgecolor='black')

# 自定义地图的特征
fig, ax = plt.subplots(figsize=(12,9))
ax.axis('on')      # do show axis as a frame
ax.set_xticks([])  # but no tick marks
ax.set_yticks([])  # one either axis
ax.set_title("SERPM 8 Zones", fontweight='bold', fontsize=16, pad=20)
ax.annotate('in the viscinity of Jupiter, FL',
            xy=(0.5, 1.0), xycoords='axes fraction',
            xytext=(0, 5), textcoords='offset points',

예제 #4

0

파일 보기

import transportation_tutorials as tt
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import folium
from shapely.geometry import Polygon

maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE'))
maz_data = pd.read_csv(tt.data('SERPM8-MAZDATA', '*.csv'))
print(maz.crs)
# maz = maz.to_crs(epsg = 4326)
print(maz.info())
print(maz_data.info())

maz_data_1 = maz_data[['PopDen', 'emp_total']]
print(maz_data_1)

maz_data_1 = maz_data_1.astype(float)
maz_1 = maz.merge(maz_data_1, how='left', left_on='MAZ', right_on='PopDen')
print(maz_1)

maz_1 = maz_1.sort_values(by='PopDen', ascending=False).head(5)
print(maz_1.info())

# From the Solution, how to change the data from CRS format to folium

import pyproj
# 读取经纬度
input_Proj = pyproj.Proj(init="EPSG:2236", preserve_units=True)  # 定义数据地理坐标系
output_Proj = pyproj.Proj(init="EPSG:4326")  # 定义转换投影坐标系

예제 #5

0

파일 보기

import transportation_tutorials as tt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.display.max_columns = 100

trip = pd.read_csv(tt.data('SERPM8-BASE2015-TRIPS'))
print(trip.info())
hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS'))
print(hh.info())

trip_hh_merge = pd.merge(
    hh,
    trip.groupby(['hh_id']).size().rename('n_trips'),
    left_on=['hh_id'],
    right_index=True,
)
print(trip_hh_merge.head())
# Prepare a heatmap that visualizes the joint distribution of the number of trips taken by each household and the number of automobiles owned by the household.

# For households with 2 automobiles, what is the most frequent number of trips made by those households in the data?
'''resluts_1 = trip_hh_merge.pivot_table(
    index='autos',
    columns='n_trips',
    aggfunc='size'
)
print(resluts_1)
'''
sns.heatmap(trip_hh_merge.pivot_table(index='autos',
                                      columns='n_trips',

예제 #6

0

파일 보기

파일: Using Spatial Joins.py 프로젝트: ShirleyLau633/FDOT0726

import transportation_tutorials as tt
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import matplotlib.pyplot as plt

maz = gpd.read_file(tt.data('SERPM8-MAZSHAPE'))
taz = gpd.read_file(tt.data('SERPM8-TAZSHAPE'))
#print(maz.info())
#print(taz.info())

maz_points = maz.copy()  # copy

# Q1: Within the SERPM 7 region, what TAZ has the largest number of MAZs within its boundary? How many does it have?
# Every piece of data has a center point
maz_points.geometry = maz_points.apply(lambda x: Point(x.POINT_X, x.POINT_Y),
                                       axis=1)
# join
taz_maz = gpd.sjoin(maz_points, taz, how='left', op='within')

taz_maz_counts = taz_maz.groupby(['TAZ_REG'])['MAZ'].count()

idx = taz_maz_counts.idxmax()
print(idx)

idx_max_1 = taz_maz_counts.max()
print(idx_max_1)

idx_max_2 = taz_maz_counts.loc[idx]
print(idx_max_2)

예제 #7

0

파일 보기

파일: Data Visualization.py 프로젝트: ShirleyLau633/FDOT0726

tm = {11,12,13,14,15,16,17,18,19}
trip_mode_dictionary[21] = 'TRANSIT'
trip_mode_counts = trips.trip_mode.map(lambda x: 21 if x in tm else x).value_counts(sort=False)
trip_mode_counts.index = trip_mode_counts.index.map(trip_mode_dictionary)


ax = trip_mode_counts.plot(kind='bar', color='green')
ax.set_title("Trip Mode Frequency")
ax.set_xlabel("Trip Mode")
ax.set_ylabel("Number of Trips");
plt.show()
'''

# Plotting Histogram Data
hh = pd.read_csv(tt.data('SERPM8-BASE2015-HOUSEHOLDS'), index_col=0)
hh.set_index('hh_id', inplace=True)

hh.income.hist()
plt.show()

hh.income.hist(bins=50, grid=False, color='red')
plt.show()

hh.income.hist(bins=100, grid=False, color='red')
plt.show()

hh.income.hist(bins=200, grid=False, color='red')
plt.show()

bins = np.array([0, 10, 20, 40, 60, 70, 80, 90, 100, 125, 150, 200, 1000

예제 #8

0

파일 보기

파일: Using Pivot Tables.py 프로젝트: ShirleyLau633/FDOT0726

import transportation_tutorials as tt
import pandas as pd
import numpy as np

tour = pd.read_csv(tt.data('SERPM8-BASE2015-TOURS'))
print(tour.info())

tour_mode_dict = {
    1: "DRIVEALONEFREE",
    2: "DRIVEALONEPAY",
    3: "SHARED2GP",
    4: "SHARED2PAY",
    5: "SHARED3GP",
    6: "SHARED3PAY",
    7: "TNCALONE",
    8: "TNCSHARED",
    9: "WALK",
    10: "BIKE",
    11: "WALK_MIX",
    12: "WALK_PRMW",
    13: "WALK_PRMD",
    14: "PNR_MIX",
    15: "PNR_PRMW",
    16: "PNR_PRMD",
    17: "KNR_MIX",
    18: "KNR_PRMW",
    19: "KNR_PRMD",
    20: "SCHBUS",
}

# Within the Jupiter study area, what is the average distance for bike tours to work? (Hint: It is 4.03 miles)

예제 #9

0

파일 보기

파일: Using Groupby and Transform.py 프로젝트: ShirleyLau633/FDOT0726

import transportation_tutorials as tt
import pandas as pd
import numpy as np

districts = pd.read_csv(tt.data('FL-COUNTY-BY-DISTRICT'))
print(districts.head())

bridges = pd.read_csv(tt.data('FL-BRIDGES'))

# Recall the necessary cleaning for the bridges data file
bridges = bridges.replace('-', 0)
bridges[['Poor #', 'SD #']] = bridges[['Poor #', 'SD #']].astype(int)
bridges.fillna(0, inplace=True)
print(bridges.head())
print(bridges.info())

# Within each FDOT District, what is the fraction of structurally deficient bridge deck area in each County?
bridges['County'] = bridges[
    'County'].str[:-6]  # delete the code of each country
districts['County'] = districts['County'].str.upper(
)  # transfer the country name

bridges_2 = pd.merge(bridges, districts, on='County')
print(bridges_2.head())
print(bridges_2.info())

bridges_2['Fraction of SD area'] = bridges_2.groupby(
    'District')['SD Area'].transform(lambda x: (x / x.sum()))
print(bridges_2)

# Which county has the highest share of structurally deficient bridge deck area within its FDOT District? (Hint: the correct answer is PALM BEACH.)

예제 #10

0

파일 보기

import transportation_tutorials as tt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

bridge = pd.read_csv(tt.data('FL-BRIDGES'))
print(bridge.info())
print(bridge.head())
print(bridge['SD #'])
print(bridge['County'])

# Which county has the highest number of structurally deficient bridges? Use a frequency plot to find your answer.
bridge = bridge[:-1]
bridge['SD #'] = bridge['SD #'].replace('-', 0).astype('int64')
print(bridge['SD #'])
# bridge_SD_counts = bridge['SD #'].value_counts(sort=True)
ax = bridge.plot(x='County', y='SD #', kind='bar', color='green', figsize = (25,3))
ax.set_xlabel("County")
ax.set_ylabel("number of structurally deficient bridges");
plt.show()
# answer: DUVAL(031)
# Solution: bridge_count.sort_values(by = 'SD #', ascending = False).plot( x = 'County', y = 'SD #', kind = 'bar', figsize = (25,3), color = 'coral');
# bridge_count[bridge_count['SD #'] == bridge_count['SD #'].max()]['County'].values[0]

# Which county has the lowest percentage of bridges that are in good condition? Use a bar chart to find your answer.
bridge['Fraction of Good #'] = bridge["Good #"] / bridge.groupby('County')["Total #"].transform('sum')
bridge.sort_values(by = 'Fraction of Good #', ascending = True).plot(x = 'County', y = 'Fraction of Good #', kind = 'bar', figsize = (25,3), color = 'coral');
ax.set_xlabel("County")