-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
323 lines (251 loc) · 11.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import pandas as pd
import math
from bokeh.plotting import figure, output_file, show
from bokeh.models import DatetimeTickFormatter, Panel, Tabs, HoverTool, CustomJS, ColumnDataSource, Panel, Tabs, DataTable, DateFormatter, TableColumn
from bokeh.layouts import widgetbox, column
from gmplot import gmplot
from bokeh.resources import CDN
from bokeh.embed import autoload_static
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyCwQDcVpWzxDW7lnWqJjivaGXO3YBo2-IU"
#---------------------LOADING CSV INTO PANDAS DATAFRAME------------------------#
def load_data(fname):
df = pd.read_csv(fname)
#dropping unused columns
df = df.drop(['watch_date', 'entry_timestamp', 'dispatch_timestamp', 'response_timestamp', 'city', 'unit_sequence_in_call_dispatch'],1)
#ensuring proper types
df['call_date'] = pd.to_datetime(df['call_date'])
df['received_timestamp'] = pd.to_datetime(df['received_timestamp'])
df['on_scene_timestamp'] = pd.to_datetime(df['on_scene_timestamp'])
df['transport_timestamp'] = pd.to_datetime(df['transport_timestamp'])
df['hospital_timestamp'] = pd.to_datetime(df['hospital_timestamp'])
df['available_timestamp'] = pd.to_datetime(df['available_timestamp'])
df['final_priority'] = df['final_priority'].astype('object')
df['supervisor_district'] = df['supervisor_district'].astype('object')
df['neighborhood_district'] = df['neighborhood_district'].astype('object')
#blank spaces are filled with NaN
return df
#testing data
df = load_data('sfpd_dispatch_data_subset.csv')
#verifying number of columns
assert(30 == df.shape[1])
# print(df.dtypes)
# print(df.head())
#---------------------AVG RESPONSE TIME VS CALL TIME---------------------------#
def response_to_call(df, t=30):
'''This function graphs the average dispatch response time against the time
of day the call was made.
Input: df (pd.DataFrame)
t (int; the granularity of each time period in min for which an
average response time is calculated)
Output: graph (HTML)'''
temp = df.set_index('received_timestamp')
df_time = temp.index.time
response_time = []
time_of_day = []
for i,row in df.iterrows():
if not pd.isnull(row['on_scene_timestamp']):
diff = row['on_scene_timestamp']-row['received_timestamp']
diff = diff.total_seconds()//60
#within an acceptable range of values
if diff < 60 and diff > 0:
response_time.append(diff)
#granularity of average
time = df_time[i].replace(minute = df_time[i].minute - (df_time[i].minute%t))
time = time.replace(second = 0)
time_of_day.append(time)
assert(len(response_time)==len(time_of_day))
response_call_df = pd.DataFrame(data = {'response': response_time, 'call_time': time_of_day})
#calculates average
avg_response_time = response_call_df.groupby(['call_time'])['response'].mean()
#generates column data
data = {'time':avg_response_time.index,
'response':avg_response_time}
# print(data['time'])
source = ColumnDataSource(data=data)
#plots averages
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,save"
p = figure(plot_width = 700, plot_height = 350, title = "Average Response Time VS Call Time", tools = TOOLS)
p.line(x = 'time', y = 'response', source = source)
p.xaxis.formatter=DatetimeTickFormatter()
p.xaxis.major_label_orientation = math.pi/4
p.grid.grid_line_alpha=0.3
p.xaxis.axis_label = 'Time of Day'
p.yaxis.axis_label = 'Average Response'
hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
("Time", "@time{%T}"),
("Dispatch Response Time", "@response")
]
hover.formatters = {'time':'datetime'}
return p
def tabbed_call(df):
output_file("response_tabs.html")
tab1 = Panel(child=response_to_call(df, 15), title = "Quarter Hour")
tab2 = Panel(child=response_to_call(df), title = "Half Hour")
tab3 = Panel(child=response_to_call(df, 60), title = "Hour")
tabs = Tabs(tabs=[tab1,tab2,tab3])
show(tabs)
#tabbed_call(df)
#---------------------CALL LOCATION HEAT MAP-----------------------------------#
def calls_per_area(df):
'''This function provides a heat map of the number of calls through the
San Francisco area.
Input: df (pd.DataFrame)
Output: heatmap (html)'''
gmap = gmplot.GoogleMapPlotter(37.766956, -122.438481, 13)
latlng_list = []
for i, row in df.iterrows():
latlng_list.append((row['latitude'],row['longitude']))
heat_lats, heat_lngs = zip(*latlng_list)
gmap.heatmap(heat_lats, heat_lngs)
gmap.draw("heatmap.html")
# calls_per_area(df)
# tempHolder=''
# oldLine='<script type="text/javascript" src="https://maps.googleapis.com/maps/api/js?libraries=visualization&sensor=true_or_false"></script>'
# newLine='<script type="text/javascript" src="https://maps.googleapis.com/maps/api/js?libraries=visualization&sensor=true_or_false&key=AIzaSyCwQDcVpWzxDW7lnWqJjivaGXO3YBo2-IU"></script>'
# #Open the file created by gmplot, do a find and replace.
# #My file is in flask, so it is in static/map.html, change path to your file
# with open('heatmap.html') as fh:
# for line in fh:
# tempHolder += line.replace(oldLine,newLine)
# fh.close
# #Now open the file again and overwrite with the edited text
# fh=open('heatmap.html', 'w')
# fh.write(tempHolder)
# fh.close
#---------------------CRIME LOCATION HEAT MAP----------------------------------#
def crime_per_area(df):
'''This function provides a heat map of the number of crime-related calls
through the San Francisco area. Crime-related calls are distinguished as
ambulance dispatches.
Input: df (pd.DataFrame)
Output: heatmap (html)'''
gmap = gmplot.GoogleMapPlotter(37.766956, -122.438481, 13)
latlng_list = []
for i, row in df.iterrows():
if row['unit_type'] == "MEDIC":
latlng_list.append((row['latitude'],row['longitude']))
heat_lats, heat_lngs = zip(*latlng_list)
gmap.heatmap(heat_lats, heat_lngs)
gmap.draw("crime_heatmap.html")
# crime_per_area(df)
#---------------------AMBULANCE TRANSPORT VS CALL TIME-------------------------#
def ambulance_response(df, t=30):
'''This function graphs the transport time of an ambulance against the time
of day the call was made.
Input: df (pd.DataFrame)
t (int; the granularity of each time period in min for which an
average response time is calculated)
Output: graph (HTML)'''
temp = df.set_index('received_timestamp')
df_time = temp.index.time
ambulance_time = []
time_of_day = []
for i,row in df.iterrows():
if not pd.isnull(row['hospital_timestamp']):
diff = row['hospital_timestamp']-row['received_timestamp']
diff = diff.total_seconds()//60
#within an acceptable range of values
if diff < 60 and diff > 0:
ambulance_time.append(diff)
#granularity of average
time = df_time[i].replace(minute = df_time[i].minute - (df_time[i].minute%t))
time = time.replace(second = 0)
time_of_day.append(time)
assert(len(ambulance_time)==len(time_of_day))
response_df = pd.DataFrame(data = {'transport': ambulance_time, 'call_time': time_of_day})
#calculates average
avg_response_time = response_df.groupby(['call_time'])['transport'].mean()
#generates column data
data = {'time':avg_response_time.index,
'response':avg_response_time}
source = ColumnDataSource(data=data)
#plots averages
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,save"
p = figure(plot_width = 700, plot_height = 350, title = "Average Ambulance Transport Time VS Call Time", tools = TOOLS)
p.line(x = 'time', y = 'response', source = source)
p.xaxis.formatter=DatetimeTickFormatter()
p.xaxis.major_label_orientation = math.pi/4
p.grid.grid_line_alpha=0.3
p.xaxis.axis_label = 'Time of Day'
p.yaxis.axis_label = 'Average Ambulance Transport Time'
hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
("Time", "@time{%T}"),
("Ambulance Response Time", "@response")
]
hover.formatters = {'time':'datetime'}
return p
def tabbed_am(df):
output_file("ambulance_tabs.html")
tab1 = Panel(child=ambulance_response(df, 15), title = "Quarter Hour")
tab2 = Panel(child=ambulance_response(df), title = "Half Hour")
tab3 = Panel(child=ambulance_response(df, 60), title = "Hour")
tabs = Tabs(tabs=[tab1,tab2,tab3])
show(tabs)
#tabbed_am(df)
#---------------------AREA VS DISPATCH TIME------------------------------------#
def longest_dispatch(df):
'''This function answers the query of which area takes the longest dispatch
response time.
Input: df (pd.DataFrame)
Output: (zipcode, time) tuple of int set and int'''
output_file("area_times.html")
dispatch_dict = dict()
#creates dictionary of dispatch times per zip
for i,row in df.iterrows():
if not pd.isnull(row['on_scene_timestamp']):
dispatch = row['on_scene_timestamp']-row['received_timestamp']
dispatch_min = dispatch.total_seconds()//60
if dispatch_min > 0 and dispatch_min < 120:
if row['zipcode_of_incident'] not in dispatch_dict:
dispatch_dict[row['zipcode_of_incident']] = [dispatch_min]
else:
dispatch_dict[row['zipcode_of_incident']] += [dispatch_min]
#finds zip and longest dispatch time
max_time = 0
max_zip = set()
zips = []
times = []
for zipcode in dispatch_dict:
zips.append(zipcode)
average = sum(dispatch_dict[zipcode])/len(dispatch_dict[zipcode])
times.append(average)
if average > max_time:
max_time = average
max_zip.clear()
max_zip.add(zipcode)
elif average == max_time:
max_zip.add(zipcode)
data = dict(zipcodes=zips,averages=times)
source = ColumnDataSource(data=data)
columns = [
TableColumn(field="zipcodes", title="Zipcode"),
TableColumn(field="averages", title="Average Dispatch Time"),
]
data_table = DataTable(source=source, columns=columns, width=700, height=300, sortable=True)
show(widgetbox(data_table))
return (max_zip, max_time)
def most_common_call(df, zipcode):
#finds the most common unit dispatch for a zipcode
print(zipcode)
call_dict = dict()
for i,row in df.iterrows():
if not pd.isnull(row['unit_type']) and row['zipcode_of_incident'] == zipcode:
call_dict[row['unit_type']] = call_dict.get(row['unit_type'],1) + 1
print(call_dict)
max_count = 0
most_common = []
for key in call_dict:
if call_dict[key] > max_count:
most_common = [key]
max_count = call_dict[key]
elif call_dict[key] == max_count:
most_common.append(key)
return most_common
# values = longest_dispatch(df)
# print(values)
# print(most_common_call(df,list(values[0])[0]))