-
Notifications
You must be signed in to change notification settings - Fork 0
/
scheduler_functions.py
200 lines (169 loc) · 7.56 KB
/
scheduler_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
Description: This file contains all functions that are related to the
scheduler
@author: Robert Hennessy (robertghennessy@gmail.com)
"""
import datetime as dt
import numpy as np
import pandas as pd
from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
import config
job_identifier = {'traffic': 'trf-', 'transit-siri': 't_siri-',
'transit-gtfs-rt': 't_gtfs_rt-'}
weekday_names = config.weekday_names
day_of_week_codes = config.day_of_week_codes
def run_tasks(sql_loc):
"""
Runs the tasks stored in the task database
:param sql_loc: location of the sql task database
:type sql_loc: string
:return None
"""
jobstores = {
'default': SQLAlchemyJobStore(url='sqlite:///%s' % sql_loc)
}
scheduler = BlockingScheduler(jobstores=jobstores)
scheduler.start()
scheduler.print_jobs()
return None
def add_traffic_jobs(function_to_run, csv_path_in, scheduler_sql_loc,
out_sql_loc):
"""
Create the job database that the scheduler uses.
:param function_to_run: The function that is being scheduled
:type csv_path_in: function
:param csv_path_in: The path to the csv file that contains the trip
information.
:type csv_path_in: string
:param scheduler_sql_loc: location of the sql job database generated by
this program and used by the scheduler
:type scheduler_sql_loc: string
:param out_sql_loc: location of the sql job database that the scheduled
task stores its results
:type sched_sql_loc: string
:return None
"""
schedule_trips = pd.read_csv(csv_path_in, index_col=0)
schedule_trips = schedule_trips.sort_values([
'departure_time_timedelta_start', 'arrival_time_timedelta_stop'])
schedule_trips_index = schedule_trips.index
# open the scheduler object and associate the job database with it
scheduler = BackgroundScheduler()
scheduler.add_jobstore('sqlalchemy', url='sqlite:///%s' %
scheduler_sql_loc)
# loop through all of the trips and add them to the jobs database
for sInd in range(len(schedule_trips)):
trip = schedule_trips.loc[sInd]
trip_index = schedule_trips_index[sInd]
trip_id = trip['trip_id']
start_station = trip['short_stop_name_start']
end_station = trip['short_stop_name_stop']
# create the location dictionaries
start_loc = {
"lat": trip['stop_lat_start'],
"lng": trip['stop_lon_start']
}
end_loc = {
"lat": trip['stop_lat_stop'],
"lng": trip['stop_lon_stop']
}
sched_time = dt.datetime.strptime(trip['departure_time_start'],
"%H:%M:%S")
day_code = ''
for day_ind in range(len(weekday_names)):
if trip[weekday_names[day_ind]]:
if day_code == '':
day_code = day_code + day_of_week_codes[day_ind]
else:
day_code = day_code + ',' + day_of_week_codes[day_ind]
# misfire_grace_time - seconds after the designated runtime that
# the job is still allowed to be run
scheduler.add_job(function_to_run, 'cron', day_of_week=day_code,
hour=sched_time.hour, minute=sched_time.minute,
misfire_grace_time=120,
id=(job_identifier['traffic']+str(trip_index)),
args=[trip_index, trip_id, start_station,
end_station, start_loc, end_loc,
out_sql_loc])
scheduler.print_jobs()
scheduler.start()
scheduler.shutdown()
return None
def add_periodic_job(sched_sql_loc, function_to_run, time_df, id_modifier,
args):
"""
Adds a job to the scheduler database. This function must have the same
arguments for all runs.
:param sched_sql_loc: location of the sql job database generated by
this program and used by the scheduler
:type sched_sql_loc: string
:param function_to_run: The function that is being scheduled
:type csv_path_in: function
:param time_df: pandas data frame that contains when the function should
run
:type time_df: pandas data frame
:param id_modifier: string that will be added to the id for add job
:type string
:param args: list of arguments that are used by function_to_run
:type args: list
"""
# open the scheduler object and associate the job database with it
scheduler = BackgroundScheduler()
scheduler.add_jobstore('sqlalchemy', url='sqlite:///%s' % sched_sql_loc)
sched_time_hours = time_df['hours'].values
sched_time_minutes = time_df['minutes'].values
sched_time_seconds = time_df['seconds'].values
sched_day_code = time_df['day_code'].values
sched_index = time_df.index.values
for ind in range(len(sched_time_hours)):
# misfire_grace_time - seconds after the designated runtime that
# the job is still allowed to be run
tempargs = args.copy()
tempargs.append(sched_index[ind])
scheduler.add_job(function_to_run, 'cron',
day_of_week=sched_day_code[ind],
hour=int(sched_time_hours[ind]),
minute=int(sched_time_minutes[ind]),
second=int(sched_time_seconds[ind]),
misfire_grace_time=120,
id=(id_modifier+str(sched_index[ind])),
args=tempargs)
scheduler.print_jobs()
scheduler.start()
scheduler.shutdown()
def create_collect_time(collect_time, collect_frequency,
collect_day_code, csv_file_loc):
"""
This function constructs a data frame for when apscheduler to schedule
a periodic job. It returns the data frame and store the data frame
in a csv file
:param collect_time: time to collect data in hours
:type collect_start_time: tuple of floats
:param collect_frequency: frequency to collect the data in minutes
:type collect_frequency: float
:param collect_day_code: days of the week to collect the data
:type collect_day_code: string
:param csv_file_loc: location for the csv file to store data frame
:type csv_file_loc: string
:return df: data frame that was constructed by this function
:type df: pandas data frame
"""
collect_start_time = collect_time[0]
collect_end_time = collect_time[1]
sched_time = np.arange(60*60*collect_start_time,
60*60*collect_end_time
+ 60*collect_frequency,
60*collect_frequency)
sched_time_seconds = np.mod(sched_time, 60)
sched_time = (sched_time-sched_time_seconds)/60
sched_time_minutes = np.mod(sched_time, 60)
sched_time = (sched_time-sched_time_minutes)/60
sched_time_hours = np.mod(sched_time, 60)
sched_time_seconds = np.round(sched_time_seconds)
d = {'day_code': collect_day_code, 'hours': sched_time_hours,
'minutes': sched_time_minutes, 'seconds': sched_time_seconds}
df = pd.DataFrame(data=d)
df.to_csv(csv_file_loc)
return df