scheduler_functions.py

"""
Description: This file contains all functions that are related to the 
    scheduler
    
@author: Robert Hennessy (robertghennessy@gmail.com)
"""

import datetime as dt
import numpy as np
import pandas as pd

from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore

import config


job_identifier = {'traffic': 'trf-', 'transit-siri': 't_siri-',
                  'transit-gtfs-rt': 't_gtfs_rt-'}
weekday_names = config.weekday_names
day_of_week_codes = config.day_of_week_codes


def run_tasks(sql_loc):
    """
    Runs the tasks stored in the task database
    
    :param sql_loc: location of the sql task database
    :type sql_loc: string

    :return None
    """
    jobstores = {
        'default': SQLAlchemyJobStore(url='sqlite:///%s' % sql_loc)
    }
    scheduler = BlockingScheduler(jobstores=jobstores)
    scheduler.start()    
    scheduler.print_jobs()
    return None
    

def add_traffic_jobs(function_to_run, csv_path_in, scheduler_sql_loc,
                     out_sql_loc):
    """
    Create the job database that the scheduler uses.
    
    :param function_to_run: The function that is being scheduled
    :type csv_path_in: function
    
    :param csv_path_in: The path to the csv file that contains the trip
        information. 
    :type csv_path_in: string
    
    :param scheduler_sql_loc: location of the sql job database generated by
        this program and used by the scheduler
    :type scheduler_sql_loc: string
    
    :param out_sql_loc: location of the sql job database that the scheduled
        task stores its results
    :type sched_sql_loc: string
    
    :return None
    """
    schedule_trips = pd.read_csv(csv_path_in, index_col=0)
    schedule_trips = schedule_trips.sort_values([
        'departure_time_timedelta_start', 'arrival_time_timedelta_stop'])
    schedule_trips_index = schedule_trips.index
    # open the scheduler object and associate the job database with it
    scheduler = BackgroundScheduler()
    scheduler.add_jobstore('sqlalchemy', url='sqlite:///%s' %
                                             scheduler_sql_loc)
    # loop through all of the trips and add them to the jobs database
    for sInd in range(len(schedule_trips)):
            trip = schedule_trips.loc[sInd]
            trip_index = schedule_trips_index[sInd]
            trip_id = trip['trip_id']
            start_station = trip['short_stop_name_start']
            end_station = trip['short_stop_name_stop']
            # create the location dictionaries
            start_loc = {
                "lat": trip['stop_lat_start'],
                "lng": trip['stop_lon_start']
            }
            end_loc = {
                "lat": trip['stop_lat_stop'],
                "lng": trip['stop_lon_stop']
            }         
            sched_time = dt.datetime.strptime(trip['departure_time_start'],
                                              "%H:%M:%S")
            day_code = ''
            for day_ind in range(len(weekday_names)):
                if trip[weekday_names[day_ind]]:
                    if day_code == '':
                        day_code = day_code + day_of_week_codes[day_ind]
                    else:
                        day_code = day_code + ',' + day_of_week_codes[day_ind]
            # misfire_grace_time - seconds after the designated runtime that 
            # the job is still allowed to be run
            scheduler.add_job(function_to_run, 'cron', day_of_week=day_code,
                              hour=sched_time.hour, minute=sched_time.minute, 
                              misfire_grace_time=120,
                              id=(job_identifier['traffic']+str(trip_index)),
                              args=[trip_index, trip_id, start_station, 
                                    end_station, start_loc, end_loc, 
                                    out_sql_loc])
    scheduler.print_jobs()
    scheduler.start()
    scheduler.shutdown()
    return None


def add_periodic_job(sched_sql_loc, function_to_run, time_df, id_modifier, 
                     args):
    """
    Adds a job to the scheduler database. This function must have the same
        arguments for all runs.
    
    :param sched_sql_loc: location of the sql job database generated by
        this program and used by the scheduler
    :type sched_sql_loc: string
    
    :param function_to_run: The function that is being scheduled
    :type csv_path_in: function

    :param time_df: pandas data frame that contains when the function should
        run
    :type time_df: pandas data frame
    
    :param id_modifier: string that will be added to the id for add job
    :type string
    
    :param args: list of arguments that are used by function_to_run
    :type args: list
    """
    # open the scheduler object and associate the job database with it
    scheduler = BackgroundScheduler()
    scheduler.add_jobstore('sqlalchemy', url='sqlite:///%s' % sched_sql_loc)
    sched_time_hours = time_df['hours'].values
    sched_time_minutes = time_df['minutes'].values
    sched_time_seconds = time_df['seconds'].values
    sched_day_code = time_df['day_code'].values
    sched_index = time_df.index.values
    for ind in range(len(sched_time_hours)):
        # misfire_grace_time - seconds after the designated runtime that 
        # the job is still allowed to be run
        tempargs = args.copy()        
        tempargs.append(sched_index[ind])
        scheduler.add_job(function_to_run, 'cron',
                          day_of_week=sched_day_code[ind], 
                          hour=int(sched_time_hours[ind]), 
                          minute=int(sched_time_minutes[ind]),
                          second=int(sched_time_seconds[ind]),
                          misfire_grace_time=120,
                          id=(id_modifier+str(sched_index[ind])),
                          args=tempargs)
    scheduler.print_jobs()
    scheduler.start()
    scheduler.shutdown()


def create_collect_time(collect_time, collect_frequency, 
                        collect_day_code, csv_file_loc):
    """
    This function constructs a data frame for when apscheduler to schedule
        a periodic job. It returns the data frame and store the data frame
        in a csv file
    
    :param collect_time: time to collect data in hours
    :type collect_start_time: tuple of floats
    
    :param collect_frequency: frequency to collect the data in minutes
    :type collect_frequency: float
    
    :param collect_day_code: days of the week to collect the data
    :type collect_day_code: string
    
    :param csv_file_loc: location for the csv file to store data frame
    :type csv_file_loc: string
    
    :return df: data frame that was constructed by this function
    :type df: pandas data frame
    """
    collect_start_time = collect_time[0]
    collect_end_time = collect_time[1]                
    sched_time = np.arange(60*60*collect_start_time,
                           60*60*collect_end_time 
                           + 60*collect_frequency,
                           60*collect_frequency)
    sched_time_seconds = np.mod(sched_time, 60)
    sched_time = (sched_time-sched_time_seconds)/60
    sched_time_minutes = np.mod(sched_time, 60)
    sched_time = (sched_time-sched_time_minutes)/60
    sched_time_hours = np.mod(sched_time, 60)
    sched_time_seconds = np.round(sched_time_seconds)    
    d = {'day_code': collect_day_code, 'hours': sched_time_hours, 
         'minutes': sched_time_minutes, 'seconds': sched_time_seconds}
    df = pd.DataFrame(data=d)
    df.to_csv(csv_file_loc)
    return df