Ejemplo n.º 1
0
def store_weather(weather_data, table_name):

    weather_data = flatten_dict(weather_data)

    # if there's no measurement for rainfall assume rainfall of 0
    if "rain_1h" not in weather_data:
        # if there's a 3 hour measurement for rainfall (ie. for forecast data) - use this in place of 1 hr value
        if "rain_3h" in weather_data:
            weather_data["rain_1h"] = weather_data["rain_3h"]
        else:
            weather_data["rain_1h"] = 0.0

    # if there's no timezone offset value then create one
    if "timezone" not in weather_data:
        gmt = pytz.timezone("GMT")
        dt = pytz.utc.localize(datetime.utcfromtimestamp(weather_data["dt"]))
        weather_data["timezone"] = dt.astimezone(gmt).utcoffset().total_seconds()

    # select only desired fields from raw data
    data = {"date": datetime.fromtimestamp(weather_data["dt"]).strftime("%Y-%m-%d %H:%M:%S"),
            "timezone": weather_data["timezone"],
            "temp": weather_data["main_temp"],
            "feels_like": weather_data["main_feels_like"],
            "wind_speed": weather_data["wind_speed"],
            "wind_deg": weather_data["wind_deg"],
            "weather_main": weather_data["weather_0_main"],
            "weather_description": weather_data["weather_0_description"],
            "rain": weather_data["rain_1h"]
            }

    # push new data to database...
    try:
        sql_query = db.construct_sql(query_type="insert", table_name=table_name, data=data)
        db.execute_sql(sql_query, database=database, user=user, password=password, host=host, port=port,
                       retrieveing_data=False)
    except Exception as e:
        try:
            sql_query = db.construct_sql(query_type="update", table_name=table_name,
                                         data=data, predicates={"date": data["date"]})
            db.execute_sql(sql_query, database=database, user=user, password=password,
                           host=host, port=port, retrieveing_data=False)
            print("Update successful")
        except Exception as e:
            print(e)
            return e
def stops_on_route(route, main=False, direction=1):
    """query the database return a sequence of all stops on a given sub-route"""

    # create sql query
    if main:
        sql = db.construct_sql(table_name="routes", query_type="select_where", data={"ID": route})
    else:
        sql = db.construct_sql(table_name="routes", query_type="select_where", data={"ID": route.split("_")[0]})

    # execute sql query
    response = db.execute_sql(sql, database, user, password, host, port, retrieving_data=True)

    if main:
        # return the 'main' sub-route in the passed direction
        for sub_route in response[0][1].keys():
            if response[0][1][sub_route]["main"] and (response[0][1][sub_route]["direction"] == direction):
                return response[0][1][sub_route]["stops"]
    else:
        return response[0][1][route]["stops"]
Ejemplo n.º 3
0
def get_weather_from_db():
    """returns a tuple of the 'current' weather data from out postgres database."""

    sql = db.construct_sql(table_name="weather_data_current",
                           query_type="select_all")

    data = db.execute_sql(sql,
                          database,
                          user,
                          password,
                          host,
                          port,
                          retrieving_data=True)

    return data[0]
def get_mean_time(route, direction, segments, month, day, time):

    # create sql query
    table_name = "route_%s_%s_means" % (str(route), str(direction))
    sql = db.construct_sql(table_name=table_name, query_type="select_where",
                           data={"month": month, "weekday": day, "timegroup": str(time)},
                           column_names=segments, verbose=False)

    # execute sql query
    try:
        response = db.execute_sql(sql, database, user, password, host, port, retrieving_data=True)

        if len(response) < 1:
            # if the response is empty return None
            return None
        else:
            # return the sum of all proportions + the (number of missing values * the average journey time)
            return sum_values(response[0])

    except Exception as e:
        # print(e)
        # print(sql)
        return e
Ejemplo n.º 5
0
def get_proportion(route, direction, startstop, endstop, weekday, month,
                   time_group):
    """returns a proportion representing the amount of the total bus route journey that the users journey represents
        
        It will first attempt to do this using calculated proportions for that day of the week, month and time_group, 
        but will resort to a simple percentage of the amount of the stops travelled compared to the amount of stops 
        there are"""

    # dictionaries for use finding the relevant section of the code in the database
    days = {
        0: "Monday",
        1: "Tuesday",
        2: "Wednesday",
        3: "Thursday",
        4: "Friday",
        5: "Saturday",
        6: "Sunday"
    }
    months = {
        1: "January",
        2: "Febuary",
        3: "March",
        4: "April",
        5: "May",
        6: "June",
        7: "July",
        8: "August",
        9: "September",
        10: "October",
        11: "November",
        12: "December"
    }
    times = {
        0: "0",
        1: "1",
        2: "2",
        3: "3",
        4: "4",
        5: "5",
        6: "6",
        7: "7",
        8: "8",
        9: "9",
        10: "10",
        11: "11",
        12: "12",
        13: "13",
        14: "14",
        15: "15",
        16: "16",
        17: "17",
        18: "18",
        19: "19",
        20: "20",
        21: "21",
        22: "22",
        23: "23",
        24: "24",
        25: "25",
        26: "26",
        27: "27",
        28: "28"
    }

    # call proportions file in dictionary format - this proportions file returns a calculated average based
    # on previous journeys for a given... ...month, week and time_group...
    try:
        # construct sql query
        table_name = "route_%s_%s_proportions" % (route.lower(), direction)
        sql_values = db.construct_sql(table_name=table_name,
                                      query_type="select_where",
                                      data={
                                          "month": months[month],
                                          "weekday": days[weekday],
                                          "timegroup": str(time_group)
                                      })

        sql_keys = db.construct_sql(table_name=table_name,
                                    query_type="attr_names")

        response_values = db.execute_sql(sql_values,
                                         database,
                                         user,
                                         password,
                                         host,
                                         port,
                                         retrieving_data=True)[0]

        response_keys = db.execute_sql(sql_keys,
                                       database,
                                       user,
                                       password,
                                       host,
                                       port,
                                       retrieving_data=True)

        list_of_values = list(response_values[3:])
        list_of_keys = list(response_keys[3:])

        for item in list_of_keys:
            if list_of_keys.index(item) != len(list_of_keys):
                splitsegment = str(item).split("_")
                first_stop_segment = str(item).split("_")[0][3:]
                last_stop_segment = str(item).split("_")[1][:-2]
                if startstop == first_stop_segment:
                    index1 = list_of_keys.index(item)
                if endstop == last_stop_segment:
                    index2 = list_of_keys.index(item)

        total = 0

        for i in range(index1, index2 + 1):
            if list_of_values[
                    i] is not None:  # this is to handle the odd NaN value in our proporitons datasets.
                # NaNs occur at an average incidence
                # of 0.12% in the data.
                value = list_of_values[i]
                total += value
        proportion = total
        if proportion > 0:
            return proportion

        else:
            proportion = quickanddirty(route, direction, startstop, endstop)
            return proportion

    # otherwise simply return the percentage of the number of stops a user is travelling (*eyeroll*)
    except Exception as e:
        print(e)
        proportion = quickanddirty(route, direction, startstop, endstop)
    return proportion
Ejemplo n.º 6
0
def generate_test_dataframe(route, direction, date, time):
    """Returns a dataframe with the user entered trip details if given the route, direction, date and time
    
    This function is called from the generate_predictions function and in turn calls the get_weather_from_db
    function, the time_group_function  and the get_active_columns function
    The list of columns in the dataframe varies per route, and the list of columns is stored in a json format
    on the database. Continuous features are added to that dataframe directly, whereas categorical features 
    that need to be one hot encoded for are added to a temporary dataframe. The get_active_columns function 
    returns a list of which categorical features in the dataframe needed to be marked 1 instead of 0."""

    # check if *current* weather data should be used for prediction;

    dt = datetime.fromisoformat("%s %s" % (str(date), time_from_seconds(time)))
    now = datetime.now()
    if abs((dt - now).total_seconds()) < 3600:
        current = True
    else:
        current = False

    if current:
        # if so; get current weather from database
        weather = get_weather_from_db()

    else:
        # otherwise; request the nearest weather forecast from the database
        weather = get_nearest_forecast(dt)[0]

    # extract required parameters
    temp = weather[2]
    feels_like = weather[3]
    main = weather[6]
    description = weather[7]
    wind_speed = weather[4]
    wind_deg = weather[5]
    rain = weather[8]

    # create empty dataframe with correct headings from templates generated from list of columns from
    # the datasets used to train the linear regression models

    template_name = str(route) + "_" + str(direction)
    # construct sql query
    sql = db.construct_sql(table_name="model_features",
                           query_type="select_where",
                           column_names=["features"],
                           data={"id": template_name})
    # execute sql query
    response = db.execute_sql(sql,
                              database,
                              user,
                              password,
                              host,
                              port,
                              retrieving_data=True)[0][0]

    # make a single row to contain the test data and put 0 in every column.
    row = [0] * len(response)
    # create the dataframe
    test_frame = pd.DataFrame([row], columns=response)

    # now forget about the test dataframe for a while and pop my user entered data into a temporary dataframe...
    data = {"DAYOFSERVICE": [date], "TIME": [time]}
    temp_dataframe = pd.DataFrame(data)

    # ...add in the categorical features from the weather api request...
    temp_dataframe['weather_main'] = main
    temp_dataframe['weather_description'] = description

    # ...converting date to date format from string
    date_list = []
    for row in temp_dataframe['DAYOFSERVICE']:
        x = datetime.strptime(row, '%Y-%m-%d')
        date_list.append(x)

    # add that date in date format to the temporary dataframe...
    temp_dataframe['DAYOFSERVICE'] = date_list

    # ...creating month and day of the week feature from that date...
    temp_dataframe['MONTH'] = temp_dataframe['DAYOFSERVICE'].dt.month
    temp_dataframe['DAYOFWEEK'] = temp_dataframe['DAYOFSERVICE'].dt.dayofweek

    # ...creating Time Group Feature from the time....
    time_group_departure = time_group_function(time)
    temp_dataframe['TIME_GROUP'] = time_group_departure

    # ...drop the date and time features we don't need...
    temp_dataframe = temp_dataframe.drop(columns=['DAYOFSERVICE', 'TIME'])

    # remember that test dataframe we created at the beginning... well, now we...
    # use the get_active_columns function to figure out the names of the categorical
    # columns in the test dataframe that need to be encoded to 1 (instead of 0) and then...
    active_columns = get_active_columns(temp_dataframe)

    # ... itterate through this one line test dataframe and if the column is in the list of active columns...
    # ...assign the value of that column at index 0 (because there will only ever be one line) as 1
    for column, row in test_frame.items():
        if column in active_columns:
            row.iloc[0] = 1

    # add the continuous features directly to the test dataframe
    test_frame['temp'] = temp
    test_frame['feels_like'] = feels_like
    test_frame['wind_speed'] = wind_speed
    test_frame['wind_deg'] = wind_deg
    test_frame['rain'] = rain

    # and return the test dataframe to the pickled linear regression
    return test_frame
Ejemplo n.º 7
0
def store_incidents(incidents):
    # iterate through incidents and work out if they are relevant - if so store on database

    # sql query for checking if incident is within ~500m of a bus route
    path_sql = """
        select route_id 
        from db_gtfs_shapes
        where (route_path <-> path'%s') < 0.006;
        """

    now = datetime.now()

    for incident in incidents:

        # skip incidents that are already over
        dt = datetime.strptime(incident["end_time"], "%m/%d/%Y %H:%M:%S")
        if now > dt:
            continue

        # check if this incident intersects with any bus route
        sql = path_sql % incident["incident_path"]

        # return a list of bus routes that are effected by this disruption
        response = db.execute_sql(sql,
                                  database,
                                  user,
                                  password,
                                  host,
                                  port,
                                  retrieving_data=True)

        if len(response) > 0:

            # populate the lookup table
            for route in response:

                entry = {
                    "incident_id": incident["incident_id"],
                    "route_id": route[0]
                }
                sql = db.construct_sql(table_name="incident_lookup",
                                       query_type="insert",
                                       data=entry)

                # execute sql query
                db.execute_sql(sql,
                               database,
                               user,
                               password,
                               host,
                               port,
                               retrieving_data=False)

            # add this incident as an entry into the incident_data table
            sql = db.construct_sql(table_name="incident_data",
                                   query_type="insert",
                                   data=incident)
            db.execute_sql(sql,
                           database,
                           user,
                           password,
                           host,
                           port,
                           retrieving_data=False)