Esempio n. 1
0
def prepare_dataset(garage_id):
    #path = r'E:\Files\leeuwarden\leeuwarden_data'
    parking_locations = pd.read_csv(base_file_location(
        r'parkeerdata\leeuwarden_garage_parking_garage_gps.csv'),
                                    sep=";",
                                    decimal=",")

    name = r"data\out-visitors_per_hour-garage" + str(
        garage_id) + "_predict.csv"

    # Load data
    visitors_per_hour = pd.read_csv(base_file_location(name), sep=";")

    matched_row = parking_locations.loc[parking_locations.garage_id ==
                                        garage_id]
    capacity = int(matched_row.capacity_value)
    visitors_per_hour['perc'] = 100 * visitors_per_hour['cum_sum'] / capacity

    X = visitors_per_hour.copy()
    # drop the target_variable
    #X = X.drop(['occupation', 'cum_sum'], axis=1)
    X = X.drop(['cum_sum'], axis=1)
    # drop id from feature table (overfitting)
    X = X.drop(['index'], axis=1)
    # drop visitors (explainable variable with respect to the binned-target variable)
    X = X.drop('perc', axis=1)
    # drop datetime (overfitting)
    X = X.drop(['date'], axis=1)
    # drop direct influencers
    X = X.drop(['count_in', 'count_out', 'delta', 'month'], axis=1)
    # drop weather data dates
    X = X.drop(['YYYYMMDD', 'HH'], axis=1)
    return X
Esempio n. 2
0
def save_one_fig(df_predictions, df_garage_locations, prediction_date,
                 prediction_hour):

    plt.clf()
    plt.figure(figsize=(15, 15))

    update_drawing(prediction_hour, 24, df_predictions, df_garage_locations,
                   prediction_date)
    plt.savefig(base_file_location(r"data\parking_lwd.png"))
def process_file():
    visit_info = Q.PriorityQueue()

    cnt = 0
    with open(base_file_location('locatus\locatusdata_bewerkt_filtered.csv'), 'r') as infile:
        # skip header
        next(infile)
        for line in infile:

            visitor, sensor, visit_timestamp = parse_line(line)

            visit_info.put((visitor, visit_timestamp, sensor))
            cnt += 1

            if cnt % 500000 == 0:
                print("cnt: {0}".format(cnt))
    return visit_info
Esempio n. 4
0
def main():
    # Create the animation for several days.

    fig, ax = plt.subplots(figsize=(20, 15))

    # We need to have ffmpeg installed
    plt.rcParams['animation.ffmpeg_path'] = ffmpeg_location()

    AnimationWriter = matplotlib.animation.writers['ffmpeg']
    writer = AnimationWriter(fps=15,
                             metadata=dict(artist='pca 2018'),
                             bitrate=1800)

    network_ani = matplotlib.animation.FuncAnimation(fig,
                                                     update_drawing,
                                                     frames=120,
                                                     interval=500,
                                                     repeat=False)

    print("saving")

    network_ani.save(base_file_location(r'data\im.mp4'), writer=writer)
Esempio n. 5
0
def save_one_fig(days):

    plt.clf()
    plt.figure(figsize=(20, 15))
    update_drawing(days)
    plt.savefig(base_file_location(r"data\visit_lwd.png"))
Esempio n. 6
0
# Author: pca
# Date: 2018-06-26
# Version: 1.0
#
# Prerequisite: ffmpeg.exe is available on machine

import networkx as nx
import pandas as pd
from leeuwarden_kennis.config_data import base_file_location
from leeuwarden_kennis.config_data import ffmpeg_location
import collections
import matplotlib.pyplot as plt
import matplotlib.animation

# this is the file created in network_lwd_store_routes
df_visits = pd.read_csv(base_file_location(r'data\network_walks.csv'), sep=';')

# add date only column
df_visits['visit_date'] = pd.to_datetime(
    pd.to_datetime(df_visits['time_from'], format='%Y-%m-%d').dt.date)

all_dates = sorted(df_visits['visit_date'].unique())

# get the sensor locations.
# this file can be found here: https://github.com/KennisnetwerkDataScience/Wifi-punten-in-Leeuwarden/tree/master/data
df_sensor_locations = pd.read_csv(base_file_location(r'data\opstelpunten.csv'),
                                  sep=';',
                                  decimal=',')


def calc_width(max_width, total_routes, weight):
    network_walks = []

    last_person = -1
    last_location = -1
    last_visit_time = datetime.datetime.now()

    while not visits_pq.empty():
        visit_record = visits_pq.get()
        current_person = visit_record[0]
        current_location = visit_record[2]
        current_visit_time = visit_record[1]

        # change of location
        if current_location != last_location and current_person == last_person and in_time_hrs(last_visit_time, current_visit_time, 12):
            network_walks.append((current_person, last_location, current_location, last_visit_time, current_visit_time))

        last_person = current_person
        last_location = current_location
        last_visit_time = current_visit_time

    return network_walks


visits_pq = process_file()
network = make_network(visits_pq)

# make a pandas df from this
network_df = pd.DataFrame(network, columns=['visitor', 'sensor_from', 'sensor_to', 'time_from', 'time_to'])
network_df.to_csv(base_file_location(r'data\network_walks.csv'), index=None, sep=';')

Esempio n. 8
0
from sklearn.ensemble import RandomForestClassifier
import pickle
import networkx as nx
from leeuwarden_kennis.config_data import base_file_location
from leeuwarden_kennis.config_data import ffmpeg_location
import collections
import matplotlib.pyplot as plt
import matplotlib.animation

parking_ids = [36, 37, 38, 39, 40]

classifiers = {
    garage_id: pickle.load(
        open(
            base_file_location(r"data\classifier_garage_{0}.sav").format(
                garage_id), "rb"))
    for garage_id in parking_ids
}


# prepare dataset
# fields:
# index;date;hour;count_in;count_out;delta;cum_sum;week;month;day;dayofyear;weekday;year;YYYYMMDD;HH;R
def create_dataset(prediction_date: datetime.datetime, rain: float):
    pd_prediction_date = pd.to_datetime(prediction_date)

    ds_dict = dict()

    for idx in range(0, 24):
        date = pd_prediction_date.strftime('%Y-%m-%d')
        hr = idx