def Calc_all_Station_Time():
    start = time.time()
    filename = "testfile"
    if (os.path.exists(filename)):
        return 1
    file = open(filename, 'w')
    file.close()
    # setup query suite
    qsp = query_suite.QuerySuite(config="app_config_Miner.json",
                                 property_name="dbcconfig",
                                 limit=7000)
    qspinsert = query_suite.QuerySuite(config="app_config_Storage.json",
                                       property_name="dbcconfig",
                                       limit=7000)
    eva_nummer = qsp.get_all_stationnumbers()
    #if you want to start it reversed
    #eva_nummer = eva_nummer.sort_index(ascending=False, axis=0)
    for index, eva in eva_nummer.iterrows():
        Station_Time_Average(evanr=eva["EVA_NR"], qsp=qsp, qspinsert=qspinsert)
    end = time.time()
    qsp.disconnect()
    qspinsert.disconnect()
    qsp = None
    qspinsert = None
    print("Global endtime: {}".format((end - start)))
    os.remove(filename)
    return 0
Example #2
0
def analyze(dailytripid):
    # setup query suite
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=5000)

    # get all yymmddhhmm that match the dailitripid
    trips_df = qs.get_all_yymmddhhmm_of_dailytripid(dailytripid=dailytripid)

    # get all stops of all trips and collect them in accumulator
    stops_accumulator = pd.DataFrame()
    for index, row in trips_df.iterrows():
        stops_on_trip = analyze_train_delay.analyze(
            dailytripid=dailytripid, yymmddhhmm=row["yymmddhhmm"])
        stops_accumulator = stops_accumulator.append(stops_on_trip,
                                                     ignore_index=True)
        #analyze_train_delay.visualize(stops_on_trip)
        print("queries left: {}".format(len(trips_df.index) - index))

    # calculate autocorrelations
    WRAP_AROUND_MODE = False
    acf_delay_by_staytime_df = autocorrelation_over_entire_df(
        stops_accumulator, "delay_by_staytime", wrap_around=WRAP_AROUND_MODE)
    acf_delay_by_traveltime_df = autocorrelation_over_entire_df(
        stops_accumulator, "delay_by_traveltime", wrap_around=WRAP_AROUND_MODE)
    acf_delay_at_arrival_df = autocorrelation_over_entire_df(
        stops_accumulator, "delay_at_arrival", wrap_around=WRAP_AROUND_MODE)
    acf_delay_at_departure_df = autocorrelation_over_entire_df(
        stops_accumulator, "delay_at_departure", wrap_around=WRAP_AROUND_MODE)

    # construct result dataframe
    result_df = acf_delay_by_staytime_df
    result_df = pd.concat(
        [result_df, acf_delay_by_traveltime_df["acf_delay_by_traveltime"]],
        axis=1)
    result_df = pd.concat(
        [result_df, acf_delay_at_arrival_df["acf_delay_at_arrival"]], axis=1)
    result_df = pd.concat(
        [result_df, acf_delay_at_departure_df["acf_delay_at_departure"]],
        axis=1)
    return {"acf_df": result_df, "dailytripid": dailytripid}
Example #3
0
import json
import pymysql
import pandas as pd
import numpy as np

import query_suite

qs = query_suite.QuerySuite(config="app_config.json",
                            property_name="dbcconfig",
                            limit=5000)
def Station_Time_Average(evanr=8011160, qsp=None, qspinsert=None):
    start = time.time()
    close = False
    if qsp is None:
        # setup query suite
        qsp = query_suite.QuerySuite(config="app_config.json",
                                     property_name="dbcconfig",
                                     limit=5000)
        close = True

    if qspinsert is None:
        qspinsert = qsp

    init = time.time()
    if DEBUG:
        print("EVA: {}".format(evanr))
        print("init: {}".format(init - start))

    returnvalue = {}
    artimetdelaysum = 0
    artimeCount = 0
    artimelastValue = 0
    artimenew = True
    dptimedelaysum = 0
    dptimeCount = 0
    dptimelastValue = 0
    dptimenew = True
    staytimedelaysum = 0
    staytimeCount = 0
    staytimelastValue = 0
    staytimenew = True
    states = qspinsert.get_AverageStatelike(evanr)
    for index, row in states.iterrows():
        description = int(row["DescriptionID"])
        if description == artime:
            artimeCount = row["Count"]
            artimelastValue = row["lastValue"]
            artimetdelaysum = row["Average"] * artimeCount
            artimenew = False
        if description == dptime:
            dptimeCount = row["Count"]
            dptimelastValue = row["lastValue"]
            dptimedelaysum = row["Average"] * dptimeCount
            dptimenew = False
        if description == staytime:
            staytimeCount = row["Count"]
            staytimelastValue = row["lastValue"]
            staytimedelaysum = row["Average"] * staytimeCount
            staytimenew = False
        pass
    lastValue = max(artimelastValue, dptimelastValue, staytimelastValue)
    ttsids = pd.DataFrame(qsp.get_Station_information(evanr, lastValue))

    gether_data = time.time()
    if DEBUG:
        print("gether_data: {}".format(gether_data - start))

    for index, row in ttsids.iterrows():
        arzeitsoll = row["arzeitsoll"]
        arzeitist = row["arzeitist"]
        dpzeitist = row["dpzeitist"]
        dpzeitsoll = row["dpzeitsoll"]
        ID = row["ID"]
        if type(arzeitist) is pd._libs.tslib.Timedelta and type(
                arzeitsoll) is pd._libs.tslib.Timedelta:
            artimetdelaysum += int((arzeitist - arzeitsoll).total_seconds())
            artimeCount += 1
            artimelastValue = ID
        if type(dpzeitist) is pd._libs.tslib.Timedelta and type(
                dpzeitsoll) is pd._libs.tslib.Timedelta:
            dptimedelaysum += int((dpzeitist - dpzeitsoll).total_seconds())
            dptimeCount += 1
            dptimelastValue = ID
        if type(dpzeitist) is pd._libs.tslib.Timedelta and type(
                arzeitist) is pd._libs.tslib.Timedelta:
            staytimedelaysum += int(
                ((arzeitist - dpzeitist) -
                 (arzeitsoll - dpzeitsoll)).total_seconds())
            staytimeCount += 1
            staytimelastValue = ID
    if artimetdelaysum is not 0:
        average = int(artimetdelaysum / artimeCount)
        writetoAverageState(artimeCount, artimelastValue, artimenew, average,
                            evanr, artime, qspinsert)
        returnvalue["arzeitdelay"] = pu.strfdelta(timedelta(seconds=average),
                                                  "%s%D %H:%M:%S")
    if dptimedelaysum is not 0:
        average = int(dptimedelaysum / dptimeCount)
        writetoAverageState(dptimeCount, dptimelastValue, dptimenew, average,
                            evanr, dptime, qspinsert)
        returnvalue["dpzeitdelay"] = pu.strfdelta(timedelta(seconds=average),
                                                  "%s%D %H:%M:%S")
    if staytimedelaysum is not 0:
        average = int(staytimedelaysum / staytimeCount)
        writetoAverageState(staytimeCount, staytimelastValue, staytimenew,
                            average, evanr, staytime, qspinsert)
        returnvalue["staytimedelay"] = pu.strfdelta(timedelta(seconds=average),
                                                    "%s%D %H:%M:%S")

    Calculate_data = time.time()
    if DEBUG:
        print("Calculate_data: {}".format(Calculate_data - start))

    if close:
        # clean up
        qsp.disconnect()
    print(max(artimeCount, dptimeCount, staytimeCount))
    end = time.time()
    if DEBUG:
        print("end: {}".format(end - start))
    print("_______________")
    return returnvalue
def qs():
    #setup query suite pandas
    qs = query_suite.QuerySuite(config="app_config.json", property_name="dbcconfig", limit=5000)
    yield qs
    qs.disconnect()
def Time_Average(dailytripid="8317284780065095268"):
    start = time.time()

    # setup query suite
    qsp = query_suite.QuerySuite(config="app_config.json",
                                 property_name="dbcconfig",
                                 limit=5000)

    init = time.time()
    if DEBUG:
        print("init")
        print(init - start)

    # get stops on trip
    # ttsids = qsp.get_ttsid_on_trip(dailytripid="-5016615278318514860", yymmddhhmm="1712011704")
    ttsids = pd.DataFrame(
        qsp.get_ttsid_like(dailytripid=dailytripid, stopindex=1))

    returnvalue = pd.DataFrame()
    all = []
    for index, row in ttsids.iterrows():
        all.append(
            analyze_train_delay.analyze(row["dailytripid"], row["yymmddhhmm"]))

    gether_data = time.time()
    if DEBUG:
        print("gether_data")
        print(gether_data - start)

    for index in all[00]["stopindex"]:
        delay_at_arrival = np.array([])
        delay_at_departure = np.array([])
        staytime_scheduled = np.array([])
        staytime_real = np.array([])
        delay_by_staytime = np.array([])
        traveltime_scheduled = np.array([])
        traveltime_real = np.array([])
        delay_by_traveltime = np.array([])
        test = 0

        for temp in all:
            # print(test)
            try:
                delay_at_arrival = np.append(delay_at_arrival,
                                             temp["delay_at_arrival"][index])
                delay_at_departure = np.append(
                    delay_at_departure, temp["delay_at_departure"][index])
                delay_by_staytime = np.append(delay_by_staytime,
                                              temp["delay_by_staytime"][index])
                delay_by_traveltime = np.append(
                    delay_by_traveltime, temp["delay_by_traveltime"][index])
            except Exception as e:
                if DEBUG:
                    print("train shorter than before")
                e
                pass
            test += 1
        pass

        size = delay_at_arrival.size
        if delay_at_arrival.size == 0:
            pass
        else:
            avr_delay_at_arrival_storage = delay_at_arrival.mean()
            avr_delay_at_departure_storage = delay_at_departure.mean()
            avr_delay_by_staytime = delay_by_staytime.mean()
            avr_delay_by_traveltime = delay_by_traveltime.mean()
            test = pd.DataFrame([
                [
                    avr_delay_at_arrival_storage,
                    avr_delay_at_departure_storage, avr_delay_by_staytime,
                    avr_delay_by_traveltime
                ],
            ],
                                columns=[
                                    "delay_at_arrival", "delay_at_departure",
                                    "delay_by_staytime", "delay_by_traveltime"
                                ])
            returnvalue = returnvalue.append(test, ignore_index=True)
        print(index)

    # print(traveltime_scheduled_accum)
    # print(traveltime_real_accum)
    # print(delay_by_traveltime_accum)

    # clean up
    qsp.disconnect()

    end = time.time()
    if DEBUG:
        print("end")
        print(end - start)
    return returnvalue
def analyze(dailytripid, yymmddhhmm):
    # setup query suite
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=5000)

    # get stops on trip
    tts_with_stationname_df = qs.get_tts_with_stationnames_on_trip(
        dailytripid=dailytripid, yymmddhhmm=yymmddhhmm)

    # dataframes for accumulating results
    delay_at_arrival_accum = pd.DataFrame()
    delay_at_departure_accum = pd.DataFrame()
    staytime_scheduled_accum = pd.DataFrame()
    staytime_real_accum = pd.DataFrame()
    delay_by_staytime_accum = pd.DataFrame()
    traveltime_scheduled_accum = pd.DataFrame()
    traveltime_real_accum = pd.DataFrame()
    delay_by_traveltime_accum = pd.DataFrame()

    # loop over trip
    train_stop_old = None
    for index, row in tts_with_stationname_df.iterrows():
        # convert row to data frame
        train_stop = pd.DataFrame()
        train_stop = train_stop.append(row, ignore_index=True)

        delay_at_arrival = pu.calc_delay_at_arrival_df(train_stop)
        delay_at_arrival_accum = delay_at_arrival_accum.append(
            delay_at_arrival, ignore_index=True)

        delay_at_departure = pu.calc_delay_at_departure_df(train_stop)
        delay_at_departure_accum = delay_at_departure_accum.append(
            delay_at_departure, ignore_index=True)

        staytime_scheduled = pu.calc_staytime_scheduled_df(train_stop)
        staytime_scheduled_accum = staytime_scheduled_accum.append(
            staytime_scheduled, ignore_index=True)

        staytime_real = pu.calc_staytime_real_df(train_stop)
        staytime_real_accum = staytime_real_accum.append(staytime_real,
                                                         ignore_index=True)

        delay_by_staytime = pu.calc_delay_by_staytime_df(train_stop)
        delay_by_staytime_accum = delay_by_staytime_accum.append(
            delay_by_staytime, ignore_index=True)

        traveltime_scheduled = pu.calc_traveltime_scheduled_df(
            train_stop_old, train_stop)
        traveltime_scheduled_accum = traveltime_scheduled_accum.append(
            traveltime_scheduled, ignore_index=True)

        traveltime_real = pu.calc_traveltime_real_df(train_stop_old,
                                                     train_stop)
        traveltime_real_accum = traveltime_real_accum.append(traveltime_real,
                                                             ignore_index=True)

        delay_by_traveltime = pu.calc_delay_by_traveltime_df(
            train_stop_old, train_stop)
        delay_by_traveltime_accum = delay_by_traveltime_accum.append(
            delay_by_traveltime, ignore_index=True)

        # make step
        train_stop_old = train_stop

    # teardown query suite
    qs.disconnect()

    #construct result data frame
    result_df = tts_with_stationname_df
    result_df = pd.concat(
        [result_df, delay_at_arrival_accum["delay_at_arrival"]], axis=1)
    result_df = pd.concat(
        [result_df, delay_at_departure_accum["delay_at_departure"]], axis=1)
    result_df = pd.concat(
        [result_df, staytime_scheduled_accum["staytime_scheduled"]], axis=1)
    result_df = pd.concat([result_df, staytime_real_accum["staytime_real"]],
                          axis=1)
    result_df = pd.concat(
        [result_df, delay_by_staytime_accum["delay_by_staytime"]], axis=1)
    result_df = pd.concat(
        [result_df, traveltime_scheduled_accum["traveltime_scheduled"]],
        axis=1)
    result_df = pd.concat(
        [result_df, traveltime_real_accum["traveltime_real"]], axis=1)
    result_df = pd.concat(
        [result_df, delay_by_traveltime_accum["delay_by_traveltime"]], axis=1)

    return result_df
Example #8
0
def trainnumberfulltodailytripid(trainummberfull="RE11350"):
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=1)
    ttsid = qs.get_dtid_with_trainnumberfull(trainummberfull)
    return ttsid
Example #9
0
def yymmddhhmmtodate(yymmddhhmm="1712111109"):
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=1)
    date = qs.get_date_with_yymmddhhmm(yymmddhhmm)
    return date
Example #10
0
def datetoyymmddhhmm(date="2017-12-11", dailytripid=5212057309500211661):
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=1)
    yymmddhhmm = qs.get_yymmddhhmm_with_date_and_dtid(date, dailytripid)
    return yymmddhhmm
Example #11
0
def dailytripidtotrainnumberfull(dailytripid=5212057309500211661):
    qs = query_suite.QuerySuite(config="app_config.json",
                                property_name="dbcconfig",
                                limit=1)
    trainnumberfull = qs.get_trainnumberfull_with_dtid(dailytripid)
    return trainnumberfull