コード例 #1
0
def model2(row):
    """
    MODEL2 : binary day and week attributes + holiday (binary)
    indeksi : 0-6 dnevi, 7-31 ura
    server: 152.32996
    lokalno: 129.303771
    """
    result = np.zeros(7 + 24 + 2)

    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    result[day] = 1
    result[7 + hour] = 1

    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1

    result[-2] = holiday
    result[-1] = school_hol

    return result
コード例 #2
0
def zgradi_matrike(linija, training):
    d = open("prazniki_in_dela_prosti_dnevi.csv", "rt", encoding="latin1")
    branje = csv.reader(d)
    next(branje)
    prazniki = []
    for d in branje:
        prazniki.append(
            datetime.datetime.strptime((d[0].split(";", 1))[0],
                                       "%d.%m.%Y").date())

    if training:
        x = []
        y = []
        for d in linija:
            x.append(napolni_x(d, prazniki))
            y.append(
                lpputils.tsdiff(lpputils.parsedate(d[-3]),
                                lpputils.parsedate(d[-1])))
        X = linear.append_ones(np.array(x))
        Y = np.array(y)
        return X, Y
    else:
        x = []
        originalen_datum = []
        dejanski_cas = []
        route = []
        for d in linija:
            originalen_datum.append(d[-3])
            dejanski_cas.append(d[-1])
            route.append(d[3])
            x.append(napolni_x(d, prazniki))
        X = linear.append_ones(np.array(x))
        return route, dejanski_cas, originalen_datum, X
コード例 #3
0
def model4(row):
    """
    MODEL4 : binary day and week attributes + all holiday (binary)
    indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x
    server: 150.86627
    lokalno: 128.68312
    """
    result = np.zeros(7 + 24 + 3)

    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    result[day] = 1
    result[7 + hour] = 1

    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-3] = summer_hol
    result[-2] = holiday
    result[-1] = school_hol

    return result
コード例 #4
0
def model1(row):
    """
    MODEL1 : norm day, norm hour, holiday, school holiday, avg. driver, avg. bus
    """
    result = np.zeros(6)
    result[0] = lpputils.parsedate(row[DEP_IDX]).weekday() / 7.0  # day
    result[1] = lpputils.parsedate(row[DEP_IDX]).hour / 24.0  # hour
    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    #summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[2] = holiday
    result[3] = school_hol
    #result[4] = summer_hol
    result[4] = driver_average(row[DRV_IDX])
    result[5] = bus_average(row[BUS_IDX])

    return result
コード例 #5
0
def visualize(train_data, _month, day_s, day_e):
    """
    Function which outputs daily travel time by hour (graph + text)
    """
    comp_data = []
    times = np.zeros(24)
    cnts = np.zeros(24)
    for d in range(day_s, day_e, 1):
        for row in train_data:
            date = lpputils.parsedate(row[DEP_IDX])
            hour = date.hour
            month = date.month
            day = date.day
            if month == _month and day == d:
                times[hour] += lpputils.tsdiff(row[ARR_IDX], row[DEP_IDX])
                cnts[hour] += 1
    norm_times = [
        float(times[i]) / (float(cnts[i]) + 0.0000000000001)
        for i in range(len(times))
    ]
    #comp_data.append(np.asarray(norm_times))
    print(norm_times)
    #with open('vizualizacija.csv', 'wb') as abc:
    #    np.savetxt(abc, np.asarray(comp_data), delimiter=",", fmt="%d")

    #data = np.genfromtxt('vizualizacija.csv', delimiter=',')
    #for i in range(len(data)):
    plt.plot(norm_times, label='the data')
    plt.show()
コード例 #6
0
def model3(row):
    """
    MODEL3 : binary day and week attributes + drivers + buses + holiday
    indeksi : dnevi, ure, vozniki, busi, pocitnice
    server: ???
    lokalno: 129.2525
    """
    day_offset = 0
    hour_offset = 7
    driver_offset = 31
    buses_offset = driver_offset + len(driver_idxs)
    holiday_offset = buses_offset + len(buses_idxs)
    result = np.zeros(holiday_offset + 3)

    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    result[day_offset + day] = 1
    result[hour_offset + hour] = 1
    if row[DRV_IDX] in driver_idxs.keys():
        result[driver_offset + driver_idxs[row[DRV_IDX]]] = 1
    if row[BUS_IDX] in buses_idxs.keys():
        result[buses_offset + buses_idxs[row[BUS_IDX]]] = 1

    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    summer_hol = 0

    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-3] = summer_hol
    result[-2] = holiday
    result[-1] = school_hol

    return result
コード例 #7
0
def model_to_csv(train_data, filename):

    file = open(filename, 'w')
    file.write('line,month,day,hour,travel\n')
    for row in train_data:
        date = lpputils.parsedate(row[DEP_IDX])
        hour = date.hour
        day = date.weekday()
        month = date.month
        travel = lpputils.tsdiff(row[ARR_IDX], row[DEP_IDX])
        file.write('{0},{1},{2},{3},{4}\n'.format(row[2], month, day, hour,
                                                  travel))
コード例 #8
0
def model5(row):
    """
    MODEL5 : binary day and week attributes + all holiday (binary)
    indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x, padavine
    server: 184.51330
    lokalno:  147.68
    """
    global arso
    result = np.zeros(7 + 24 + 4)

    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    result[day] = 1
    result[7 + hour] = 1

    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-4] = summer_hol
    result[-3] = holiday
    result[-2] = school_hol

    if date.strftime("%Y-%m-%d") in arso.keys():
        result[-1] = arso[date.strftime("%Y-%m-%d")][0]
    else:
        print("{0} - No weather data !!!".format(date.strftime("%Y-%m-%d")))

    return result
コード例 #9
0
def model9(row):
    """
    MODEL9 : binary day and hour attributes + all holiday (binary) -> added 20 min interval between 06 and 09
    indeksi : 30 * 7 kombinacije dan ura, pocitnice 3x, padavine, detour
    server: 179.68471
    lokalno: 142.....
    uporaba: rezultati 8, 9(+45 sekund na linijo 1), 10(+65 sekund na linijo 1)
    """
    global arso
    result = np.zeros(7 * 30 + 5)

    date = lpputils.parsedate(row[DEP_IDX]).date()
    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    minutes = lpputils.parsedate(row[DEP_IDX]).minute

    day_offset = 30 * day

    if hour < 6:
        result[day_offset + hour] = 1
    elif 6 <= hour <= 8:
        offset = (hour - 6) * 2
        if 0 <= minutes <= 20:
            result[day_offset + hour + offset] = 1
        elif 20 < minutes <= 40:
            result[day_offset + hour + offset + 1] = 1
        elif 40 < minutes <= 59:
            result[day_offset + hour + offset + 2] = 1
    else:
        result[day_offset + hour + 6] = 1

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-5] = summer_hol
    result[-4] = holiday
    result[-3] = school_hol

    if date.strftime("%Y-%m-%d") in arso.keys():
        result[-2] = 1 if arso[date.strftime("%Y-%m-%d")][0] > 20 else 0
    else:
        print("No data for weather !!!")

    line = row[2]
    if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']:
        line += row[3][0]
    result[-1] = check_detour(line, row[DEP_IDX], detours)

    return result
コード例 #10
0
def model8(row):
    """
    MODEL8 : binary day and week attributes + all holiday (binary) + weather + detour-> added 20 min interval between 06 and 09
    indeksi : 0-6 dnevi, 7-37 ura, pocitnice 3x, padavine, obvoz
    server: ?
    lokalno:
    """
    global arso
    global detours

    result = np.zeros(7 + 30 + 5)

    date = lpputils.parsedate(row[DEP_IDX]).date()
    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    minutes = lpputils.parsedate(row[DEP_IDX]).minute
    result[day] = 1

    if hour < 6:
        result[7 + hour] = 1
    elif 6 <= hour <= 8:
        offset = (hour - 6) * 2
        if 0 <= minutes <= 20:
            result[7 + hour + offset] = 1
        elif 20 < minutes <= 40:
            result[7 + hour + offset + 1] = 1
        elif 40 < minutes <= 59:
            result[7 + hour + offset + 2] = 1
    else:
        result[7 + hour + 6] = 1

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-5] = summer_hol
    result[-4] = holiday
    result[-3] = school_hol

    if date.strftime("%Y-%m-%d") in arso.keys():
        result[-2] = arso[date.strftime("%Y-%m-%d")][0]
    else:
        print("No data !!!")

    line = row[2]
    if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']:
        line += row[3][0]
    result[-1] = check_detour(line, row[DEP_IDX], detours)

    return result
コード例 #11
0
def model6(row):
    """
    MODEL6 : binary day and week attributes + all holiday (binary) + rush
    indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x, rush1, rush2
    server: ??
    lokalno: 147.68
    """
    result = np.zeros(7 + 24 + 5)

    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    result[day] = 1
    result[7 + hour] = 1

    date = lpputils.parsedate(row[DEP_IDX]).date()

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-5] = summer_hol
    result[-4] = holiday
    result[-3] = school_hol

    if hour >= 3 and hour <= 6:
        result[-2] = (hour % 3) / 3
    if hour >= 15 and hour <= 18:
        result[-1] = (3 - hour % 15) / 3

    return result
コード例 #12
0
def model10(row):
    """
    MODEL10 : binary day and hour attributes + all holiday (binary) + drivers-> added 20 min interval between 06 and 09
    indeksi : 30 * 7 kombinacije dan ura, vozniki, pocitnice 3x, padavine
    server: 145.82494
    lokalno: 122.3
    uporaba: rezultati 11,12,13
    """
    global arso
    result = np.zeros(7 * 30 + len(driver_idxs) + 4)

    date = lpputils.parsedate(row[DEP_IDX]).date()
    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    minutes = lpputils.parsedate(row[DEP_IDX]).minute

    day_offset = 30 * day

    if hour < 6:
        result[day_offset + hour] = 1
    elif 6 <= hour <= 8:
        offset = (hour - 6) * 2
        if 0 <= minutes <= 20:
            result[day_offset + hour + offset] = 1
        elif 20 < minutes <= 40:
            result[day_offset + hour + offset + 1] = 1
        elif 40 < minutes <= 59:
            result[day_offset + hour + offset + 2] = 1
    else:
        result[day_offset + hour + 6] = 1

    driver_offset = 7 * 30

    if row[DRV_IDX] in driver_idxs.keys():
        result[driver_offset + driver_idxs[row[DRV_IDX]]] = 1

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-4] = summer_hol
    result[-3] = holiday
    result[-2] = school_hol

    #if date.strftime("%Y-%m-%d") in arso.keys():
    #    result[-1] = max([arso[date.strftime("%Y-%m-%d")][0],0])
    #else:
    #    print("No data !!!")

    return result
コード例 #13
0
def napolni_x(d, prazniki):
    primer = []
    datum = lpputils.parsedate(d[-3])
    primer.append(1 if datum.minute <= 30 else 0)
    primer.append(1 if datum.minute > 30 else 0)
    primer.append(1 if datum.hour < 20 and datum.hour > 6 else 0)
    for ura in range(24):
        primer.append(1 if datum.hour == ura else 0)
    for dan in range(7):
        primer.append(1 if datum.weekday() == dan else 0)
    primer.append(0)
    for praznik in prazniki:
        if datum.date() == praznik:
            primer[len(primer) - 1] = 1
            break
    return primer
コード例 #14
0
def model7(row):
    """
    MODEL7 : binary day and week attributes + all holiday (binary) -> added 20 min interval between 06 and 09
    indeksi : 0-6 dnevi, 7-37 ura, pocitnice 3x, padavine
    server: ?
    lokalno: 128.68
    """
    global arso
    result = np.zeros(7 + 30 + 4)

    date = lpputils.parsedate(row[DEP_IDX]).date()
    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    minutes = lpputils.parsedate(row[DEP_IDX]).minute
    result[day] = 1

    if hour < 6:
        result[7 + hour] = 1
    elif 6 <= hour <= 8:
        offset = (hour - 6) * 2
        if 0 <= minutes <= 20:
            result[7 + hour + offset] = 1
        elif 20 < minutes <= 40:
            result[7 + hour + offset + 1] = 1
        elif 40 < minutes <= 59:
            result[7 + hour + offset + 2] = 1
    else:
        result[7 + hour + 6] = 1

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-4] = summer_hol
    result[-3] = holiday
    result[-2] = school_hol

    if date.strftime("%Y-%m-%d") in arso.keys():
        result[-1] = arso[date.strftime("%Y-%m-%d")][0]
    else:
        print("No data !!!")

    return result
コード例 #15
0
def model10(row):
    """
    MODEL10 : binary day and hour attributes + all holiday (binary) -> added 20 min interval between 06 and 09
    indeksi : 24 * 7 * 4 kombinacije dan ura, pocitnice 3x, padavine, detour
    """
    global arso
    result = np.zeros(7 * 24 * 4 + 5)

    date = lpputils.parsedate(row[DEP_IDX]).date()
    day = lpputils.parsedate(row[DEP_IDX]).weekday()
    hour = lpputils.parsedate(row[DEP_IDX]).hour
    minutes = lpputils.parsedate(row[DEP_IDX]).minute

    day_offset = (24 * 4) * day

    if 0 <= minutes <= 15:
        result[day_offset + hour * 4] = 1
    elif 15 < minutes <= 30:
        result[day_offset + hour * 4 + 1] = 1
    elif 30 < minutes <= 45:
        result[day_offset + hour * 4 + 2] = 1
    elif 45 < minutes <= 59:
        result[day_offset + hour * 4 + 3] = 1

    holiday = 0
    school_hol = 0
    summer_hol = 0
    if date in HOLIDAYS:
        holiday = 1
    if date in SCHOOL_HOL:
        school_hol = 1
    if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate(
            SUMMER_HOL[1]).date():
        summer_hol = 1

    result[-5] = summer_hol
    result[-4] = holiday
    result[-3] = school_hol

    if date.strftime("%Y-%m-%d") in arso.keys():
        result[-2] = 1 if arso[date.strftime("%Y-%m-%d")][0] > 20 else 0
    else:
        print("No data for weather !!!")

    line = row[2]
    if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']:
        line += row[3][0]
    result[-1] = check_detour(line, row[DEP_IDX], detours)

    return result
コード例 #16
0

if __name__ == "__main__":
    f = gzip.open("train_pred.csv.gz", "rt")
    reader = csv.reader(f, delimiter="\t")
    next(reader)
    # ['Registration', 'Driver ID', 'Route', 'Route Direction', 'Route description', 'First station', 'Departure time', 'Last station', 'Arrival time']

    data = [d for d in reader]
    noLines = len(data)

    Y = numpy.zeros(noLines)
    X = numpy.zeros([noLines, 7])
    for i, line in enumerate(data):
        Y[i] = lpputils.tsdiff(line[-1], line[-3])  # določimo čas vožnje
        odhod = lpputils.parsedate(line[-1])
        X[i] = getAttributes(odhod)

    lr = linear.LinearLearner(lambda_=1.)
    napovednik = lr(X, Y)

    f = gzip.open("test_pred.csv.gz", "rt")
    test = csv.reader(f, delimiter="\t")
    next(reader)  # skip legend

    fo = open("naloga3.txt", "wt")
    for l in test:
        odhod = lpputils.parsedate(l[-3])
        nov_primer = numpy.array(getAttributes(odhod))
        #print(nov_primer)
        c = napovednik(nov_primer)
コード例 #17
0
    else:
        linije[primer[3]] = [primer]

linearna_regresija = linear.LinearLearner()
for linija in linije.keys():
    x, y = zgradi_matrike(linije[linija], True)
    linije[linija] = linearna_regresija(x, y)

f = gzip.open("test.csv.gz", "rt",
              encoding="latin1")  #za izpis MAE spremeni v "train.csv.gz"
vrstica = csv.reader(f, delimiter="\t")
next(vrstica)
ime, dejanski_cas, primeri, testni_X = zgradi_matrike(vrstica, False)

datoteka = open("napovedi_tekmovanje.txt", "wt", encoding="latin1")

mae_mesec = 11
mae = 0
stevilo_primerov = 0

for vrstica in range(len(primeri)):
    napoved = lpputils.tsadd(primeri[vrstica],
                             -linije[ime[vrstica]](testni_X[vrstica]))
    datoteka.write(napoved + "\n")
    if lpputils.parsedate(primeri[vrstica]).month == mae_mesec:
        mae += absolute_error(dejanski_cas[vrstica], napoved)
        stevilo_primerov += 1
datoteka.close()
if mae != 0:
    print("Mean absolute error:", mae / stevilo_primerov)