def model2(row): """ MODEL2 : binary day and week attributes + holiday (binary) indeksi : 0-6 dnevi, 7-31 ura server: 152.32996 lokalno: 129.303771 """ result = np.zeros(7 + 24 + 2) day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour result[day] = 1 result[7 + hour] = 1 date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 result[-2] = holiday result[-1] = school_hol return result
def zgradi_matrike(linija, training): d = open("prazniki_in_dela_prosti_dnevi.csv", "rt", encoding="latin1") branje = csv.reader(d) next(branje) prazniki = [] for d in branje: prazniki.append( datetime.datetime.strptime((d[0].split(";", 1))[0], "%d.%m.%Y").date()) if training: x = [] y = [] for d in linija: x.append(napolni_x(d, prazniki)) y.append( lpputils.tsdiff(lpputils.parsedate(d[-3]), lpputils.parsedate(d[-1]))) X = linear.append_ones(np.array(x)) Y = np.array(y) return X, Y else: x = [] originalen_datum = [] dejanski_cas = [] route = [] for d in linija: originalen_datum.append(d[-3]) dejanski_cas.append(d[-1]) route.append(d[3]) x.append(napolni_x(d, prazniki)) X = linear.append_ones(np.array(x)) return route, dejanski_cas, originalen_datum, X
def model4(row): """ MODEL4 : binary day and week attributes + all holiday (binary) indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x server: 150.86627 lokalno: 128.68312 """ result = np.zeros(7 + 24 + 3) day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour result[day] = 1 result[7 + hour] = 1 date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-3] = summer_hol result[-2] = holiday result[-1] = school_hol return result
def model1(row): """ MODEL1 : norm day, norm hour, holiday, school holiday, avg. driver, avg. bus """ result = np.zeros(6) result[0] = lpputils.parsedate(row[DEP_IDX]).weekday() / 7.0 # day result[1] = lpputils.parsedate(row[DEP_IDX]).hour / 24.0 # hour date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 #summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[2] = holiday result[3] = school_hol #result[4] = summer_hol result[4] = driver_average(row[DRV_IDX]) result[5] = bus_average(row[BUS_IDX]) return result
def visualize(train_data, _month, day_s, day_e): """ Function which outputs daily travel time by hour (graph + text) """ comp_data = [] times = np.zeros(24) cnts = np.zeros(24) for d in range(day_s, day_e, 1): for row in train_data: date = lpputils.parsedate(row[DEP_IDX]) hour = date.hour month = date.month day = date.day if month == _month and day == d: times[hour] += lpputils.tsdiff(row[ARR_IDX], row[DEP_IDX]) cnts[hour] += 1 norm_times = [ float(times[i]) / (float(cnts[i]) + 0.0000000000001) for i in range(len(times)) ] #comp_data.append(np.asarray(norm_times)) print(norm_times) #with open('vizualizacija.csv', 'wb') as abc: # np.savetxt(abc, np.asarray(comp_data), delimiter=",", fmt="%d") #data = np.genfromtxt('vizualizacija.csv', delimiter=',') #for i in range(len(data)): plt.plot(norm_times, label='the data') plt.show()
def model3(row): """ MODEL3 : binary day and week attributes + drivers + buses + holiday indeksi : dnevi, ure, vozniki, busi, pocitnice server: ??? lokalno: 129.2525 """ day_offset = 0 hour_offset = 7 driver_offset = 31 buses_offset = driver_offset + len(driver_idxs) holiday_offset = buses_offset + len(buses_idxs) result = np.zeros(holiday_offset + 3) day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour result[day_offset + day] = 1 result[hour_offset + hour] = 1 if row[DRV_IDX] in driver_idxs.keys(): result[driver_offset + driver_idxs[row[DRV_IDX]]] = 1 if row[BUS_IDX] in buses_idxs.keys(): result[buses_offset + buses_idxs[row[BUS_IDX]]] = 1 date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-3] = summer_hol result[-2] = holiday result[-1] = school_hol return result
def model_to_csv(train_data, filename): file = open(filename, 'w') file.write('line,month,day,hour,travel\n') for row in train_data: date = lpputils.parsedate(row[DEP_IDX]) hour = date.hour day = date.weekday() month = date.month travel = lpputils.tsdiff(row[ARR_IDX], row[DEP_IDX]) file.write('{0},{1},{2},{3},{4}\n'.format(row[2], month, day, hour, travel))
def model5(row): """ MODEL5 : binary day and week attributes + all holiday (binary) indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x, padavine server: 184.51330 lokalno: 147.68 """ global arso result = np.zeros(7 + 24 + 4) day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour result[day] = 1 result[7 + hour] = 1 date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-4] = summer_hol result[-3] = holiday result[-2] = school_hol if date.strftime("%Y-%m-%d") in arso.keys(): result[-1] = arso[date.strftime("%Y-%m-%d")][0] else: print("{0} - No weather data !!!".format(date.strftime("%Y-%m-%d"))) return result
def model9(row): """ MODEL9 : binary day and hour attributes + all holiday (binary) -> added 20 min interval between 06 and 09 indeksi : 30 * 7 kombinacije dan ura, pocitnice 3x, padavine, detour server: 179.68471 lokalno: 142..... uporaba: rezultati 8, 9(+45 sekund na linijo 1), 10(+65 sekund na linijo 1) """ global arso result = np.zeros(7 * 30 + 5) date = lpputils.parsedate(row[DEP_IDX]).date() day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour minutes = lpputils.parsedate(row[DEP_IDX]).minute day_offset = 30 * day if hour < 6: result[day_offset + hour] = 1 elif 6 <= hour <= 8: offset = (hour - 6) * 2 if 0 <= minutes <= 20: result[day_offset + hour + offset] = 1 elif 20 < minutes <= 40: result[day_offset + hour + offset + 1] = 1 elif 40 < minutes <= 59: result[day_offset + hour + offset + 2] = 1 else: result[day_offset + hour + 6] = 1 holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-5] = summer_hol result[-4] = holiday result[-3] = school_hol if date.strftime("%Y-%m-%d") in arso.keys(): result[-2] = 1 if arso[date.strftime("%Y-%m-%d")][0] > 20 else 0 else: print("No data for weather !!!") line = row[2] if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']: line += row[3][0] result[-1] = check_detour(line, row[DEP_IDX], detours) return result
def model8(row): """ MODEL8 : binary day and week attributes + all holiday (binary) + weather + detour-> added 20 min interval between 06 and 09 indeksi : 0-6 dnevi, 7-37 ura, pocitnice 3x, padavine, obvoz server: ? lokalno: """ global arso global detours result = np.zeros(7 + 30 + 5) date = lpputils.parsedate(row[DEP_IDX]).date() day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour minutes = lpputils.parsedate(row[DEP_IDX]).minute result[day] = 1 if hour < 6: result[7 + hour] = 1 elif 6 <= hour <= 8: offset = (hour - 6) * 2 if 0 <= minutes <= 20: result[7 + hour + offset] = 1 elif 20 < minutes <= 40: result[7 + hour + offset + 1] = 1 elif 40 < minutes <= 59: result[7 + hour + offset + 2] = 1 else: result[7 + hour + 6] = 1 holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-5] = summer_hol result[-4] = holiday result[-3] = school_hol if date.strftime("%Y-%m-%d") in arso.keys(): result[-2] = arso[date.strftime("%Y-%m-%d")][0] else: print("No data !!!") line = row[2] if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']: line += row[3][0] result[-1] = check_detour(line, row[DEP_IDX], detours) return result
def model6(row): """ MODEL6 : binary day and week attributes + all holiday (binary) + rush indeksi : 0-6 dnevi, 7-31 ura, pocitnice 3x, rush1, rush2 server: ?? lokalno: 147.68 """ result = np.zeros(7 + 24 + 5) day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour result[day] = 1 result[7 + hour] = 1 date = lpputils.parsedate(row[DEP_IDX]).date() holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-5] = summer_hol result[-4] = holiday result[-3] = school_hol if hour >= 3 and hour <= 6: result[-2] = (hour % 3) / 3 if hour >= 15 and hour <= 18: result[-1] = (3 - hour % 15) / 3 return result
def model10(row): """ MODEL10 : binary day and hour attributes + all holiday (binary) + drivers-> added 20 min interval between 06 and 09 indeksi : 30 * 7 kombinacije dan ura, vozniki, pocitnice 3x, padavine server: 145.82494 lokalno: 122.3 uporaba: rezultati 11,12,13 """ global arso result = np.zeros(7 * 30 + len(driver_idxs) + 4) date = lpputils.parsedate(row[DEP_IDX]).date() day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour minutes = lpputils.parsedate(row[DEP_IDX]).minute day_offset = 30 * day if hour < 6: result[day_offset + hour] = 1 elif 6 <= hour <= 8: offset = (hour - 6) * 2 if 0 <= minutes <= 20: result[day_offset + hour + offset] = 1 elif 20 < minutes <= 40: result[day_offset + hour + offset + 1] = 1 elif 40 < minutes <= 59: result[day_offset + hour + offset + 2] = 1 else: result[day_offset + hour + 6] = 1 driver_offset = 7 * 30 if row[DRV_IDX] in driver_idxs.keys(): result[driver_offset + driver_idxs[row[DRV_IDX]]] = 1 holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-4] = summer_hol result[-3] = holiday result[-2] = school_hol #if date.strftime("%Y-%m-%d") in arso.keys(): # result[-1] = max([arso[date.strftime("%Y-%m-%d")][0],0]) #else: # print("No data !!!") return result
def napolni_x(d, prazniki): primer = [] datum = lpputils.parsedate(d[-3]) primer.append(1 if datum.minute <= 30 else 0) primer.append(1 if datum.minute > 30 else 0) primer.append(1 if datum.hour < 20 and datum.hour > 6 else 0) for ura in range(24): primer.append(1 if datum.hour == ura else 0) for dan in range(7): primer.append(1 if datum.weekday() == dan else 0) primer.append(0) for praznik in prazniki: if datum.date() == praznik: primer[len(primer) - 1] = 1 break return primer
def model7(row): """ MODEL7 : binary day and week attributes + all holiday (binary) -> added 20 min interval between 06 and 09 indeksi : 0-6 dnevi, 7-37 ura, pocitnice 3x, padavine server: ? lokalno: 128.68 """ global arso result = np.zeros(7 + 30 + 4) date = lpputils.parsedate(row[DEP_IDX]).date() day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour minutes = lpputils.parsedate(row[DEP_IDX]).minute result[day] = 1 if hour < 6: result[7 + hour] = 1 elif 6 <= hour <= 8: offset = (hour - 6) * 2 if 0 <= minutes <= 20: result[7 + hour + offset] = 1 elif 20 < minutes <= 40: result[7 + hour + offset + 1] = 1 elif 40 < minutes <= 59: result[7 + hour + offset + 2] = 1 else: result[7 + hour + 6] = 1 holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-4] = summer_hol result[-3] = holiday result[-2] = school_hol if date.strftime("%Y-%m-%d") in arso.keys(): result[-1] = arso[date.strftime("%Y-%m-%d")][0] else: print("No data !!!") return result
def model10(row): """ MODEL10 : binary day and hour attributes + all holiday (binary) -> added 20 min interval between 06 and 09 indeksi : 24 * 7 * 4 kombinacije dan ura, pocitnice 3x, padavine, detour """ global arso result = np.zeros(7 * 24 * 4 + 5) date = lpputils.parsedate(row[DEP_IDX]).date() day = lpputils.parsedate(row[DEP_IDX]).weekday() hour = lpputils.parsedate(row[DEP_IDX]).hour minutes = lpputils.parsedate(row[DEP_IDX]).minute day_offset = (24 * 4) * day if 0 <= minutes <= 15: result[day_offset + hour * 4] = 1 elif 15 < minutes <= 30: result[day_offset + hour * 4 + 1] = 1 elif 30 < minutes <= 45: result[day_offset + hour * 4 + 2] = 1 elif 45 < minutes <= 59: result[day_offset + hour * 4 + 3] = 1 holiday = 0 school_hol = 0 summer_hol = 0 if date in HOLIDAYS: holiday = 1 if date in SCHOOL_HOL: school_hol = 1 if lpputils.parsedate(SUMMER_HOL[0]).date() <= date <= lpputils.parsedate( SUMMER_HOL[1]).date(): summer_hol = 1 result[-5] = summer_hol result[-4] = holiday result[-3] = school_hol if date.strftime("%Y-%m-%d") in arso.keys(): result[-2] = 1 if arso[date.strftime("%Y-%m-%d")][0] > 20 else 0 else: print("No data for weather !!!") line = row[2] if row[3][0:2] in ['B ', 'G ', 'I ', 'Z ']: line += row[3][0] result[-1] = check_detour(line, row[DEP_IDX], detours) return result
if __name__ == "__main__": f = gzip.open("train_pred.csv.gz", "rt") reader = csv.reader(f, delimiter="\t") next(reader) # ['Registration', 'Driver ID', 'Route', 'Route Direction', 'Route description', 'First station', 'Departure time', 'Last station', 'Arrival time'] data = [d for d in reader] noLines = len(data) Y = numpy.zeros(noLines) X = numpy.zeros([noLines, 7]) for i, line in enumerate(data): Y[i] = lpputils.tsdiff(line[-1], line[-3]) # določimo čas vožnje odhod = lpputils.parsedate(line[-1]) X[i] = getAttributes(odhod) lr = linear.LinearLearner(lambda_=1.) napovednik = lr(X, Y) f = gzip.open("test_pred.csv.gz", "rt") test = csv.reader(f, delimiter="\t") next(reader) # skip legend fo = open("naloga3.txt", "wt") for l in test: odhod = lpputils.parsedate(l[-3]) nov_primer = numpy.array(getAttributes(odhod)) #print(nov_primer) c = napovednik(nov_primer)
else: linije[primer[3]] = [primer] linearna_regresija = linear.LinearLearner() for linija in linije.keys(): x, y = zgradi_matrike(linije[linija], True) linije[linija] = linearna_regresija(x, y) f = gzip.open("test.csv.gz", "rt", encoding="latin1") #za izpis MAE spremeni v "train.csv.gz" vrstica = csv.reader(f, delimiter="\t") next(vrstica) ime, dejanski_cas, primeri, testni_X = zgradi_matrike(vrstica, False) datoteka = open("napovedi_tekmovanje.txt", "wt", encoding="latin1") mae_mesec = 11 mae = 0 stevilo_primerov = 0 for vrstica in range(len(primeri)): napoved = lpputils.tsadd(primeri[vrstica], -linije[ime[vrstica]](testni_X[vrstica])) datoteka.write(napoved + "\n") if lpputils.parsedate(primeri[vrstica]).month == mae_mesec: mae += absolute_error(dejanski_cas[vrstica], napoved) stevilo_primerov += 1 datoteka.close() if mae != 0: print("Mean absolute error:", mae / stevilo_primerov)