print("end loading\n") df['WEEK_END'] = df['DAY_WE_DS'].apply(lambda x: int(x >= 5)) print(df.head()) print(len(df)) #limits for each assignment from the start to the week w limits = [[0 for k in range(28)] for v in range(12)] #finding maximal value and average value for i in range(28): limits[w][i] = df[(df.ASS_ASSIGNMENT == i)].CSPL_RECEIVED_CALLS.max( axis=0) #0 special case for Evenements limits[centre('Evenements')][w] = 0 assign = np.unique(df.ASS_ASSIGNMENT) print('end !') print(time.time() - start_time) start_time = time.time() # In[ ]: #start of training clf = [[] for i in range(12)] for i in assign: clf[w].append( ensemble.GradientBoostingRegressor( loss='ls', max_depth=param[i]['max_depth'],
import numpy as np import pandas as pd from Calculhoraire_exclusif import holidays from Calculhoraire_exclusif import centre from Calculhoraire_exclusif import intervalle data_name = "train_2011_2012_2013.csv" cols = ['DATE', 'ASS_ASSIGNMENT', 'CSPL_RECEIVED_CALLS'] print("loading data...") df = pd.read_csv(data_name, usecols=cols, sep=";") print("end loading\n") df['TIME_SLOT'] = df['DATE'].apply(lambda x: intervalle(x)) df['ASS_ASSIGNMENT'] = df['ASS_ASSIGNMENT'].apply(lambda x: centre(x)) from datetime import datetime df['DATE'] = df['DATE'].apply( lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) df['DAY_WE_DS'] = df['DATE'].dt.weekday df['short_DATE'] = df['DATE'].dt.date df['short_DATE'] = df['short_DATE'].apply( lambda x: datetime.strftime(x, "%Y-%m-%d")) df['HO'] = df['short_DATE'].apply(lambda x: holidays(str(x))) df["MONTH"] = df['short_DATE'].apply(lambda x: int(x.split("-")[1])) df["YEAR"] = df['short_DATE'].apply(lambda x: int(x.split("-")[0])) df["DAY"] = df['short_DATE'].apply(lambda x: int(x.split("-")[2])) df2 = df.groupby([ 'short_DATE', 'YEAR', 'MONTH', 'DAY', 'DAY_WE_DS', 'TIME_SLOT', 'HO',
dsub['HO'] = dsub['short_DATE'].apply(lambda x: holidays(x)) print("End of conversion!") dsub['DAY_WE_DS'] = dsub['DATE'].dt.weekday dsub['WEEK_END'] = dsub['DAY_WE_DS'].apply(lambda x: int(x >= 5)) dsub['DAY'] = dsub['DATE'].dt.day dsub['MONTH'] = dsub['DATE'].dt.month dsub['YEAR'] = dsub['DATE'].dt.year dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x)) # In[16]: prediction = [] for row in dsub.iterrows(): ass = row[1]['ASS_ASSIGNMENT'] if limits[centre(ass)] == 0: prediction.append(0) continue time = row[1]['TIME_SLOT'] wk = row[1]['WEEK_END'] dayds = row[1]['DAY_WE_DS'] month = row[1]['MONTH'] year = row[1]['YEAR'] day = row[1]['DAY'] holidays = row[1]['HO'] pred = clf[centre(ass)].predict( [[year, month, day, time, holidays, dayds, wk]]) pred = max(0, pred) pred = min(limits[centre(ass)] / 2, pred) pred = 2 * pred prediction.append(pred)
dsub['MONTH'] = dsub['DATE'].dt.month dsub['YEAR'] = dsub['DATE'].dt.year dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x)) # In[ ]: prediction = [] for row in dsub.iterrows(): ass = row[1]['ASS_ASSIGNMENT'] time = row[1]['TIME_SLOT'] wk = row[1]['DAY_WE_DS'] month = row[1]['MONTH'] year = row[1]['YEAR'] day = row[1]['DAY'] holidays = row[1]['HO'] pred = clf[centre(ass)].predict([[wk, time, month, year, day, holidays]]) prediction.append(max(0, pred)) #arrondit à l'entier le plus proche prediction = [int(x) for x in prediction] #mise à jour dsub['prediction'] = prediction dsub['DATE'] = dsub['DATE'].apply( lambda x: datetime.strftime(x, "%Y-%m-%d %H:%M:%S")) dsub['DATE'] = dsub['DATE'] + '.000' header = ['DATE', 'ASS_ASSIGNMENT', 'prediction'] #Ecriture dsub.to_csv("sortie.txt", columns=header, sep='\t', index=False) # In[ ]:
# In[4]: errors = [0 for i in range(len(assignments))] from datetime import timedelta for row in dsub.iterrows(): ass = row[1]['ASS_ASSIGNMENT'] date = row[1]['DATE'] date = date + timedelta(days=-7) date = date.strftime("%Y-%m-%d %H:%M:%S") date += '.000' prediction = row[1]['prediction'] yreal = dreal2.get((date, ass), 0) errors[centre(ass)] += (exp(0.1 * (yreal - prediction)) - 0.1 * (yreal - prediction) - 1) errors # In[5]: sum(errors) # In[7]: error = sum(errors) / 82909 error # In[ ]:
bestmult = [0 for i in range(len(assignments))] # In[26]: for i in range(5): tmperrors = [0 for j in range(len(assignments))] for row in dsub.iterrows(): ass = row[1]['ASS_ASSIGNMENT'] date = row[1]['DATE'] date = date + timedelta(days=-7) date = date.strftime("%Y-%m-%d %H:%M:%S") date += '.000' prediction = row[1]['prediction'] yreal = dreal2.get((date, ass), 0) tmperrors[centre(ass)] += (exp(0.1 * (yreal - (i + 1) * prediction)) - 0.1 * (yreal - (i + 1) * prediction) - 1) print(tmperrors) for a in range(len(assignments)): if (errors[a] > tmperrors[a]): errors[a] = tmperrors[a] bestmult[a] = i + 1 # In[27]: print(errors) print(bestmult) # In[28]: # In[5]: