print(inverse(i)) clf[w][i].fit(Matrix, Result) #end of training # In[ ]: start = '2011-01-01 00:00:00.000' start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f") #reading the submission file dsub = pd.read_csv('submission/submission.txt', sep="\t") print(dsub.head()) print("convert to timestamp...") dsub["DATE"] = dsub["DATE"].apply( lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) dsub["short_DATE"] = dsub["DATE"].apply( lambda x: datetime.strftime(x, "%Y-%m-%d")) dsub['HO'] = dsub['short_DATE'].apply(lambda x: holidays(x)) dsub["ABSOLUTE"] = dsub['DATE'].apply(lambda x: ((x - start).days) * 48) dsub['DAY_WE_DS'] = dsub['DATE'].dt.weekday dsub['WEEK_END'] = dsub['DAY_WE_DS'].apply(lambda x: int(x >= 5)) dsub['DAY'] = dsub['DATE'].dt.day dsub['MONTH'] = dsub['DATE'].dt.month dsub['YEAR'] = dsub['DATE'].dt.year dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x)) print("End of conversion!") # In[ ]: #prediction prediction = [] for row in dsub.iterrows():
print("loading data...") df = pd.read_csv(data_name, usecols=cols, sep=";") print("end loading\n") df['TIME_SLOT'] = df['DATE'].apply(lambda x: intervalle(x)) df['ASS_ASSIGNMENT'] = df['ASS_ASSIGNMENT'].apply(lambda x: centre(x)) from datetime import datetime df['DATE'] = df['DATE'].apply( lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) df['DAY_WE_DS'] = df['DATE'].dt.weekday df['short_DATE'] = df['DATE'].dt.date df['short_DATE'] = df['short_DATE'].apply( lambda x: datetime.strftime(x, "%Y-%m-%d")) df['HO'] = df['short_DATE'].apply(lambda x: holidays(str(x))) df["MONTH"] = df['short_DATE'].apply(lambda x: int(x.split("-")[1])) df["YEAR"] = df['short_DATE'].apply(lambda x: int(x.split("-")[0])) df["DAY"] = df['short_DATE'].apply(lambda x: int(x.split("-")[2])) df2 = df.groupby([ 'short_DATE', 'YEAR', 'MONTH', 'DAY', 'DAY_WE_DS', 'TIME_SLOT', 'HO', 'ASS_ASSIGNMENT' ])['CSPL_RECEIVED_CALLS'].sum() write_name = "train_week_centre_time_date.csv" #Attention il faut ouvrir le fichier et renommer la dernière colonne df2.to_csv(write_name, header=True) # In[ ]: