print(inverse(i))
        clf[w][i].fit(Matrix, Result)
#end of training

# In[ ]:
start = '2011-01-01 00:00:00.000'
start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f")
#reading the submission file
dsub = pd.read_csv('submission/submission.txt', sep="\t")
print(dsub.head())
print("convert to timestamp...")
dsub["DATE"] = dsub["DATE"].apply(
    lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
dsub["short_DATE"] = dsub["DATE"].apply(
    lambda x: datetime.strftime(x, "%Y-%m-%d"))
dsub['HO'] = dsub['short_DATE'].apply(lambda x: holidays(x))

dsub["ABSOLUTE"] = dsub['DATE'].apply(lambda x: ((x - start).days) * 48)

dsub['DAY_WE_DS'] = dsub['DATE'].dt.weekday
dsub['WEEK_END'] = dsub['DAY_WE_DS'].apply(lambda x: int(x >= 5))
dsub['DAY'] = dsub['DATE'].dt.day
dsub['MONTH'] = dsub['DATE'].dt.month
dsub['YEAR'] = dsub['DATE'].dt.year
dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x))
print("End of conversion!")

# In[ ]:
#prediction
prediction = []
for row in dsub.iterrows():
예제 #2
0
print("loading data...")
df = pd.read_csv(data_name, usecols=cols, sep=";")
print("end loading\n")

df['TIME_SLOT'] = df['DATE'].apply(lambda x: intervalle(x))
df['ASS_ASSIGNMENT'] = df['ASS_ASSIGNMENT'].apply(lambda x: centre(x))
from datetime import datetime

df['DATE'] = df['DATE'].apply(
    lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
df['DAY_WE_DS'] = df['DATE'].dt.weekday
df['short_DATE'] = df['DATE'].dt.date
df['short_DATE'] = df['short_DATE'].apply(
    lambda x: datetime.strftime(x, "%Y-%m-%d"))
df['HO'] = df['short_DATE'].apply(lambda x: holidays(str(x)))
df["MONTH"] = df['short_DATE'].apply(lambda x: int(x.split("-")[1]))
df["YEAR"] = df['short_DATE'].apply(lambda x: int(x.split("-")[0]))
df["DAY"] = df['short_DATE'].apply(lambda x: int(x.split("-")[2]))

df2 = df.groupby([
    'short_DATE', 'YEAR', 'MONTH', 'DAY', 'DAY_WE_DS', 'TIME_SLOT', 'HO',
    'ASS_ASSIGNMENT'
])['CSPL_RECEIVED_CALLS'].sum()

write_name = "train_week_centre_time_date.csv"

#Attention il faut ouvrir le fichier et renommer la dernière colonne
df2.to_csv(write_name, header=True)

# In[ ]: