print("end loading\n")

    df['WEEK_END'] = df['DAY_WE_DS'].apply(lambda x: int(x >= 5))

    print(df.head())
    print(len(df))

    #limits for each assignment from the start to the week w
    limits = [[0 for k in range(28)] for v in range(12)]
    #finding maximal value and average value
    for i in range(28):
        limits[w][i] = df[(df.ASS_ASSIGNMENT == i)].CSPL_RECEIVED_CALLS.max(
            axis=0)

    #0 special case for Evenements
    limits[centre('Evenements')][w] = 0

    assign = np.unique(df.ASS_ASSIGNMENT)

    print('end !')
    print(time.time() - start_time)
    start_time = time.time()

    # In[ ]:
    #start of training
    clf = [[] for i in range(12)]
    for i in assign:
        clf[w].append(
            ensemble.GradientBoostingRegressor(
                loss='ls',
                max_depth=param[i]['max_depth'],
예제 #2
0
import numpy as np
import pandas as pd
from Calculhoraire_exclusif import holidays
from Calculhoraire_exclusif import centre
from Calculhoraire_exclusif import intervalle

data_name = "train_2011_2012_2013.csv"
cols = ['DATE', 'ASS_ASSIGNMENT', 'CSPL_RECEIVED_CALLS']

print("loading data...")
df = pd.read_csv(data_name, usecols=cols, sep=";")
print("end loading\n")

df['TIME_SLOT'] = df['DATE'].apply(lambda x: intervalle(x))
df['ASS_ASSIGNMENT'] = df['ASS_ASSIGNMENT'].apply(lambda x: centre(x))
from datetime import datetime

df['DATE'] = df['DATE'].apply(
    lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
df['DAY_WE_DS'] = df['DATE'].dt.weekday
df['short_DATE'] = df['DATE'].dt.date
df['short_DATE'] = df['short_DATE'].apply(
    lambda x: datetime.strftime(x, "%Y-%m-%d"))
df['HO'] = df['short_DATE'].apply(lambda x: holidays(str(x)))
df["MONTH"] = df['short_DATE'].apply(lambda x: int(x.split("-")[1]))
df["YEAR"] = df['short_DATE'].apply(lambda x: int(x.split("-")[0]))
df["DAY"] = df['short_DATE'].apply(lambda x: int(x.split("-")[2]))

df2 = df.groupby([
    'short_DATE', 'YEAR', 'MONTH', 'DAY', 'DAY_WE_DS', 'TIME_SLOT', 'HO',
dsub['HO'] = dsub['short_DATE'].apply(lambda x: holidays(x))
print("End of conversion!")

dsub['DAY_WE_DS'] = dsub['DATE'].dt.weekday
dsub['WEEK_END'] = dsub['DAY_WE_DS'].apply(lambda x: int(x >= 5))
dsub['DAY'] = dsub['DATE'].dt.day
dsub['MONTH'] = dsub['DATE'].dt.month
dsub['YEAR'] = dsub['DATE'].dt.year
dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x))

# In[16]:

prediction = []
for row in dsub.iterrows():
    ass = row[1]['ASS_ASSIGNMENT']
    if limits[centre(ass)] == 0:
        prediction.append(0)
        continue
    time = row[1]['TIME_SLOT']
    wk = row[1]['WEEK_END']
    dayds = row[1]['DAY_WE_DS']
    month = row[1]['MONTH']
    year = row[1]['YEAR']
    day = row[1]['DAY']
    holidays = row[1]['HO']
    pred = clf[centre(ass)].predict(
        [[year, month, day, time, holidays, dayds, wk]])
    pred = max(0, pred)
    pred = min(limits[centre(ass)] / 2, pred)
    pred = 2 * pred
    prediction.append(pred)
예제 #4
0
dsub['MONTH'] = dsub['DATE'].dt.month
dsub['YEAR'] = dsub['DATE'].dt.year
dsub['TIME_SLOT'] = dsub['DATE'].apply(lambda x: intervalle(x))

# In[ ]:

prediction = []
for row in dsub.iterrows():
    ass = row[1]['ASS_ASSIGNMENT']
    time = row[1]['TIME_SLOT']
    wk = row[1]['DAY_WE_DS']
    month = row[1]['MONTH']
    year = row[1]['YEAR']
    day = row[1]['DAY']
    holidays = row[1]['HO']
    pred = clf[centre(ass)].predict([[wk, time, month, year, day, holidays]])
    prediction.append(max(0, pred))

#arrondit à l'entier le plus proche
prediction = [int(x) for x in prediction]

#mise à jour
dsub['prediction'] = prediction
dsub['DATE'] = dsub['DATE'].apply(
    lambda x: datetime.strftime(x, "%Y-%m-%d %H:%M:%S"))
dsub['DATE'] = dsub['DATE'] + '.000'
header = ['DATE', 'ASS_ASSIGNMENT', 'prediction']
#Ecriture
dsub.to_csv("sortie.txt", columns=header, sep='\t', index=False)

# In[ ]:
예제 #5
0
# In[4]:

errors = [0 for i in range(len(assignments))]

from datetime import timedelta

for row in dsub.iterrows():
    ass = row[1]['ASS_ASSIGNMENT']
    date = row[1]['DATE']
    date = date + timedelta(days=-7)
    date = date.strftime("%Y-%m-%d %H:%M:%S")
    date += '.000'
    prediction = row[1]['prediction']
    yreal = dreal2.get((date, ass), 0)
    errors[centre(ass)] += (exp(0.1 * (yreal - prediction)) - 0.1 *
                            (yreal - prediction) - 1)

errors

# In[5]:

sum(errors)

# In[7]:

error = sum(errors) / 82909
error

# In[ ]:
bestmult = [0 for i in range(len(assignments))]

# In[26]:

for i in range(5):
    tmperrors = [0 for j in range(len(assignments))]
    for row in dsub.iterrows():
        ass = row[1]['ASS_ASSIGNMENT']
        date = row[1]['DATE']
        date = date + timedelta(days=-7)
        date = date.strftime("%Y-%m-%d %H:%M:%S")
        date += '.000'
        prediction = row[1]['prediction']
        yreal = dreal2.get((date, ass), 0)
        tmperrors[centre(ass)] += (exp(0.1 * (yreal - (i + 1) * prediction)) -
                                   0.1 * (yreal - (i + 1) * prediction) - 1)
    print(tmperrors)
    for a in range(len(assignments)):
        if (errors[a] > tmperrors[a]):
            errors[a] = tmperrors[a]
            bestmult[a] = i + 1

# In[27]:

print(errors)
print(bestmult)

# In[28]:

# In[5]: