def buildInput(): #Only DATE and ASSIGNEMENT submission = pp.loadCSV("submission.txt", sep ="\t") submission.drop('prediction', axis=1, inplace=True) X = submission X['DAY_OFF']= 0 X['DATE'] = pd.to_datetime(X['DATE']) # Setting the dates 3 days earlier. X["DATE"] = X["DATE"].apply(lambda x: x+pd.DateOffset(days=-3)) X['WEEK_END'] = X['DATE'].apply(lambda x: x.dayofweek>=5) X = pp.parseDate(submission,drop = False) X = pp.oneHot(X, "ASS_ASSIGNMENT", delCol = True) #X = pp.oneHot(X, "YEAR", delCol = True, prefix="YEAR") X.drop('YEAR', axis=1, inplace=True) X['MONTH_1']= 0 X = pp.oneHot(X, "MONTH", delCol = True, prefix="MONTH") X.drop('DAY', axis=1, inplace=True) #X = pp.oneHot(X, "DAY", delCol = True, prefix="DAY") X = pp.oneHot(X, "CRENEAU", delCol = True, prefix="CRENEAU") X.drop('DATE', axis=1, inplace=True) return X
def buildInputDeep(X): X['DATE'] = pd.to_datetime(X['DATE']) X["DAY_OFF"] = X['DATE'].apply(lambda x : pp.isFerie(x)).astype(int) X['WEEK_END'] = X['DATE'].apply(lambda x: x.dayofweek>=5) X = pp.parseDate(X,drop = False) print("DateTime data parsed.") #transform categorical data in one-hots X = pp.oneHot(X, "ASS_ASSIGNMENT", delCol = True) X.drop('YEAR', axis=1, inplace=True) X = pp.oneHot(X, "MONTH", delCol = True, prefix="MONTH") X.drop('DAY', axis=1, inplace=True) #X_train = oneHot(X_train, "DAY", delCol = True, prefix="DAY") X = pp.oneHot(X, "CRENEAU", delCol = True, prefix="CRENEAU") print("Categorical data made into one-hot") X.drop('DATE', axis=1, inplace=True) l = list(X) X = X.as_matrix() nInputDim = X.shape[1] nInputNumber = X.shape[0] XZ = np.zeros((nInputNumber,1, nInputDim)) for i in range(nInputNumber): for k in range(nInputDim): XZ[i,0,k]=X[i,k] print("Data converted to numpy array") print("PREPROCESSING OVER") return XZ