Ejemplo n.º 1
0
def buildInput():
    
    #Only DATE and ASSIGNEMENT
    
    submission = pp.loadCSV("submission.txt", sep ="\t")
    submission.drop('prediction', axis=1, inplace=True)
    
    X = submission
    
    X['DAY_OFF']= 0
    X['DATE'] = pd.to_datetime(X['DATE'])
    # Setting the dates 3 days earlier. 
    X["DATE"] = X["DATE"].apply(lambda x: x+pd.DateOffset(days=-3))
    
    X['WEEK_END'] = X['DATE'].apply(lambda x: x.dayofweek>=5)
    
    X = pp.parseDate(submission,drop = False)
    
    
    X = pp.oneHot(X, "ASS_ASSIGNMENT", delCol = True)
    #X = pp.oneHot(X, "YEAR", delCol = True, prefix="YEAR")
    X.drop('YEAR', axis=1, inplace=True)
    
    X['MONTH_1']= 0
    X = pp.oneHot(X, "MONTH", delCol = True, prefix="MONTH")
   
    
    X.drop('DAY', axis=1, inplace=True)
    #X = pp.oneHot(X, "DAY", delCol = True, prefix="DAY")
    
    X = pp.oneHot(X, "CRENEAU", delCol = True, prefix="CRENEAU")

    X.drop('DATE', axis=1, inplace=True)
    
    
    return X
Ejemplo n.º 2
0
def buildInputDeep(X):
    X['DATE'] = pd.to_datetime(X['DATE'])   
    X["DAY_OFF"] = X['DATE'].apply(lambda x : pp.isFerie(x)).astype(int)
        
    X['WEEK_END'] = X['DATE'].apply(lambda x: x.dayofweek>=5)
    
    X = pp.parseDate(X,drop = False)
    print("DateTime data parsed.")

    #transform categorical data in one-hots
    X = pp.oneHot(X, "ASS_ASSIGNMENT", delCol = True)

    X.drop('YEAR', axis=1, inplace=True)
    X = pp.oneHot(X, "MONTH", delCol = True, prefix="MONTH")
  
    X.drop('DAY', axis=1, inplace=True)
    #X_train = oneHot(X_train, "DAY", delCol = True, prefix="DAY")
    
    X = pp.oneHot(X, "CRENEAU", delCol = True, prefix="CRENEAU")
    print("Categorical data made into one-hot") 
    
    X.drop('DATE', axis=1, inplace=True)
    l = list(X)
    
    X = X.as_matrix()
    nInputDim = X.shape[1]
    nInputNumber = X.shape[0]
    XZ = np.zeros((nInputNumber,1, nInputDim))

    for i in range(nInputNumber):
                for k in range(nInputDim):
                    XZ[i,0,k]=X[i,k]
                    
    print("Data converted to numpy array")
    print("PREPROCESSING OVER")
    return XZ