def call_data(): loading = instant_data() df, mode = loading.hourly_instant(), 'hour' df = df[start_p:stop_p] data = df data = data.interpolate(limit=300000000, limit_direction='both').astype( 'float32' ) #interpolate neighbor first, for rest NA fill with mean() #.apply(lambda x: x.fillna(x.mean()),axis=0) data['Day'] = data.index.dayofyear # # MARS mars_cutoff = 0.3 data_mar = call_mar(data, target, mode, cutoff=mars_cutoff) data_mar = move_column_inplace(data_mar, target, 0) return data_mar
def datapreprocess(): loading = instant_data() df,mode = loading.hourly_instant(),'hour' df = df[start_p:stop_p] data = df data = data.interpolate(limit=300000000,limit_direction='both').astype('float32')#interpolate neighbor first, for rest NA fill with mean() #.apply(lambda x: x.fillna(x.mean()),axis=0) data[target].plot() # # MARS mars_cutoff = 0.3 data_mar = call_mar(data,target,mode,cutoff=mars_cutoff) data_mar = move_column_inplace(data_mar,target,0) # # SCALE # scaler = MinMaxScaler() # df_scaled = pd.DataFrame(scaler.fit_transform(data_mar), columns=data_mar.columns,index=data_mar.index) scaler_tar = MinMaxScaler() scaler_tar.fit(data[target].to_numpy().reshape(-1,1)) return data_mar,scaler_tar
print('incorrect input') st = 'CPY012' target, start_p, stop_p, host_path = station_sel(st, mode) start_p = '2016-01-01' stop_p = '2017-01-01' #----------------------------- data = df[start_p:stop_p] split_date = int(len(data) * .7) data = data.interpolate(limit=3000000000, limit_direction='both').astype('float32') data['Day'] = data.index.dayofyear #add day #----------------------------- cutoff = .3 data_mar = call_mar(data, target, mode, cutoff=cutoff) data_mar = move_column_inplace(data_mar, target, 0) n_features = len(data_mar.columns) #---------------------------- out_t_step = 1 X, Y, _ = to_supervise(data_mar, target, out_t_step) #trainX, testX = X[:split_date].dropna(),X[split_date:].dropna() #trainY, testY = Y[:split_date].dropna(),Y[split_date:].dropna() trainX, testX = X.iloc[:split_date].dropna(), X.iloc[split_date:].dropna() trainY, testY = Y.iloc[:split_date].dropna(), Y.iloc[split_date:].dropna() #--------------------------------------------# scaler = StandardScaler() trainX = scaler.fit_transform(trainX) testX = scaler.transform(testX)