def predict_summation_batch(data, lstm, slice_size=30): """ data - time series for forecasting. lstm - model for predicting trend. lstm2 - model for predictin diff. sclice_size - count of data to prediction """ series_lstm1 = [] differs = [] series_lstm2 = [] for i in range(slice_size, len(data)): numval = np.asarray(data[i - slice_size:i]).reshape(1, slice_size) sc, numval = scaler(numval.ravel()) val = torch.from_numpy(numval[:].reshape(1, -1)).float() predict_1 = lstm(val) predict_1 = sc.inverse_transform(predict_1.detach().numpy()) # print(predict_1) series_lstm1.append(predict_1) differs.append(data[i] - predict_1) if len(series_lstm1) >= slice_size: lenght = len(series_lstm1) value = np.asarray(differs[lenght - slice_size:lenght]) sc, numval = scaler(value.ravel()) numval = sc.transform(np.asarray(numval[:].reshape(1, slice_size))) val = torch.from_numpy(numval).float() predict_2 = lstm(val[:1]) predict_2 = sc.inverse_transform(predict_2.detach().numpy()) series_lstm2.append(predict_2) series_lstm2 = np.asarray(series_lstm2) series_lstm1 = np.asarray(series_lstm1) differs = np.asarray(differs) # series_lstm1 = scaler.inverse_transform(np.asarray(series_lstm1).reshape(-1,1)) return series_lstm1[:len(series_lstm2)].ravel(), series_lstm2.ravel( ), differs.ravel()
def predict_batch(data, lstm, slice_size=30): """ data - time series for forecasting. lstm - model for predicting trend. sclice_size - count of data to prediction """ series_lstm1 = [] differs = [] series_lstm2 = [] for i in range(slice_size, len(data)): numval = np.asarray(data[i - slice_size:i]).reshape(1, slice_size) sc, numval = scaler(numval.ravel()) # numval = numval.ravel() val = torch.from_numpy(numval[:].reshape(1, -1)).float() # print(val) predict_1 = lstm(val) predict_1 = sc.inverse_transform(predict_1.detach().numpy()) # print(predict_1) series_lstm1.append(predict_1) return series_lstm1
data_xls.to_csv('data.csv', encoding='utf-8', index=False) # In[87]: #importing classes to handle data,to split data into training and testing sets,to visualising of tree and to cheak accuracy of our model from preprocessing import splitter from preprocessing import Encoder from preprocessing import scaler from metrics import matrix from treeVis import vis sp=splitter() mt=matrix() vs=vis() sc=scaler() en=Encoder() # In[88]: #importing dataset dataset=pd.read_csv("data.csv") dataset.tail() # In[89]: from sklearn.preprocessing import LabelEncoder
numerical_cols = [ cname for cname in X_train_full.columns if X_train_full[cname].dtype in ['int64', 'float64'] ] # Keep selected columns only my_cols = low_cardinality_cols + numerical_cols X_train = X_train_full[my_cols].copy() X_valid = X_valid_full[my_cols].copy() #print(X_train.head()) ##### 1- preprocessing encoding encoder = encoder() OH_X_train, OH_X_valid = encoder.ordinal(X_train, X_valid) #### 1- preprocessing scaling scaler = scaler() datat = scaler.standerd_scaler(OH_X_train) datat = scaler.standerd_scaler(OH_X_valid) ##### 2- modeling cla = classifier() preds = cla.random_forest(OH_X_train, y_train, OH_X_valid, 10, 0) print(preds) print(mean_absolute_error(y_valid, preds)) #Model Accuracy, how often is the classifier correct? print("Accuracy:", metrics.accuracy_score(y_valid, preds) * 100, "%")
df = pp.create_target_classes(df) # Shuffle a few times for i in range(5): df = df.iloc[np.random.permutation(len(df))] df = df.fillna(0) # Transform into NumPy matrix, normalized by column X, y, y_map = pp.vectorize(df, 'target', path) t_preproc = time.time() print('Cleaned and processed', len(df.index), 'rows in', round((t_preproc - t_extract), 2), 'seconds.') # Train neural network ############################################################################### print('Training neural network...') print('[', X.shape[1], '] x [', np.unique(y).size, ']') model_simple = nn.deep_nn(pp.scaler(X, 'robust', path), y, 'std', path) # nn.deep_nn(X, y) t_nn = time.time() print('Neural network trained in', round((t_nn - t_preproc), 2), 'seconds.') print('Evaluating model and saving class probabilities...') predDF = pd.DataFrame.from_records(model_simple.predict(pp.scaler(X, 'robust'))) predDF.to_pickle(path + '/model_prob.pkl') # Perform k-Means clustering and send classified data through neural network ############################################################################### clusters = 18 print('Applying k-Means classifier with', clusters, 'clusters...') kmX = km.kmeans(pp.scaler(X, 'robust', path), clusters) print('Complete.')
create_sequences, train_test, generated) if __name__ == "__main__": values_dt = pd.read_csv( '../temp_ds/Power-Networks-LCL-June2015(withAcornGps)v2_2.csv', delimiter=',') values_dt = np.asarray(values_dt['KWH/hh (per half hour) '].dropna( how='any', axis=0)) values_dt[np.where(values_dt == 'Null')] = -1 values_dt = values_dt.astype(np.float32) splited = split_data(values_dt, 50) #Нарезаем на 50 батчей avg_splited = [ moving_average(splited[i], 20) for i in range(len(splited)) ] #Усредняем scalers_data = np.asarray([ scaler(avg_splited[i]) for i in range(len(avg_splited)) ]) #Нормализуем datas = scalers_data[:, 1] # Данные (батчи) scalers = scalers_data[:, 0] # Скейлеры model = TempNN(n_features=1, n_hidden=64, seq_len=30, n_layers=1) for i, data in enumerate(datas): print("Batch №%d" % i) X_train, y_train, X_test, y_test = train_test(data) y_train = torch.reshape(y_train, (-1, 1)) y_test = torch.reshape(y_test, (-1, 1)) model, train_hist, test_hist = train_model(model, X_train, y_train, X_test, y_test) torch.save(model.state_dict(), "../models/energy_model.pth")