def pred(facility, model, resolution): filename, columns, irrelevantColumns, targetColumns, traintime, testtime, columnOrder = configs.getConfig( facility, model, resolution) df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) lstm_1_1 = mlModule.LSTM('LSTM 1x128 d0.0' + ' mod' + model, layers=[128], dropout=0.0, recurrentDropout=0.0, epochs=5000) lstm_1_2 = mlModule.LSTM('LSTM 1x128 d0.1' + ' mod' + model, layers=[128], dropout=0.1, recurrentDropout=0.1, epochs=5000) lstm_1_3 = mlModule.LSTM('LSTM 1x128 d0.2' + ' mod' + model, layers=[128], dropout=0.2, recurrentDropout=0.2, epochs=5000) lstm_1_4 = mlModule.LSTM('LSTM 1x128 d0.3' + ' mod' + model, layers=[128], dropout=0.3, recurrentDropout=0.3, epochs=5000) lstm_1_5 = mlModule.LSTM('LSTM 1x128 d0.4' + ' mod' + model, layers=[128], dropout=0.4, recurrentDropout=0.4, epochs=5000) lstm_1_6 = mlModule.LSTM('LSTM 1x128 d0.5' + ' mod' + model, layers=[128], dropout=0.5, recurrentDropout=0.5, epochs=5000) linear = mlModule.Linear_Regularized('Linear rCV mod' + model) initTrainPredict([ linear, lstm_1_1, lstm_1_2, lstm_1_3, lstm_1_4, lstm_1_5, lstm_1_6, ])
def pred(facility, model, resolution): filename, columns, irrelevantColumns, targetColumns, traintime, testtime, columnOrder = configs.getConfig(facility, model, resolution) df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit(targetColumns) mlp_mae = mlModule.MLP('MLP 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, loss='mean_absolute_error', metrics=['mean_absolute_error']) mlp_mse = mlModule.MLP('MLP 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, loss='mean_squared_error', metrics=['mean_squared_error']) lstm_mae = mlModule.LSTM('LSTM 1x128 d0.2 mae mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_absolute_error', metrics=['mean_absolute_error']) lstm_mse = mlModule.LSTM('LSTM 1x128 d0.2 mse mod'+model, layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12, loss='mean_squared_error', metrics=['mean_squared_error']) modelList = [ mlp_mae, mlp_mse, lstm_mae, lstm_mse, ] initTrainPredict(modelList)
def performDropoutPrediction(facility, model, resolution, lookback=12, retrain=False): filename, columns, irrelevantColumns, targetColumns, traintime, testtime, columnOrder = configs.getConfig( facility, model, resolution) df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) lstm = mlModule.LSTM('LSTMs 1x128 d0.2 mod' + model, layers=[128], training=True, dropout=0.2, recurrentDropout=0.2, enrolWindow=lookback) gru = mlModule.GRU('GRUs 1x128 d0.2 mod' + model, layers=[128], training=True, dropout=0.2, recurrentDropout=0.2, enrolWindow=lookback) modelList = [ lstm, gru, ] mlModule.initModels(modelList) mlModule.trainModels(retrain) predictions, means, stds = mlModule.predictWithModelsUsingDropout( numberOfPredictions=30) plotDropoutPrediction(modelList, predictions, means, stds, targetColumns, df_test, y_test, traintime)
# List of column names used a targets targetColumns = [ '50TT002', '20PDT001', ] # List of training periods on form ['start', 'end'] traintime = [ ["2020-01-01 00:00:00", "2020-03-20 00:00:00"], ] # Testing period, recommended: entire dataset testtime = ["2020-01-01 00:00:00", "2020-08-01 00:00:00"] df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) mlp_1 = mlModule.MLP('MLP 1x64 d0.2 mod' + model, layers=[64], dropout=0.2) mlp_2 = mlModule.MLP('MLP 1x128 d0.2 mod' + model, layers=[128], dropout=0.2) mlp_3 = mlModule.MLP('MLP 2x64 d0.2 mod' + model, layers=[64, 64], dropout=0.2) mlp_4 = mlModule.MLP('MLP 2x128 d0.2 mod' + model, layers=[128, 128], dropout=0.2) lstm_1 = mlModule.LSTM('LSTM 1x64 d0.2 mod' + model, layers=[64], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_2 = mlModule.LSTM('LSTM 1x128 d0.2 mod' + model,
def featureComparison( irrelevantColumnsList, filename, columns, traintime, testtime, targetColumns, enrolWindow, ): global colors, models columnsLists = [] deviationsLists = [] names = [] trainmetrics = [] testmetrics = [] for i, irrelevantColumns in enumerate(irrelevantColumnsList): mlModule.reset() df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) mlp_1 = mlModule.MLP('MLP 1x64 d0.2 mod' + models[i], layers=[64], dropout=0.2) mlp_2 = mlModule.MLP('MLP 1x128 d0.2 mod' + models[i], layers=[128], dropout=0.2) mlp_3 = mlModule.MLP('MLP 2x64 d0.2 mod' + models[i], layers=[64, 64], dropout=0.2) mlp_4 = mlModule.MLP('MLP 2x128 d0.2 mod' + models[i], layers=[128, 128], dropout=0.2) lstm_1 = mlModule.LSTM('LSTM 1x64 d0.2 mod' + models[i], layers=[64], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_2 = mlModule.LSTM('LSTM 1x128 d0.2 mod' + models[i], layers=[128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_3 = mlModule.LSTM('LSTM 2x64 d0.2 mod' + models[i], layers=[64, 64], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) lstm_4 = mlModule.LSTM('LSTM 2x128 d0.2 mod' + models[i], layers=[128, 128], dropout=0.2, recurrentDropout=0.2, enrolWindow=12) linear = mlModule.Linear_Regularized('Linear rCV mod' + models[i]) modelList = [ mlp_1, mlp_2, mlp_3, mlp_4, lstm_1, lstm_2, lstm_3, lstm_4, linear, ] mlModule.initModels(modelList) retrain = False mlModule.trainModels(retrain) modelNames, metrics_train, metrics_test, columnsList, deviationsList = mlModule.predictWithModels( plot=True, score=True) if i < 1: columnsLists = columnsList deviationsLists = deviationsList all_names = modelNames all_train_metrics = metrics_train all_test_metrics = metrics_test else: for j_target in range(len(columnsList)): for k_model in range(1, len(columnsList[j_target])): columnsLists[j_target].append( columnsList[j_target][k_model]) for k_model in range(0, len(deviationsList[j_target])): deviationsLists[j_target].append( deviationsList[j_target][k_model]) all_names = [*all_names, *modelNames] all_train_metrics = [*all_train_metrics, *metrics_train] all_test_metrics = [*all_test_metrics, *metrics_test] names.append(modelNames) trainmetrics.append(metrics_train) testmetrics.append(metrics_test) indexColumn = mlModule._indexColumn columnDescriptions = mlModule._columnDescriptions columnUnits = mlModule._columnUnits traintime = mlModule._traintime for i in range(len(deviationsLists)): for j in range(len(deviationsLists[i])): deviationsLists[i][j][3] = colors[j] for i in range(len(columnsLists)): columnsList[i][0][3] = 'red' for j in range(1, len(columnsLists[i])): columnsLists[i][j][3] = colors[j - 1] printModelScores( all_names, all_train_metrics, all_test_metrics, ) plotModelPredictions( plt, deviationsLists, columnsLists, indexColumn, columnDescriptions, columnUnits, traintime, interpol=False, ) plotModelScores( plt, all_names, all_train_metrics, all_test_metrics, )
def pred(facility, model, resolution): filename, columns, irrelevantColumns, targetColumns, traintime, testtime, columnOrder = configs.getConfig( facility, model, resolution) df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) mlpr_1_1 = mlModule.MLP('MLP 1x128 1.0' + ' mod' + model, layers=[128], l1_rate=1.0, epochs=5000) mlpr_1_2 = mlModule.MLP('MLP 1x128 0.5' + ' mod' + model, layers=[128], l1_rate=0.5, epochs=5000) mlpr_1_3 = mlModule.MLP('MLP 1x128 0.1' + ' mod' + model, layers=[128], l1_rate=0.1, epochs=5000) mlpr_1_4 = mlModule.MLP('MLP 1x128 0.05' + ' mod' + model, layers=[128], l1_rate=0.05, epochs=5000) mlpr_1_5 = mlModule.MLP('MLP 1x128 0.01' + ' mod' + model, layers=[128], l1_rate=0.01, epochs=5000) mlpr_1_6 = mlModule.MLP('MLP 1x128 0.005' + ' mod' + model, layers=[128], l1_rate=0.005, epochs=5000) mlpr_1_7 = mlModule.MLP('MLP 1x128 0.001' + ' mod' + model, layers=[128], l1_rate=0.001, epochs=5000) mlpd_1_8 = mlModule.MLP('MLP 1x128 0.2' + ' mod' + model, layers=[128], dropout=0.2, epochs=5000) linear_r = mlModule.Linear_Regularized('Linear rCV' + ' mod' + model) initTrainPredict([ mlpr_1_1, mlpr_1_2, mlpr_1_3, mlpr_1_4, mlpr_1_5, mlpr_1_6, mlpr_1_7, mlpd_1_8, linear_r, ]) initTrainPredict([ mlpr_1_6, mlpr_1_7, mlpd_1_8, linear_r, ])
] # In this case, two separate testing phases are used # This testtime parameter should cover the entire dataset testtime = ["2020-01-01 00:00:00", "2020-08-01 00:00:00"] # This testtime1 parameter should cover the first testing phase testtime1 = ["2020-04-15 00:00:00", "2020-05-04 00:00:00"] # This testtime2 parameter should cover the second testing phase testtime2 = ["2020-06-01 00:00:00", "2020-06-16 00:00:00"] # 2. Initiate and divide data df = mlApi.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlApi.getTestTrainSplit(traintime, testtime) df_test_1, df_test_2 = mlApi.getTestTrainSplit([testtime1], testtime2) df_test_joined = pd.concat([df_test_1, df_test_2]) # 3. Plot correlation plots mlApi.correlationPlot(df_train, datasetName + " train") mlApi.correlationDuoPlot(df_test_1, df_test_2, datasetName + " test 1", datasetName + " test 2") mlApi.correlationDifferencePlot( df_train, df_test_joined, "Difference, " + datasetName + " train and test") # Reset to prepare for second dataset # ------------------------------------- mlApi.reset() # -------------------------------------
def pred(facility, model, resolution): filename, columns, irrelevantColumns, targetColumns, traintime, testtime, columnOrder = configs.getConfig( facility, model, resolution) df = mlModule.initDataframe(filename, columns, irrelevantColumns) df_train, df_test = mlModule.getTestTrainSplit(traintime, testtime) X_train, y_train, X_test, y_test = mlModule.getFeatureTargetSplit( targetColumns) mlp_1x_16 = mlModule.MLP('MLP 1x16' + ' mod' + model, layers=[16], dropout=0.2, epochs=1000) mlp_1x_32 = mlModule.MLP('MLP 1x32' + ' mod' + model, layers=[32], dropout=0.2, epochs=1000) mlp_1x_64 = mlModule.MLP('MLP 1x64' + ' mod' + model, layers=[64], dropout=0.2, epochs=1000) mlp_1x_128 = mlModule.MLP('MLP 1x128' + ' mod' + model, layers=[128], dropout=0.2, epochs=1000) mlp_2x_16 = mlModule.MLP('MLP 2x16' + ' mod' + model, layers=[16, 16], dropout=0.2, epochs=1000) mlp_2x_32 = mlModule.MLP('MLP 2x32' + ' mod' + model, layers=[32, 32], dropout=0.2, epochs=1000) mlp_2x_64 = mlModule.MLP('MLP 2x64' + ' mod' + model, layers=[64, 64], dropout=0.2, epochs=1000) mlp_2x_128 = mlModule.MLP('MLP 2x128' + ' mod' + model, layers=[128, 128], dropout=0.2, epochs=1000) linear_cv = mlModule.Linear_Regularized('Linear rCV' + ' mod' + model) ensemble = mlModule.Ensemble('MLP 1x128 + Linear' + ' mod' + model, [mlp_1x_128, linear_cv]) ensemble2 = mlModule.Ensemble('MLP 2x64 + Linear' + ' mod' + model, [mlp_2x_64, linear_cv]) modelList = [ linear_cv, mlp_1x_16, mlp_1x_32, mlp_2x_16, mlp_2x_32, ] initTrainPredict(modelList) modelList = [ linear_cv, mlp_1x_64, mlp_1x_128, mlp_2x_64, mlp_2x_128, ] initTrainPredict(modelList) modelList = [ linear_cv, ensemble, ensemble2, ] initTrainPredict(modelList)