def main(): models = ['RF'] # 'LSTM', 'NN', 'LR', 'RF', 'DT', 'SVC', targets = ['dissolved_oxygen', 'ph'] # ['DOcategory', 'pHcategory'] sondefilename = 'leavon_wo_2019-07-01-2020-01-15' n_job = -1 for model_name in models: print(model_name) for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/balance_data/output_Cat_' + model_name+'/final_models/' data = {'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1'} else: cat = 0 directory = 'Results/balance_data/output_Reg_' + model_name+'/final_models/' data = {'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2'} if not os.path.exists(directory): os.makedirs(directory) directoryresult = directory + 'Results/' if not os.path.exists(directoryresult): os.makedirs(directoryresult) resultFileName = 'results_'+target+str(time.time())+'.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directoryresult+resultFileName, index=False, header=False) if model_name == 'DT' or model_name == 'RF': method = 'OrgData' path = 'Sondes_data/train/train_data/' testpath = 'Sondes_data/test/test_data/' else: method = 'StandardScaler' path = 'Sondes_data/train/train_data_normalized/'+method+'/'+target+'/' testpath = 'Sondes_data/test/train_data_normalized/' + method+'/'+target+'/' for PrH_index in [1, 3, 6, 12, 24, 36, 48]: params = func.trained_param_grid[ 'param_grid_'+model_name+str(cat)] lags = func.getlags_window( model_name, params['param_'+target+'_'+str(PrH_index)], cat) files = [f for f in os.listdir(path) if f.endswith( '.csv') and f.startswith(sondefilename)] file1 = files[0] print(' TH: ' + str(PrH_index)+' '+method+' '+target+' '+file1) dataset = pd.read_csv(path+file1) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, lags, target, cat) if model_name == 'LSTM' or model_name == 'NN': n_job = 1 start_time = time.time() clf = func.getModel( model_name, input_dim, params['param_'+target+'_'+str(PrH_index)], n_job, cat) print('clf: '+str(clf)) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): train_y_grid = to_categorical(train_y_grid, 3) clf = clf.fit(train_X_grid, train_y_grid, model__class_weight={0: 1, 1: 50, 2: 100}) else: clf = clf.fit(train_X_grid, train_y_grid) # save the model to disk filename = model_name+'_model_' + \ target+'_'+str(PrH_index)+'.sav' joblib.dump(clf, directory+filename) # if model_name == 'RF' or model_name=='DT': # featurenames = func.setfeatures(features, lags) # if not os.path.exists(directory+'trees/'): # os.makedirs(directory+'trees/') # i_tree = 0 # class_names = ['0', '1', '2'] # print(len(clf)) # for tree_in_forest in clf: # dot_data = tree.export_graphviz(tree_in_forest, out_file=None, # feature_names=featurenames, # class_names=class_names, # filled=True, rounded=True, # special_characters=True) # graph = pydotplus.graph_from_dot_data(dot_data) # graph.write_pdf( # directory+'trees/tree_'+filename+str(i_tree)+".pdf") # i_tree = i_tree + 1 # if(i_tree > 1): # break elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) ################################# # Testing final model on test data ################################# start_time = time.time() testsondefilename = re.sub('wo_', '', sondefilename) files = [f for f in os.listdir(testpath) if f.endswith( '.csv')and f.startswith(testsondefilename)] file1 = files[0] print('Window: '+str(lags) + ' TH: ' + str(PrH_index)+' '+method+' '+target+file1) dataset = pd.read_csv(testpath+file1) test_X_grid, test_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, lags, target, cat) i = 1 custom_cv = func.custom_cv_kfolds_testdataonly( test_X_grid, 100) for test_index in custom_cv: test_X = test_X_grid[test_index] test_y = test_y_grid[test_index] predictions = clf.predict(test_X) if model_name == 'LSTM' or model_name == 'NN': test_y = argmax(test_y, axis=1) # predictions = argmax(predictions, axis=1) # test_y = test_y.astype(int) # predictions = predictions.astype(int) if i % 10 == 0: plt.scatter(np.arange(len(test_y)), test_y, s=1) plt.scatter(np.arange(len(predictions)), predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') fpath = 'predictions_' + method+target+'_Window' + \ str(lags) + '_TH'+str(PrH_index) + \ '_CV' + str(i)+file1 plt.savefig(directoryresult+fpath+'.jpg') plt.close() # data = {'Actual': test_y, 'Predictions': predictions} # print(test_y.shape) # print(predictions.shape) # if model_name == 'RF': # df = pd.DataFrame(data=data) # else: # df = pd.DataFrame(data=data, index=[0]) # df.to_csv(directoryresult+filename + # '_CV'+str(i)+'.csv', index=False) cm0 = func.forecast_accuracy(predictions, test_y, cat) if cat == 1: data = {'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7]} elif cat == 0: data = {'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5]} df = pd.DataFrame(data=data, index=[0]) df.to_csv(directoryresult+resultFileName, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i+1 Kb.clear_session() gc.collect() del clf
def main(): models = ['RF'] # 'LSTM', 'NN', 'LR', 'RF', 'DT', 'SVC', targets = ['ph'] # ['DOcategory', 'pHcategory'] # 'ph','dissolved_oxygen' # ph TH: 24,36,48 sondefilename = 'leavon_wo_2019-07-01-2020-01-15' n_job = -1 for model_name in models: print(model_name) for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/balance_data/output_Cat_' + \ model_name+'/oversampling_cv_models/' data = { 'target_names': 'target_names', 'method_names': 'method_names', 'window_nuggets': 'window_nuggets', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'std_test_score': 'std_test_score', 'mean_test_score': 'mean_test_score', 'params': 'params', 'bestscore': 'bestscore', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1', 'F1_all': 'F1_all', 'fbeta': 'fbeta', 'imfeatures': 'imfeatures', 'best_thresh_0': 'best_thresh_0', 'best_thresh_1': 'best_thresh_1', 'best_thresh_2': 'best_thresh_2' } else: cat = 0 directory = 'Results/balance_data/output_Reg_' + \ model_name+'/oversampling_cv_models/' data = { 'target_names': 'target_names', 'method_names': 'method_names', 'window_nuggets': 'window_nuggets', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'std_test_score': 'std_test_score', 'mean_test_score': 'mean_test_score', 'params': 'params', 'bestscore': 'bestscore', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2', 'imfeatures': 'imfeatures' } if not os.path.exists(directory): os.makedirs(directory) resultFileName = 'results_' + target + str(time.time()) + '.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directory + resultFileName, index=False, header=False) if model_name == 'DT' or model_name == 'RF': path = 'Sondes_data/train/train_data/' method = 'OrgData' else: method = 'StandardScaler' path = 'Sondes_data/train/train_data_normalized/' + method + '/' + target + '/' for n_steps in [1, 3, 6, 12]: for PrH_index in [1, 3, 6, 12, 24, 36, 48]: files = [ f for f in os.listdir(path) if f.endswith('.csv') and f.startswith(sondefilename) ] file = files[0] print('Window: ' + str(n_steps) + ' TH: ' + str(PrH_index) + ' ' + method + ' ' + target) dataset = pd.read_csv(path + file) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, n_steps, target, cat) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): train_y_grid = to_categorical(train_y_grid, 3) if model_name == 'LSTM' or model_name == 'NN': n_job = 1 start_time = time.time() # resample = SMOTETomek(tomek=TomekLinks( # sampling_strategy='majority')) # print(train_y_grid[train_y_grid.argmax(axis=1)==2]) model = func.algofind(model_name, input_dim, n_steps, cat) # ('r', resample), # if cat == 1: # model = CalibratedClassifierCV( # model, method='isotonic') pipeline = Pipeline(steps=[('model', model)]) custom_cv = func.custom_cv_2folds(train_X_grid, 5) gs = RandomizedSearchCV( estimator=pipeline, param_distributions=func.param_grid['param_grid_' + model_name + str(cat)], n_iter=10, cv=custom_cv, verbose=0, random_state=42, n_jobs=n_job) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): clf = gs.fit(train_X_grid, train_y_grid, model__class_weight={ 0: 1, 1: 50, 2: 100 }) else: clf = gs.fit(train_X_grid, train_y_grid) test_Score = clf.cv_results_['mean_test_score'].mean() test_std = clf.cv_results_['std_test_score'].mean() print('Mean test scores: %.3f' % test_Score) i = 1 custom_cv = func.custom_cv_2folds(train_X_grid, 3) for train_index, test_index in custom_cv: test_X = train_X_grid[test_index] test_y = train_y_grid[test_index] predictions = clf.predict(test_X) # predict_mine = [] fpath = 'predictions_' + method+target+'_Window' + \ str(n_steps) + '_TH' + \ str(PrH_index)+'_CV' + str(i)+file if cat == 1: # predict probabilities yhat = clf.predict_proba(test_X) # print(yhat[100:103]) y = label_binarize(test_y, classes=[0, 1, 2]) # print(y[100:103]) # roc_curve fpr = dict() tpr = dict() roc_auc = dict() best_thresh = dict() for i in range(3): fpr[i], tpr[i], thresholds = roc_curve( y[:, i], yhat[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) J = tpr[i] - fpr[i] # get the best threshold ix = argmax(J) best_thresh[i] = thresholds[ix] print('Best Threshold=%f, roc_auc=%.3f' % (best_thresh[i], roc_auc[i])) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve( y.ravel(), yhat.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) plt.plot( fpr["micro"], tpr["micro"], label='micro-average ROC curve (area = {0:0.2f})' ''.format(roc_auc["micro"]), color='deeppink', linestyle=':', linewidth=4) colors = cycle( ['aqua', 'darkorange', 'cornflowerblue']) for i, color in zip(range(3), colors): plt.plot( fpr[i], tpr[i], color=color, lw=2, label= 'ROC curve of class {0} (area = {1:0.2f})' ''.format(i, roc_auc[i])) # plot the roc curve for the model plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill') # axis labels plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title( 'Some extension of Receiver operating characteristic to multi-class' ) plt.legend(loc="lower right") # show the plot plt.savefig(directory + fpath + 'ROC_curve.jpg') plt.close() if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): test_y = argmax(test_y, axis=1) # predictions = argmax(predictions, axis=1) if cat == 0: predictions, test_y = func.transform( predictions, test_y, method, target, file) cm0 = func.forecast_accuracy(predictions, test_y, cat) plt.scatter(np.arange(len(test_y)), test_y, s=1) plt.scatter(np.arange(len(predictions)), predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.savefig(directory + fpath + '.jpg') plt.close() # data = {'Actual': test_y, 'Predictions': predictions} print(test_y.shape) print(predictions.shape) # if model_name == 'RF': # df = pd.DataFrame(data=data) # else: # df = pd.DataFrame(data=data, index=[0]) # df.to_csv(directory+fpath, index=False) if cat == 1: data = { 'target_names': target, 'method_names': method, 'window_nuggets': n_steps, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': fpath, 'std_test_score': [test_std], 'mean_test_score': [test_Score], 'params': [clf.best_params_], 'bestscore': [clf.best_score_], 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]], 'imfeatures': [clf.best_estimator_], 'best_thresh_0': best_thresh[0], 'best_thresh_1': best_thresh[1], 'best_thresh_2': best_thresh[2] } elif cat == 0: data = { 'target_names': target, 'method_names': method, 'window_nuggets': n_steps, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': fpath, 'std_test_score': [test_std], 'mean_test_score': [test_Score], 'params': [clf.best_params_], 'bestscore': [clf.best_score_], 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5], 'imfeatures': [clf.best_estimator_] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory + resultFileName, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print( time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i + 1 Kb.clear_session() gc.collect() del clf
def main(): methods = ['OrgData'] # 'dissolved_oxygen', 'ph', 'DOcategory', 'pHcategory'] targets = ['ysi_blue_green_algae'] model_name = 'baseline' # test_Summer train_Summer # bookTwo: Sondes_data/old/test/test_data/ path = 'Sondes_data/test_Summer/' files = [f for f in os.listdir(path) if f.endswith(".csv")] for method in methods: for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/bookThree/output_Cat_' + \ model_name+'/final_models/Results/' # final_models/Results oversampling_cv_models/ #2 data = { 'CV': 'CV', 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'window_nuggets': 'window_nuggets', 'file_names': 'file_names', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1', 'F1_all': 'F1_all', 'fbeta': 'fbeta' } else: cat = 0 directory = 'Results/bookThree/output_Reg_' + \ model_name+'/final_models/Results/' # final_models/Results oversampling_cv_models #3 data = { 'CV': 'CV', 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'window_nuggets': 'window_nuggets', 'file_names': 'file_names', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2' } if not os.path.exists(directory): os.makedirs(directory) for file in files: print(file) result_filename = 'results_' + target + '_' + file dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directory + result_filename, index=False) n_steps = 1 for PrH_index in [1, 3, 6, 12, 24, 36, 48, 60, 72]: dataset = pd.read_csv(path + file) # Only the Target dataset = dataset[['year', 'month', 'day', 'hour', target]] # dataset = dataset.dropna() # print(dataset.head()) print('Window: ' + str(n_steps) + ' TH: ' + str(PrH_index) + ' ' + method + ' ' + target) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, n_steps, target, cat) # print(train_y_grid[0:1]) start_time = time.time() i = 1 # For Test files: #4 custom_cv = func.custom_cv_kfolds_testdataonly( train_X_grid, 100) for test_index in custom_cv: # For Train files: # custom_cv = func.custom_cv_2folds(train_X_grid, 3) # for train_index, test_index in custom_cv: test_X = train_X_grid[test_index] test_y = train_y_grid[test_index] # current value would be the same in the future predictions predictions = test_X[:, -1] df_time = pd.DataFrame({ 'year': np.array(test_X[:, 0]).astype(int), 'month': np.array(test_X[:, 1]).astype(int), 'day': np.array(test_X[:, 2]).astype(int), 'hour': np.array(test_X[:, 3]).astype(int), }) # print(df_time.head()) timeline = pd.to_datetime(df_time, format='%Y%m%d %H') # print(timeline.head()) # timeline = timeline.reshape(len(time),) if cat == 1: predictions = np.array(predictions).astype(int) test_y = np.array(test_y).astype(int) test_y = test_y.reshape(len(test_y), ) predictions = predictions.reshape(len(predictions), ) cm0 = func.forecast_accuracy(predictions, test_y, cat) filename = file + '_' + \ target+'_TH' + \ str(PrH_index)+'_lag' + \ str(n_steps)+'_'+str(i) # First test files if i % 10 == 0: # or i <= 3: # 5 plt.scatter(timeline.values, test_y, s=1) plt.scatter(timeline.values, predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.xticks(rotation=45) directorydeeper = directory + 'more/' if not os.path.exists(directorydeeper): os.makedirs(directorydeeper) plt.savefig(directorydeeper + filename + '.jpg') # plt.show() plt.close() data = { 'time': timeline, 'Actual': test_y, 'Predictions': predictions } df = pd.DataFrame(data=data) df.to_csv(directorydeeper + filename + '.csv', index=False) if cat == 1: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]] } elif cat == 0: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory + result_filename, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print( time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i + 1 gc.collect()
def main(): # models = ['LOF', 'EE', 'IF', 'SVM'] targets = ['DOcategory', 'pHcategory'] # , 'ph', 'dissolved_oxygen'] sondefilename = 'leavon_wo_2019-07-01-2020-01-15' # n_job = -1 model, model_name = get_models() for j in range(len(model)): print(model_name[j]) print(model[j]) for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/AnomalyDetection/output_Cat_' + \ model_name[j]+'/oversampling_cv_models/' data = {'target_names': 'target_names', 'method_names': 'method_names', 'window_nuggets': 'window_nuggets', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'std_test_score': 'std_test_score', 'mean_test_score': 'mean_test_score', 'params': 'params', 'bestscore': 'bestscore', 'fbeta': 'fbeta'} if not os.path.exists(directory): os.makedirs(directory) resultFileName = 'results_'+target+str(time.time())+'.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directory+resultFileName, index=False, header=False) path = 'Sondes_data/train/train_data/' method = 'SS_pipeline' for n_steps in [1, 3, 6, 12]: for PrH_index in [1, 3, 6, 12, 24, 36, 48]: files = [f for f in os.listdir(path) if f.endswith( '.csv') and f.startswith(sondefilename)] file = files[0] print('Window: '+str(n_steps) + ' TH: ' + str(PrH_index)+' '+method+' '+target) dataset = pd.read_csv(path+file) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, n_steps, target, cat) print(train_X_grid[0:1]) start_time = time.time() if model_name[j] == 'IF': pipeline = Pipeline(steps=[('model', model[j])]) else: pipeline = Pipeline( steps=[('n', StandardScaler()), ('model', model[j])]) custom_cv = func.custom_cv_2folds(train_X_grid, 3) i = 1 for train_index, test_index in custom_cv: train_X_ = train_X_grid[train_index] test_y_ = train_y_grid[train_index] test_X = train_X_grid[test_index] test_y = train_y_grid[test_index] # fit on majority class train_X_ = train_X_[test_y_ == 0] # detect outliers in the test set # if model_name[j] == 'LOF': # predictions = lof_predict( # model[j], train_X_, test_X) # else: pipeline.fit(train_X_) predictions = pipeline.predict(test_X) fpath = 'predictions_' + method+target+'_Window' + \ str(n_steps) + '_TH' + \ str(PrH_index)+'_CV' + str(i)+file # mark inliers 1, outliers -1 test_y[test_y > 0] = -1 test_y[test_y == 0] = 1 # calculate score score = f1_score(test_y, predictions, pos_label=-1) print('F-measure: %.3f' % score) # cm0 = predict(predictions, predictions, cat) plt.scatter(np.arange(len(test_y)), test_y, s=1) plt.scatter(np.arange(len(predictions)), predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.savefig(directory+fpath+'.jpg') plt.close() data = {'Actual': test_y, 'Predictions': predictions} print(test_y.shape) print(predictions.shape) df = pd.DataFrame(data=data) df.to_csv(directory+fpath, index=False) if cat == 1: data = {'target_names': target, 'method_names': method, 'window_nuggets': n_steps, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': fpath, 'F-measure': score} df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory+resultFileName, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i+1 Kb.clear_session() gc.collect()
def main(): # 'LR', 'DT', 'SVC', 'LSTM', 'NN', # 'MLP', 'CNN', 'LSTM', 'ConvLSTM', 'CNNLSTM', 'EncodeDecodeLSTMs' models = ['RF'] targets = ['DOcategory', 'pHcategory', 'ph', 'dissolved_oxygen'] sondefilename = 'leavon_wo_2019-07-01-2020-01-15' n_job = -1 for model_name in models: print(model_name) for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/bookOne/output_Cat_' + \ model_name+'/oversampling_cv_models/' data = {'target_names': 'target_names', 'method_names': 'method_names', 'window_nuggets': 'window_nuggets', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'std_test_score': 'std_test_score', 'mean_test_score': 'mean_test_score', 'params': 'params', 'bestscore': 'bestscore', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1', 'F1_all': 'F1_all', 'fbeta': 'fbeta', 'imfeatures': 'imfeatures'} else: cat = 0 directory = 'Results/bookOne/output_Reg_' + \ model_name+'/oversampling_cv_models/' data = {'target_names': 'target_names', 'method_names': 'method_names', 'window_nuggets': 'window_nuggets', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'std_test_score': 'std_test_score', 'mean_test_score': 'mean_test_score', 'params': 'params', 'bestscore': 'bestscore', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2', 'imfeatures': 'imfeatures'} if not os.path.exists(directory): os.makedirs(directory) resultFileName = 'results_'+target+str(time.time())+'.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directory+resultFileName, index=False, header=False) path = 'Sondes_data/train/train_data/' method = 'OrgData' for n_steps in [1, 3, 6, 12]: # for PrH_index in [1, 3, 6, 12, 24, 36, 48]: files = [f for f in os.listdir(path) if f.endswith( '.csv') and f.startswith(sondefilename)] file = files[0] print('Window: '+str(n_steps) + ' TH: ' + str(PrH_index)+' '+method+' '+target) dataset = pd.read_csv(path+file) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, n_steps, target, cat) print(train_X_grid[0:1]) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): train_y_grid = to_categorical(train_y_grid, 3) if model_name == 'LSTM' or model_name == 'NN': n_job = 1 start_time = time.time() model = func.algofind(model_name, input_dim, n_steps, cat) if cat == 1: metric = make_scorer(f2_measure) else: metric = make_scorer(R2_measure) # cat_ix = train_X_grid[:, 7:] # print(cat_ix[0:2]) # num_ix = train_X_grid[:, : 7] # print(num_ix[0:2]) # one hot encode categorical, normalize numerical # ct = ColumnTransformer( # [('c', OneHotEncoder(), cat_ix), ('n', StandardScaler(), num_ix)]) if model_name == 'RF' or model_name == 'DT': pipeline = Pipeline(steps=[('model', model)]) else: # model_name == 'LSTM' or model_name == 'NN': pipeline = Pipeline( steps=[('n', StandardScaler()), ('model', model)]) # else: # pipeline = Pipeline( # steps=[('transforms', ct), ('model', model)]) custom_cv = func.custom_cv_2folds(train_X_grid, 5) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): gs = RandomizedSearchCV( estimator=pipeline, param_distributions=func.param_grid['param_grid_'+model_name+str(cat)], n_iter=20, cv=custom_cv, verbose=0, random_state=42, n_jobs=n_job) clf = gs.fit(train_X_grid, train_y_grid, model__class_weight={0: 1, 1: 50, 2: 100}) elif cat == 0 and (model_name == 'LSTM' or model_name == 'NN'): gs = RandomizedSearchCV( estimator=pipeline, param_distributions=func.param_grid['param_grid_'+model_name+str(cat)], n_iter=20, cv=custom_cv, verbose=0, random_state=42, n_jobs=n_job) clf = gs.fit(train_X_grid, train_y_grid) else: gs = RandomizedSearchCV( estimator=pipeline, param_distributions=func.param_grid['param_grid_'+model_name+str(cat)], n_iter=20, scoring=metric, cv=custom_cv, verbose=0, random_state=42, n_jobs=n_job) clf = gs.fit(train_X_grid, train_y_grid) test_Score = clf.cv_results_['mean_test_score'].mean() test_std = clf.cv_results_['std_test_score'].mean() print('Mean test scores: %.3f' % test_Score) i = 1 custom_cv = func.custom_cv_2folds(train_X_grid, 3) for train_index, test_index in custom_cv: test_X = train_X_grid[test_index] test_y = train_y_grid[test_index] predictions = clf.predict(test_X) fpath = 'predictions_' + method+target+'_Window' + \ str(n_steps) + '_TH' + \ str(PrH_index)+'_CV' + str(i)+file if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): test_y = argmax(test_y, axis=1) cm0 = func.forecast_accuracy(predictions, test_y, cat) plt.scatter(np.arange(len(test_y)), test_y, s=1) plt.scatter(np.arange(len(predictions)), predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.savefig(directory+fpath+'.jpg') plt.close() data = {'Actual': test_y, 'Predictions': predictions} print(test_y.shape) print(predictions.shape) df = pd.DataFrame(data=data) df.to_csv(directory+fpath, index=False) if cat == 1: data = {'target_names': target, 'method_names': method, 'window_nuggets': n_steps, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': fpath, 'std_test_score': [test_std], 'mean_test_score': [test_Score], 'params': [clf.best_params_], 'bestscore': [clf.best_score_], 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]], 'imfeatures': [clf.best_estimator_]} elif cat == 0: data = {'target_names': target, 'method_names': method, 'window_nuggets': n_steps, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': fpath, 'std_test_score': [test_std], 'mean_test_score': [test_Score], 'params': [clf.best_params_], 'bestscore': [clf.best_score_], 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5], 'imfeatures': [clf.best_estimator_]} df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory+resultFileName, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i+1
def main(): models = ['NN'] # 'LSTM', 'NN', 'LR', 'RF', 'DT', 'SVC', # 'DOcategory', 'pHcategory','ph', 'dissolved_oxygen', targets = ['pHcategory'] sondefilename = 'leavon_wo_2019-07-01-2020-01-15' n_job = -1 for model_name in models: print(model_name) for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/bookOne/output_Cat_' + model_name + '/final_models/' data = { 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1', 'F1_all': 'F1_all', 'fbeta': 'fbeta' } else: cat = 0 directory = 'Results/bookOne/output_Reg_' + model_name + '/final_models/' data = { 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'CV': 'CV', 'file_names': 'file_names', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2' } if not os.path.exists(directory): os.makedirs(directory) directoryresult = directory + 'Results/' if not os.path.exists(directoryresult): os.makedirs(directoryresult) resultFileName = 'results_' + target + str(time.time()) + '.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directoryresult + resultFileName, index=False, header=False) path = 'Sondes_data/train/train_data/' testpath = 'Sondes_data/test/test_data/' method = 'OrgData' for PrH_index in [1, 3, 6, 12, 24, 36, 48]: params = func.trained_param_grid['param_grid_' + model_name + str(cat)] lags = func.getlags_window( model_name, params['param_' + target + '_' + str(PrH_index)], cat) files = [ f for f in os.listdir(path) if f.endswith('.csv') and f.startswith(sondefilename) ] file1 = files[0] print(' TH: ' + str(PrH_index) + ' ' + method + ' ' + target + ' ' + file1) dataset = pd.read_csv(path + file1) train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, lags, target, cat) print(input_dim) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): train_y_grid = to_categorical(train_y_grid, 3) start_time = time.time() mo = func.getModel( model_name, input_dim, params['param_' + target + '_' + str(PrH_index)], n_job, cat) if model_name == 'RF' or model_name == 'DT': pipeline = Pipeline(steps=[('model', mo)]) else: pipeline = Pipeline(steps=[('n', StandardScaler()), ('model', mo)]) # save the model to disk filename = model_name+'_model_' + \ target+'_'+str(PrH_index)+'.sav' if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): clf = pipeline.fit(train_X_grid, train_y_grid, model__class_weight={ 0: 1, 1: 50, 2: 100 }) else: clf = pipeline.fit(train_X_grid, train_y_grid) # joblib.dump(clf, directory+filename) pickle.dump(clf, open(directory + filename, 'wb')) # To load the model, open the file in reading and binary mode # load_lr_model =pickle.load(open(filename, 'rb')) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) ################################# # Testing final model on test data ################################# start_time = time.time() testsondefilename = re.sub('wo_', '', sondefilename) files = [ f for f in os.listdir(testpath) if f.endswith('.csv') and f.startswith(testsondefilename) ] file1 = files[0] print('Window: ' + str(lags) + ' TH: ' + str(PrH_index) + ' ' + method + ' ' + target + file1) dataset = pd.read_csv(testpath + file1) test_X_grid, test_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, lags, target, cat) if cat == 1 and (model_name == 'LSTM' or model_name == 'NN'): test_y_grid = to_categorical(test_y_grid, 3) i = 1 custom_cv = func.custom_cv_kfolds_testdataonly( test_X_grid, 100) for test_index in custom_cv: test_X = test_X_grid[test_index] test_y = test_y_grid[test_index] predictions = clf.predict(test_X) if model_name == 'LSTM' or model_name == 'NN': test_y = argmax(test_y, axis=1) # predictions = argmax(predictions, axis=1) if cat == 1: predictions = np.array(predictions).astype(int) test_y = np.array(test_y).astype(int) test_y = test_y.reshape(len(test_y), ) predictions = predictions.reshape(len(predictions), ) if i % 10 == 0: plt.scatter(np.arange(len(test_y)), test_y, s=1) plt.scatter(np.arange(len(predictions)), predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') fpath = filename + '_CV' + str(i) + file1 # 'predictions_' + method+target+'_Window' + str(lags) + '_TH'+str(PrH_index) + \'_CV' + str(i)+file1 plt.savefig(directoryresult + fpath + '.jpg') plt.close() data = {'Actual': test_y, 'Predictions': predictions} print(test_y.shape) print(predictions.shape) df = pd.DataFrame(data=data) df.to_csv(directoryresult + filename + '_CV' + str(i) + file1, index=False) cm0 = func.forecast_accuracy(predictions, test_y, cat) if cat == 1: data = { 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]] } elif cat == 0: data = { 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'CV': i, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directoryresult + resultFileName, index=False, mode='a', header=False) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) i = i + 1 Kb.clear_session() gc.collect() del clf
def main(): method = 'OrgData' # 'DOcategory', 'pHcategory',ysi_blue_green_algae (has negative values for leavon... what does negative mean!?) # 'ysi_blue_green_algae'] # , 'dissolved_oxygen', 'ph'] targets = ['ph'] # 'ARIMA', 'SARIMA', 'ETS', 'AR', 'MA' models = ['SARIMA'] path = 'Sondes_data/train_Summer/' files = [ f for f in os.listdir(path) if f.endswith(".csv") and f.startswith('leavon') ] # leavon bgsusd_all for model_name in models: for target in targets: if target.find('category') > 0: cat = 1 directory = 'Results/bookThree/output_Cat_' + \ model_name+'/oversampling_cv_models/' data = { 'CV': 'CV', 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'window_nuggets': 'window_nuggets', 'config': 'config', 'file_names': 'file_names', 'F1_0': 'F1_0', 'F1_1': 'F1_1', 'P_0': 'P_0', 'P_1': 'P_1', 'R_0': 'R_0', 'R_1': 'R_1', 'acc0_1': 'acc0_1', 'F1_0_1': 'F1_0_1', 'F1_all': 'F1_all', 'fbeta': 'fbeta' } else: cat = 0 directory = 'Results/bookThree/output_Reg_' + \ model_name+'/oversampling_cv_models/' data = { 'CV': 'CV', 'target_names': 'target_names', 'method_names': 'method_names', 'temporalhorizons': 'temporalhorizons', 'window_nuggets': 'window_nuggets', 'config': 'config', 'file_names': 'file_names', 'mape': 'mape', 'me': 'me', 'mae': 'mae', 'mpe': 'mpe', 'rmse': 'rmse', 'R2': 'R2' } if not os.path.exists(directory): os.makedirs(directory) for file in files: print(file) result_filename = 'results_'+target + \ '_'+file + '_'+str(time.time())+'.csv' dfheader = pd.DataFrame(data=data, index=[0]) dfheader.to_csv(directory + result_filename, index=False) n_steps = 1 for PrH_index in [1, 3, 6, 12, 24, 36]: dataset = pd.read_csv(path + file) # Only the Target dataset = dataset[['year', 'month', 'day', 'hour', target]] print('Window: ' + str(n_steps) + ' TH: ' + str(PrH_index) + ' ' + method + ' ' + target) i = 1 if model_name == 'MA': train_X_grid, train_y_grid, input_dim, features = func.preparedata( dataset, PrH_index, n_steps, target, cat) start_time = time.time() # For Train files: custom_cv = func.custom_cv_2folds(train_X_grid, 3) for train_index, test_index in custom_cv: train_X = train_X_grid[train_index] train_y = train_y_grid[train_index] train_X_uni = train_X[:, -1] test_X = train_X_grid[test_index] # actual future values test_X_uni = test_X[:, -1] test_y = train_y_grid[test_index] predictions = ufunc.movingAverage( train_X_uni, train_y, test_X_uni, test_y) df_time = pd.DataFrame({ 'year': np.array(test_X[:, 0]).astype(int), 'month': np.array(test_X[:, 1]).astype(int), 'day': np.array(test_X[:, 2]).astype(int), 'hour': np.array(test_X[:, 3]).astype(int), }) timeline = pd.to_datetime(df_time, format='%Y%m%d %H') if cat == 1: predictions = np.array(predictions).astype(int) test_y = np.array(test_y).astype(int) # test_y = test_y.reshape(len(test_y),) # predictions = predictions.reshape( # len(predictions),) cm0 = func.forecast_accuracy( predictions, test_y, cat) filename = file + '_' + \ target+'_TH' + \ str(PrH_index)+'_lag' + \ str(n_steps)+'_'+str(i) plt.scatter(timeline.values, test_y, s=1) plt.scatter(timeline.values, predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.xticks(rotation=45) directorydeeper = directory + 'more/' if not os.path.exists(directorydeeper): os.makedirs(directorydeeper) plt.savefig(directorydeeper + filename + '.jpg') plt.close() data = { 'time': timeline, 'Actual': test_y, 'Predictions': predictions } df = pd.DataFrame(data=data) df.to_csv(directorydeeper + filename + '.csv', index=False) if cat == 1: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]] } elif cat == 0: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory + result_filename, index=False, mode='a', header=False) i = i + 1 elapsed_time = time.time() - start_time print( time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) if model_name == 'ARIMA' or model_name == 'AR' or model_name == 'ETS' or model_name == 'SARIMA' or model_name == 'BL': start_time = time.time() train_X_grid = dataset.values custom_cv = ufunc.custom_cv_2folds( train_X_grid, 1, PrH_index) ###################### # Cross Validation sets ###################### i = 1 for train_index, test_index in custom_cv: train_X = train_X_grid[train_index] train_X_uni = train_X[:, -1] test_X = train_X_grid[test_index] # actual future values test_X_uni = test_X[:, -1] df_time = pd.DataFrame({ 'year': np.array(test_X[:, 0]).astype(int), 'month': np.array(test_X[:, 1]).astype(int), 'day': np.array(test_X[:, 2]).astype(int), 'hour': np.array(test_X[:, 3]).astype(int), }) timeline = pd.to_datetime(df_time, format='%Y%m%d %H') if model_name == 'BL': # train_X_uni,test_X_uni # make them into dataFrame so below can be done test_X_uni = pd.DataFrame(test_X_uni) target_values = test_X_uni.drop( test_X_uni.index[0:1], axis=0) target_values.index = np.arange( 0, len(target_values)) # test_X_uni = pd.DataFrame(test_X_uni) predictions = test_X_uni.drop( test_X_uni.index[len(test_X_uni) - 1:len(test_X_uni)], axis=0) test_X_uni = target_values timeline = timeline.drop( timeline.index[len(timeline) - 1:len(timeline)], axis=0) cm0 = func.forecast_accuracy( predictions, test_X_uni, cat) filename = file + '_' + \ target+'_TH' + \ str(PrH_index)+'_lag' + \ str(n_steps)+'_'+str(i) plt.scatter(timeline.values, test_X_uni, s=1) plt.scatter(timeline.values, predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.xticks(rotation=45) directorydeeper = directory + 'more/' if not os.path.exists(directorydeeper): os.makedirs(directorydeeper) plt.savefig(directorydeeper + filename + '.jpg') plt.close() print(predictions.head()) print(test_X_uni.head()) print(timeline.head()) # data = {'time': timeline, # 'Actual': test_X_uni, # 'Predictions': predictions} frames = [timeline, test_X_uni, predictions] df = pd.concat(frames, axis=1) df.to_csv( directorydeeper + filename + '.csv', index=False, header=['time', 'Actual', 'Predictions']) if cat == 1: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]] } elif cat == 0: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory + result_filename, index=False, mode='a', header=False) if model_name == 'AR': predictions = ufunc.AutoRegression( train_X_uni, test_X_uni) if cat == 1: predictions = np.array(predictions).astype( int) test_X_uni = np.array(test_X_uni).astype( int) cm0 = func.forecast_accuracy( predictions, test_X_uni, cat) filename = file + '_' + \ target+'_TH' + \ str(PrH_index)+'_lag' + \ str(n_steps)+'_'+str(i) plt.scatter(timeline.values, test_X_uni, s=1) plt.scatter(timeline.values, predictions, s=1) plt.legend(['actual', 'predictions'], loc='upper right') plt.xticks(rotation=45) directorydeeper = directory + 'more/' if not os.path.exists(directorydeeper): os.makedirs(directorydeeper) plt.savefig(directorydeeper + filename + '.jpg') plt.close() data = { 'time': timeline, 'Actual': test_X_uni, 'Predictions': predictions } df = pd.DataFrame(data=data) df.to_csv(directorydeeper + filename + '.csv', index=False) if cat == 1: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'F1_0': cm0[0], 'F1_1': cm0[1], 'P_0': cm0[2], 'P_1': cm0[3], 'R_0': cm0[4], 'R_1': cm0[5], 'acc0_1': cm0[6], 'F1_0_1': cm0[7], 'F1_all': cm0[8], 'fbeta': [cm0[9]] } elif cat == 0: data = { 'CV': i, 'target_names': target, 'method_names': method, 'temporalhorizons': PrH_index, 'window_nuggets': 1, 'file_names': filename, 'mape': cm0[0], 'me': cm0[1], 'mae': cm0[2], 'mpe': cm0[3], 'rmse': cm0[4], 'R2': cm0[5] } df = pd.DataFrame(data=data, index=[0]) df.to_csv(directory + result_filename, index=False, mode='a', header=False) cfg_list = list() if model_name == 'ETS': cfg_list = ufunc.exp_smoothing_configs() scores = [ ufunc.score_model('ETS', train_X_uni, test_X_uni, cfg, cat, directory, file, target, PrH_index, n_steps, i, result_filename, timeline) for cfg in cfg_list ] if model_name == 'ARIMA': cfg_list = ufunc.ARIMA_configs() scores = [ ufunc.score_model('ARIMA', train_X_uni, test_X_uni, cfg, cat, directory, file, target, PrH_index, n_steps, i, result_filename, timeline) for cfg in cfg_list ] if model_name == 'SARIMA': cfg_list = ufunc.sarima_configs() scores = [ ufunc.score_model('SARIMA', train_X_uni, test_X_uni, cfg, cat, directory, file, target, PrH_index, n_steps, i, result_filename, timeline) for cfg in cfg_list ] i = i + 1 elapsed_time = time.time() - start_time print( time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))