from sklearn.metrics import roc_auc_score from datetime import datetime import pandas as pd import numpy as np import os import timeit import sys sys.path.append(os.environ['CMS_ROOT']) from cms_modules.utils import (apply_ros_rus, get_binary_imbalance_ratio, split_on_binary_attribute) from cms_modules.logging import Logger logger = Logger() logger.log_message( 'Executing Chi-Squared Feature Selection with Random Forest Learner') data_path = os.environ['CMS_PARTB_PATH'] partB_train_normalized_key = 'partB_train_normalized' partB_test_normalized_key = 'partB_test_normalized' timestamp = datetime.now().strftime("%m.%d.%Y-%H:%M:%S") results_file = f'./results.{timestamp}.csv' # initialize results header = 'index,subset_size,minority_size,run,roc_auc,time_elapsed\n' with open(results_file, 'a') as outfile: outfile.write(header) tree_count = 100
from sklearn.metrics import roc_auc_score from datetime import datetime import pandas as pd import numpy as np import os import timeit import sys sys.path.append(os.environ['CMS_ROOT']) from cms_modules.utils import (apply_ros_rus, get_binary_imbalance_ratio, split_on_binary_attribute) from cms_modules.logging import Logger logger = Logger() logger.log_message( 'Executing Random Forest Wrapper-Based Feature Selection Experiment') data_path = os.environ['CMS_PARTB_PATH'] partB_train_normalized_key = 'partB_train_normalized' partB_test_normalized_key = 'partB_test_normalized' timestamp = datetime.now().strftime("%m.%d.%Y-%H:%M:%S") results_file = f'./results.{timestamp}.csv' # initialize results header = 'index,subset_size,minority_size,run,roc_auc,time_elapsed\n' with open(results_file, 'a') as outfile: outfile.write(header) tree_count = 100
batch_size = int(cli_args.get('batch_size', 256)) threshold_interval = float(cli_args.get('threshold_interval')) epochs = int(cli_args.get('epochs')) runs = int(cli_args.get('runs')) activation = 'relu' dropout_rate = 0.5 learn_rate = 1e-3 # INITIALIZE LOGGER # -------------------------------------------------- # logger = Logger() logger.log_message('Executing ' + filename) logger.log_message('\n'.join(sys.argv[1:])) # DEFINE DIRECTORIES/PATHS # -------------------------------------------------- # # data hdf5_path = '/home/jjohn273/git/DDOS-Classification/data/combined-minmax-scaled.hdf5' logger.log_message(hdf5_path) train_key = 'train_normalized' test_key = 'test_normalized' # results results_dir = './results' train_results = 'train_metrics.hdf5'
epochs = int(cli_args.get('epochs')) runs = int(cli_args.get('runs')) decision_threshold = float(cli_args.get('decision_threshold')) default_threshold = 0.5 theoretical_threshold = "tbd" minority_size = "tbd" activation = 'relu' dropout_rate = 0.5 learn_rate = 1e-3 # INITIALIZE LOGGER # -------------------------------------------------- # logger = Logger() logger.log_message('Executing ' + filename) logger.log_message('\n'.join(sys.argv[1:])) # DEFINE DIRECTORIES/PATHS # -------------------------------------------------- # data_file = 'combined-minmax-scaled.hdf5' data_path = '/home/jjohn273/git/DDOS-Classification/data/combined-minmax-scaled.hdf5' logger.log_message(data_path) train_key = 'train_normalized' test_key = 'test_normalized' # DEFINE THRESHOLDS TO COMPUTE SCORES FOR # -------------------------------------------------- # theoretical_threshold_results_file = './theoretical-results.csv'