from sklearn.metrics import roc_auc_score
from datetime import datetime
import pandas as pd
import numpy as np
import os
import timeit

import sys

sys.path.append(os.environ['CMS_ROOT'])

from cms_modules.utils import (apply_ros_rus, get_binary_imbalance_ratio,
                               split_on_binary_attribute)
from cms_modules.logging import Logger

logger = Logger()
logger.log_message(
    'Executing Chi-Squared Feature Selection with Random Forest Learner')

data_path = os.environ['CMS_PARTB_PATH']
partB_train_normalized_key = 'partB_train_normalized'
partB_test_normalized_key = 'partB_test_normalized'
timestamp = datetime.now().strftime("%m.%d.%Y-%H:%M:%S")
results_file = f'./results.{timestamp}.csv'

# initialize results
header = 'index,subset_size,minority_size,run,roc_auc,time_elapsed\n'
with open(results_file, 'a') as outfile:
    outfile.write(header)

tree_count = 100
Beispiel #2
0
from sklearn.metrics import roc_auc_score
from datetime import datetime
import pandas as pd
import numpy as np
import os
import timeit

import sys

sys.path.append(os.environ['CMS_ROOT'])

from cms_modules.utils import (apply_ros_rus, get_binary_imbalance_ratio,
                               split_on_binary_attribute)
from cms_modules.logging import Logger

logger = Logger()
logger.log_message(
    'Executing Random Forest Wrapper-Based Feature Selection Experiment')

data_path = os.environ['CMS_PARTB_PATH']
partB_train_normalized_key = 'partB_train_normalized'
partB_test_normalized_key = 'partB_test_normalized'
timestamp = datetime.now().strftime("%m.%d.%Y-%H:%M:%S")
results_file = f'./results.{timestamp}.csv'

# initialize results
header = 'index,subset_size,minority_size,run,roc_auc,time_elapsed\n'
with open(results_file, 'a') as outfile:
    outfile.write(header)

tree_count = 100
batch_size = int(cli_args.get('batch_size', 256))

threshold_interval = float(cli_args.get('threshold_interval'))

epochs = int(cli_args.get('epochs'))

runs = int(cli_args.get('runs'))

activation = 'relu'
dropout_rate = 0.5
learn_rate = 1e-3


# INITIALIZE LOGGER
# -------------------------------------------------- #
logger = Logger()
logger.log_message('Executing ' + filename)
logger.log_message('\n'.join(sys.argv[1:]))


# DEFINE DIRECTORIES/PATHS
# -------------------------------------------------- #
# data
hdf5_path = '/home/jjohn273/git/DDOS-Classification/data/combined-minmax-scaled.hdf5'
logger.log_message(hdf5_path)
train_key = 'train_normalized'
test_key = 'test_normalized'

# results
results_dir = './results'
train_results = 'train_metrics.hdf5'
epochs = int(cli_args.get('epochs'))
runs = int(cli_args.get('runs'))

decision_threshold = float(cli_args.get('decision_threshold'))
default_threshold = 0.5
theoretical_threshold = "tbd"

minority_size = "tbd"

activation = 'relu'
dropout_rate = 0.5
learn_rate = 1e-3

# INITIALIZE LOGGER
# -------------------------------------------------- #
logger = Logger()
logger.log_message('Executing ' + filename)
logger.log_message('\n'.join(sys.argv[1:]))

# DEFINE DIRECTORIES/PATHS
# -------------------------------------------------- #
data_file = 'combined-minmax-scaled.hdf5'
data_path = '/home/jjohn273/git/DDOS-Classification/data/combined-minmax-scaled.hdf5'
logger.log_message(data_path)
train_key = 'train_normalized'
test_key = 'test_normalized'


# DEFINE THRESHOLDS TO COMPUTE SCORES FOR
# -------------------------------------------------- #
theoretical_threshold_results_file = './theoretical-results.csv'