LABELS = ['attack', 'category', 'subcategory'] STATE = 0 try: STATE = int(sys.argv[1]) except: pass #for STATE in [1, 2, 3, 4, 5]: np.random.seed(STATE) print('STATE:', STATE) # In[3]: ############################################################################### ## Load dataset ############################################################################### df = load_dataset(FILE_NAME, FIVE_PERCENT_FILES, INDEX_COLUMN, NAN_VALUES) # In[4]: ############################################################################### ## Clean dataset ############################################################################### ############################################################################### ### Remove columns with only one value df, log = remove_columns_with_one_value(df, verbose=False) print(log) ############################################################################### ### Remove redundant columns, useless columns and unused targets ### K: _number columns are numerical representations of other existing columns. ### K: category and subcategory are other labels.
from sklearn.metrics import f1_score, classification_report, accuracy_score from sklearn.metrics import cohen_kappa_score from unit import load_dataset, remove_columns_with_one_value, remove_nan_columns state = 0 try: state = int (sys.argv [1]) except: pass print ("STATE = ", state) STATES = [0, 10, 100, 1000, 10000] pd.set_option ('display.max_rows', None) pd.set_option ('display.max_columns', 5) df = load_dataset () print ("Data Loaded") remove_columns_with_one_value (df, verbose=False) remove_nan_columns (df, 0.6, verbose=False) #making the final DataFrame #dropping the number of the rows column df = df.drop(df.columns[0], axis=1) #dropping unrelated columns df.drop(axis='columns', columns=['ts', 'te', 'sa', 'da'], inplace=True) #sampling the df df = df.sample (frac=1, replace=True, random_state=0) ################################# ## Encoding the data ##