Esempio n. 1
0
LABELS = ['attack', 'category', 'subcategory']
STATE = 0
try:
    STATE = int(sys.argv[1])
except:
    pass
#for STATE in [1, 2, 3, 4, 5]:
np.random.seed(STATE)
print('STATE:', STATE)

# In[3]:

###############################################################################
## Load dataset
###############################################################################
df = load_dataset(FILE_NAME, FIVE_PERCENT_FILES, INDEX_COLUMN, NAN_VALUES)

# In[4]:

###############################################################################
## Clean dataset
###############################################################################
###############################################################################
### Remove columns with only one value
df, log = remove_columns_with_one_value(df, verbose=False)
print(log)

###############################################################################
### Remove redundant columns, useless columns and unused targets
### K: _number columns are numerical representations of other existing columns.
### K: category and subcategory are other labels.
Esempio n. 2
0
from sklearn.metrics import f1_score, classification_report, accuracy_score
from sklearn.metrics import cohen_kappa_score
from  unit import load_dataset, remove_columns_with_one_value, remove_nan_columns

state = 0
try:
  state = int (sys.argv [1])
except:
  pass
print ("STATE = ", state)
STATES = [0, 10, 100, 1000, 10000]

pd.set_option ('display.max_rows', None)
pd.set_option ('display.max_columns', 5)

df = load_dataset ()
print ("Data Loaded")
remove_columns_with_one_value (df, verbose=False)
remove_nan_columns (df, 0.6, verbose=False)
#making the final DataFrame
#dropping the number of the rows column
df = df.drop(df.columns[0], axis=1)

#dropping unrelated columns
df.drop(axis='columns', columns=['ts', 'te', 'sa', 'da'], inplace=True)


#sampling the df
df = df.sample (frac=1, replace=True, random_state=0)
#################################
## Encoding the data           ##