def initialize(self): if not self.train_data: self.train_data = process(self.source) self.train_data.extract_info() self.maxstring = len(self.train_data.genotype[0]) - 1 self.total = self.train_data.intrusion + self.train_data.normal print("---------------") print("Initializing Random Population") for i in range(0, self.popsize): self.population.append(DNA(self.maxstring)) print("Operation Complete") print("---------------")
import pandas as pd from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer from sklearn.pipeline import Pipeline from Preprocessing import process from xgboost import XGBClassifier import numpy from Preprocessing import parse_impression ### PROCESSING (X = reports, Y = y) CTIMEreports = pd.read_csv("data/CTIME/CTIMEFinalLabel.csv") AllReports = pd.read_csv("data/CTIME/CTIMEDataset.csv") processed = [] for report in CTIMEreports['CTReport']: report = parse_impression(report) rep = ' '.join(process(report)) processed.append(rep) CTIMEreports['PROCESSED'] = (processed) processed = [] for report in AllReports['CTReport']: report = parse_impression(report) rep = ' '.join(process(report)) processed.append(rep) AllReports['PROCESSED'] = processed ### BUILD MODEL (can change fit & predict functions to train on 'CTReport' (unprocessed), 'PROCESSED', 'FINDINGS' (processed), IMPRESSION' (processed) text_clf = Pipeline([ ('vect', CountVectorizer(stop_words='english', ngram_range=(1, 2),
from Preprocessing import process test = process(source="KDDTest+.txt") test.extract_info() def predict(individual, specimen): w1 = 0.239 w2 = 0.181 w3 = 0.215 w4 = 0.162 w5 = 0.069 w6 = 0.064 w7 = 0.055 w8 = 0.015 outcome = 0 if individual[0:7] == specimen[0:7]: outcome += w1 if individual[7:17] == specimen[7:17]: outcome += w2 if individual[17:29] == specimen[17:29]: outcome += w3 if individual[29:31] == specimen[29:31]: outcome += w4 if individual[31:35] == specimen[31:35]: outcome += w5 if individual[35:45] == specimen[35:45]: outcome += w6 if individual[45:52] == specimen[45:52]: outcome += w7