def initialize(self):
     if not self.train_data:
         self.train_data = process(self.source)
         self.train_data.extract_info()
         self.maxstring = len(self.train_data.genotype[0]) - 1
         self.total = self.train_data.intrusion + self.train_data.normal
     print("---------------")
     print("Initializing Random Population")
     for i in range(0, self.popsize):
         self.population.append(DNA(self.maxstring))
     print("Operation Complete")
     print("---------------")
Beispiel #2
0
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.pipeline import Pipeline
from Preprocessing import process
from xgboost import XGBClassifier
import numpy
from Preprocessing import parse_impression

### PROCESSING (X = reports, Y = y)
CTIMEreports = pd.read_csv("data/CTIME/CTIMEFinalLabel.csv")
AllReports = pd.read_csv("data/CTIME/CTIMEDataset.csv")
processed = []
for report in CTIMEreports['CTReport']:
    report = parse_impression(report)
    rep = ' '.join(process(report))
    processed.append(rep)
CTIMEreports['PROCESSED'] = (processed)

processed = []
for report in AllReports['CTReport']:
    report = parse_impression(report)
    rep = ' '.join(process(report))
    processed.append(rep)
AllReports['PROCESSED'] = processed

### BUILD MODEL (can change fit & predict functions to train on 'CTReport' (unprocessed), 'PROCESSED', 'FINDINGS' (processed), IMPRESSION' (processed)

text_clf = Pipeline([
    ('vect',
     CountVectorizer(stop_words='english',
                     ngram_range=(1, 2),
Beispiel #3
0
from Preprocessing import process

test = process(source="KDDTest+.txt")
test.extract_info()


def predict(individual, specimen):

    w1 = 0.239
    w2 = 0.181
    w3 = 0.215
    w4 = 0.162
    w5 = 0.069
    w6 = 0.064
    w7 = 0.055
    w8 = 0.015
    outcome = 0
    if individual[0:7] == specimen[0:7]:
        outcome += w1
    if individual[7:17] == specimen[7:17]:
        outcome += w2
    if individual[17:29] == specimen[17:29]:
        outcome += w3
    if individual[29:31] == specimen[29:31]:
        outcome += w4
    if individual[31:35] == specimen[31:35]:
        outcome += w5
    if individual[35:45] == specimen[35:45]:
        outcome += w6
    if individual[45:52] == specimen[45:52]:
        outcome += w7