class PickleLoader:
    def __init__(self):
        from flow import Flows
        self.flow = Flows()

    # Get the netflow
    def get_netflow(self):
        return self.flow

    # Load a file
    def load(self, host, user, password, db, amount, good=False, classify=False, db_file=""):
        file_name = db_file

        try:
            self.flow = Loader.get_data(file_name)
            if not self.flow:
                import pickle
                self.flow = pickle.load( open( file_name, "r+" ) )
                Loader.insert_data(file_name, self.flow)
                print 'Loaded data manually'
            else:
                print 'Use stored data'

            self.flow = self.flow.random(amount)
            return True
        except Exception as e:
            return False
class PickleLoader:
    def __init__(self):
        from flow import Flows
        self.flow = Flows()

    # Get the netflow
    def get_netflow(self):
        return self.flow

    # Load a file
    def load(self,
             host,
             user,
             password,
             db,
             amount,
             good=False,
             classify=False,
             db_file=""):
        file_name = db_file

        try:
            self.flow = Loader.get_data(file_name)
            if not self.flow:
                import pickle
                self.flow = pickle.load(open(file_name, "r+"))
                Loader.insert_data(file_name, self.flow)
                print 'Loaded data manually'
            else:
                print 'Use stored data'

            self.flow = self.flow.random(amount)
            return True
        except Exception as e:
            return False
 def __init__(self):
     from flow import Flows
     self.flow = Flows()
class CTULoader:
    def __init__(self):
        from flow import Flows
        self.flow = Flows()

    # Get the labels used
    def get_labels(self, file_name):
        labels = {}
        try:
            with open(file_name) as f:
                next(f)
                for line in f:
                    items = line.split(',')
                    labels[items[14].strip()] = True
        except Exception as e:
            return []
        return labels.keys()

    # Load a file
    def load(self,
             file_name,
             fr,
             to,
             good_labels=None,
             good=True,
             amount=-1,
             amountGood=0):
        f = Loader.get_data(file_name)
        if not f:
            f = open(file_name)
            Loader.insert_data(file_name, f)
            print 'Loaded data manually'
        else:
            print 'Use stored data'
        f.seek(0, 0)
        try:
            next(f)

            i = 0
            total = 0
            while i < fr and next(f):
                i += 1

            g = 0
            for line in f:
                if amount >= 0 and total >= amount:
                    if g >= amountGood:
                        break

                items = line.split(',')
                item = self.load_single(items)

                if good_labels:
                    if good:
                        if item.make_target() in good_labels:
                            total += 1
                            self.flow.add_record(item)
                    else:
                        if not item.make_target() in good_labels:
                            if total <= amount or amount == -1:
                                total += 1
                                self.flow.add_record(item)
                        elif g < amountGood:
                            self.flow.add_record(item)
                            g += 1
                else:
                    self.flow.add_record(item)

                if i == to:
                    break
                i += 1

            return True
        except Exception as e:
            return False

    # Load a single line
    # Format:
    #   StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label
    #   2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577,   ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT
    def load_single(self, items):
        from flow import FlowRecord
        rec = FlowRecord()
        rec.start_time = items[0].strip()
        rec.duration = items[1].strip()
        rec.protocol = items[2].strip()
        rec.src_ip = items[3].strip()
        rec.src_port = items[4].strip()
        rec.bidirectional = items[5].strip()
        rec.dest_ip = items[6].strip()
        rec.dest_port = items[7].strip()
        rec.state = items[8].strip()
        rec.sTos = items[9].strip()
        rec.dTos = items[10].strip()
        rec.total_pckts = items[11].strip()
        rec.total_bytes = items[12].strip()
        rec.total_srcbytes = items[13].strip()
        rec.label = items[14].strip()
        return rec

    # Get the netflow
    def get_netflow(self):
        return self.flow
 def __init__(self):
     from flow import Flows
     self.flow = Flows()
     self.cmd = "SELECT * FROM `flows` LIMIT %s, %s"
     self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s"
     self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s"
class SQLLoader:
    def __init__(self):
        from flow import Flows
        self.flow = Flows()
        self.cmd = "SELECT * FROM `flows` LIMIT %s, %s"
        self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s"
        self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s"

    # Load a file
    def load(self,
             host,
             user,
             password,
             db,
             amount,
             good=False,
             classify=False):
        import pymysql.cursors

        # Connect to the database
        connection = pymysql.connect(host=host,
                                     user=user,
                                     password=password,
                                     db=db,
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)

        try:
            with connection.cursor() as cursor:
                # Read a single record
                import random
                size = 14164163
                ran = random.randint(0, size - amount)
                if classify:
                    cursor.execute(self.cmd_total, (
                        ran,
                        amount,
                    ))
                elif good:
                    cursor.execute(self.cmd_good, (
                        ran,
                        amount,
                    ))
                else:
                    cursor.execute(self.cmd, (
                        ran,
                        amount,
                    ))

                for row in cursor:
                    item = self.load_single(row, good, classify)
                    self.flow.add_record(item)

                ret = True
        except Exception as e:
            print "Error: " + str(e)
            ret = False
        finally:
            connection.close()
        return ret

    # Load a single line
    # Format:
    #   StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label
    #   2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577,   ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT
    def load_single(self, items, good, classify):
        import socket, ipaddress
        from flow import FlowRecord
        rec = FlowRecord()
        rec.start_time = items['start_time']
        rec.duration = (items['start_time'] + items['start_msec'] / 1000.0) - (
            items['end_time'] + items['end_msec'] / 1000.0)
        rec.protocol = items['prot']
        rec.src_ip = str(ipaddress.ip_address(items['src_ip']))
        rec.src_port = items['src_port']
        rec.dest_ip = str(ipaddress.ip_address(items['dst_ip']))
        rec.dest_port = items['dst_port']
        rec.total_pckts = items['packets']
        rec.total_bytes = items['octets']

        if classify:
            rec.label = items['description']
        elif good:
            rec.label = 'non-malicous'
        else:
            rec.label = 'malicous'
        rec.tcp_flags = items['tcp_flags']
        return rec

    # Get the netflow
    def get_netflow(self):
        return self.flow
 def __init__(self):
     from flow import Flows
     self.flow = Flows()
class CTULoader:

    def __init__(self):
        from flow import Flows
        self.flow = Flows()

    # Get the labels used
    def get_labels(self, file_name):
        labels = {}
        try:
            with open(file_name) as f:
                next(f)
                for line in f:
                    items = line.split(',')
                    labels[items[14].strip()] = True
        except Exception as e:
            return []
        return labels.keys()

    # Load a file
    def load(self, file_name, fr, to, good_labels=None, good=True, amount=-1, amountGood=0):
        f = Loader.get_data(file_name)
        if not f:
            f = open(file_name)
            Loader.insert_data(file_name, f)
            print 'Loaded data manually'
        else:
            print 'Use stored data'
        f.seek(0,0)
        try:
            next(f)

            i = 0
            total = 0
            while i < fr and next(f):
                i += 1

            g = 0
            for line in f:
                if amount >= 0 and total >= amount:
                    if g >= amountGood:
                        break

                items = line.split(',')
                item = self.load_single(items)

                if good_labels:
                    if good:
                        if item.make_target() in good_labels:
                            total += 1
                            self.flow.add_record(item)
                    else:
                        if not item.make_target() in good_labels:
                            if total <= amount or amount == -1:
                                total += 1
                                self.flow.add_record(item)
                        elif g < amountGood:
                            self.flow.add_record(item)
                            g += 1
                else:
                    self.flow.add_record(item)

                if i == to:
                    break
                i += 1

            return True
        except Exception as e:
            return False

    # Load a single line
    # Format:
    #   StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label
    #   2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577,   ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT
    def load_single(self, items):
        from flow import FlowRecord
        rec = FlowRecord()
        rec.start_time = items[0].strip()
        rec.duration = items[1].strip()
        rec.protocol = items[2].strip()
        rec.src_ip = items[3].strip()
        rec.src_port = items[4].strip()
        rec.bidirectional = items[5].strip()
        rec.dest_ip = items[6].strip()
        rec.dest_port = items[7].strip()
        rec.state = items[8].strip()
        rec.sTos = items[9].strip()
        rec.dTos = items[10].strip()
        rec.total_pckts = items[11].strip()
        rec.total_bytes = items[12].strip()
        rec.total_srcbytes = items[13].strip()
        rec.label = items[14].strip()
        return rec

    # Get the netflow
    def get_netflow(self):
        return self.flow
 def __init__(self):
     from flow import Flows
     self.flow = Flows()
     self.cmd = "SELECT * FROM `flows` LIMIT %s, %s"
     self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s"
     self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s"
class SQLLoader:
    def __init__(self):
        from flow import Flows
        self.flow = Flows()
        self.cmd = "SELECT * FROM `flows` LIMIT %s, %s"
        self.cmd_total = "SELECT * FROM `flows` INNER JOIN `flow_alert` ON flows.id = flow_alert.flowid INNER JOIN `alerts` ON flow_alert.alertid = alerts.id INNER JOIN `alert_type` ON alerts.type = alert_type.id LIMIT %s, %s"
        self.cmd_good = "SELECT * FROM `flows` WHERE `id` NOT IN (SELECT `flowid` FROM `flow_alert`) LIMIT %s, %s"

    # Load a file
    def load(self, host, user, password, db, amount, good=False, classify=False):
        import pymysql.cursors

        # Connect to the database
        connection = pymysql.connect(host=host,
                                     user=user,
                                     password=password,
                                     db=db,
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)

        try:
            with connection.cursor() as cursor:
                # Read a single record
                import random
                size = 14164163
                ran = random.randint(0, size-amount)
                if classify:
                    cursor.execute(self.cmd_total, (ran, amount,))
                elif good:
                    cursor.execute(self.cmd_good, (ran, amount,))
                else:
                    cursor.execute(self.cmd, (ran, amount,))

                for row in cursor:
                    item = self.load_single(row, good, classify)
                    self.flow.add_record(item)

                ret = True
        except Exception as e:
            print "Error: " + str(e)
            ret = False
        finally:
            connection.close()
        return ret

    # Load a single line
    # Format:
    #   StartTime,Dur,Proto,SrcAddr,Sport,Dir,DstAddr,Dport,State,sTos,dTos,TotPkts,TotBytes,SrcBytes,label
    #   2011/08/10 09:46:59.607825,1.026539,tcp,94.44.127.113,1577,   ->,147.32.84.59,6881,S_RA,0,0,4,276,156,flow=Background-Established-cmpgw-CVUT
    def load_single(self, items, good, classify):
        import socket, ipaddress
        from flow import FlowRecord
        rec = FlowRecord()
        rec.start_time = items['start_time']
        rec.duration = (items['start_time'] + items['start_msec']/1000.0) - (items['end_time'] + items['end_msec']/1000.0)
        rec.protocol = items['prot']
        rec.src_ip = str(ipaddress.ip_address(items['src_ip']))
        rec.src_port = items['src_port']
        rec.dest_ip = str(ipaddress.ip_address(items['dst_ip']))
        rec.dest_port = items['dst_port']
        rec.total_pckts = items['packets']
        rec.total_bytes = items['octets']

        if classify:
            rec.label = items['description']
        elif good:
            rec.label = 'non-malicous'
        else:
            rec.label = 'malicous'
        rec.tcp_flags = items['tcp_flags']
        return rec

    # Get the netflow
    def get_netflow(self):
        return self.flow
Beispiel #11
0
class Trainer(object):
    def __init__(self):
        from flow import Flows
        self.flow = Flows()

    # Train supervised with samples and targets
    def train(self, algorithm, data, feature, good_labels, config):
        key = self.check_keys(data, "type", "DefaultTrainer")
        trainer = Trainer.get_trainer(key, DefaultTrainer())
        print "Loaded training algorithm: " + str(key) + "."
        fl = trainer.train(algorithm, data, feature, good_labels, config)
        if not fl:
            return False
        else:
            self.flow.addFlows(fl)
            return True

    def check_keys(self, dic, key, val):
        if not key in dic:
            dic[key] = val
        return dic[key]

    @staticmethod
    def get_trainer(name, default=None):
        import sys

        try:
            return getattr(sys.modules[__name__], name)()
        except Exception as e:
            print "Trainer \"" + name + "\"does not exist."
            return default

    def trainAll(self, feature, algorithm, good_labels, manager, is_binary):
        samples = self.flow.get_sample_data_complete(feature)
        targets = self.flow.get_target_data(is_binary, good_labels)
        if len(samples) > 2:
            algorithm.train(samples, targets)
            pos_train = 0
            neg_train = 0
            for i in targets:
                if i in good_labels:
                    neg_train += 1
                else:
                    pos_train += 1
            manager.add_new_result(pos_train, neg_train)

    def default(self, algorithm, loader, feature, file):
        print "Training size is " + str(loader.get_netflow().get_size()) + "."

        if loader.get_netflow().get_size() <= 2:
            print "Training set too small."
            return None
        #else:
        #    samples = loader.get_netflow().get_sample_data_complete(feature)
        #    targets = loader.get_netflow().get_target_data()
        #    #try:
        #    algorithm.train(samples, targets)
        #    #except Exception as e:
        #    #    raise e
        #    #    print "Wrong training data set used."
        #    #    return False

        print "Training set \"" + file + "\"  done."
        return loader.get_netflow()