예제 #1
0
    def __init__(self, exp, l2btools):
        """
        store the exp config in self's attribute.
        """
        self.expr_dict = exp
        self.id = self.expr_dict['id']
        self.l2btools = l2btools

        self.ip_sieve = IPSieve()
        self.ip_feature_db = {}

        #Create classifier, currently only SVM supported
        #but trainer is agnostic of classifier used provided it supports fit and predict
        self.experiment_classifier = self.l2btools.construct_svm_classifier(
            self.expr_dict['kernel_type'])
        #Create classifier
        self.trainer = Train2Ban(self.experiment_classifier)
        #Setup base data set
        #the base filename we are going to associate to the result of this experiment
        utc_datetime = datetime.datetime.utcnow()
        utc_datetime.strftime("%Y-%m-%d-%H%MZ")
        self.base_analyse_log_file = self.l2btools.analyser_results_dir + 'base_analyse_' + str(
            utc_datetime)
        #this make more sense to happens in the constructor however,
        self._process_logs()
        self._mark_bots()
예제 #2
0
class BasicTests(unittest.TestCase):
    log_files = (
        src_dir + "/test/deflect_test.log",
        src_dir + "/test/deflect.log_cool1.20120810_five_percent.log"
    )  #src_dir+"/test/deflect.log_cool1.20120810.23h59m50s-20120812.00h00m00s.old" )
    #log_files = (src_dir+"/tests/deflect_test.log", src_dir+"/tests/deflect_test.log")
    test_ip_sieve = IPSieve()
    test_ip_feature_db = {}

    def __init__(self):
        pass

    def test_ip_sieve_parse(self):
        for cur_log_file in self.log_files:
            self.test_ip_sieve.add_log_file(cur_log_file)
            self.test_ip_sieve.parse_log()

    def test_all_features(self):
        for cur_log_file in self.log_files:
            self.test_ip_sieve.add_log_file(cur_log_file)
            self.test_ip_sieve.parse_log()

            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(
                    self.test_ip_sieve, self.test_ip_feature_db)
                cur_feature_tester.compute()

        print self.test_ip_feature_db

    def run_tests(self):
        """
        needs a function to feed to the profiler
        """
        self.test_ip_sieve_parse()
        self.test_all_features()
예제 #3
0
class Learn2BanTools():
    ip_sieve = IPSieve()
    ip_feature_db = {}
    log_files = list()

    def load_data_logs(self):
        """
        Retrieve all training logs from testing log directory
        """
        data_log_list = glob.glob(self.data_dir + '*')
        self.log_files = data_log_list
        return self.log_files

    def connect_to_db(self):
        """
        This connetcion to the db will live for the live time of the
        learn2bantools instance and will be used to save data back to the db
        """
        self.db = MySQLdb.connect(self.db_host, self.db_user, self.db_password)

        #Create cursor object to allow query execution
        self.cur = self.db.cursor(MySQLdb.cursors.DictCursor)
        sql = 'CREATE DATABASE IF NOT EXISTS learn2ban'
        self.cur.execute(sql)

        #Connect directly to DB
        self.db = MySQLdb.connect(self.db_host, self.db_user, self.db_password,
                                  self.db_name)
        self.cur = self.db.cursor(MySQLdb.cursors.DictCursor)

    def disconnect_from_db(self):
        """
        Close connection to the database
        """
        self.cur.close()
        self.db.close()

    # def save_experiment(self, experiment_result):
    #     """
    #     Save the results of an experimental run (old version)
    #     """
    #     add_experiment_result = ("INSERT INTO experiment_result( experiment_id, result_file) VALUES (%(experiment_id)s,%(result_file)s)")
    #     self.cur.execute(add_experiment_result, experiment_result)
    #     self.db.commit()

    def save_experiment_result(self, experiment_result):
        """
        Saves the result of an experimental run, including testing proportion used and score
        """
        add_experiment_result = (
            "INSERT INTO experiment_results( experiment_id, result_file, proportion, score, active_features, pca_ratios, mrmr_score) VALUES (%(experiment_id)s,%(result_file)s,%(proportion)s,%(score)s,%(active_features)s,%(pca_ratios)s,%(mrmr_score)s)"
        )
        self.cur.execute(add_experiment_result, experiment_result)
        self.db.commit()

    def retrieve_experiments_results(self):
        """
        Retrieve the results of the experiments already run
        """
        self.cur.execute("SELECT * FROM experiment_results")
        experiment_results = self.cur.fetchall()
        return experiment_results

    def delete_all_experiments_results(self):
        """
        Drops the entire experiment_results table
        """
        self.cur.execute("TRUNCATE TABLE experiment_results")

    def retrieve_experiments(self):
        """
        Retrieve the set of experiments to run from the database
        """
        self.cur.execute("SELECT * FROM experiments where enabled=TRUE")
        self.experiment_set = self.cur.fetchall()
        return self.experiment_set

    def retrieve_experiment_logs(self, experiment_id):
        """
        Read the experiment_logs table and retrieve the name
        of logs associated to the experiment id

        INPUT:
           experiment_id: the id of the experiment whose logs are sought
        """
        self.cur.execute(
            "SELECT experiment_logs.log_id, logs.file_name FROM experiment_logs, logs WHERE experiment_logs.log_id = logs.id AND experiment_logs.experiment_id ="
            + str(experiment_id) + ";")
        log_set = self.cur.fetchall()
        #add the full path to log files
        for cur_log in log_set:
            cur_log['file_name'] = self.data_dir + cur_log['file_name']
        return log_set

    def load_database_config(self):
        """
        Get configuration parameters from the learn2ban config file
        and from the lern2ban database
        """
        config = ConfigParser.ConfigParser()
        config.readfp(open(src_dir + '/config/train2ban.cfg'))
        self.db_user = config.get('db_params', 'db_user')
        self.db_password = config.get('db_params', 'db_password')
        self.db_host = config.get('db_params', 'db_host')
        self.db_name = config.get('db_params', 'db_name')
        self.config_profile = config.get('db_params', 'config_profile')

    def load_train2ban_config(self):
        #Get database connection params
        self.load_database_config()
        #Establish database connection object
        self.connect_to_db()
        #Get basic config parameters
        #first try to see if there is a config specific to this host
        if (not self.config_profile):
            self.config_profile = "default"

        self.cur.execute("SELECT * from config where profile_name='" +
                         self.config_profile + "';")
        config_row = self.cur.fetchone()

        #otherwise we read first row of the database (the one with minimum id)
        if (not config_row):
            self.cur.execute("SELECT * from config ORDER BY id ASC")
            config_row = self.cur.fetchone()

        if (not config_row):
            raise IOError, "No configuration record in the database"

        if not config_row["absolute_paths"]:
            cur_dir = src_dir
        else:
            cur_dir = ""

        try:
            self.data_dir = cur_dir + config_row["training_directory"] + (
                config_row["training_directory"][-1] != "/" and "/" or "")
            self.analyser_results_dir = cur_dir + config_row[
                "analyser_results_directory"] + (
                    config_row["analyser_results_directory"][-1] != "/" and "/"
                    or "")

        except IndexError:
            raise ValueError, "Data and Result directory can not be left blank"

        #depricated for now, we are entering the regexes directly into the db
        if config_row["regex_filter_directory"]:
            self.filter_dir = cur_dir + config_row["regex_filter_directory"] + (
                config_row["regex_filter_directory"][-1] != "/" and "/" or "")

        if config_row["default_filter_file"]:
            self.filter_file = self.filter_dir + config_row[
                "default_filter_file"] + (
                    config_row["default_filter_file"][-1] != "/" and "/" or "")

    def add_data_log(self, log):
        self.log_files = list()
        self.log_files.append(self.train_dir + log)

    def load_bad_filters_from_db(self, experiment_id):
        #TODO: ensure cur is live
        self.cur.execute(
            "SELECT regex_assignment.log_id, regex_filters.regex from regex_assignment, regex_filters, experiment_logs WHERE regex_assignment.regex_filter_id = regex_filters.id AND regex_assignment.log_id = experiment_logs.log_id AND experiment_logs.experiment_id = "
            + str(experiment_id))
        return self.cur.fetchall()

    def load_bad_filters(self):
        """
        Load set of regex filters from the default filter file.
        This is to allow expression of an individual filter file, rather
        than a set or by experiment.
        """
        tree = ET.parse(self.filter_file)
        root = tree.getroot()
        filters = list()
        for child in root:
            filters.append(child.text)
        return filters

    def sieve_the_ip(self):
        """
        This was used when all experiment were using all of log files
        but in new model each experiment has its own file
        """
        for cur_log_file in self.log_files:
            self.ip_sieve.add_log_file(cur_log_file)
            self.ip_sieve.parse_log_file()

    # def magnitude(self,v):
    #     return math.sqrt(sum(v[i]*v[i] for i in v))

    def clear_data(self):
        self.ip_sieve = IPSieve()
        self.ip_feature_db = {}

    def gather_all_features(self, log_files):
        """
        gathers all features

        INPUT:
            log_files: the logs that we went through it.
        """
        for cur_log_file in log_files:
            self.ip_sieve.add_log_file(cur_log_file)
            self.ip_sieve.parse_log()
            for CurrentFeatureType in Learn2BanFeature.__subclasses__():
                cur_feature_tester = CurrentFeatureType(
                    self.ip_sieve, self.ip_feature_db)
                cur_feature_tester.compute()

        return self.ip_feature_db

    def construct_svm_classifier(self, kernel_mode='linear'):
        """
        Creates an instance of the SVM classifier with a given mode
        """
        return svm.SVC(kernel=kernel_mode)

    def random_slicer(self, data_size, train_portion=0.5):
        """
        Return two arrays with random true and false and complement of each
        other, used for slicing a set into trainig and testing

        INPUT:
            data_size: size of the array to return
            train_portion: between 0,1 indicate the portion for the True
                           entry
        """
        from random import random
        random_selector = [
            random() < train_portion for i in range(0, data_size)
        ]
        complement_selector = np.logical_not(random_selector)

        return random_selector, complement_selector

    def __init__(self):
        #we would like people to able to use the tool object even
        #if they don't have a db so we have no reason to load this
        #config in the constructor
        #self.load_database_config()
        pass
예제 #4
0
 def clear_data(self):
     self.ip_sieve = IPSieve()
     self.ip_feature_db = {}
예제 #5
0
    def process_incident(self, incident):
        """
		get the incident time from the db and gathers all features

		INPUT:
			log_files: the logs that we went through it.
		"""
        if (incident is None):
            return

        ip_sieve = IPSieve()
        ip_records = {}
        banned_ips = []

        if (incident["file_name"] is None) or (len(incident["file_name"])
                                               == 0):
            # get the logs from ES
            # get the logs from ES
            banned_ips = self.es_handler.get_banjax(incident['start'],
                                                    incident['stop'],
                                                    incident['target'])
            ats_records = self.es_handler.get(incident['start'],
                                              incident['stop'],
                                              incident['target'])

            # calculate IP dictionary with ATS records
            ip_records = ip_sieve.process_ats_records(ats_records)
        else:
            # read the sessions from the log file
            ip_sieve.add_log_file(incident["file_name"])
            ip_records = ip_sieve.parse_log("nginx")

        # calculate features
        ip_feature_db = {}

        #At this stage it is only a peliminary list we might lose features
        #due to 0 variance
        self._active_feature_list = []
        #do a dry run on all features just to gather the indeces of all available
        #features
        for CurentFeature in Learn2BanFeature.__subclasses__():
            f = CurentFeature(ip_records, ip_feature_db)
            self._active_feature_list.append(f._FEATURE_INDEX)

        for CurentFeature in Learn2BanFeature.__subclasses__():
            f = CurentFeature(ip_records, ip_feature_db)
            #logging.info("Computing feature %i..."% f._FEATURE_INDEX)
            print "Computing feature %i..." % f._FEATURE_INDEX
            f.compute()

        # post process the features
        ip_feature_db = self.bothound_tools.post_process(ip_feature_db)

        # delete the old sessions for thie incidend
        self.bothound_tools.delete_sessions(incident['id'])

        #print ip_feature_db
        self.bothound_tools.add_sessions(incident['id'], ip_feature_db,
                                         banned_ips)
        self.bothound_tools.set_incident_process(incident['id'], False)
        print "Incident {} processed.".format(incident['id'])
        return ip_feature_db