Exemple #1
0
class ClassificationServer(object):
    def __init__(self):
        log.info("Listener started.")
        #self.bayes_classifier = None

        # Init bayes
        #self._init_classifier()

        self.database = Database()

        self.sessions = {}

    def init_session(self, timestamp, tag):
        session_id = self.database.createSession(timestamp, tag)
        log.debug("New Session: %s " % session_id)
        self.sessions[session_id] = {"bayes": self._init_classifier()}

        return session_id

    def stop_session(self, timestamp, session_id):
        self.database.stopSession(timestamp, session_id)
        self.sessions.pop(session_id)
        log.debug("Session %s stopped." % session_id)

    def _init_classifier(self):
        return BayesOneClass()
    
    def set_baseline(self, baseline_data, session_id):
        log.info("New baseline data received.")
        #log.info(baseline_data)
        self.sessions[session_id]["bl_data"] = json.loads(baseline_data)

        bayes_classifier = self.sessions[session_id]["bayes"]
        log.info("Calculating baseline summary.")
        # Split into training and testing datasets according to split ratio
        train, test, validate = bayes_classifier.split_dataset(self.sessions[session_id]["bl_data"], TRAIN_RATIO, TEST_RATIO)
        log.info("Split %d rows into train with %s, test with %s and validate with %s." % (len(self.sessions[session_id]["bl_data"]), len(train), len(test), len(validate)))

        ### Train
        summary, counted = bayes_classifier.summarize_by_class(train)
        self.sessions[session_id]["bl_summary"] = summary
        log.info('Summary by class value: %s' % self.sessions[session_id]["bl_summary"])

        ### Test
        '''
        stream_probabilities = bayes_classifier.get_stream_probabilities(self.sessions[session_id]["bl_summary"], test) 

        ## Protocol probabilities
        mean_probabilities = bayes_classifier.get_mean_probabilities(stream_probabilities, counted)
        log.info("Mean probabilities: %s" % mean_probabilities)
        self.sessions[session_id]["bl_probs"] = mean_probabilities

        # Calculate thresholds based on mean probabilities for each stream
        for dkey, dval in mean_probabilities.iteritems():
            bayes_classifier.thresholds[dkey] = dval ** CLASS_THRESHOLD_WEIGHT
        print ""
        log.info("New thresholds: %s" % bayes_classifier.thresholds)
        '''

        ## Calculate thresholds based on feature probabilities
        # Feature probabilities first
        feature_probabilities = bayes_classifier.get_feature_probabilities(self.sessions[session_id]["bl_summary"], test)

        # Calculate feature thresholds, protocol thresholds, and protocol probabilities
        self.sessions[session_id]["bl_fthreshold"], bayes_classifier.thresholds, self.sessions[session_id]["bl_probs"] = bayes_classifier.get_thresholds(feature_probabilities)

        # Split threshold exponent and mantissa
        self.sessions[session_id]["bl_fthreshold_exp"], bayes_classifier.exp_thresholds = bayes_classifier.split_thresholds(self.sessions[session_id]["bl_fthreshold"])

        #log.info("PROTOCOL EXP: %s" % protocol_exp) 

        #log.info("Feature thresholds: %s" % self.sessions[session_id]["bl_fthreshold"])
        #log.info("Protocol probabilities: %s" % self.sessions[session_id]["bl_probs"])

        ### Validate
        predictions = bayes_classifier.get_predictions(self.sessions[session_id]["bl_summary"], validate)
        accuracy = bayes_classifier.get_accuracy(validate, predictions)
        self.sessions[session_id]["bl_accuracy"] = accuracy

        # Calculate threshold based on mean of predicted test values
        mean_predictions = bayes_classifier.get_mean_predictions(predictions, counted)

        log.info('Mean Predictions: %s' % mean_predictions)
        log.info('Accuracy: %s' % accuracy)

        self.database.addBaseline(session_id,
                                  self.sessions[session_id]["bl_summary"], # Baseline
                                  self.sessions[session_id]["bl_accuracy"], # Baseline self check
                                  bayes_classifier.thresholds, # Calculated protocol thresholds
                                  self.sessions[session_id]["bl_fthreshold"], # Calculated feature thresholds
                                  self.sessions[session_id]["bl_fthreshold_exp"], # Extracted feature threshold exponents
                                  bayes_classifier.exp_thresholds) # Extracted protocol threshold exponents

        print "Feature Thresholds"
        print self.sessions[session_id]["bl_fthreshold_exp"]

        #print self.sessions
        #print self.sessions[-1].keys()

        return len(self.sessions[session_id]["bl_data"])

    def classify_streams(self, stream_data, session_id):
        log.info("New stream data received.")

        stream_data = json.loads(stream_data)

        bayes_classifier = self.sessions[session_id]["bayes"]

        log.info("EXP THRESHOLDS")
        log.info(bayes_classifier.exp_thresholds)

        classifications = bayes_classifier.get_predictions(self.sessions[session_id]["bl_summary"], stream_data)

        ## Database update
        hosts = []
        for stream in stream_data.values():

            # Filter streams with very low probability
            try:
                protocol = stream["meta"]["type"]
                small_threshold = self.sessions[session_id]["bl_probs"][protocol] ** (CLASS_THRESHOLD_WEIGHT[protocol]/2.0)
                if True:
                #if stream["classify"]["prob"] < small_threshold:
                    # Hosts
                    if stream["meta"]["h_ip"] not in hosts:
                        host_id = self.database.addHost(stream["meta"]["h_ip"], stream["meta"]["h_id"], session_id)
                        hosts.append(stream["meta"]["h_ip"])
                    else:
                        host_id = self.database.getHost(stream["meta"]["h_ip"])

                    # Streams
                    stream_id = self.database.addStream(stream["meta"]["type"])
                    self.database.addStreamToHost(session_id, stream_id, host_id, stream)
            except KeyError:
                log.warning("No threshold specified for %s protocol." % type)
        return len(stream_data)