key = None

    # extra check in case running the first time
    if ((split_path[-1] != 'miscellaneous' and key_address == source_ip)
            or (split_path[-1] != 'miscellaneous' and key_address == None)):
        # Initialize and load the model
        if len(sys.argv) > 2:
            load_path = sys.argv[2]
        else:
            load_path = os.path.join('models', 'RandomForestModel.pkl')

        # Compute model hash
        with open(load_path, 'rb') as handle:
            model_hash = hashlib.md5(handle.read()).hexdigest()

        model = RandomForestModel(duration=None)
        model.load(load_path)
        logger.debug("Loaded model from %s", load_path)

        # Get representations from the model
        reps, source_ip, timestamps, preds, others = model.get_representation(
            pcap_path, source_ip=source_ip, mean=False)

        if preds is not None:
            logger.debug("Generating predictions")
            last_update, prev_rep = get_previous_state(source_ip,
                                                       timestamps[0])
            _, mean_rep = average_representation(reps,
                                                 timestamps,
                                                 prev_representation=prev_rep,
                                                 last_update=last_update)
Exemple #2
0
data directory specified by the first argument.  The model is saved to the
location specified by the second argument.
'''

import sys
import json
from utils.RandomForestModel import RandomForestModel

if __name__ == '__main__':
    # Load model params from config
    with open('config.json') as config_file:
        config = json.load(config_file)
        duration = config['duration']
        labels = config['labels']

    # Get the data directory
    if len(sys.argv) < 2:
        data_dir = "/pcaps"
    else:
        data_dir = sys.argv[1]
    # Initialize the model
    model = RandomForestModel(duration=duration, labels=labels)
    # Train the model
    model.train(data_dir)
    # Save the model to the specified path
    if len(sys.argv) == 3:
        save_path = sys.argv[2]
    else:
        save_path = "/models/RandomForestModel.pkl"
    model.save(save_path)
Exemple #3
0
def test_RandomForestModel():
    instance = RandomForestModel(15)
Exemple #4
0
    logger = logging.getLogger(__name__)

    if len(sys.argv) < 2:
        data_dir = "/pcaps"
    else:
        data_dir = sys.argv[1]
    # Load model from specified path
    if len(sys.argv) > 2:
        load_path = sys.argv[2]
    else:
        load_path = os.path.join('models', 'RandomForestModel.pkl')
    if len(sys.argv) > 3:
        save_path = sys.argv[3]
    else:
        save_path = "/models/RandomForestModel.pkl"
    model = RandomForestModel(duration=None, hidden_size=None)
    logger.info("Loading model from %s", load_path)
    model.load(load_path)

    # Initialize results dictionary
    results = {}
    results['labels'] = model.labels

    # Get the true label assignments
    logger.info("Getting label assignments")
    with open(os.path.join(data_dir, 'label_assignments.json')) as handle:
        label_assignments = json.load(handle)

    # Walk through testing directory and get all the pcaps
    logger.info("Getting pcaps")
    pcaps = []
Exemple #5
0
def eval_rnn(pcap, input_label=None):
    '''
    Evaluate the RNN model on a single pcap
    '''
    load_path = os.path.join('models', 'RandomForestModel.pkl')
    model = RandomForestModel(duration=None)
    model.load(load_path)

    # Get representations from the model
    reps, source_ip, timestamps, preds, others = model.get_representation(
        pcap_path, source_ip=None, mean=False)

    if preds is not None:
        logger.debug("Generating predictions")
        last_update, prev_rep = get_previous_state(source_ip, timestamps[0])
        _, mean_rep = average_representation(reps,
                                             timestamps,
                                             prev_representation=prev_rep,
                                             last_update=last_update)
        mean_preds = model.classify_representation(mean_rep)
        if len(sys.argv) > 2:
            for p in mean_preds:
                logger.debug(p)
        # Update the stored representation
        current_rep, avg_rep = None, None
        if reps is not None and is_private(source_ip):
            logger.debug("Updating stored data")
            current_rep, avg_rep = update_data(source_ip, reps, timestamps,
                                               preds, others, model_hash)

        # Get the sessions that the model looked at
        sessions = model.sessions
        # Clean the sessions
        clean_sessions = []
        inferred_ip = None


        clean_sessions, inferred_ip = \
                    clean_session_dict(
                                        sessions,
                                        source_address=None
                                      )

        if source_ip is None:
            source_ip = inferred_ip

        L_in = []
        pred_labels = {l[0]: l[1] for l in mean_preds}
        if input_label is None:
            for l in labels:
                if l not in pred_labels:
                    L_in.append((l, 0))
                else:
                    L_in.append((l, pred_labels[l]))
        else:
            L_in = [(l, 0) for l in labels if l != input_label]
            L_in.append((input_label, 1))

        # Use the RNN model to compute abnormality scores
        rnnmodel = AbnormalDetector(num_labels=len(labels))
        rnnpath = os.path.join('models', 'AbnormalRNN')
        rnnmodel.load(rnnpath)

        for session_dict in clean_sessions:
            for k, session in session_dict.items():
                X, L = create_inputs(L_in, session, 116)
                score = rnnmodel.get_output(X, L)
                print(k, score[0, 0])