Example #1
0
    def __init__(self):

        ## Set logging information for instance
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

        ## Take arguments from command line
        self.args = None
        self.read_args()

        ## Take input from configuration file
        self.get_config()
        self.common = Common(config=self.config)

        ## Instantiate a logger to to leg messages to aid debugging
        self.logger = Common().setup_logger(self.logger)

        ## Add network traffic files for parsing
        self.get_files()
        self.model_hash = None
        self.model = Model(duration=self.duration,
                           hidden_size=None,
                           model_type=self.args.algorithm)

        def create_base_alg():
            return BaseAlgorithm(files=self.files,
                                 config=self.config,
                                 model=self.model,
                                 model_hash=self.model_hash,
                                 model_path=self.args.trained_model,
                                 sos_model=self.args.sos_model)

        ## Check whether operation is evaluation, train, or test
        ## Evaluation returns predictions that are useful for the deployment
        ## of networkml in an operational environment.
        if self.args.operation == 'eval':
            self.load_model()

            if (self.args.algorithm == 'onelayer'
                    or self.args.algorithm == 'randomforest'):
                base_alg = create_base_alg()
                base_alg.eval(self.args.algorithm)

            ## SOS refers to statistical outlier selection model
            elif self.args.algorithm == 'sos':
                from networkml.algorithms.sos.eval_SoSModel import eval_pcap
                eval_pcap(self.args.path, self.args.sos_model,
                          self.conf_labels, self.time_const)

        ## Train entails training a new model on specific packet captures
        elif self.args.operation == 'train':

            ## Check for model type specified
            ## onelayer refers to a one-layer neural network
            if self.args.algorithm == 'onelayer':
                m = MLPClassifier((self.state_size),
                                  alpha=0.1,
                                  activation='relu',
                                  max_iter=1000)
                base_alg = create_base_alg()
                base_alg.train(self.args.path, self.args.save, m,
                               self.args.algorithm)

            ## Random forests refers to a decision tree-based model
            elif self.args.algorithm == 'randomforest':
                m = RandomForestClassifier(n_estimators=100,
                                           min_samples_split=5,
                                           class_weight='balanced')
                base_alg = create_base_alg()
                base_alg.train(self.args.path, self.args.save, m,
                               self.args.algorithm)

            ## SOS refers to statistical outlier selection model
            elif self.args.algorithm == 'sos':
                from networkml.algorithms.sos.train_SoSModel import train
                train(self.args.path, self.args.sos_model, self.time_const,
                      self.rnn_size, self.conf_labels, self.args.save)

        ## Test is for checking overall performance of networkML models for
        ## the device classification task. It is a benchmarking operation.
        elif self.args.operation == 'test':
            self.load_model()

            ## Check for model type specified
            ## onelayer refers to a one-layer neural network
            ## Random forests refers to a decision tree-based model
            if (self.args.algorithm == 'onelayer'
                    or self.args.algorithm == 'randomforest'):
                base_alg = create_base_alg()
                base_alg.test(self.args.path, self.args.save)

            ## SOS refers to statistical outlier selection model
            elif self.args.algorithm == 'sos':
                self.logger.info(
                    'There is no testing operation for the SoSModel.')
Example #2
0
    def eval(self, algorithm):
        """
        This operation uses a specified algorithm to predict--for particular
        network traffic--what devices types are present and whether the device
        is acting normally or abnormally. This is the function that should be
        used in production when a user wants to actually employ networkML to
        classify and assess traffic.

        Args:
            algorithm: type of algorithm (random forest, neural network, or
            stochastic outlier selection (SOS).
        """
        if self.files:
            self.model.sessionize_pcaps(self.files)

        for fi in self.files:
            self.logger.info('Processing {0}...'.format(fi))
            base_pcap = os.path.basename(fi)
            pcap_key, pcap_labels = self.parse_pcap_name(base_pcap)
            if pcap_key is None:
                self.logger.debug('Ignoring unknown pcap name %s', base_pcap)
                continue

            ## Get representations from the model
            reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation(
                str(fi), source_ip=None, mean=False)

            ## If no predictions are made, send a message with explanation
            if preds is None:
                message = {}
                message[pcap_key] = {'valid': False, 'pcap': base_pcap}
                message = {'data': message}
                self.logger.info('Not enough sessions in file \'%s\'', str(fi))
                self.publish_message(message)
                continue

            else:  ## If a prediction is made, send message with prediction
                self.logger.debug('Generating predictions')

                ## Update the stored representation
                if reps is not None:
                    self.logger.debug('Updating stored data')
                    r_key = self.common.update_data(source_mac, reps,
                                                    timestamps, preds, others,
                                                    self.model_hash)

                ## Get the sessions that the model looked at
                sessions = self.model.sessions
                ## Clean the sessions
                clean_sessions = []
                inferred_mac = None
                for session_dict in sessions:
                    cleaned_sessions, inferred_mac = \
                        clean_session_dict(
                            session_dict,
                            source_address=source_mac
                        )
                    clean_sessions.append(cleaned_sessions)

                if source_mac is None:
                    source_mac = inferred_mac

                ## Make simple decisions based on vector differences and update
                ## times
                timestamp = timestamps[0].timestamp()
                labels, confs = zip(*preds)
                abnormality = 0.0
                if self.has_avx():
                    from networkml.algorithms.sos.eval_SoSModel import eval_pcap
                    try:
                        abnormality = eval_pcap(str(fi),
                                                self.sos_model,
                                                self.conf_labels,
                                                self.time_const,
                                                label=labels[0],
                                                rnn_size=self.rnn_size,
                                                model_path=self.model_path,
                                                model_type=algorithm)
                    except ValueError:
                        self.logger.warning(
                            "Can't run abnormality detection because not a big enough sample size"
                        )
                else:
                    self.logger.warning(
                        "Can't run abnormality detection because this CPU doesn't support AVX"
                    )

                prev_s = self.common.get_address_info(source_mac, timestamp)
                decision = self.common.basic_decision(pcap_key, source_mac,
                                                      prev_s, timestamp,
                                                      labels, confs,
                                                      abnormality)
                sources = {
                    'source_ip': capture_ip_source,
                    'source_mac': source_mac,
                    'pcap_labels': pcap_labels,
                }
                if pcap_key in decision:
                    decision[pcap_key].update(sources)
                elif source_mac in decision:
                    decision[source_mac].update(sources)
                self.logger.debug('Created message')
                for i in range(3):
                    self.logger.info(labels[i] + ' : ' +
                                     str(round(confs[i], 3)))

                # update Redis with decision
                if self.common.use_redis:
                    redis_decision = {}
                    for k in decision:
                        redis_decision[k] = str(decision[k])
                    try:
                        self.common.r.hmset(r_key, redis_decision)
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Failed to update keys in Redis because: {0}'.
                            format(str(e)))

                message = {'data': decision}
                message['data']['pcap'] = base_pcap
                self.publish_message(message)

        message = {'data': ''}
        self.publish_message(message, close=True)
Example #3
0
    def eval(self, algorithm):
        for fi in self.files:
            self.logger.info('Processing {0}...'.format(fi))
            source_mac = None
            key = None
            split_path = 'None'
            try:
                split_path = os.path.split(fi)[-1]
                split_path = split_path.split('.')
                split_path = split_path[0].split('-')
                key = split_path[0].split('_')[1]
            except Exception as e:  # pragma: no cover
                self.logger.debug('Could not get key because %s', str(e))

            # ignore misc files
            if (split_path[-1] == 'miscellaneous'):
                continue

            # Get representations from the model
            reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation(
                str(fi), source_ip=source_mac, mean=False)
            if preds is None:
                message = {}
                message[key] = {'valid': False, 'pcap': os.path.split(fi)[-1]}
                uid = os.getenv('id', 'None')
                file_path = os.getenv('file_path', 'None')
                message = {
                    'id': uid,
                    'type': 'metadata',
                    'file_path': file_path,
                    'data': message,
                    'results': {
                        'tool': 'networkml',
                        'version': networkml.__version__
                    }
                }
                message = json.dumps(message)
                self.logger.info('Not enough sessions in file \'%s\'', str(fi))
                if self.common.use_rabbit:
                    self.common.channel.basic_publish(
                        exchange=self.common.exchange,
                        routing_key=self.common.routing_key,
                        body=message,
                        properties=pika.BasicProperties(delivery_mode=2, ))
                continue

            else:
                self.logger.debug('Generating predictions')
                last_update, prev_rep = self.common.get_previous_state(
                    source_mac, timestamps[0])

                # TODO are these calls actually needed???
                _, mean_rep = self.common.average_representation(
                    reps,
                    timestamps,
                    prev_representation=prev_rep,
                    last_update=last_update)
                mean_preds = self.model.classify_representation(mean_rep)

                # Update the stored representation
                if reps is not None:
                    self.logger.debug('Updating stored data')
                    r_key = self.common.update_data(source_mac, reps,
                                                    timestamps, preds, others,
                                                    self.model_hash)

                # Get the sessions that the model looked at
                sessions = self.model.sessions
                # Clean the sessions
                clean_sessions = []
                inferred_mac = None
                for session_dict in sessions:
                    cleaned_sessions, inferred_mac = \
                        clean_session_dict(
                            session_dict,
                            source_address=source_mac
                        )
                    clean_sessions.append(cleaned_sessions)

                if source_mac is None:
                    source_mac = inferred_mac

                # Make simple decisions based on vector differences and update times
                timestamp = timestamps[0].timestamp()
                labels, confs = zip(*preds)
                abnormality = 0.0
                has_avx = False
                if 'flags' in get_cpu_info() and (
                        'avx' in get_cpu_info()['flags']
                        or 'avx2' in get_cpu_info()['flags']):
                    has_avx = True
                if has_avx:
                    from networkml.algorithms.sos.eval_SoSModel import eval_pcap
                    abnormality = eval_pcap(str(fi),
                                            self.conf_labels,
                                            self.time_const,
                                            label=labels[0],
                                            rnn_size=self.rnn_size,
                                            model_path=self.model_path,
                                            model_type=algorithm)
                else:
                    self.logger.warning(
                        "Can't run abnormality detection because this CPU doesn't support AVX"
                    )
                prev_s = self.common.get_address_info(source_mac, timestamp)
                decision = self.common.basic_decision(key, source_mac, prev_s,
                                                      timestamp, labels, confs,
                                                      abnormality)
                if key in decision:
                    decision[key]['source_ip'] = capture_ip_source
                    decision[key]['source_mac'] = source_mac
                elif source_mac in decision:
                    decision[source_mac]['source_ip'] = capture_ip_source
                    decision[source_mac]['source_mac'] = source_mac
                self.logger.debug('Created message')
                for i in range(3):
                    self.logger.info(labels[i] + ' : ' +
                                     str(round(confs[i], 3)))

                # update Redis with decision
                if self.common.use_redis:
                    redis_decision = {}
                    for k in decision:
                        redis_decision[k] = str(decision[k])
                    try:
                        self.common.r.hmset(r_key, redis_decision)
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Failed to update keys in Redis because: {0}'.
                            format(str(e)))

                # Get json message
                uid = os.getenv('id', 'None')
                file_path = os.getenv('file_path', 'None')
                message = {
                    'id': uid,
                    'type': 'metadata',
                    'file_path': file_path,
                    'data': decision,
                    'results': {
                        'tool': 'networkml',
                        'version': networkml.__version__
                    }
                }
                message['data']['pcap'] = os.path.split(fi)[-1]
                message = json.dumps(message)
                self.logger.info('Message: ' + message)
                if self.common.use_rabbit:
                    self.common.channel.basic_publish(
                        exchange=self.common.exchange,
                        routing_key=self.common.routing_key,
                        body=message,
                        properties=pika.BasicProperties(delivery_mode=2, ))

        uid = os.getenv('id', 'None')
        file_path = os.getenv('file_path', 'None')
        message = {
            'id': uid,
            'type': 'metadata',
            'file_path': file_path,
            'data': '',
            'results': {
                'tool': 'networkml',
                'version': networkml.__version__
            }
        }
        message = json.dumps(message)
        if self.common.use_rabbit:
            self.common.channel.basic_publish(
                exchange=self.common.exchange,
                routing_key=self.common.routing_key,
                body=message,
                properties=pika.BasicProperties(delivery_mode=2, ))
            try:
                self.common.connection.close()
            except Exception as e:  # pragma: no cover
                self.logger.error(
                    'Unable to close rabbit connection because: {0}'.format(
                        str(e)))
        return
Example #4
0
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.INFO)

        self.args = None
        self.read_args()
        self.get_config()
        self.common = Common(config=self.config)
        self.logger = Common().setup_logger(self.logger)
        self.get_files()
        self.model_hash = None
        self.model = Model(duration=self.duration,
                           hidden_size=None,
                           model_type=self.args.algorithm)
        if self.args.operation == 'eval':
            self.load_model()
            if self.args.algorithm == 'onelayer':
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).eval(
                                  self.args.algorithm)
            elif self.args.algorithm == 'randomforest':
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).eval(
                                  self.args.algorithm)
            elif self.args.algorithm == 'sos':
                from networkml.algorithms.sos.eval_SoSModel import eval_pcap
                eval_pcap(self.args.path, self.conf_labels, self.time_const)
        elif self.args.operation == 'train':
            if self.args.algorithm == 'onelayer':
                m = MLPClassifier((self.state_size),
                                  alpha=0.1,
                                  activation='relu',
                                  max_iter=1000)
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).train(
                                  self.args.path, self.args.save, m,
                                  self.args.algorithm)
            elif self.args.algorithm == 'randomforest':
                m = RandomForestClassifier(n_estimators=100,
                                           min_samples_split=5,
                                           class_weight='balanced')
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).train(
                                  self.args.path, self.args.save, m,
                                  self.args.algorithm)
            elif self.args.algorithm == 'sos':
                from networkml.algorithms.sos.train_SoSModel import train
                train(self.args.path, self.time_const, self.rnn_size,
                      self.conf_labels, self.args.save)
        elif self.args.operation == 'test':
            self.load_model()
            if self.args.algorithm == 'onelayer':
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).test(
                                  self.args.path, self.args.save)
            elif self.args.algorithm == 'randomforest':
                BaseAlgorithm(files=self.files,
                              config=self.config,
                              model=self.model,
                              model_hash=self.model_hash,
                              model_path=self.args.trained_model).test(
                                  self.args.path, self.args.save)
            elif self.args.algorithm == 'sos':
                self.logger.info(
                    'There is no testing operation for the SoSModel.')
Example #5
0
    def eval(self, algorithm):
        """
        This operation uses a specified algorithm to predict--for particular
        network traffic--what devices types are present and whether the device
        is acting normally or abnormally. This is the function that should be
        used in production when a user wants to actually employ networkML to
        classify and assess traffic.

        Args:
            algorithm: type of algorithm (random forest, neural network, or
            stochastic outlier selection (SOS).
        """

        for fi in self.files:
            self.logger.info('Processing {0}...'.format(fi))
            base_pcap = os.path.basename(fi)
            key = self.parse_pcap_name(base_pcap)
            if key is None:
                self.logger.debug('Ignoring unknown pcap name %s', base_pcap)
                continue

            ## Get representations from the model
            reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation(
                str(fi), source_ip=None, mean=False)

            ## If no predictions are made, send a message with explanation
            if preds is None:
                message = {}
                message[key] = {'valid': False, 'pcap': base_pcap}
                uid = os.getenv('id', 'None')
                file_path = os.getenv('file_path', 'None')
                message = {
                    'id': uid,
                    'type': 'metadata',
                    'file_path': file_path,
                    'data': message,
                    'results': {
                        'tool': 'networkml',
                        'version': networkml.__version__
                    }
                }
                message = json.dumps(message)
                self.logger.info('Not enough sessions in file \'%s\'', str(fi))
                if self.common.use_rabbit:
                    self.common.channel.basic_publish(
                        exchange=self.common.exchange,
                        routing_key=self.common.routing_key,
                        body=message,
                        properties=pika.BasicProperties(delivery_mode=2, ))
                continue

            else:  ## If a prediction is made, send message with prediction
                self.logger.debug('Generating predictions')
                last_update, prev_rep = self.common.get_previous_state(
                    source_mac, timestamps[0])

                ## Update the stored representation
                if reps is not None:
                    self.logger.debug('Updating stored data')
                    r_key = self.common.update_data(source_mac, reps,
                                                    timestamps, preds, others,
                                                    self.model_hash)

                ## Get the sessions that the model looked at
                sessions = self.model.sessions
                ## Clean the sessions
                clean_sessions = []
                inferred_mac = None
                for session_dict in sessions:
                    cleaned_sessions, inferred_mac = \
                        clean_session_dict(
                            session_dict,
                            source_address=source_mac
                        )
                    clean_sessions.append(cleaned_sessions)

                if source_mac is None:
                    source_mac = inferred_mac

                ## Make simple decisions based on vector differences and update
                ## times
                timestamp = timestamps[0].timestamp()
                labels, confs = zip(*preds)
                abnormality = 0.0
                has_avx = False

                ## Check if CPU supports AVX (advanced vector extension),
                ## which speeds up certain calculations
                if 'flags' in get_cpu_info() and (
                        'avx' in get_cpu_info()['flags']
                        or 'avx2' in get_cpu_info()['flags']):
                    has_avx = True
                if has_avx:
                    from networkml.algorithms.sos.eval_SoSModel import eval_pcap
                    abnormality = eval_pcap(str(fi),
                                            self.conf_labels,
                                            self.time_const,
                                            label=labels[0],
                                            rnn_size=self.rnn_size,
                                            model_path=self.model_path,
                                            model_type=algorithm)
                else:
                    self.logger.warning(
                        "Can't run abnormality detection because this CPU doesn't support AVX"
                    )

                ##
                prev_s = self.common.get_address_info(source_mac, timestamp)
                decision = self.common.basic_decision(key, source_mac, prev_s,
                                                      timestamp, labels, confs,
                                                      abnormality)
                if key in decision:
                    decision[key]['source_ip'] = capture_ip_source
                    decision[key]['source_mac'] = source_mac
                elif source_mac in decision:
                    decision[source_mac]['source_ip'] = capture_ip_source
                    decision[source_mac]['source_mac'] = source_mac
                self.logger.debug('Created message')
                for i in range(3):
                    self.logger.info(labels[i] + ' : ' +
                                     str(round(confs[i], 3)))

                # update Redis with decision
                if self.common.use_redis:
                    redis_decision = {}
                    for k in decision:
                        redis_decision[k] = str(decision[k])
                    try:
                        self.common.r.hmset(r_key, redis_decision)
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Failed to update keys in Redis because: {0}'.
                            format(str(e)))

                # Get json message
                uid = os.getenv('id', 'None')
                file_path = os.getenv('file_path', 'None')
                message = {
                    'id': uid,
                    'type': 'metadata',
                    'file_path': file_path,
                    'data': decision,
                    'results': {
                        'tool': 'networkml',
                        'version': networkml.__version__
                    }
                }
                message['data']['pcap'] = base_pcap
                message = json.dumps(message)
                self.logger.info('Message: ' + message)
                if self.common.use_rabbit:
                    self.common.channel.basic_publish(
                        exchange=self.common.exchange,
                        routing_key=self.common.routing_key,
                        body=message,
                        properties=pika.BasicProperties(delivery_mode=2, ))

        uid = os.getenv('id', 'None')
        file_path = os.getenv('file_path', 'None')
        message = {
            'id': uid,
            'type': 'metadata',
            'file_path': file_path,
            'data': '',
            'results': {
                'tool': 'networkml',
                'version': networkml.__version__
            }
        }
        message = json.dumps(message)
        if self.common.use_rabbit:
            self.common.channel.basic_publish(
                exchange=self.common.exchange,
                routing_key=self.common.routing_key,
                body=message,
                properties=pika.BasicProperties(delivery_mode=2, ))
            try:
                self.common.connection.close()
            except Exception as e:  # pragma: no cover
                self.logger.error(
                    'Unable to close rabbit connection because: {0}'.format(
                        str(e)))
        return