Python load_data Examples, netml.utils.tool.load_data Python Examples

Example #1

0

Show file

    def test_extract_only(self):
        """extract file-labeled & flag-labeled features to path"""
        feature_path = TEMPDIR_PATH / 'netml-test-features-a'

        self.try_execute(
            'learn',
            'extract',

            '-p', DATA_PATH / 'demo.pcap',
            '-l', DATA_PATH / 'demo.csv',

            '--pcap-normal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.1_normal.pcap',
            '--pcap-abnormal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.119_anomaly.pcap',

            '-f', feature_path,
        )

        (features, labels) = load_data(feature_path)

        self.assertEqual(len(features), 88 + 4979 + 310)
        self.assertEqual(len(labels), 88 + 4979 + 310)

        # 0 is normal; 1 is abnormal
        self.assertTrue(labels[:88].any())
        self.assertFalse(labels[:88].all())

        self.assertFalse(labels[88:4979].any())

        self.assertTrue(labels[(88 + 4979):].all())

Example #2

0

Show file

File: ndm_demo.py Project: shinan6/netml

def main(data_file=DATA_FILE):
    # load data
    X, y = load_data(data_file)
    # split train and test test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=RANDOM_STATE)
    print(
        f'X_train.shape: {X_train.shape}, X_test.shape: {X_test.shape}, y_train.shape: {y_train.shape}, '
        f'y_test.shape: {y_test.shape}')

    # model_name in ['OCSVM', 'KDE','IF', 'AE', 'GMM', 'PCA']
    model_name = 'OCSVM'
    print(f'model_name: {model_name}')
    # create detection model
    model = generate_model(model_name)

    ndm = MODEL(model,
                score_metric='auc',
                verbose=10,
                random_state=RANDOM_STATE)

    # learned the model from the train set
    ndm.train(X_train)

    # evaluate the learned model
    ndm.test(X_test, y_test)

    # dump data to disk
    out_file = data_file.parent / f'{ndm.model_name}-results.dat'
    dump_data((model, ndm.history), out_file=out_file)

    print(ndm.train.tot_time, ndm.test.tot_time, ndm.score)

Example #3

0

Show file

File: cli.py Project: mveerara/netml

    def __call__(self, args, parser):
        pcap_file = args.pcap

        if pcap_file is None:
            if sys.stdin.isatty():
                parser.error("the following arguments are required "
                             "when standard input is not specified: -p/--pcap")

            pcap_file = sys.stdin.buffer

        pcap = self.extract(pcap_file)

        # TODO: catch model file issues such as EOFError
        (model, train_history) = load_data(args.model)

        classifications = model.predict(pcap.features)

        for ((flow_key, flow_packets), classification) in zip(pcap.flows, classifications):
            if classification == CLASS_NORMAL:
                if args.report_all is None:
                    if args.verbosity <= 1:
                        continue
                elif not args.report_all:
                    continue

                class_tag = 'NORMAL'
            elif classification == CLASS_ABNORMAL:
                class_tag = 'ANOMALY' | colors.red | colors.bold
            else:
                class_tag = '[unclassified]'

            if flow_key[4] == 6:
                packet_type = 'TCP'
            elif flow_key[4] == 17:
                packet_type = 'UDP'
            else:
                packet_type = '[protocol-other]'

            (packet_datetime0, packet_datetime1) = packet_datetimes = [
                datetime.datetime.fromtimestamp(packet.time)
                for packet in (flow_packets[0], flow_packets[-1])
            ]
            packet_date = packet_datetime0.date()
            (packet_time0, packet_time1) = (
                packet_datetime.time()
                for packet_datetime in packet_datetimes
            )

            print(
                f'[{packet_date}] [{packet_time0} – {packet_time1}]',
                f'{flow_key[0]}:{flow_key[2]} → {flow_key[1]}:{flow_key[3]} [{packet_type}]',
                class_tag,
            )

Example #4

0

Show file

    def test_extract_train(self):
        """extract features and train model"""
        model_path = TEMPDIR_PATH / 'netml-test-output-a'

        self.try_execute(
            'learn',
            '-p', DATA_PATH / 'demo.pcap',
            '-o', model_path,
        )

        (model, train_history) = load_data(model_path)

        self.assertTrue(hasattr(model, 'predict'))
        self.assertFalse(train_history)

Example #5

0

Show file

    def test_extract_only(self):
        """extract features to path"""
        feature_path = TEMPDIR_PATH / 'netml-test-features-a'

        self.try_execute(
            'learn',
            'extract',
            '-p', DATA_PATH / 'demo.pcap',
            '-f', feature_path,
        )

        (features, labels) = load_data(feature_path)

        self.assertGreater(len(features), 0)
        self.assertIsNone(labels)

Example #6

0

Show file

    def test_extract_train(self):
        """extract features & labels and train & test model"""
        model_path = TEMPDIR_PATH / 'netml-test-output-a'

        self.try_execute(
            'learn',
            '-p', DATA_PATH / 'demo.pcap',
            '-l', DATA_PATH / 'demo.csv',
            '-o', model_path,
        )

        (model, train_history) = load_data(model_path)

        self.assertTrue(hasattr(model, 'predict'))

        self.assertIsNot(train_history, None)
        self.assertIn('score', train_history)
        self.assertGreater(train_history['score'], 0)

Example #7

0

Show file

    def test_extract_multiple(self):
        """extract features from multiple unlabeled pcap files"""
        feature_path = TEMPDIR_PATH / 'netml-test-features-a'

        self.try_execute(
            'learn',
            'extract',
            '-f', feature_path,

            # (filenames reflect what's in them but here we specify no labels for testing)
            '-p', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.119_anomaly.pcap',
            '-p', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.1_normal.pcap',
        )

        (features, labels) = load_data(feature_path)

        self.assertEqual(len(features), 5_289)
        self.assertIsNone(labels)

Example #8

0

Show file

    def test_extract_train(self):
        """extract flag-labeled features and train & test model"""
        model_path = TEMPDIR_PATH / 'netml-test-output-a'

        self.try_execute(
            'learn',

            '--pcap-normal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.1_normal.pcap',
            '--pcap-abnormal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.119_anomaly.pcap',

            '-o', model_path,
        )

        (model, train_history) = load_data(model_path)

        self.assertTrue(hasattr(model, 'predict'))

        self.assertIsNot(train_history, None)
        self.assertIn('score', train_history)
        self.assertGreater(train_history['score'], 0)

Example #9

0

Show file

    def test_extract_multiple(self):
        """extract features & labels from multiple pcap & label files"""
        feature_path = TEMPDIR_PATH / 'netml-test-features-a'

        self.try_execute(
            'learn',
            'extract',
            '-f', feature_path,

            '-p', DATA_PATH / 'demo.pcap',
            '-p', DATA_PATH / 'demo.pcap',

            '-l', DATA_PATH / 'demo.csv',
            '-l', DATA_PATH / 'demo.csv',
        )

        (features, labels) = load_data(feature_path)

        self.assertEqual(len(features), 176)
        self.assertEqual(len(labels), 176)

Example #10

0

Show file

    def test_extract_only(self):
        """extract flag-labeled features to path"""
        feature_path = TEMPDIR_PATH / 'netml-test-features-a'

        self.try_execute(
            'learn',
            'extract',

            '--pcap-normal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.1_normal.pcap',
            '--pcap-abnormal', DATA_PATH / 'srcIP_10.42.0.1' / 'srcIP_10.42.0.119_anomaly.pcap',

            '-f', feature_path,
        )

        (features, labels) = load_data(feature_path)

        self.assertEqual(len(features), 5289)
        self.assertEqual(len(labels), 5289)

        # 0 is normal
        self.assertFalse(labels[:4979].any())

        # 1 is abnormal
        self.assertTrue(labels[4979:].all())

Example #11

0

Show file

File: cli.py Project: mveerara/netml

    def perform_train(self, feature_file, output_file):
        (features, labels) = load_data(feature_file)

        # train (and test split)
        #
        # TODO: might make sense to split testing into a separate "action," such that
        # TODO: it's done by default, but also can be applied modularly (on its own)
        #
        if labels is None:
            features_train = features
            features_test = labels_test = _labels_train = None

            self.vtable(2, [
                ('', 'features', 'labels'),
                ('train', features_train.shape, 'n/a'),
                ('test', 'n/a', 'n/a'),
            ], title='data shapes')
        else:
            (
                features_train,
                features_test,
                _labels_train,
                labels_test,
            ) = sklearn.model_selection.train_test_split(features,
                                                         labels,
                                                         test_size=self.args.test_size,
                                                         random_state=self.args.random_state)

            self.vtable(2, [
                ('', 'features', 'labels'),
                ('train', features_train.shape, 'n/a'),
                ('test', features_test.shape, labels_test.shape),
            ], title='data shapes')

        self.vprint(2, f'model name: {self.args.algorithm}')

        # param may be:
        # * unspecified (None)
        # * just a dict for this model
        # * dict with sub-dicts for many models
        params = self.args.param or {}
        if isinstance(params.get(self.args.algorithm), dict):
            params = params[self.args.algorithm]

        self.vprint(2, f'param override: {params}')

        model_class = self.load_algorithmic_model(self.args.algorithm)

        try:
            inspect.signature(model_class).bind(**params)
        except TypeError as exc:
            raise ValueError(
                f"model-training params failed to bind for {self.args.algorithm}:\n" +
                textwrap.indent(yaml.dump(params).strip(), '  ')
            ) from exc

        # create detection model
        model = model_class(
            random_state=self.args.random_state,
            **params
        )
        model.name = self.args.algorithm  # FIXME
        ndm = netml.ndm.model.MODEL(
            model,
            score_metric='auc',
            verbose=self.args.verbosity,
            random_state=self.args.random_state,
        )

        # train the model from the train set
        ndm.train(features_train)
        time_train = ndm.train.tot_time

        # evaluate the model
        if features_test is not None and labels_test is not None:
            ndm.test(features_test, labels_test)
            time_test = ndm.test.tot_time
        else:
            time_test = None

        # dump data to disk
        dump_data((model, ndm.history), out_file=output_file)

        self.vtable(1, [
            ('train time (m)', 'test time (m)', 'model score (auc)'),
            (
                time_train,
                'n/a' if time_test is None else time_test,
                getattr(ndm, 'score', 'n/a'),
            ),
        ], title='training performance')