def _setup(self, params):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None
        self.params = params
        self.cfg = params['cfg']
        self.incremental = params['incremental']
        self.dates = self._get_train_test(params['dates'])

        self.dataset = CICFlowADDataset(root=os.path.abspath(
            self.params['data_path']),
                                        n_known_outlier_classes=1,
                                        train_dates=[params['dates'][0]],
                                        val_dates=[params['dates'][0]],
                                        test_dates=[params['dates'][0]],
                                        shuffle=True)

        self.model = DeepSVDD(self.cfg['objective'], self.cfg['nu'])
        self.model.set_trainer(optimizer_name=self.cfg['optimizer_name'],
                               lr=self.cfg['lr'],
                               n_epochs=self.cfg['n_epochs'],
                               lr_milestones=self.cfg['lr_milestone'],
                               batch_size=self.cfg['batch_size'],
                               weight_decay=self.cfg['weight_decay'],
                               device=self.params['device'],
                               n_jobs_dataloader=self.cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, self.cfg['net_name'])
        self.model.load_model(params['model_path'])
        self.model.test(self.dataset)
    def _setup(self, cfg):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None

        trial_idx = cfg['__trial_index__']
        train, val = cfg['train_dates'][trial_idx]
        test = cfg['test_dates']

        self.dataset = CICFlowADDataset(root=os.path.abspath(cfg['data_path']),
                                        n_known_outlier_classes=1,
                                        train_dates=cfg['period'][train],
                                        val_dates=cfg['period'][val],
                                        test_dates=test,
                                        shuffle=True)

        self.model = DeepSVDD(cfg['objective'], cfg['nu'])
        self.model.set_trainer(optimizer_name=cfg['optimizer_name'],
                               lr=cfg['lr'],
                               n_epochs=cfg['n_epochs'],
                               lr_milestones=cfg['lr_milestone'],
                               batch_size=cfg['batch_size'],
                               weight_decay=cfg['weight_decay'],
                               device=cfg['device'],
                               n_jobs_dataloader=cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, cfg['net_name'])

        if cfg['pretrain']:
            self.model = self.model.pretrain(
                self.dataset,
                optimizer_name=cfg['optimizer_name'],
                lr=cfg['lr'],
                n_epochs=cfg['ae_n_epochs'],
                lr_milestones=cfg['ae_lr_milestone'],
                batch_size=cfg['ae_batch_size'],
                weight_decay=cfg['ae_weight_decay'],
                device=cfg['device'],
                n_jobs_dataloader=cfg["n_jobs_dataloader"])
    def _setup(self, cfg):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None

        dates = np.array([cfg['dates']])

        self.dataset = CICFlowADDataset(root=os.path.abspath(cfg['data_path']),
                                        n_known_outlier_classes=1,
                                        test_dates=dates,
                                        shuffle=True,
                                        split=True)

        self.model = DeepSVDD(cfg['objective'], cfg['nu'])
        self.model.set_trainer(optimizer_name=cfg['optimizer_name'],
                               lr=cfg['lr'],
                               n_epochs=cfg['n_epochs'],
                               lr_milestones=cfg['lr_milestone'],
                               batch_size=cfg['batch_size'],
                               weight_decay=cfg['weight_decay'],
                               device=cfg['device'],
                               n_jobs_dataloader=cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, cfg['net_name'])
        self.model.load_model(cfg['model_path'])


        if cfg['pretrain']:
            self.model = self.model.pretrain(
                self.dataset,
                optimizer_name=cfg['optimizer_name'],
                lr=cfg['lr'],
                n_epochs=cfg['ae_n_epochs'],
                lr_milestones=cfg['ae_lr_milestone'],
                batch_size=cfg['ae_batch_size'],
                weight_decay=cfg['ae_weight_decay'],
                device=cfg['device'],
                n_jobs_dataloader=cfg["n_jobs_dataloader"])
Exemple #4
0
class CICFLOWExp():
    def _setup(self, params):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None
        self.params = params
        self.cfg = params['cfg']

        # self.dataset = NSLKDDADDataset(root=os.path.abspath(params['data_path']),
        #                                n_known_outlier_classes=1,
        #                                shuffle=True)
        self.dataset = CICFlowADDataset(root=os.path.abspath(params['data_path']),
                                        n_known_outlier_classes=1,
                                        shuffle=True)                               

        self.model = DeepSVDD(self.cfg['objective'], self.cfg['nu'])
        self.model.set_trainer(optimizer_name=self.cfg['optimizer_name'],
                               lr=self.cfg['lr'],
                               n_epochs=self.cfg['n_epochs'],
                               lr_milestones=self.cfg['lr_milestone'],
                               batch_size=self.cfg['batch_size'],
                               weight_decay=self.cfg['weight_decay'],
                               device=self.params['device'],
                               n_jobs_dataloader=self.cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, self.cfg['net_name'])
        self.model.load_model(params['model_path'])

        return self

    def _get_output(self):
        labels, outputs = self.model.get_output(self.dataset)

        c = self.model.trainer.c
        R = self.model.trainer.R
        

        print((c,R))
        
        results = locals().copy()
        del results["self"]

        with open('model_output.pkl','wb') as pfile:
            pickle.dump(results, pfile)
class OneDaySVDDCICFlowExp(tune.Trainable):
    def _setup(self, cfg):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None

        dates = np.array([cfg['dates']])

        self.dataset = CICFlowADDataset(root=os.path.abspath(cfg['data_path']),
                                        n_known_outlier_classes=1,
                                        test_dates=dates,
                                        shuffle=True,
                                        split=True)

        self.model = DeepSVDD(cfg['objective'], cfg['nu'])
        self.model.set_trainer(optimizer_name=cfg['optimizer_name'],
                               lr=cfg['lr'],
                               n_epochs=cfg['n_epochs'],
                               lr_milestones=cfg['lr_milestone'],
                               batch_size=cfg['batch_size'],
                               weight_decay=cfg['weight_decay'],
                               device=cfg['device'],
                               n_jobs_dataloader=cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, cfg['net_name'])
        self.model.load_model(cfg['model_path'])


        if cfg['pretrain']:
            self.model = self.model.pretrain(
                self.dataset,
                optimizer_name=cfg['optimizer_name'],
                lr=cfg['lr'],
                n_epochs=cfg['ae_n_epochs'],
                lr_milestones=cfg['ae_lr_milestone'],
                batch_size=cfg['ae_batch_size'],
                weight_decay=cfg['ae_weight_decay'],
                device=cfg['device'],
                n_jobs_dataloader=cfg["n_jobs_dataloader"])

    def _train(self):
        self.model.test(self.dataset, set_split="train")
        self.model.test(self.dataset, set_split="val")
        self.model.test(self.dataset, set_split="test")

        val_labels, val_scores, _ = self.model.trainer.get_results("val")
        test_labels, test_scores, _ = self.model.trainer.get_results("test")
        train_labels, train_scores, _ = self.model.trainer.get_results("train")

        results = locals().copy()
        del results["self"]

        self.results = results

        rocs = {
            phase + '_auc_roc': roc_auc_score(labels, scores)
            for phase in ["val", "test", "train"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
        }
        
        ratios = {
            phase + '_ratio_anomalies': get_ratio_anomalies(labels)
            for phase in ["val", "test", "train"]
            for labels, _, _ in [self.model.trainer.get_results(phase)]
        }

        prc = {
            phase + '_auc_pr': auc(recall, precision)
            for phase in ["val", "test", "train"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
            for precision, recall, _ in
            [precision_recall_curve(labels, scores)]
        }

        get_f1 = lambda pr, rec: 2 * (pr * rec) / (pr + rec)
        max_f1 = lambda pr, rec: max(get_f1(pr, rec))
        idx_max_f1 = lambda pr, rec: np.argmax(
            get_f1(pr, rec)[~np.isnan(get_f1(pr, rec))])

        f1s = {
            phase + '_max_f1': max_f1(precision, recall)
            for phase in ["val", "test", "train"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
            for precision, recall, _ in
            [precision_recall_curve(labels, scores)]
        }
        prs = {
            phase + '_precision_max_f1':
            precision[idx_max_f1(precision, recall)]
            for phase in ["val", "test", "train"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
            for precision, recall, _ in
            [precision_recall_curve(labels, scores)]
        }

        recs = {
            phase + '_recall_max_f1': recall[idx_max_f1(precision, recall)]
            for phase in ["val", "test", "train"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
            for precision, recall, _ in
            [precision_recall_curve(labels, scores)]
        }

        return {**rocs, **ratios, **prc, **prs, **recs, **f1s}


    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir,
                                       str(self.trial_id) + "_model.pth")
        self.model.save_model(checkpoint_path)
        pickle.dump(self.results,
                    open(os.path.join(checkpoint_dir, 'results.pkl'), "wb"))
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_model(checkpoint_path)
class DriftCICFlowExp(tune.Trainable):
    def _setup(self, params):
        # self.training_iteration = 0
        self.test_labels = None
        self.val_labels = None
        self.val_scores = None
        self.test_scores = None
        self.params = params
        self.cfg = params['cfg']
        self.incremental = params['incremental']
        self.dates = self._get_train_test(params['dates'])

        self.dataset = CICFlowADDataset(root=os.path.abspath(
            self.params['data_path']),
                                        n_known_outlier_classes=1,
                                        train_dates=[params['dates'][0]],
                                        val_dates=[params['dates'][0]],
                                        test_dates=[params['dates'][0]],
                                        shuffle=True)

        self.model = DeepSVDD(self.cfg['objective'], self.cfg['nu'])
        self.model.set_trainer(optimizer_name=self.cfg['optimizer_name'],
                               lr=self.cfg['lr'],
                               n_epochs=self.cfg['n_epochs'],
                               lr_milestones=self.cfg['lr_milestone'],
                               batch_size=self.cfg['batch_size'],
                               weight_decay=self.cfg['weight_decay'],
                               device=self.params['device'],
                               n_jobs_dataloader=self.cfg["n_jobs_dataloader"])
        self.model.setup(self.dataset, self.cfg['net_name'])
        self.model.load_model(params['model_path'])
        self.model.test(self.dataset)

    def _get_train_test(self, dates):
        train, test = tee(dates)
        next(test, None)
        return zip(train, test)

    def _train(self):
        try:
            train, test = next(self.dates)
        except StopIteration:
            return {'done': True}

        self.dataset = CICFlowADDataset(root=os.path.abspath(
            self.params['data_path']),
                                        n_known_outlier_classes=1,
                                        train_dates=[train],
                                        val_dates=[train],
                                        test_dates=[test],
                                        shuffle=True)

        if self.incremental:
            self.model.train(dataset=self.dataset,
                             optimizer_name=self.cfg['optimizer_name'],
                             lr=self.cfg['lr'],
                             n_epochs=1,
                             lr_milestones=self.cfg['lr_milestone'],
                             batch_size=self.cfg['batch_size'],
                             weight_decay=self.cfg['weight_decay'],
                             device=self.params['device'],
                             n_jobs_dataloader=self.cfg["n_jobs_dataloader"])

        self.model.test(self.dataset, set_split="test")
        self.model.test(self.dataset, set_split="train")

        test_labels, test_scores, _ = self.model.trainer.get_results("test")

        results = locals().copy()
        del results["self"]

        self.results = results

        rocs = {
            phase + '_auc_roc': roc_auc_score(labels, scores)
            for phase in ["test"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
        }

        prs = {
            phase + '_auc_pr': auc(recall, precision)
            for phase in ["test"]
            for labels, scores, _ in [self.model.trainer.get_results(phase)]
            for precision, recall, _ in
            [precision_recall_curve(labels, scores)]
        }

        return {**rocs, **prs}

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir,
                                       str(self.trial_id) + "_model.pth")
        self.model.save_model(checkpoint_path)
        pickle.dump(self.results,
                    open(os.path.join(checkpoint_dir, 'results.pkl'), "wb"))
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_model(checkpoint_path)