def _evaluate(params): e, base, heuristic, strategy = params session = Session(get_engine()) model = session.query(Model).filter_by(file_name=e.model_file.name).first() session.close() # Generate evaluation frames y_pred = e.binarizer.binarize(base=base, heuristic=heuristic, strategy=strategy, go_backwards=False) y_true = e.binarizer.get_targets() evaluations = [] for axis in [0, 1, 2]: for i, attribute_name in enumerate(e.dataset.attribute_keys): def get_evaluation(label, precision, recall, f1): return Evaluation(model_id=model.id, file_name=model.file_name, label=label, perspective=perspective, attribute_name=attribute_name, axis=axis, base=base, heuristic=heuristic, strategy=strategy, precision=precision, recall=recall, f1=f1) perspective = 'Control Flow' if i == 0 else 'Data' if i > 0 and not e.ad_.supports_attributes: evaluations.append(get_evaluation('Normal', 0.0, 0.0, 0.0)) evaluations.append(get_evaluation('Anomaly', 0.0, 0.0, 0.0)) else: yp = label_collapse(y_pred[:, :, i:i + 1], axis=axis).compressed() yt = label_collapse(y_true[:, :, i:i + 1], axis=axis).compressed() p, r, f, _ = metrics.precision_recall_fscore_support(yt, yp, labels=[0, 1]) evaluations.append(get_evaluation('Normal', p[0], r[0], f[0])) evaluations.append(get_evaluation('Anomaly', p[1], r[1], f[1])) return evaluations
def evaluate(y_true, y_pred): evaluation = {} axes = Axis.keys()[:y_pred.ndim + 1] for axis in axes: yt = label_collapse(y_true, axis=axis) yp = label_collapse(y_pred, axis=axis) p, r, f, s = metrics.precision_recall_fscore_support( yt.compressed(), yp.compressed(), average='macro') evaluation[axis] = dict(precision=p, recall=r, f1=f, support=s) return evaluation
def threshold_binarize(self, tau, scores, axis=0): # Apply the threshold function (Theta in the paper) predictions = np.array(scores.data > tau, dtype=int) # Apply mask predictions = np.ma.array(predictions, mask=scores.mask) # Positive axis flatten predictions if axis in [0, 1]: predictions = label_collapse(predictions, axis=axis) return predictions
def __init__(self, result, mask, features, targets=None): self.result = result self._mask = mask self.mask_ = mask self.features = features self._targets = targets # Try to fix dimensions if self.mask_.shape != self.result.scores.shape: if len(self.mask_) != len(self.result.scores.shape): self.mask_ = np.expand_dims(self.mask_, axis=-1) self.mask_ = np.repeat(self.mask_, self.result.scores.shape[-1], axis=-1) self.targets = None if self._targets is not None: self.targets = dict( (a, self.mask(label_collapse(self._targets, axis=a))) for a in [0, 1, 2])
def get_tau(self, scores, heuristic=Heuristic.DEFAULT, strategy=Strategy.SINGLE, axis=0, taus=None): if heuristic == Heuristic.DEFAULT: return np.array([0.5]) if not isinstance(scores, np.ma.MaskedArray): scores = self.mask(scores) scores = self.split_by_strategy(scores, strategy) if heuristic in [Heuristic.MEAN, Heuristic.MEDIAN]: scores = [max_collapse(s, axis=axis) for s in scores] if heuristic == Heuristic.MEAN: return self.correct_shape( [np.mean(s[np.round(s, 1) > 0]) for s in scores], strategy) elif heuristic == Heuristic.MEDIAN: return self.correct_shape( [np.median(s[np.round(s, 1) > 0]) for s in scores], strategy) if taus is None: taus = [self.get_candidate_taus(s, axis=axis) for s in scores] else: taus = [taus] * len(scores) tau = None if heuristic == Heuristic.BEST: y_trues = self.split_by_strategy(self.get_targets(axis=2), strategy) y_trues = [label_collapse(y, axis=axis) for y in y_trues] tau = [ best_heuristic(taus=t, theta=self.threshold_binarize, y_true=y, scores=s, axis=axis) for s, t, y in zip(scores, taus, y_trues) ] if heuristic == Heuristic.RATIO: tau = [ ratio_heuristic(taus=t, scores=s, theta=self.threshold_binarize, axis=axis) for s, t in zip(scores, taus) ] if heuristic in [Heuristic.ELBOW_DOWN, Heuristic.ELBOW_UP]: tau = [ elbow_heuristic(taus=t, scores=s, theta=self.threshold_binarize, axis=axis)[heuristic] for s, t in zip(scores, taus) ] if heuristic in [ Heuristic.LP_LEFT, Heuristic.LP_MEAN, Heuristic.LP_RIGHT ]: tau = [ lowest_plateau_heuristic(taus=t, scores=s, theta=self.threshold_binarize, axis=axis)[heuristic] for s, t in zip(scores, taus) ] return self.correct_shape(tau, strategy)