Example #1
0
def _evaluate(params):
    e, base, heuristic, strategy = params

    session = Session(get_engine())
    model = session.query(Model).filter_by(file_name=e.model_file.name).first()
    session.close()

    # Generate evaluation frames
    y_pred = e.binarizer.binarize(base=base, heuristic=heuristic, strategy=strategy, go_backwards=False)
    y_true = e.binarizer.get_targets()

    evaluations = []
    for axis in [0, 1, 2]:
        for i, attribute_name in enumerate(e.dataset.attribute_keys):
            def get_evaluation(label, precision, recall, f1):
                return Evaluation(model_id=model.id, file_name=model.file_name,
                                  label=label, perspective=perspective, attribute_name=attribute_name,
                                  axis=axis, base=base, heuristic=heuristic, strategy=strategy,
                                  precision=precision, recall=recall, f1=f1)

            perspective = 'Control Flow' if i == 0 else 'Data'
            if i > 0  and not e.ad_.supports_attributes:
                evaluations.append(get_evaluation('Normal', 0.0, 0.0, 0.0))
                evaluations.append(get_evaluation('Anomaly', 0.0, 0.0, 0.0))
            else:
                yp = label_collapse(y_pred[:, :, i:i + 1], axis=axis).compressed()
                yt = label_collapse(y_true[:, :, i:i + 1], axis=axis).compressed()
                p, r, f, _ = metrics.precision_recall_fscore_support(yt, yp, labels=[0, 1])
                evaluations.append(get_evaluation('Normal', p[0], r[0], f[0]))
                evaluations.append(get_evaluation('Anomaly', p[1], r[1], f[1]))

    return evaluations
 def evaluate(y_true, y_pred):
     evaluation = {}
     axes = Axis.keys()[:y_pred.ndim + 1]
     for axis in axes:
         yt = label_collapse(y_true, axis=axis)
         yp = label_collapse(y_pred, axis=axis)
         p, r, f, s = metrics.precision_recall_fscore_support(
             yt.compressed(), yp.compressed(), average='macro')
         evaluation[axis] = dict(precision=p, recall=r, f1=f, support=s)
     return evaluation
    def threshold_binarize(self, tau, scores, axis=0):
        # Apply the threshold function (Theta in the paper)
        predictions = np.array(scores.data > tau, dtype=int)

        # Apply mask
        predictions = np.ma.array(predictions, mask=scores.mask)

        # Positive axis flatten predictions
        if axis in [0, 1]:
            predictions = label_collapse(predictions, axis=axis)

        return predictions
    def __init__(self, result, mask, features, targets=None):
        self.result = result
        self._mask = mask
        self.mask_ = mask
        self.features = features
        self._targets = targets

        # Try to fix dimensions
        if self.mask_.shape != self.result.scores.shape:
            if len(self.mask_) != len(self.result.scores.shape):
                self.mask_ = np.expand_dims(self.mask_, axis=-1)
            self.mask_ = np.repeat(self.mask_,
                                   self.result.scores.shape[-1],
                                   axis=-1)

        self.targets = None

        if self._targets is not None:
            self.targets = dict(
                (a, self.mask(label_collapse(self._targets, axis=a)))
                for a in [0, 1, 2])
    def get_tau(self,
                scores,
                heuristic=Heuristic.DEFAULT,
                strategy=Strategy.SINGLE,
                axis=0,
                taus=None):
        if heuristic == Heuristic.DEFAULT:
            return np.array([0.5])

        if not isinstance(scores, np.ma.MaskedArray):
            scores = self.mask(scores)

        scores = self.split_by_strategy(scores, strategy)

        if heuristic in [Heuristic.MEAN, Heuristic.MEDIAN]:
            scores = [max_collapse(s, axis=axis) for s in scores]
            if heuristic == Heuristic.MEAN:
                return self.correct_shape(
                    [np.mean(s[np.round(s, 1) > 0]) for s in scores], strategy)
            elif heuristic == Heuristic.MEDIAN:
                return self.correct_shape(
                    [np.median(s[np.round(s, 1) > 0]) for s in scores],
                    strategy)

        if taus is None:
            taus = [self.get_candidate_taus(s, axis=axis) for s in scores]
        else:
            taus = [taus] * len(scores)

        tau = None
        if heuristic == Heuristic.BEST:
            y_trues = self.split_by_strategy(self.get_targets(axis=2),
                                             strategy)
            y_trues = [label_collapse(y, axis=axis) for y in y_trues]
            tau = [
                best_heuristic(taus=t,
                               theta=self.threshold_binarize,
                               y_true=y,
                               scores=s,
                               axis=axis)
                for s, t, y in zip(scores, taus, y_trues)
            ]

        if heuristic == Heuristic.RATIO:
            tau = [
                ratio_heuristic(taus=t,
                                scores=s,
                                theta=self.threshold_binarize,
                                axis=axis) for s, t in zip(scores, taus)
            ]

        if heuristic in [Heuristic.ELBOW_DOWN, Heuristic.ELBOW_UP]:
            tau = [
                elbow_heuristic(taus=t,
                                scores=s,
                                theta=self.threshold_binarize,
                                axis=axis)[heuristic]
                for s, t in zip(scores, taus)
            ]

        if heuristic in [
                Heuristic.LP_LEFT, Heuristic.LP_MEAN, Heuristic.LP_RIGHT
        ]:
            tau = [
                lowest_plateau_heuristic(taus=t,
                                         scores=s,
                                         theta=self.threshold_binarize,
                                         axis=axis)[heuristic]
                for s, t in zip(scores, taus)
            ]

        return self.correct_shape(tau, strategy)