Exemple #1
0
class IABOD(ABOD):

    # todo: refactor - avoid coping data from HullCluster to class
    # (originally needed for superclass to access it)
    data = []
    __verbose = False
    __test_offline = False
    data_cluster = None

    # todo: remove test online
    def __init__(self, test_offline=False, cluster=None):
        ABOD.__init__(self)
        self.__test_offline = test_offline
        if cluster is None:
            self.data_cluster = MeanShiftCluster()
        else:
            assert issubclass(cluster, ClusterBase)
            self.data_cluster = cluster

    def fit(self, data, dim_reduction=False):
        raise NotImplementedError("Use 'partial_fit' instead of 'fit'")

    def partial_fit(self, samples):

        if self.__test_offline is True:
            if len(self.data) == 0:
                self.data = samples
            elif len(self.data) < 40:
                self.data = np.concatenate((self.data, samples))
        else:
            self.data_cluster.update(samples)
            self.data = self.data_cluster.get_data()

    def mean_dist(self, samples, metric='cosine'):
        return self.data_cluster.mean_dist(samples, metric)

    def class_mean_dist(self, samples, metric='cosine'):
        return self.data_cluster.class_mean_dist(samples, metric)
Exemple #2
0
class ISVM:

    __verbose = False

    clf = None
    uncertainty_thresh = 0.7

    random_data = None
    data_cluster = None

    # prediction
    prediction = None
    probability = None

    def __init__(self, random_data, cluster=None):
        # load random data
        self.random_data = random_data
        self.clf = SVC(kernel='linear', probability=True, C=1)
        if cluster is None:
            self.data_cluster = MeanShiftCluster()
        else:
            assert issubclass(cluster, ClusterBase)
            self.data_cluster = cluster

    def decision_function(self, samples):
        pass

    def get_proba(self):
        # probability that it is the class (uncertain samples not counted)
        prob = 0
        prob += np.sum(self.probability[:, 1][self.prediction == 1])
        prob += np.sum(1 - self.probability[:, 1][self.prediction == -1])
        prob /= len(self.probability[:, 1][self.prediction != 0])
        return prob

    def mean_dist(self, samples, metric='cosine'):
        return self.data_cluster.mean_dist(samples, metric)

    def class_mean_dist(self, samples, metric='cosine'):
        return self.data_cluster.class_mean_dist(samples, metric)

    def predict(self, samples):
        proba = self.clf.predict_proba(samples)
        self.probability = proba
        mask_1 = np.sum(proba < self.uncertainty_thresh, axis=1) == 2
        pred = np.array([-1 if r[0] > 0.5 else 1 for r in proba])
        pred[mask_1] = 0
        self.prediction = pred
        return pred

    def __fit_vs_random(self, class_data):
        label_class = np.repeat(1, np.shape(class_data)[0])
        label_unknown = np.repeat(-1, np.shape(self.random_data)[0])
        training_embeddings = np.concatenate((class_data, self.random_data))
        training_labels = np.concatenate((label_class, label_unknown))
        self.clf.fit(training_embeddings, training_labels)

    def partial_fit(self, samples):
        self.data_cluster.update(samples)
        reduced_data = self.data_cluster.get_data()
        # refit SVM one vs random
        self.__fit_vs_random(reduced_data)