Beispiel #1
0
    def initialDBScanSciLearn(self):

        db = DBSCAN(eps=8, min_samples=self.minPts,
                    algorithm='brute').fit(self.buffer)
        clusters = db.labels_
        self.buffer['clusters'] = clusters

        clusterNumber = np.unique(clusters)

        for clusterId in clusterNumber:

            if (clusterId != -1):

                cl = self.buffer[self.buffer['clusters'] == clusterId]
                cl = cl.drop('clusters', axis=1)

                sample = Sample(cl.iloc[0].tolist())

                mc = MicroCluster(sample, self.currentTimestamp, self.lamb)

                for sampleNumber in range(len(cl[1:])):
                    sample = Sample(cl.iloc[sampleNumber].tolist())
                    mc.insertSample(sample, self.currentTimestamp)

                self.pMicroCluster.insert(mc)
Beispiel #2
0
    def initWithoutDBScan(self):

        sample = Sample(self.buffer.iloc[0].values, 0)
        sample.set_timestamp(1)

        mc = MicroCluster(1, self.lamb, self.pMicroCluster.N + 1)

        maxEpsilon = 0

        for sampleNumber in range(0, len(self.buffer)):
            sample = Sample(self.buffer.iloc[sampleNumber].values,
                            sampleNumber)
            sample.set_timestamp(sampleNumber + 1)
            mc.insert_sample(sample)

            if mc.radius > maxEpsilon:
                maxEpsilon = mc.radius
                # print 'New max: {}'.format(mc.radius)

        self.pMicroCluster.insert(mc)

        if isinstance(self.epsilon, str):
            if self.epsilon == 'auto':
                self.epsilon = self.pMicroCluster.clusters[
                    0].radius * self.radiusFactor
                self.epsilon = maxEpsilon
Beispiel #3
0
    def initWithoutDBScan(self):

        sample = Sample(self.buffer.iloc[0].values, 0)
        sample.setTimestamp(1)

        mc = MicroCluster(1, self.lamb)

        for sampleNumber in range(0, len(self.buffer)):
            sample = Sample(self.buffer.iloc[sampleNumber].values,
                            sampleNumber)
            sample.setTimestamp(sampleNumber + 1)
            mc.insertSample(sample, self.currentTimestamp)

        self.pMicroCluster.insert(mc)

        if isinstance(self.epsilon, str):
            if self.epsilon == 'auto':
                self.epsilon = self.pMicroCluster.clusters[
                    0].radius * self.radiusFactor
Beispiel #4
0
    def initWithoutDBScan(self):

        sample = Sample(self.buffer[0], 0)
        sample.setTimestamp(1)

        mc = MicroCluster(1, self.lamb, self.pMicroCluster.N + 1)

        maxEpsilon = 0

        for sampleNumber in range(0, len(self.buffer)):
            sample = Sample(self.buffer[sampleNumber], sampleNumber)
            sample.setTimestamp(sampleNumber + 1)
            mc.insertSample(sample, self.currentTimestamp)

            if mc.radius > maxEpsilon:
                maxEpsilon = mc.radius

        self.pMicroCluster.insert(mc)

        if isinstance(self.epsilon, str):
            if self.epsilon == 'auto':
                self.epsilon = self.pMicroCluster.clusters[
                    0].radius * self.radiusFactor
                self.epsilon = maxEpsilon
Beispiel #5
0
    def runOnNewSample(self, sample):

        if simulation:
            self.currentTimestamp += 1
            sample.setTimestamp(self.currentTimestamp)
        else:
            self.currentTimestamp = time.time()

        ### INITIALIZATION PHASE ###
        if not self.inizialized:
            self.buffer.append(sample)
            if (len(self.buffer) >= self.numberInitialSamples):
                self.resetLearningImpl()
                self.initialDBScanSciLearn()
                self.inizialized = True

        ### MERGING PHASE ###
        else:
            merged = False
            TrueOutlier = True
            returnOutlier = True

            if len(self.pMicroCluster.clusters) != 0:
                closestMicroCluster = self.nearestCluster(
                    sample, self.currentTimestamp, kind='cluster')

                backupClosestCluster = copy.deepcopy(closestMicroCluster)
                backupClosestCluster.insertSample(sample,
                                                  self.currentTimestamp)

                if (backupClosestCluster.radius <= self.epsilon):

                    closestMicroCluster.insertSample(sample,
                                                     self.currentTimestamp)
                    merged = True
                    TrueOutlier = False
                    returnOutlier = False

                    self.updateAll(closestMicroCluster)

            if not merged and len(self.oMicroCluster.clusters) != 0:

                closestMicroCluster = self.nearestCluster(
                    sample, self.currentTimestamp, kind='outlier')

                backupClosestCluster = copy.deepcopy(closestMicroCluster)
                backupClosestCluster.insertSample(sample,
                                                  self.currentTimestamp)

                if (backupClosestCluster.radius <= self.epsilon):
                    closestMicroCluster.insertSample(sample,
                                                     self.currentTimestamp)
                    merged = True

                    if (closestMicroCluster.weight > self.beta * self.mu):
                        self.oMicroCluster.clusters.pop(
                            self.oMicroCluster.clusters.index(
                                closestMicroCluster))
                        self.pMicroCluster.insert(closestMicroCluster)

                    self.updateAll(closestMicroCluster)

            if not merged:
                newOutlierMicroCluster = MicroCluster(1, self.lamb)
                newOutlierMicroCluster.insertSample(sample,
                                                    self.currentTimestamp)

                for clusterTest in self.pMicroCluster.clusters:

                    if np.linalg.norm(
                            clusterTest.center -
                            newOutlierMicroCluster.center) < 2 * self.epsilon:
                        TrueOutlier = False

                if TrueOutlier:
                    self.oMicroCluster.insert(newOutlierMicroCluster)
                    self.updateAll(newOutlierMicroCluster)
                else:
                    self.pMicroCluster.insert(newOutlierMicroCluster)
                    self.updateAll(newOutlierMicroCluster)
                    returnOutlier = False

            if self.currentTimestamp % self.tp == 0:

                for cluster in self.pMicroCluster.clusters:

                    if cluster.weight < self.beta * self.mu:
                        self.pMicroCluster.clusters.pop(
                            self.pMicroCluster.clusters.index(cluster))

                for cluster in self.oMicroCluster.clusters:

                    creationTimestamp = cluster.creationTimeStamp

                    xs1 = math.pow(
                        2, -self.lamb * (self.currentTimestamp -
                                         creationTimestamp + self.tp)) - 1
                    xs2 = math.pow(2, -self.lamb * self.tp) - 1
                    xsi = xs1 / xs2

                    if cluster.weight < xsi:

                        self.oMicroCluster.clusters.pop(
                            self.oMicroCluster.clusters.index(cluster))

            if self.exportVariables:
                record = {
                    'pMicroClusters': self.pMicroCluster.clusters,
                    'oMicroClusters': self.oMicroCluster.clusters,
                    'result': returnOutlier
                }

                return record

            else:
                return returnOutlier