예제 #1
0
    def setUp(self):

        self.db = "test-{}".format(int(datetime.datetime.now().timestamp()))
        self.source = InfluxBucket({
            'name': 'test',
            'addr': ADDR,
            'database': self.db,
            'measurement': 'nosetests',
        })
        self.source.drop()
        self.source.init()
        self.storage = TempStorage()

        generator = SinEventGenerator(base=3, sigma=0.05)

        self.to_date = datetime.datetime.now().timestamp()
        self.from_date = self.to_date - 3600 * 24 * 7

        for ts in generator.generate_ts(
                self.from_date,
                self.to_date,
                step_ms=60000,
        ):
            self.source.insert_times_data(
                measurement='measure1',
                ts=ts,
                data={'foo': random.lognormvariate(10, 1)},
            )
        self.source.commit()
예제 #2
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.source = MemDataSource()
        self.storage = TempStorage()

        self.model = DonutModel(dict(
            name='test',
            offset=30,
            span=24 * 3,
            bucket_interval=20 * 60,
            interval=60,
            features=FEATURES,
            grace_period="140m", # = 7 points
            max_threshold=99.7,
            min_threshold=68,
            max_evals=10,
        ))

        self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01)

        to_date = datetime.datetime.now().timestamp()

        # Be sure that date range is aligned
        self.to_date = math.floor(to_date / self.model.bucket_interval) * self.model.bucket_interval
        self.from_date = self.to_date - 3600 * 24 * 7 * 3

        for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000):
            self.source.insert_times_data({
                'timestamp': ts,
                'foo': random.normalvariate(10, 1)
            })
예제 #3
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        for env_var in ['RANDOM_SEED', 'PYTHONHASHSEED']:
            if not os.environ.get(env_var):
                raise Exception(
                    '{} environment variable not set'.format(env_var))

        np.random.seed(int(os.environ['RANDOM_SEED']))
        random.seed(int(os.environ['RANDOM_SEED']))

        self.source = MemBucket()
        self.storage = TempStorage()

        self.model = DonutModel(
            dict(
                name='test',
                offset=30,
                span=24 * 3,
                bucket_interval=20 * 60,
                interval=60,
                features=FEATURES,
                grace_period="140m",  # = 7 points
                max_threshold=99.7,
                min_threshold=68,
                max_evals=3,
            ))

        self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01)

        to_date = datetime.datetime.now().timestamp()

        # Be sure that date range is aligned
        self.to_date = math.floor(
            to_date / self.model.bucket_interval) * self.model.bucket_interval
        self.from_date = self.to_date - 3600 * 24 * 7 * 3

        for ts in self.generator.generate_ts(self.from_date,
                                             self.to_date,
                                             step_ms=600000):
            self.source.insert_times_data({
                'timestamp': ts,
                'foo': random.normalvariate(10, 1)
            })
예제 #4
0
    def test_predict_with_nan(self):
        source = MemDataSource()
        storage = TempStorage()

        to_date = datetime.datetime.now().replace(
            hour=0,
            minute=0,
            second=0,
            microsecond=0,
        ).timestamp()

        # Generate 3 days of data
        nb_days = 3
        hist_to = to_date
        hist_from = to_date - 3600 * 24 * nb_days
        ts = hist_from

        for i in range(nb_days):
            # [0h-12h[
            for j in range(12):
                source.insert_times_data({
                    'timestamp': ts,
                    'foo': j,
                })
                ts += 3600

            # No data for [12h, 13h[
            ts += 3600

            # [13h-0h[
            for j in range(11):
                source.insert_times_data({
                    'timestamp': ts,
                    'foo': j,
                })
                ts += 3600

        model = DonutModel(dict(
            name='test',
            offset=30,
            span=24,
            bucket_interval=3600,
            interval=60,
            features=[
                {
                   'name': 'count_foo',
                   'metric': 'count',
                   'field': 'foo',
                },
            ],
            max_threshold=30,
            min_threshold=25,
            max_evals=10,
        ))

        # train on all dataset
        model.train(source, hist_from, hist_to)
        self.assertTrue(model.is_trained)

        # predict on last 24h
        to_date = hist_to
        from_date = to_date - 3600 * 24
        prediction = model.predict(source, from_date, to_date)

        # prediction.plot('count_foo')

        self.assertEqual(len(prediction.timestamps), 24)
        self.assertEqual(prediction.observed.shape, (24,))
        self.assertEqual(prediction.predicted.shape, (24,))

        # Adding this call to ensure detect_anomalies() can deal with nan
        model.detect_anomalies(prediction)

        # Donut does missing data insertion and can fill the gap in the data
        for i in range(24):
            self.assertAlmostEqual(
                1.0,
                prediction.predicted[i],
                delta=0.22,
            )