Ejemplo n.º 1
0
class TestInfluxLong(unittest.TestCase):
    def setUp(self):

        self.db = "test-{}".format(int(datetime.datetime.now().timestamp()))
        self.source = InfluxBucket({
            'name': 'test',
            'addr': ADDR,
            'database': self.db,
            'measurement': 'nosetests',
        })
        self.source.drop()
        self.source.init()
        self.storage = TempStorage()

        generator = SinEventGenerator(base=3, sigma=0.05)

        self.to_date = datetime.datetime.now().timestamp()
        self.from_date = self.to_date - 3600 * 24 * 7

        for ts in generator.generate_ts(
                self.from_date,
                self.to_date,
                step_ms=60000,
        ):
            self.source.insert_times_data(
                measurement='measure1',
                ts=ts,
                data={'foo': random.lognormvariate(10, 1)},
            )
        self.source.commit()

    def test_train(self):
        model = DonutModel(
            dict(
                name='test',
                offset=30,
                span=5,
                bucket_interval=20 * 60,
                interval=60,
                features=FEATURES[0:1],
                max_evals=1,
            ))

        # Train
        model.train(self.source,
                    from_date=self.from_date,
                    to_date=self.to_date)

        # Check
        self.assertTrue(model.is_trained)
Ejemplo n.º 2
0
class TestInfluxQuick(unittest.TestCase):
    def setUp(self):
        bucket_interval = 3

        t0 = int(datetime.datetime.now().timestamp())

        # XXX Bucket returned by InfluxDB are aligned on
        # modulo(bucket_interval), that's why
        # timestamp must be aligned for unit tests.
        t0 -= t0 % bucket_interval

        self.t0 = t0

        self.db = 'test-{}'.format(t0)
        logging.info("creating database %s", self.db)
        self.source = InfluxBucket({
            'name': 'test',
            'addr': ADDR,
            'database': self.db,
            'measurement': 'nosetests',
        })
        self.source.drop()
        self.source.init()

        self.model = Model(
            dict(
                name="test-model",
                offset=30,
                span=300,
                bucket_interval=3,
                interval=60,
                features=FEATURES,
            ))

        data = [
            # (foo, bar, timestamp)
            (1, 33, t0 - 1),  # excluded
            (2, 120, t0),
            (3, 312, t0 + 1),
            # empty
            (4, 18, t0 + 7),
            (5, 78, t0 + 9),  # excluded
        ]
        for foo, bar, ts in data:
            self.source.insert_times_data(measurement='measure1',
                                          ts=ts,
                                          data={
                                              'foo': foo,
                                          })
            self.source.insert_times_data(measurement='measure2',
                                          ts=ts,
                                          data={
                                              'bar': bar,
                                          })
            self.source.insert_times_data(measurement='measure3',
                                          ts=ts,
                                          tags={
                                              'tag_kw': 'tag1',
                                              'tag_int': 9,
                                              'tag_bool': False,
                                          },
                                          data={
                                              'baz': bar,
                                          })
            self.source.insert_times_data(measurement='measure3',
                                          ts=ts,
                                          tags={
                                              'tag_kw': 'tag2',
                                              'tag_int': 7,
                                              'tag_bool': True,
                                          },
                                          data={
                                              'baz': -bar,
                                          })

        self.source.commit()

    def tearDown(self):
        self.source.drop()

    def test_validation(self):
        with self.assertRaises(errors.Invalid):
            InfluxBucket({
                'addr': 'localhost',
            })
        with self.assertRaises(errors.Invalid):
            InfluxBucket({
                'database': 'foo',
            })

    def test_build_time_predicates(self):
        self.assertEqual(
            _build_time_predicates(),
            [],
        )
        self.assertEqual(
            _build_time_predicates(
                from_date=1515404366.1234,
                to_date="2018-01-08T14:59:25.456Z",
            ),
            [
                "time >= 1515404366123400000",
                "time < 1515423565456000000",
            ],
        )

    def test_build_tags_predicates(self):
        self.assertEqual(
            _build_tags_predicates(),
            [],
        )
        self.assertEqual(
            _build_tags_predicates([
                {
                    'tag': 'foo',
                    'value': 'bar'
                },
                {
                    'tag': 'a "',
                    'value': 'b \''
                },
                {
                    'tag': 'int',
                    'value': 42
                },
                {
                    'tag': 'bool',
                    'value': True
                },
            ]), [
                "\"foo\"='bar'",
                "\"a \\\"\"='b \\''",
                "(\"int\"='42' OR \"int\"=42)",
                "(\"bool\"='True' OR \"bool\"=True)",
            ])

    def test_build_times_queries(self):
        where = "time >= 1515404366123400000 and time < 1515423565456000000"
        queries = list(
            self.source._build_times_queries(
                bucket_interval=self.model.bucket_interval,
                features=self.model.features,
                from_date=1515404366.1234,
                to_date="2018-01-08T14:59:25.456Z",
            ))
        self.assertEqual(
            queries,
            [
                "select MEAN(\"foo\") as \"avg_foo\" from \"measure1\" "
                "where {} group by time(3000ms);".format(where),
                "select COUNT(\"bar\") as \"count_bar\" from \"measure2\" "
                "where {} group by time(3000ms);".format(where),
                "select MEAN(\"baz\") as \"avg_baz\" from \"measure1\" "
                "where {} and \"mytag\"='myvalue' group by time(3000ms);".
                format(where),
            ],
        )

        source = InfluxBucket({
            'name': 'test',
            'addr': ADDR,
            'database': self.db,
            'retention_policy': 'custom',
            'measurement': 'nosetests',
        })

        queries = list(
            source._build_times_queries(
                bucket_interval=self.model.bucket_interval,
                features=self.model.features,
                from_date=1515404366.1234,
                to_date="2018-01-08T14:59:25.456Z",
            ))
        from_prefix = '"{}"."custom".'.format(self.db)
        self.assertEqual(
            queries,
            [
                "select MEAN(\"foo\") as \"avg_foo\" from {}\"measure1\" "
                "where {} group by time(3000ms);".format(from_prefix, where),
                "select COUNT(\"bar\") as \"count_bar\" from {}\"measure2\" "
                "where {} group by time(3000ms);".format(from_prefix, where),
                "select MEAN(\"baz\") as \"avg_baz\" from {}\"measure1\" "
                "where {} and \"mytag\"='myvalue' group by time(3000ms);".
                format(from_prefix, where),
            ],
        )

    def test_get_times_data(self):
        logging.info("[%d %d]", self.t0, self.t0)
        res = self.source.get_times_data(
            bucket_interval=self.model.bucket_interval,
            features=self.model.features,
            from_date=self.t0,
            to_date=self.t0 + 8,
        )

        foo_avg = []
        bar_count = []

        for line in res:
            foo_avg.append(nan_to_none(line[1][0]))
            bar_count.append(nan_to_none(line[1][1]))

        self.assertEqual(foo_avg, [2.5, None, 4.0])
        self.assertEqual(bar_count, [2.0, 0, 1.0])

    def test_get_times_data2(self):
        res = self.source.get_times_data(
            bucket_interval=self.model.bucket_interval,
            features=self.model.features,
            from_date=self.t0,
            to_date=self.t0 + 8,
        )

        # _source to write aggregate data to RAM
        _source = MemBucket()
        _features = copy.deepcopy(self.model.features)
        for _, feature in enumerate(self.model.features):
            feature.metric = 'avg'

        i = None
        for i, (_, val, timeval) in enumerate(res):
            bucket = {
                feature.field: val[i]
                for i, feature in enumerate(self.model.features)
            }
            bucket.update({'timestamp': make_ts(timeval)})
            _source.insert_times_data(bucket)

        res2 = _source.get_times_data(
            bucket_interval=self.model.bucket_interval,
            features=self.model.features,
            from_date=self.t0,
            to_date=self.t0 + 8,
        )
        self.model.features = _features

        for i, (_, val2, timeval2) in enumerate(res2):
            (_, val, timeval) = res[i]
            np.testing.assert_allclose(val, val2)

    def test_match_all(self):
        model = Model(
            dict(
                name="test-model",
                offset=30,
                span=300,
                bucket_interval=3,
                interval=60,
                features=FEATURES_MATCH_ALL_TAG1,
            ))
        res = self.source.get_times_data(
            bucket_interval=model.bucket_interval,
            features=model.features,
            from_date=self.t0,
            to_date=self.t0 + 8,
        )
        baz_avg = []
        for line in res:
            baz_avg.append(line[1][0])

        np.testing.assert_allclose(
            np.array(baz_avg),
            np.array([216.0, np.nan, 18.0]),
            rtol=0,
            atol=0,
        )

        model = Model(
            dict(
                name="test-model",
                offset=30,
                span=300,
                bucket_interval=3,
                interval=60,
                features=FEATURES_MATCH_ALL_TAG2,
            ))

        res = self.source.get_times_data(
            bucket_interval=model.bucket_interval,
            features=model.features,
            from_date=self.t0,
            to_date=self.t0 + 8,
        )
        baz_avg = []
        for line in res:
            baz_avg.append(line[1][0])

        np.testing.assert_allclose(
            np.array(baz_avg),
            np.array([-216.0, np.nan, -18.0]),
            rtol=0,
            atol=0,
        )
Ejemplo n.º 3
0
class AppTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        pass

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._jwt = None

    def setUp(self):
        if 'LOUDML_ADDR' in os.environ:
            self.loudml_addr = os.environ['LOUDML_ADDR']
        else:
            self.loudml_addr = 'localhost:8077'

        self.bucket_interval = 20 * 60

        t0 = int(datetime.datetime.now().timestamp())
        t0 -= t0 % self.bucket_interval
        self.t0 = t0

        self.db = 'test-{}'.format(t0)
        logging.info("creating database %s", self.db)
        if 'INFLUXDB_ADDR' in os.environ:
            addr = os.environ['INFLUXDB_ADDR']
        else:
            addr = 'localhost'

        self.source = InfluxBucket({
            'name': 'nosetests',
            'addr': addr,
            'database': self.db,
        })
        self.source.drop()
        self.source.init()

        self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01)

        to_date = datetime.datetime.now().timestamp()

        # Be sure that date range is aligned
        self.to_date = math.floor(
            to_date / self.bucket_interval) * self.bucket_interval  # noqa E501
        self.from_date = self.to_date - 3600 * 24 * 7 * 3

        for ts in self.generator.generate_ts(self.from_date,
                                             self.to_date,
                                             step_ms=600000):  # noqa E501
            self.source.insert_times_data(measurement='bar',
                                          ts=ts,
                                          data={
                                              'foo':
                                              random.normalvariate(10, 1),
                                          })

        self.source.commit()

    def tearDown(self):
        self.source.drop()

    def get(self, url, data=None, **kwargs):
        headers = kwargs.pop('headers', {})
        if self._jwt:
            headers['Authorization'] = 'Bearer {}'.format(self._jwt)
        return requests.get(self.get_url(url),
                            data=data,
                            headers=headers,
                            **kwargs)

    def post(self, url, data=None, content_type=None, **kwargs):
        headers = kwargs.pop('headers', {})
        if self._jwt:
            headers['Authorization'] = 'Bearer {}'.format(self._jwt)
        if content_type:
            headers['Content-Type'] = content_type
        return requests.post(self.get_url(url),
                             data=data,
                             headers=headers,
                             **kwargs)

    def patch(self, url, data=None, **kwargs):
        headers = kwargs.pop('headers', {})
        if self._jwt:
            headers['Authorization'] = 'Bearer {}'.format(self._jwt)
        return requests.patch(self.get_url(url),
                              data=data,
                              headers=headers,
                              **kwargs)

    def delete(self, url, data=None, **kwargs):
        headers = kwargs.pop('headers', {})
        if self._jwt:
            headers['Authorization'] = 'Bearer {}'.format(self._jwt)
        return requests.delete(self.get_url(url),
                               data=data,
                               headers=headers,
                               **kwargs)

    def put(self, url, data=None, content_type=None, **kwargs):
        headers = kwargs.pop('headers', {})
        if self._jwt:
            headers['Authorization'] = 'Bearer {}'.format(self._jwt)
        if content_type:
            headers['Content-Type'] = content_type
        return requests.put(self.get_url(url),
                            data=data,
                            headers=headers,
                            **kwargs)

    def get_url(self, url):
        if 'USE_SSL' in os.environ:
            scheme = 'https://'
        else:
            scheme = 'http://'

        return scheme + self.loudml_addr + url

    def _wait_job(self, job_id):
        state = None
        while not state_is_done(state):
            time.sleep(5)
            response = self.get('/jobs/{}'.format(job_id))
            res = response.json()
            # print(res)
            state = res['state']
        return state

    def _get_models(self):
        response = self.get('/models', )
        self.assertEqual(response.status_code, 200)
        return response.json()

    def _get_sources(self):
        response = self.get('/buckets', )
        self.assertEqual(response.status_code, 200)
        return response.json()

    def _require_source(self):
        sources = {source['name']: source for source in self._get_sources()}
        if self.db in sources:
            return

        if 'INFLUXDB_ADDR' in os.environ:
            addr = os.environ['INFLUXDB_ADDR']
        else:
            addr = 'localhost:8086'

        source = {
            'name': self.db,
            'type': 'influxdb',
            'addr': addr,
            'database': self.db,
            'create_database': 'true',
            'retention_policy': 'autogen',
            'max_series_per_request': 2000,
        }
        response = self.post(
            '/buckets',
            content_type='application/json',
            data=json.dumps(source),
        )
        self.assertEqual(response.status_code, 201)
        sources = {source['name']: source for source in self._get_sources()}
        self.assertTrue(self.db in sources)

    def _del_model(self, model_name):
        response = self.delete('/models/{}'.format(model_name), )
        self.assertTrue(response.status_code in [200, 404])

    def _require_model(self):
        self._require_source()
        models = {
            model['settings']['name']: model
            for model in self._get_models()
        }
        if 'test-model' in models:
            return models['test-model']

        model = dict(
            name='test-model',
            default_bucket=self.db,
            offset=30,
            span=24 * 3,
            bucket_interval=self.bucket_interval,
            interval=60,
            features=FEATURES,
            grace_period="140m",  # = 7 points
            max_threshold=99.7,
            min_threshold=68,
            max_evals=1,
        )
        model['type'] = 'donut'

        response = self.post(
            '/models',
            content_type='application/json',
            data=json.dumps(model),
        )
        self.assertEqual(response.status_code, 201)
        models = {
            model['settings']['name']: model
            for model in self._get_models()
        }
        self.assertTrue('test-model' in models)
        return models['test-model']

    def _require_training(self):
        model = self._require_model()
        if model['state']['trained']:
            return
        response = self.post(
            '/models/{}/_train?from={}&to={}'.format(
                'test-model',
                str(self.from_date),
                str(self.to_date),
            ), )
        job_id = read_job_id(response.text)
        status = self._wait_job(job_id)
        self.assertEqual(status, 'done')

    def test_training(self):
        self._del_model('test-model')
        self._require_training()

    def test_home(self):
        response = self.get('/')
        self.assertEqual(response.status_code, 200)
        home = response.json()
        self.assertIsNotNone(home.get('host_id'))
        self.assertIsNotNone(home.get('version'))