def test_trough_antisegments(self): data_val = [ 9.0, 9.0, 9.0, 9.0, 7.0, 4.0, 7.0, 9.0, 9.0, 9.0, 5.0, 1.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000010, 'to': 1523889000012, 'labeled': True, 'deleted': False }, { '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000003, 'to': 1523889000005, 'labeled': False, 'deleted': True }] segments = [Segment.from_json(segment) for segment in segments] try: model = models.TroughModel() model_name = model.__class__.__name__ model.state = model.get_state(None) model.fit(dataframe, segments, 'test') except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name))
def test_jump_model_for_cache(self): cache = { 'patternCenter': [2, 6], 'patternModel': [5, 0.5, 4], 'confidence': 2, 'convolveMax': 8, 'convolveMin': 7, 'window_size': 1, 'convDelMin': 0, 'convDelMax': 0, } data_val = [ 1.0, 1.0, 1.0, 4.0, 4.0, 0.0, 0.0, 5.0, 5.0, 0.0, 0.0, 4.0, 4.0, 4.0, 4.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 152388900009, 'to': 1523889000013, 'labeled': True, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] model = models.JumpModel() model.state = model.get_state(cache) result = model.fit(dataframe, segments, 'test') self.assertEqual(len(result.pattern_center), 3)
def test_models_for_pattern_model_cache(self): cache = { 'patternCenter': [4, 12], 'patternModel': [], 'confidence': 2, 'convolveMax': 8, 'convolveMin': 7, 'window_size': 2, 'convDelMin': 0, 'convDelMax': 0, } data_val = [ 5.0, 5.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 0, 0, 0, 0, 0, 0, 6.0, 6.0, 6.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000024, 'labeled': True, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] try: model = models.DropModel() model_name = model.__class__.__name__ model.state = model.get_state(cache) model.fit(dataframe, segments, 'test') except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name))
def test_three_value_segment(self): data_val = [ 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 9.0, 9.0, 9.0, 9.0, 2.0, 3.0, 4.0, 5.0, 4.0, 2.0, 1.0, 3.0, 4.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] model_instances = [ models.GeneralModel(), models.PeakModel(), ] try: for model in model_instances: model_name = model.__class__.__name__ model.state = model.get_state(None) model.fit(dataframe, segments, 'test') except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name))
def test_positive_and_negative_segments(self): data_val = [ 1.0, 1.0, 1.0, 2.0, 3.0, 2.0, 1.0, 1.0, 1.0, 1.0, 5.0, 7.0, 5.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000004, 'to': 1523889000006, 'labeled': True, 'deleted': False }, { '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': False, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] cache = {} detector = pattern_detector.PatternDetector('PEAK', 'test_id') try: detector.train(dataframe, segments, cache) except Exception as e: self.fail('detector.train fail with error {}'.format(e))
def test_only_negative_segments(self): data_val = [0, 1, 2, 1, 2, 10, 1, 2, 1] data_ind = [1523889000000 + i for i in range(len(data_val))] data = {'timestamp': data_ind, 'value': data_val} dataframe = pd.DataFrame(data=data) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000019, 'to': 1523889000025, 'labeled': False, 'deleted': False }, { '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000002, 'to': 1523889000008, 'labeled': False, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] cache = {} detector = pattern_detector.PatternDetector('PEAK', 'test_id') excepted_error_message = 'test_id has no positive labeled segments. Pattern detector needs at least 1 positive labeled segment' try: detector.train(dataframe, segments, cache) except ValueError as e: self.assertEqual(str(e), excepted_error_message)
def detect(self, dataframe: pd.DataFrame, cache: Optional[ModelCache]) -> DetectionResult: logger.debug('Unit {} got {} data points for detection'.format( self.analytic_unit_id, len(dataframe))) # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643) if cache is None: msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection' logger.error(msg) raise ValueError(msg) self.model.state = self.model.get_state(cache) window_size = self.model.state.window_size if window_size is None: message = '{} got cache without window_size for detection'.format( self.analytic_unit_id) logger.error(message) raise ValueError(message) if len(dataframe) < window_size * 2: message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points' logger.error(message) raise ValueError(message) detected = self.model.detect(dataframe, self.analytic_unit_id) segments = [ Segment(segment[0], segment[1]) for segment in detected['segments'] ] new_cache = detected['cache'].to_json() last_dataframe_time = dataframe.iloc[-1]['timestamp'] last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time) return DetectionResult(new_cache, segments, last_detection_time)
def detections_generator( self, dataframe: pd.DataFrame, upper_bound: pd.DataFrame, lower_bound: pd.DataFrame, enabled_bounds: Bound ) -> Generator[Segment, None, Segment]: in_segment = False segment_start = 0 bound: Bound = None for idx, val in enumerate(dataframe['value'].values): if val > upper_bound.values[idx]: if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL: if not in_segment: in_segment = True segment_start = dataframe['timestamp'][idx] bound = Bound.UPPER continue if val < lower_bound.values[idx]: if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL: if not in_segment: in_segment = True segment_start = dataframe['timestamp'][idx] bound = Bound.LOWER continue if in_segment: segment_end = dataframe['timestamp'][idx - 1] yield Segment( utils.convert_pd_timestamp_to_ms(segment_start), utils.convert_pd_timestamp_to_ms(segment_end), message=f'{val} out of {str(bound.value)} bound' ) in_segment = False else: if in_segment: segment_end = dataframe['timestamp'][idx] return Segment( utils.convert_pd_timestamp_to_ms(segment_start), utils.convert_pd_timestamp_to_ms(segment_end), message=f'{val} out of {str(bound.value)} bound' )
def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult: if cache is None or cache == {}: raise ValueError('Threshold detector error: cannot detect before learning') if len(dataframe) == 0: return None value = cache['value'] condition = cache['condition'] segments = [] for index, row in dataframe.iterrows(): current_value = row['value'] current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp']) segment = Segment(current_timestamp, current_timestamp) # TODO: merge segments if pd.isnull(current_value): if condition == 'NO_DATA': segment.message = 'NO_DATA detected' segments.append(segment) continue comparators = { '>': operator.gt, '<': operator.lt, '=': operator.eq, '>=': operator.ge, '<=': operator.le } assert condition in comparators.keys(), f'condition {condition} not allowed' if comparators[condition](current_value, value): segment.message = f"{current_value} {condition} threshold's value {value}" segments.append(segment) last_entry = dataframe.iloc[-1] last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp']) return DetectionResult(cache, segments, last_detection_time)
async def __handle_analytic_task(self, task: object) -> dict: """ returns payload or None """ analytic_unit_id: AnalyticUnitId = task['analyticUnitId'] log.debug('Analytics get task with type: {} for unit: {}'.format( task['type'], analytic_unit_id)) if task['type'] == 'CANCEL': if analytic_unit_id in self.analytic_workers: self.analytic_workers[analytic_unit_id].cancel() return payload = task['payload'] worker = self.__ensure_worker(analytic_unit_id, payload['detector'], payload['analyticUnitType']) data = payload.get('data') if task['type'] == 'PUSH': # TODO: do it a better way res = await worker.consume_data(data, payload['cache']) if res: res.update({'analyticUnitId': analytic_unit_id}) return res elif task['type'] == 'LEARN': if 'segments' in payload: segments = payload['segments'] segments = [Segment.from_json(segment) for segment in segments] return await worker.do_train(segments, data, payload['cache']) elif 'threshold' in payload: return await worker.do_train(payload['threshold'], data, payload['cache']) elif 'anomaly' in payload: return await worker.do_train(payload['anomaly'], data, payload['cache']) else: raise ValueError('No segments or threshold in LEARN payload') elif task['type'] == 'DETECT': return await worker.do_detect(data, payload['cache']) elif task['type'] == 'PROCESS': return await worker.process_data(data, payload['cache']) raise ValueError('Unknown task type "%s"' % task['type'])
def test_general_for_two_labeling(self): data_val = [ 1.0, 2.0, 5.0, 2.0, 1.0, 1.0, 3.0, 6.0, 4.0, 2.0, 1.0, 0, 0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000001, 'to': 1523889000003, 'labeled': True, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] model = models.GeneralModel() model.state = model.get_state(None) model.fit(dataframe, segments, 'test') result = len(data_val) + 1 for _ in range(2): model.do_detect(dataframe) max_pattern_index = max(model.do_detect(dataframe)) self.assertLessEqual(max_pattern_index[0], result)
def test_value_error_dataset_input_should_have_multiple_elements(self): data_val = [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 4.0, 5.0, 5.0, 6.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, 3.0, 2.0, 7.0, 8.0, 9.0, 8.0, 7.0, 6.0 ] dataframe = create_dataframe(data_val) segments = [{ '_id': 'Esl7uetLhx4lCqHa', 'analyticUnitId': 'opnICRJwOmwBELK8', 'from': 1523889000007, 'to': 1523889000011, 'labeled': True, 'deleted': False }] segments = [Segment.from_json(segment) for segment in segments] try: model = models.JumpModel() model.state = model.get_state(None) model_name = model.__class__.__name__ model.fit(dataframe, segments, 'test') except ValueError: self.fail('Model {} raised unexpectedly'.format(model_name))
def test_serialize(self): segment_list = [Segment(100, 200)] serialize_list = utils.meta.SerializableList(segment_list) meta_result = utils.meta.serialize(serialize_list) expected_result = [{'from': 100, 'to': 200}] self.assertEqual(meta_result, expected_result)
def test_merge_intersecting_segments(self): test_cases = [ { 'index': [Segment(10, 20), Segment(30, 40)], 'result': [[10, 20], [30, 40]], 'step': 0, }, { 'index': [ Segment(10, 20), Segment(13, 23), Segment(15, 17), Segment(20, 40) ], 'result': [[10, 40]], 'step': 0, }, { 'index': [], 'result': [], 'step': 0, }, { 'index': [Segment(10, 20)], 'result': [[10, 20]], 'step': 0, }, { 'index': [ Segment(10, 20), Segment(13, 23), Segment(25, 30), Segment(35, 40) ], 'result': [[10, 23], [25, 30], [35, 40]], 'step': 0, }, { 'index': [ Segment(10, 50), Segment(5, 40), Segment(15, 25), Segment(6, 50) ], 'result': [[5, 50]], 'step': 0, }, { 'index': [Segment(5, 10), Segment(10, 20), Segment(25, 50)], 'result': [[5, 20], [25, 50]], 'step': 0, }, { 'index': [Segment(20, 40), Segment(10, 15), Segment(50, 60)], 'result': [[10, 15], [20, 40], [50, 60]], 'step': 0, }, { 'index': [Segment(20, 40), Segment(10, 20), Segment(50, 60)], 'result': [[10, 40], [50, 60]], 'step': 0, }, { 'index': [Segment(10, 10), Segment(20, 20), Segment(30, 30)], 'result': [[10, 30]], 'step': 10, }, ] for case in test_cases: utils_result = utils.merge_intersecting_segments( case['index'], case['step']) for got, expected in zip(utils_result, case['result']): self.assertEqual(got.from_timestamp, expected[0]) self.assertEqual(got.to_timestamp, expected[1])