Exemple #1
0
def find_interval(dataframe: pd.DataFrame) -> int:
    if len(dataframe) < 2:
        raise ValueError(
            'Can`t find interval: length of data must be at least 2')
    delta = utils.convert_pd_timestamp_to_ms(
        dataframe.timestamp[1]) - utils.convert_pd_timestamp_to_ms(
            dataframe.timestamp[0])
    return delta
Exemple #2
0
 def detect(self, dataframe: pd.DataFrame, id: AnalyticUnitId) -> dict:
     logging.debug('Start method detect for analytic unit {}'.format(id))
     result = self.do_detect(dataframe)
     segments = [(
         utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[0]]),
         utils.convert_pd_timestamp_to_ms(dataframe['timestamp'][x[1]]),
     ) for x in result]
     if not self.state:
         logging.warning('Return empty self.state after detect')
     logging.debug('Method detect complete successful for analytic unit {}'.format(id))
     return {
         'segments': segments,
         'cache': self.state,
     }
Exemple #3
0
    def detect(self, dataframe: pd.DataFrame,
               cache: Optional[ModelCache]) -> DetectionResult:
        logger.debug('Unit {} got {} data points for detection'.format(
            self.analytic_unit_id, len(dataframe)))
        # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)

        if cache is None:
            msg = f'{self.analytic_unit_id} detection got invalid cache, skip detection'
            logger.error(msg)
            raise ValueError(msg)

        self.model.state = self.model.get_state(cache)
        window_size = self.model.state.window_size

        if window_size is None:
            message = '{} got cache without window_size for detection'.format(
                self.analytic_unit_id)
            logger.error(message)
            raise ValueError(message)

        if len(dataframe) < window_size * 2:
            message = f'{self.analytic_unit_id} skip detection: dataset length {len(dataframe)} points less than minimal length {window_size * 2} points'
            logger.error(message)
            raise ValueError(message)

        detected = self.model.detect(dataframe, self.analytic_unit_id)

        segments = [
            Segment(segment[0], segment[1]) for segment in detected['segments']
        ]
        new_cache = detected['cache'].to_json()
        last_dataframe_time = dataframe.iloc[-1]['timestamp']
        last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
        return DetectionResult(new_cache, segments, last_detection_time)
Exemple #4
0
    def detect(self, dataframe: pd.DataFrame,
               cache: Optional[ModelCache]) -> DetectionResult:
        if cache == None:
            raise f'Analytic unit {self.analytic_unit_id} got empty cache'
        data = dataframe['value']

        cache = AnomalyCache.from_json(cache)
        segments = cache.segments
        enabled_bounds = cache.get_enabled_bounds()

        smoothed_data = utils.exponential_smoothing(data, cache.alpha)

        lower_bound = smoothed_data - cache.confidence
        upper_bound = smoothed_data + cache.confidence

        if len(segments) > 0:
            data_start_time = utils.convert_pd_timestamp_to_ms(
                dataframe['timestamp'][0])

            for segment in segments:
                seasonality_index = cache.seasonality // cache.time_step
                seasonality_offset = self.get_seasonality_offset(
                    segment.from_timestamp, cache.seasonality, data_start_time,
                    cache.time_step)
                segment_data = pd.Series(segment.data)

                lower_bound = self.add_season_to_data(lower_bound,
                                                      segment_data,
                                                      seasonality_offset,
                                                      seasonality_index,
                                                      Bound.LOWER)
                upper_bound = self.add_season_to_data(upper_bound,
                                                      segment_data,
                                                      seasonality_offset,
                                                      seasonality_index,
                                                      Bound.UPPER)

        detected_segments = list(
            self.detections_generator(dataframe, upper_bound, lower_bound,
                                      enabled_bounds))

        last_dataframe_time = dataframe.iloc[-1]['timestamp']
        last_detection_time = utils.convert_pd_timestamp_to_ms(
            last_dataframe_time)

        return DetectionResult(cache.to_json(), detected_segments,
                               last_detection_time)
    def detections_generator(
        self,
        dataframe: pd.DataFrame,
        upper_bound: pd.DataFrame,
        lower_bound: pd.DataFrame,
        enabled_bounds: Bound
    ) -> Generator[Segment, None, Segment]:
        in_segment = False
        segment_start = 0
        bound: Bound = None
        for idx, val in enumerate(dataframe['value'].values):
            if val > upper_bound.values[idx]:
                if enabled_bounds == Bound.UPPER or enabled_bounds == Bound.ALL:
                    if not in_segment:
                        in_segment = True
                        segment_start = dataframe['timestamp'][idx]
                        bound = Bound.UPPER
                    continue

            if val < lower_bound.values[idx]:
                if enabled_bounds == Bound.LOWER or enabled_bounds == Bound.ALL:
                    if not in_segment:
                        in_segment = True
                        segment_start = dataframe['timestamp'][idx]
                        bound = Bound.LOWER
                    continue

            if in_segment:
                segment_end = dataframe['timestamp'][idx - 1]
                yield Segment(
                    utils.convert_pd_timestamp_to_ms(segment_start),
                    utils.convert_pd_timestamp_to_ms(segment_end),
                    message=f'{val} out of {str(bound.value)} bound'
                )
                in_segment = False
        else:
            if in_segment:
                segment_end = dataframe['timestamp'][idx]
                return Segment(
                    utils.convert_pd_timestamp_to_ms(segment_start),
                    utils.convert_pd_timestamp_to_ms(segment_end),
                    message=f'{val} out of {str(bound.value)} bound'
                )
Exemple #6
0
    def process_data(self, dataframe: pd.DataFrame,
                     cache: ModelCache) -> ProcessingResult:
        cache = AnomalyCache.from_json(cache)
        segments = cache.segments
        enabled_bounds = cache.get_enabled_bounds()

        # TODO: exponential_smoothing should return dataframe with related timestamps
        smoothed_data = utils.exponential_smoothing(dataframe['value'],
                                                    cache.alpha)

        lower_bound = smoothed_data - cache.confidence
        upper_bound = smoothed_data + cache.confidence

        if len(segments) > 0:
            data_start_time = utils.convert_pd_timestamp_to_ms(
                dataframe['timestamp'][0])

            for segment in segments:
                seasonality_index = cache.seasonality // cache.time_step
                # TODO: move it to utils and add tests
                seasonality_offset = self.get_seasonality_offset(
                    segment.from_timestamp, cache.seasonality, data_start_time,
                    cache.time_step)
                segment_data = pd.Series(segment.data)

                lower_bound = self.add_season_to_data(lower_bound,
                                                      segment_data,
                                                      seasonality_offset,
                                                      seasonality_index,
                                                      Bound.LOWER)
                upper_bound = self.add_season_to_data(upper_bound,
                                                      segment_data,
                                                      seasonality_offset,
                                                      seasonality_index,
                                                      Bound.UPPER)

                # TODO: support multiple segments

        timestamps = utils.convert_series_to_timestamp_list(
            dataframe.timestamp)
        lower_bound_timeseries = list(
            zip(timestamps, lower_bound.values.tolist()))
        upper_bound_timeseries = list(
            zip(timestamps, upper_bound.values.tolist()))

        if enabled_bounds == Bound.ALL:
            return ProcessingResult(lower_bound_timeseries,
                                    upper_bound_timeseries)
        elif enabled_bounds == Bound.UPPER:
            return ProcessingResult(upper_bound=upper_bound_timeseries)
        elif enabled_bounds == Bound.LOWER:
            return ProcessingResult(lower_bound=lower_bound_timeseries)
Exemple #7
0
    def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> DetectionResult:
        if cache is None or cache == {}:
            raise ValueError('Threshold detector error: cannot detect before learning')
        if len(dataframe) == 0:
            return None

        value = cache['value']
        condition = cache['condition']

        segments = []
        for index, row in dataframe.iterrows():
            current_value = row['value']
            current_timestamp = utils.convert_pd_timestamp_to_ms(row['timestamp'])
            segment = Segment(current_timestamp, current_timestamp)
            # TODO: merge segments
            if pd.isnull(current_value):
                if condition == 'NO_DATA':
                    segment.message = 'NO_DATA detected'
                    segments.append(segment)
                continue

            comparators = {
                '>': operator.gt,
                '<': operator.lt,
                '=': operator.eq,
                '>=': operator.ge,
                '<=': operator.le
            }

            assert condition in comparators.keys(), f'condition {condition} not allowed'

            if comparators[condition](current_value, value):
                segment.message = f"{current_value} {condition} threshold's value {value}"
                segments.append(segment)

        last_entry = dataframe.iloc[-1]
        last_detection_time = utils.convert_pd_timestamp_to_ms(last_entry['timestamp'])
        return DetectionResult(cache, segments, last_detection_time)
Exemple #8
0
    def detect(self, dataframe: pd.DataFrame, cache: Optional[models.ModelCache]) -> dict:
        logger.debug('Unit {} got {} data points for detection'.format(self.analytic_unit_id, len(dataframe)))
        # TODO: split and sleep (https://github.com/hastic/hastic-server/pull/124#discussion_r214085643)
        detected = self.model.detect(dataframe, cache)

        segments = [{ 'from': segment[0], 'to': segment[1] } for segment in detected['segments']]
        newCache = detected['cache']

        last_dataframe_time = dataframe.iloc[-1]['timestamp']
        last_detection_time = convert_pd_timestamp_to_ms(last_dataframe_time)
        return {
            'cache': newCache,
            'segments': segments,
            'lastDetectionTime': last_detection_time
        }
Exemple #9
0
    def detect(self, dataframe: pd.DataFrame, cache: ModelCache) -> dict:
        if cache == None:
            raise 'Threshold detector error: cannot detect before learning'
        value = cache['value']
        condition = cache['condition']

        now = convert_sec_to_ms(time())
        segments = []

        dataframe_without_nans = dataframe.dropna()
        if len(dataframe_without_nans) == 0:
            if condition == 'NO_DATA':
                segments.append({'from': now, 'to': now})
            else:
                return None
        else:
            last_entry = dataframe_without_nans.iloc[-1]
            last_time = convert_pd_timestamp_to_ms(last_entry['timestamp'])
            last_value = last_entry['value']
            segment = {'from': last_time, 'to': last_time}

            if condition == '>':
                if last_value > value:
                    segments.append(segment)
            elif condition == '>=':
                if last_value >= value:
                    segments.append(segment)
            elif condition == '=':
                if last_value == value:
                    segments.append(segment)
            elif condition == '<=':
                if last_value <= value:
                    segments.append(segment)
            elif condition == '<':
                if last_value < value:
                    segments.append(segment)

        return {'cache': cache, 'segments': segments, 'lastDetectionTime': now}