def calculate_peak(self, file_index=-1, is_horizontal=True):
        if is_horizontal:
            label = 'horizontal'
        else:
            label = 'vertical'

        _profile = self.roi[label]['profiles'][str(file_index)]
        yaxis = _profile['profile']

        if self.peak_algorithm == 'change_point':
            var = np.mean(yaxis)
            result = pelt(normal_var(yaxis, var), len(yaxis))
            if len(result) > 2:
                peak = np.mean(result[2:])
            else:
                peak = np.mean(result[1:])

        else: # sliding average
            _o_range = MeanRangeCalculation(data=yaxis)
            nbr_pixels = len(yaxis)
            delta_array = []
            for _pixel in np.arange(0, nbr_pixels-5):
                _o_range.calculate_left_right_mean(pixel=_pixel)
                _o_range.calculate_delta_mean_square()
                delta_array.append(_o_range.delta_square)

            peak_value = delta_array.index(max(delta_array[0: nbr_pixels -5]))

        self.peak[label][file_index] = np.int(peak_value)
Esempio n. 2
0
    def test_pelt_normal_var_small(self):
        """ Test normal changing variance, with smaller dataset
        """
        data = [
            -1.82348457, -0.13819782, 1.25618544, -0.54487136, -2.24769311,
            9.82204284, -1.0181088, 3.93764179, -8.73177678, 5.99949843
        ]

        result = pelt(normal_var(np.array(data), 0), len(data))
        self.assertEqual(result, [0, 5])
Esempio n. 3
0
    def test_pelt_normal_var_big(self):
        """ Test normal changing variance, with bigger dataset
        """
        size = 100
        mean = 0.0
        var_a = 1.0
        var_b = 10.0

        np.random.seed(19348)
        data_a = np.random.normal(mean, var_a, size)
        data_b = np.random.normal(mean, var_b, size)
        data = np.append(data_a, data_b)

        result = pelt(normal_var(data, mean), len(data))
        self.assertEqual(result, [0, 100, 198])
Esempio n. 4
0
    def _pelt_change_point_detection(self,
                                     df=None,
                                     metric=None,
                                     min_ts_length=None,
                                     max_ts_length=None):
        """
        This function computes the significant change points based on PELT and the Kullback-Leibler divergence method.
        :param pandas.dataframe df: A pandas dataframe containing the time series
        :param pandas.dataframe metric: The metric in the dataframe that contains the time series
        :param int min_ts_length: Specifying the minimum required length of the time series for training
        :param int max_ts_length: Specifying the maximum required length of the time series for training.
        The training time series length truncates accordingly based on minimum between max_ts_length and the
        length of the input time series.
        :return: A pandas dataframe containing the time series after the last changepoint

        >>> df
                          raw  interpolated
        2016-01-02  1753421.0     14.377080
        2016-01-03  1879108.0     14.446308
        2016-01-04  1462725.0     14.195812
        2016-01-05  1525162.0     14.237612
        2016-01-06  1424264.0     14.169166
        ...               ...           ...
        2018-10-14  2185230.0     14.597232
        2018-10-15  1825539.0     14.417386
        2018-10-16  1776778.0     14.390313
        2018-10-17  1792899.0     14.399345
        2018-10-18  1738657.0     14.368624

        >>> copy_df, change_point_list
        (                  raw  interpolated
        2016-01-02  1753421.0     14.377080
        2016-01-03  1879108.0     14.446308
        2016-01-04  1462725.0     14.195812
        2016-01-05  1525162.0     14.237612
        2016-01-06  1424264.0     14.169166
        ...               ...           ...
        2018-10-14  2185230.0     14.597232
        2018-10-15  1825539.0     14.417386
        2018-10-16  1776778.0     14.390313
        2018-10-17  1792899.0     14.399345
        2018-10-18  1738657.0     14.368624
        [1021 rows x 2 columns], ['2016-12-26 00:00:00', '2018-09-10 00:00:00'])
        """
        import numpy as np
        import pandas as pd
        from changepy import pelt
        from changepy.costs import normal_var

        change_point_threshold = self.change_point_threshold

        df_copy = pd.DataFrame(df[metric])

        counts = df_copy[metric].values
        mean = np.mean(counts)

        # Performing changepoint detection with respect to the data variablity shift through PELT
        cdate = pelt(normal_var(counts, mean), len(counts))

        # If PELT detects the first datapoint to be a change point, then we ignore that change point
        if cdate:
            if cdate[0] == 0:
                cdate.remove(0)
        if len(cdate) > 0:
            # Finding the magnitude of divergence around every change point (detected by PELT) by comparing the
            # distributions of the data points on the left and the right side of the change point
            entrp = self._shift_intensity(change_points=cdate,
                                          df=df_copy,
                                          metric=metric)
            df_change_points = pd.DataFrame({
                'c_point': cdate,
                'entropy': entrp
            })

            # Narrowing down to the change points which satisfies a required lower bound of divergence
            df_change_points = df_change_points[
                df_change_points['entropy'] > change_point_threshold]
            cdate = df_change_points['c_point'].values

            # Set the start date of the time series based on the min_ts_length and the max_ts_length and the change
            # points
            if len(cdate) > 0:
                df_subset = df_copy.iloc[cdate]
                change_point_list = [i.__str__() for i in df_subset.index]
                index = df_subset.index[-1]
                copy_df = df.loc[index:df.last_valid_index(
                )] if self.data_shift_truncate else df
                if copy_df.shape[0] < min_ts_length:
                    # Return None, If time series after the change point contains less number of data points
                    # than the minimum required length
                    return None, change_point_list
                elif copy_df.shape[0] < max_ts_length:
                    # Return the time series after the change point if it's length lies between the minimum and the
                    # maximum required length
                    pass
                elif copy_df.shape[0] > max_ts_length:
                    # Truncate the time series after change point if it contains more than required data for
                    # training
                    copy_df = df.iloc[-max_ts_length:]
            else:
                change_point_list = None
                if df.shape[0] < min_ts_length:
                    return None, change_point_list
                else:
                    if df.shape[0] < max_ts_length:
                        copy_df = df
                    else:
                        copy_df = df.iloc[-max_ts_length:]
        else:
            change_point_list = None
            if df.shape[0] < min_ts_length:
                return None, change_point_list
            else:
                if df.shape[0] < max_ts_length:
                    copy_df = df
                else:
                    copy_df = df.iloc[-max_ts_length:]
        return copy_df, change_point_list