def get_CP_dict(feature_dict, vid_list):
    """
    Function for CP Baseline
    Calculate changepoints for whole epsiode.

    Args:
        features_dict: dictionary of i3d features to calculate changepoints
        vid_list: list of video names

    Returns:
        dictionary of changepoints (key: video names; value: changepoints)
    """
    CP_dict = {}
    model ='l2'
    pen = 80
    jump = 2

    for vid in vid_list:
        features = feature_dict[vid]
        if len(features) < 2:
            CP_dict[vid] = np.zeros(len(features))
            continue

        algo = rpt.Pelt(model=model, jump=jump).fit(features)
        res = algo.predict(pen=pen)
        res_np = [1 if ix in res else 0 for ix in range(len(features))]

        CP_dict[vid] = np.asarray(res_np)

    return CP_dict
Beispiel #2
0
def changepoint_detection_singlecell(df,
                                     cellid,
                                     penalty=12,
                                     fontsize=14,
                                     figsize=(10, 4),
                                     create_plot=False):
    df1 = df.sort_values(by=["date_c"])
    signal = df1.rad_corr.values
    dates = df1.date_c.values
    algo = rpt.Pelt(model="rbf").fit(signal)
    result = algo.predict(pen=penalty)
    # we exclude the last value of result as it is irrelevant
    result = result[0:-1]
    if create_plot == True:
        fig, ax = plt.subplots(figsize=figsize, tight_layout=True)
        ax.scatter(dates, signal, s=6)
        ax.vlines(dates[result],
                  signal.min(),
                  signal.max(),
                  linestyles="dashed")
        for i in result:
            date = pd.to_datetime(dates[i])
            date = date.strftime("%d-%b-%Y")
            ax.text(dates[i],
                    signal.max() - 0.5,
                    date,
                    color="red",
                    fontsize=fontsize)
        ax.set_title("{}".format(cellid), fontsize=fontsize)
        ax.set_xlabel("Date", fontsize=fontsize)
        ax.set_ylabel("Nadir Normalized Radiance", fontsize=fontsize)
        ax.tick_params(axis='both', which='major', labelsize=fontsize)
        plt.show()
    return dates[result]
Beispiel #3
0
    def _clean_and_compute_changes(self):

        tmp = []

        # now we go through and background substract the light curves

        for lc, poly in zip(self._light_curves, self._polys):

            n = len(lc.counts)

            bkg_counts = np.empty(n)
            bkg_errs = np.empty(n)

            for i, (a, b) in enumerate(zip(lc.time_bins[:-1],
                                           lc.time_bins[1:])):

                bkg_counts[i] = poly.integral(a, b)
                bkg_errs[i] = poly.integral_error(a, b)

            clean_counts = lc.counts - bkg_counts

            tmp.append(clean_counts)

        # look for the change points in the
        # in the cleaned light curves

        tmp = np.vstack(tmp).T

        angles = angle_mapping(tmp)

        penalty = 2 * np.log(len(angles))
        algo = rpt.Pelt().fit(angles)
        cpts_seg = algo.predict(pen=penalty)

        self._all_change_points = np.array(cpts_seg) - 1
Beispiel #4
0
def pelt(data):
    '''
    data: Valores del activo EURUSD.

    '''
    datos = np.array(data.Close)

    n = len(datos)  # Tamaño de los datos dentro del array.
    sigma = datos.std()  # Desviación estandar de los datos.
    p = np.log(n) * sigma**2  # Penalización utilizada dentro del modelo.

    # Pasos a realizar dentro del modelo de Binary segmentation.
    algo = rpt.Pelt().fit(datos)
    my_bkps = algo.predict(pen=p)
    senal = pd.DataFrame(my_bkps)

    mean = senal.drop([
        len(my_bkps) - 1
    ])  # Quitamos de la serie el último valor ya que no es correcto.
    mean = np.array(mean)  # Valores obtenidos del modelo traidos a un array.

    changes = mean.astype(
        int)  # Valores del array anterior convertidos a numeros enteros.

    feature = boolean_change_point(datos, changes)

    # La función regresa las fechas y los cambios numericos.
    return changes, feature
def extract_CP(args, features_dict):
    """
    Calculate and save Changepoints.
    Saves one .pkl label file per episode/ video.

    Args:
        args: parser arguments
        features_dict: dictionary of i3d features to calculate changepoints
    """
    pen = 80
    for vid in tqdm(features_dict.keys()):
        features = features_dict[vid]
        changepoints = []

        # check if CP for this episode with given setting (pen,..) already exists
        if not os.path.exists(
                f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{pen}.pkl"
        ):
            algo = rpt.Pelt(model=args.merge_model,
                            jump=args.merge_jump).fit(features)
            res = algo.predict(pen=pen)
            CP = [1 if ix in res else 0 for ix in range(len(features))]
            if not os.path.exists(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}"
            ):
                os.makedirs(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}"
                )
            pickle.dump(
                np.asarray(CP),
                open(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{pen}.pkl",
                    "wb"))
Beispiel #6
0
def segment_array(data: np.ndarray) -> List[np.ndarray]:
    """
    Split up data into segments.
    :param data:
    :return:
    """

    if data is None or len(data) == 0:
        return []

    if len(data) == 1:
        return [np.array([1])]

    try:
        algo = rpt.Pelt().fit(data)
        segment_idxs = algo.predict(pen=1)

        segments: List[np.ndarray] = []
        start = 0
        for idx in segment_idxs:
            segments.append(data[start:idx])
            start = idx

        return segments
    except Exception as e:
        logging.error(str(e))
        return []
Beispiel #7
0
    def needs_refinement_pelt(self, signals):
        import ruptures

        count = 0
        for signal in signals:
            if len(signal) < 100:
                continue

            algo = ruptures.Pelt(model=self.model,
                                 jump=len(signal) // 100,
                                 min_size=self.min_dist)
            algo = algo.fit(self.norm_signal(signal))

            # Empirically, most sub-state detectino results use a penalty
            # in the range 30 to 60. If there's no changepoints with a
            # penalty of 20, there's also no changepoins with any penalty
            # > 20, so we can safely skip changepoint detection altogether.
            changepoints = algo.predict(pen=20)

            if not changepoints:
                continue

            if len(changepoints) and changepoints[-1] == len(signal):
                changepoints.pop()
            if len(changepoints) and changepoints[0] == 0:
                changepoints.pop(0)

            if changepoints:
                count += 1

        refinement_ratio = count / len(signals)
        return refinement_ratio > 0.3
Beispiel #8
0
def actvTrnsAutoDetect(Data,
                       Interval,
                       Time='YYYY/MM/DD_HH:MM:SS',
                       Model="mahalanobis",
                       Penalty=50):
    #Interval is in Minutes
    ###Not recommended to use. Current decetion algorithm is not very robust.
    import ruptures
    algo = ruptures.Pelt(model=Model).fit(Data)
    result = algo.predict(pen=Penalty)
    times = []
    Ttimes = []
    #plt.plot(Data)
    #plt.xticks(rotation=45)
    #plt.show()
    ruptures.display(Data, result)
    plt.show()
    for entry in result:
        time = Data.index[entry - 1]
        print("Time is: {}".format(time))
        #if entry is not 1 and entry is not len(self.data):
        if True:
            ###Excluding results that are first and last. It's usually meaningless.
            times.append(time)
            Ttimes.append([
                time - pandas.Timedelta(minutes=Interval / 2),
                time + pandas.Timedelta(minutes=Interval / 2)
            ])
    return Ttimes
Beispiel #9
0
def ada_preprocessing(timeseries, delay_correction=0, transition_size=5):
    ''' --> timeseries
     
     
    Complete preprocessing for later forecasting.
    Calculates breakpoints using rbf kernel with optimal parameters.
    Delay correction specifies, by how much found breakpoints are moved backwards.
    Transition size specifies, how large the window around the breakpoint should be, in which transition period = 1.
    Returns timeseries with categorical concept features, transition period feature and steps since/to next breakpoint.
    '''
    series = timeseries.copy()
    #series = create_simdata.linear1_abrupt()
    series = preprocess_timeseries(series)  #cuts out the first 10 observations
    signal = series.loc[:, [
        "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4', 'acf5',
        'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3'
    ]].to_numpy()
    algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:])
    bkps = algo.predict(pen=12)
    #print(bkps)
    bkps = bkps[:-1]

    series = series.reset_index(drop=True)
    series = transform_bkps_to_features(bkps, series, delay_correction,
                                        transition_size)
    series = steps_to_from_bkps(series, bkps, delay_correction)
    series = series.loc[:, [
        "t", "t-1", "t-2", "t-3", "t-4", "t-5", "concept", "transition",
        "steps_since_bkp", "steps_to_bkp"
    ]]

    return series
Beispiel #10
0
def get_change_points(log):
    attr_datetime = pm4py.get_attribute_values(log, 'time:timestamp')
    start_date = min(attr_datetime).date()
    end_date = max(attr_datetime).date()
    delta = datetime.timedelta(days=1)
    print("Start date: ", start_date, "\nEnd date: ", end_date)

    event_counts = {}
    i = start_date
    while i <= end_date:
        event_counts[i.strftime('%Y-%m-%d')] = 0
        #print(i)
        i += delta

    #print(event_counts)

    for t in attr_datetime:
        event_counts[t.date().strftime('%Y-%m-%d')] += 1

    dates = np.array(list(event_counts.values()))

    # detection
    algo = rpt.Pelt(model=MODEL).fit(dates)
    detect_result = algo.predict(pen=PENALTY)

    # display
    rpt.display(dates, detect_result, detect_result)
    plt.savefig('change_points.png')
    plt.show()
    print('Change point plot is saved as "change_points.png"')

    return event_counts, detect_result
Beispiel #11
0
def grafik_baseline():
    if request.method == 'POST':
        if 'flink' in session:
            flink = session['flink']
            ftype = session['ftype']

            # baseline
            baseline = session['baseline']
            pilih_baseline = int(baseline)

            if (ftype == 'sp2') or (ftype == 'sp28'):
                a_file = urllib.request.urlopen(flink)  #Read raw file
                list_of_lists = []
                # Konversi file raw menjadi list of lists
                for line in a_file:
                    stripped_line = line.strip()
                    line_list = stripped_line.split()
                    list_of_lists.append(line_list)

                a_file.close()

                # Mengubah list of lists menjadi suatu dataframe
                data = pd.DataFrame(list_of_lists)
                data.columns = [
                    "time", "ch1", "ch2", "subsidiary", "difference"
                ]
                # Konversi dari dataframe ke array
                data_numpy = data.to_numpy().transpose()
                data_y = data_numpy[1][0:len(data_numpy[1])]
                data_y = np.asfarray(data_y, float)
                data_x = data_numpy[0][0:len(data_numpy[0])]
                data_x = np.asfarray(data_x, float)

                # menghilangkan outliers
                dspk_y = despike(data_y, 50)

                # moving average filter
                ma_y = ma(dspk_y, 9)

                # savitzky-golay filter
                svg_y = savgol_filter(ma_y, window_length=41, polyorder=2)

                # change points detection
                chgpts_y = rpt.Pelt(model='l2').fit(svg_y)
                result_y = chgpts_y.predict(pen=5000)

                if pilih_baseline == 1:
                    base_y = baseline_poly_manual(data_x, svg_y)
                    #return render_template('grafik_baseline.html', **baseline_poly_manual.kwargs)
                elif pilih_baseline == 2:
                    base_y = baseline_prediction(data_x, svg_y, result_y)
                    #return render_template('grafik_baseline.html', **baseline_prediction.kwargs)
            elif (ftype == 'csv') or (ftype == 'xls') or (ftype == 'xlsx'):
                if pilih_baseline == 1:
                    base_y = baseline_poly_manual(data_x, svg_y)
                elif pilih_baseline == 2:
                    base_y = baseline_prediction(data_x, svg_y, result_y)

    return redirect(url_for('content'))
Beispiel #12
0
def get_change_point_dates(df, label, from_date, to_date):
    signal = timeseries[label].loc[from_date:to_date].values
    timeline = df.loc[from_date:to_date].index

    algo = rpt.Pelt(model="rbf").fit(signal)
    result = algo.predict(pen=10)

    return np.take(timeline, [x - 1 for x in result])
Beispiel #13
0
 def get_changepoints(arr):
     """Helper function to find and return the changepoints in a wind speed
     time series array"""
     algo = rpt.Pelt(jump=1).fit(arr)
     # 260 determined to be the smallest penalty where
     # the most changepoints detected across all stations
     # was 2.
     return algo.predict(pen=260)
Beispiel #14
0
    def run(self):
        """
        Run optimization and find change points.

        Returns:
            self
        """
        # Convert the dataset, index: Recovered, column: log10(Susceptible)
        sr_df = self.sr_df.copy()
        sr_df[self.S] = np.log10(sr_df[self.S].astype(np.float64))
        df = sr_df.pivot_table(index=self.R, values=self.S, aggfunc="last")
        # Convert index to serial numbers
        serial_df = pd.DataFrame(np.arange(1, df.index.max() + 1, 1))
        serial_df.index += 1
        df = pd.merge(df,
                      serial_df,
                      left_index=True,
                      right_index=True,
                      how="outer")
        series = df.reset_index(drop=True).iloc[:, 0]
        series = series.interpolate(limit_direction="both")
        # Sampling to reduce run-time of Ruptures
        samples = np.linspace(0,
                              series.index.max(),
                              len(self.sr_df),
                              dtype=np.int64)
        series = series[samples]
        # Detection with Ruptures
        algorithm = rpt.Pelt(model="rbf", jump=1, min_size=self.min_size)
        results = algorithm.fit_predict(series.values, pen=0.5)
        # Convert index values to Susceptible values
        reset_series = series.reset_index(drop=True)
        reset_series.index += 1
        susceptible_df = reset_series[results].reset_index()
        # Convert Susceptible values to dates
        df = pd.merge_asof(susceptible_df.sort_values(self.S),
                           sr_df.reset_index().sort_values(self.S),
                           on=self.S,
                           direction="nearest")
        found_list = df[self.DATE].sort_values()[:-1]
        # Only use dates when the previous phase has more than {min_size + 1} days
        delta_days = timedelta(days=self.min_size)
        first_obj = self.date_obj(self.dates[0])
        last_obj = self.date_obj(self.dates[-1])
        effective_list = [first_obj]
        for found in found_list:
            if effective_list[-1] + delta_days < found:
                effective_list.append(found)
        # The last change date must be under the last date of records {- min_size} days
        if effective_list[-1] >= last_obj - delta_days:
            effective_list = effective_list[:-1]
        # Set change points
        self._change_dates = [
            date.strftime(self.DATE_FORMAT) for date in effective_list[1:]
        ]
        return self
Beispiel #15
0
def analysis_linear(penalization, iterations, data_creation_function,
                    size_concepts, obs_amount_beyond_window):
    identified_bkps_total = 0
    not_detected_bkps_total = 0
    miss_detected_bkps_total = 0
    delays_score_total = 0

    for i in range(0, iterations, 1):
        print(i)
        data = data_creation_function()
        data = pd.DataFrame({"t": data})

        #data = preprocess_timeseries(data) #cuts out the first 10 observations

        lags = pd.concat([
            data["t"].shift(1), data["t"].shift(2), data["t"].shift(3),
            data["t"].shift(4), data["t"].shift(5)
        ],
                         axis=1)
        data["t-1"] = lags.iloc[:, 0]
        data["t-2"] = lags.iloc[:, 1]
        data["t-3"] = lags.iloc[:, 2]
        data["t-4"] = lags.iloc[:, 3]
        data["t-5"] = lags.iloc[:, 4]
        data = mutual_info(10, data)
        data = data[10:]

        signal = data.loc[:,
                          ["t", 't-1', 't-2', 't-3', 't-4', 't-5']].to_numpy()
        algo = rpt.Pelt(model="linear", min_size=2, jump=1).fit(signal)
        bkps = algo.predict(pen=penalization)

        result = bkps_stats(bkps, signal, size_concepts,
                            obs_amount_beyond_window)
        identified_bkps = result[0]
        not_detected_bkps = result[1]
        miss_detected_bkps = result[2]
        list_delays = result[3]

        identified_bkps_total += identified_bkps
        not_detected_bkps_total += not_detected_bkps
        miss_detected_bkps_total += miss_detected_bkps
        delays_score_total += sum(list_delays)

    if (identified_bkps_total + miss_detected_bkps_total) != 0:
        precision = identified_bkps_total / (identified_bkps_total +
                                             miss_detected_bkps_total)
    else:
        precision = 0
    recall = identified_bkps_total / (iterations * 3)
    if identified_bkps_total != 0:
        average_delay = delays_score_total / identified_bkps_total
    else:
        average_delay = 0

    return [precision, recall, average_delay]
Beispiel #16
0
    def data(self, source):
        """
        Loads data from source and performs changepoint detection

        :param source: timeseries array
        """
        self._signal = np.array(source).flatten()
        algo = rpt.Pelt(model=self._model).fit(self._signal)
        self._bkpts = algo.predict(pen=self._penalty)
        return self
Beispiel #17
0
def stability_analysis_long_term(penalization,
                                 iterations,
                                 data_creation_function,
                                 size_concepts,
                                 windowsize_preprocessing=10):
    # data_creation_function = create_simdata.linear1_abrupt
    # penalization = 12
    # iterations = 1
    # size_concepts = 200
    # windowsize_preprocessing = 10

    #size_concepts*2 + 100 - windowsize_preprocessing
    standard_deviations = []
    for i in range(0, iterations, 1):
        print("iteration: ", i)
        data = data_creation_function()
        indices_bkp1 = []
        indices_bkp2 = []
        discard = False
        for j in range(
                int(size_concepts * 2 + (size_concepts / 2)) + 50 - 1,
            (size_concepts * 4), 1):
            print("observation: ", j)
            temp_data = data[0:j]
            temp_data = preprocess_timeseries(
                temp_data, windowsize_preprocessing
            )  #cuts out the first "windowsize_preprocessing" observations

            signal = temp_data.loc[:, [
                "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4',
                'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2',
                'mi_lag3'
            ]].to_numpy()
            algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:])
            bkps = algo.predict(pen=penalization)

            bkps = bkps[:-1]
            filtered_bkps = [
                bkp for bkp in bkps
                if bkp < (size_concepts * 2 + size_concepts / 2) -
                windowsize_preprocessing
            ]

            if len(filtered_bkps) == 2:
                indices_bkp1.append(filtered_bkps[0])
                indices_bkp2.append(filtered_bkps[1])
            else:
                discard = True
                break
        if discard == False:
            bkp1_sd = stdev(indices_bkp1)
            bkp2_sd = stdev(indices_bkp2)
            standard_deviations.append(bkp1_sd)
            standard_deviations.append(bkp2_sd)
    return standard_deviations
Beispiel #18
0
def estimate_power(activity, rider_setup, pen=5, temp=None):
    """Estimate the power output for a ride .

    Args:
        activity (Activity): target activity
        rider_setup (RiderSetup): target rider setup
        pen (int, optional): regularization penalty. Defaults to 5.
        temp (float, optional): temperature in °C. By default, temperature will be taken from activity data.
    """
    activity.resample(pd.Timedelta(seconds=1))
    values = np.array(activity["speed"].rolling(
        pd.Timedelta(seconds=3)).mean(),
                      dtype=np.float64)
    values = np.gradient(values)
    values[np.isnan(values)] = 0.0

    activity["acceleration"] = values

    if temp is None:
        temp = 20
    algo = rpt.Pelt(model="rbf").fit(values)
    pen = pen * np.log(len(values)) * np.std(values)**2
    indices = algo.predict(pen=pen)
    print("n = {0}".format(len(indices)))
    key_est = "pwr"
    activity[key_est] = 0

    for i in range(1, len(indices) - 1):
        altitude = activity["alt"][indices[i - 1]]
        dist = activity["dist"][indices[i]] - activity["dist"][indices[i - 1]]
        delta_t = (activity.index[indices[i]] -
                   activity.index[indices[i - 1]]).total_seconds()
        grade = 100 * (activity["alt"][indices[i]] -
                       activity["alt"][indices[i - 1]]) / (dist + 1)
        acceleration_power = 0.5 * rider_setup.total_mass * (
            activity["speed"][indices[i]]**2 -
            activity["speed"][indices[i - 1]]**2) / delta_t
        if "temp" in activity.keys():
            temp = activity["temp"][indices[i - 1]]
        if "headwind" in activity.keys():
            headwind = activity["headwind"][indices[i - 1]]
        else:
            headwind = 0.0
        speed = activity["speed"][indices[i - 1]:indices[i]].mean()
        steady_power = power_from_speed(rider_setup,
                                        grade,
                                        speed,
                                        altitude=altitude,
                                        temp=temp,
                                        headwind=headwind)
        pwr = steady_power + acceleration_power
        if (pwr < 0):
            pwr = 0
        activity.loc[activity.index[indices[i - 1]:indices[i]], key_est] = pwr
Beispiel #19
0
def analysis_linear_final(penalization,
                          iterations,
                          dataset,
                          size_concepts,
                          obs_amount_beyond_window,
                          windowsize_preprocessing=10):
    identified_bkps_total = 0
    not_detected_bkps_total = 0
    miss_detected_bkps_total = 0
    delays_score_total = 0

    for i in range(0, iterations, 1):
        print(i)
        with open("data_final_detection/" + dataset + "_" + str(i) + ".data",
                  'rb') as filehandle:
            data = pickle.load(filehandle)
        data = pd.DataFrame({"t": data})

        #data = preprocess_timeseries(data) #cuts out the first 10 observations

        lags = pd.concat([
            data["t"].shift(1), data["t"].shift(2), data["t"].shift(3),
            data["t"].shift(4), data["t"].shift(5)
        ],
                         axis=1)
        data["t-1"] = lags.iloc[:, 0]
        data["t-2"] = lags.iloc[:, 1]
        data["t-3"] = lags.iloc[:, 2]
        data["t-4"] = lags.iloc[:, 3]
        data["t-5"] = lags.iloc[:, 4]
        data = mutual_info(10, data)
        data = data[10:]

        signal = data.loc[:,
                          ["t", 't-1', 't-2', 't-3', 't-4', 't-5']].to_numpy()
        algo = rpt.Pelt(model="linear", min_size=2, jump=1).fit(signal)
        bkps = algo.predict(pen=penalization)

        result = bkps_stats(bkps, signal, size_concepts,
                            obs_amount_beyond_window)
        identified_bkps = result[0]
        not_detected_bkps = result[1]
        miss_detected_bkps = result[2]
        list_delays = result[3]

        identified_bkps_total += identified_bkps
        not_detected_bkps_total += not_detected_bkps
        miss_detected_bkps_total += miss_detected_bkps
        delays_score_total += sum(list_delays)

    return [
        identified_bkps_total, not_detected_bkps_total,
        miss_detected_bkps_total, delays_score_total
    ]
Beispiel #20
0
def analysis_rbf_final(penalization,
                       iterations,
                       dataset,
                       size_concepts,
                       obs_amount_beyond_window,
                       windowsize_preprocessing=10):
    identified_bkps_total = 0
    not_detected_bkps_total = 0
    miss_detected_bkps_total = 0
    delays_score_total = 0

    for i in range(0, iterations, 1):
        print(i)
        #data = data_creation_function()
        with open("data_final_detection/" + dataset + "_" + str(i) + ".data",
                  'rb') as filehandle:
            data = pickle.load(filehandle)

        #with open("data_final_detection/" + "linear1" + "_"+ str(1) +".data",  'rb') as filehandle:


#            data = pickle.load(filehandle)

        data = preprocess_timeseries(
            data, windowsize_preprocessing
        )  #cuts out the first "windowsize_preprocessing" observations
        signal = data.loc[:, [
            "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4',
            'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2',
            'mi_lag3'
        ]].to_numpy()
        algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:])
        bkps = algo.predict(pen=penalization)

        result = bkps_stats(bkps,
                            signal,
                            size_concepts,
                            obs_amount_beyond_window,
                            windowsize_preproc=windowsize_preprocessing)
        identified_bkps = result[0]
        not_detected_bkps = result[1]
        miss_detected_bkps = result[2]
        list_delays = result[3]

        identified_bkps_total += identified_bkps
        not_detected_bkps_total += not_detected_bkps
        miss_detected_bkps_total += miss_detected_bkps
        delays_score_total += sum(list_delays)

    return [
        identified_bkps_total, not_detected_bkps_total,
        miss_detected_bkps_total, delays_score_total
    ]
Beispiel #21
0
def plot_change_points(df, label, from_date, to_date):
    signal = timeseries[label].loc[from_date:to_date].values
    timeline = df.loc[from_date:to_date].index

    algo = rpt.Pelt(model="rbf").fit(signal)
    result = algo.predict(pen=10)

    plt.plot(timeline, signal, 'b-')
    for xc in np.take(timeline, [x - 1 for x in result]):
        plt.axvline(x=xc, color='black', linestyle='--')

    plt.show()
Beispiel #22
0
def analysis_rbf(penalization,
                 iterations,
                 data_creation_function,
                 size_concepts,
                 obs_amount_beyond_window,
                 windowsize_preprocessing=10):
    identified_bkps_total = 0
    not_detected_bkps_total = 0
    miss_detected_bkps_total = 0
    delays_score_total = 0

    for i in range(0, iterations, 1):
        print(i)
        data = data_creation_function()
        data = preprocess_timeseries(
            data, windowsize_preprocessing
        )  #cuts out the first "windowsize_preprocessing" observations
        signal = data.loc[:, [
            "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4',
            'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2',
            'mi_lag3'
        ]].to_numpy()
        algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:])
        bkps = algo.predict(pen=penalization)

        result = bkps_stats(bkps,
                            signal,
                            size_concepts,
                            obs_amount_beyond_window,
                            windowsize_preproc=windowsize_preprocessing)
        identified_bkps = result[0]
        not_detected_bkps = result[1]
        miss_detected_bkps = result[2]
        list_delays = result[3]

        identified_bkps_total += identified_bkps
        not_detected_bkps_total += not_detected_bkps
        miss_detected_bkps_total += miss_detected_bkps
        delays_score_total += sum(list_delays)

    if (identified_bkps_total + miss_detected_bkps_total) != 0:
        precision = identified_bkps_total / (identified_bkps_total +
                                             miss_detected_bkps_total)
    else:
        precision = 0
    recall = identified_bkps_total / (iterations * 3)
    if identified_bkps_total != 0:
        average_delay = delays_score_total / identified_bkps_total
    else:
        average_delay = 0

    return [precision, recall, average_delay]
Beispiel #23
0
def find_regimes(data, first=19):
    # find the break points for each flight
    change_points = {}

    # for idx, flight in enumerate(test_range):
    for idx, flight in enumerate(list(data.keys())):

        theta = data[flight]['theta'].values[first:]
        algo = rpt.Pelt(model="l2").fit(theta)
        result = algo.predict(pen=5)
        change_points[flight] = [0] + result

    return change_points
Beispiel #24
0
def f(x, y):
    pen = float(display['pen'])
    algo = rpt.Pelt(model="rbf").fit(y)
    breakpoint_index = algo.predict(pen=10)
    
    breakpoints = []
    
    for b in breakpoint_index[:-1]:
        breakpoints.append(b-1)
        breakpoints.append(b)
    
    return breakpoint_index, breakpoints
    
Beispiel #25
0
def get_ball_velocity(json_file_path):
    """
	Using this to test out a methodology for programatically getting hike and dead ball time marks
	In theory, the ball should be stationary at the hike, and maybe it reaches another stopped point at, or nearly after the dead ball whistle
	"""
    #print (json_file_path)
    df_all_tracking, times = ngs.extract_player_tracking_data(json_file_path)
    #print ('  ',list(df_all_tracking['description'])[0])
    df_filtered = df_all_tracking[df_all_tracking['player_id'] ==
                                  'ball'].sort_values(by='time')
    df_filtered['time_val'] = df_filtered['time'].apply(
        lambda x: convert_time_to_seconds(x))

    all_velocities = {}
    xpos = np.array(df_filtered['x'])
    ypos = np.array(df_filtered['y'])
    positions = list(zip(xpos, ypos))
    timesteps = np.array(df_filtered['time_val'])
    velocities = get_velocity_list(positions, timesteps)
    indices = list(df_filtered.index)
    all_velocities = {**all_velocities, **dict(zip(indices, velocities))}

    #  Compile the velocity data structures of all players and add it to the dataframe
    df_filtered['velocity'] = df_filtered.index.to_series().map(all_velocities)
    df_filtered['speed'] = df_filtered['velocity'].apply(
        lambda x: (x[0]**2 + x[1]**2)**0.5)

    #print (min(df_filtered['speed']), '\t', max(df_filtered['speed']))
    # ball speed ranged from 0 to 75.1, and [0,45] is ~90% of the data values
    #  so going to make 45 the max and 0 the min for the color mapper

    #print (df_filtered.head(100))
    #df_filtered.to_csv('./ball_velocity.csv')

    ## Using sklearn discretizer
    #enc = KBinsDiscretizer(n_bins=10, encode='onehot')
    #ball_speed_binned = enc.fit_transform(np.array(list(df_filtered['speed'])).reshape(-1, 1))
    #print (ball_speed_binned)

    ## Using sklearn.cluster.MeanShift (https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html)
    #clustering = MeanShift().fit()

    ## Using ruptures library
    speed_signal = np.array(df_filtered['speed']).reshape(-1, 1)
    speed_breakpts = 2
    speed_detection = rpt.Pelt(model="rbf").fit(speed_signal)
    speed_result = speed_detection.predict(pen=10)

    return speed_result[:-1], [
        list(df_filtered['time'])[x] for x in speed_result[:-1]
    ], list(df_filtered['time'])[-1]
Beispiel #26
0
def rpt_changepoint(f, window=25):
    # changepoint detection based on ruptures
    # NOT recommended: no guidance for penalty (pen)
    # f, window = pre_changepoint(ts_, window=window)
    # ycol = 'dtrend_diff'                # used to detect changes in trend
    # ts = f[ycol].values
    ts = f['dtrend_diff'].values
    if f['period'].isnull().sum() == 0:
        window = f.loc[f.index[0], 'period']
    algo = rpt.Pelt(model="rbf").fit(ts)
    for p in [0, 5, 10, 20, 50, 100]:
        result = algo.predict(pen=p)
        print(str(p) + ' ' + str(result))
    return None
def rpt_pelt(series, pen=3):
    '''Applies the PELT-algorithm with the provided penalty
    args:
        series: (Reduced) time series, retrieved when applying dimensionality reduction
        pen: penalty value for classifying change points
    returns:
        list of change points
    '''
    algo = rpt.Pelt(model="rbf", min_size=1, jump=1).fit(series)
    result = algo.predict(pen=pen)
    # display
    #rpt.display(series, result)
    #plt.show()
    return result[:-1]
def merge_PL_CP(args, features_dict, PL_dict):
    """
    Calculate "merge" fusion strategy and save new pseudo-labels. 
    Changepoint detection for whole epsiode and take union of PL and CP.
    Saves one .pkl label file per episode/ video.

    Args:
        args: parser arguments
        features_dict: dictionary of i3d features to calculate changepoints
        PL_dict: pre-extracted pseudo-labels as startpoint
    """
    for vid in features_dict.keys():
        features = features_dict[vid]
        PL = PL_dict[vid]
        changepoints = []

        # check if CP for this episode with given setting (pen,..) already exists
        if os.path.exists(
                f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl"
        ):
            cp = pickle.load(
                open(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl",
                    "rb"))
            changepoints.append(cp)
        else:
            algo = rpt.Pelt(model=args.merge_model,
                            jump=args.merge_jump).fit(features)
            res = algo.predict(pen=args.merge_pen)
            CP = [1 if ix in res else 0 for ix in range(len(features))]
            changepoints.append(np.asarray(CP))
            if not os.path.exists(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}"
            ):
                os.makedirs(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}"
                )
            pickle.dump(
                np.asarray(CP),
                open(
                    f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl",
                    "wb"))
        CP = np.asarray(dilate_boundaries([list(changepoints[0])])[0])

        merges = CP | PL

        save_path = f"data/pseudo_labels/PL_CP_merge/{args.test_data}/{args.i3d_training}/merge_{args.merge_pen}/seed_{args.seed}/{vid.split('.')[0]}"
        if not os.path.exists(save_path):
            Path(save_path).mkdir(parents=True, exist_ok=True)
        pickle.dump(merges, open(f"{save_path}/preds.pkl", "wb"))
Beispiel #29
0
def f(x, y):
    pen = float(display['pen'])
    algo = rpt.Pelt(model="rbf").fit(y)
    breakpoint_index = algo.predict(pen=4)

    knots = []

    for b in breakpoint_index[:-1]:
        knots.append(b - 1)
        knots.append(b)

    k = 1
    tck = interpolate.splrep(x, y, s=0, t=knots, task=-1, k=k)
    ynew = interpolate.splev(x, tck, der=0)

    return ynew
def FindChangePoints(BitScores, penalty, minimumSize, Display=False):
    # This function uses the module Ruptures to detect change points in the bit score map.
    algo = rpt.Pelt(min_size=minimumSize).fit(np.array(BitScores))
    result = algo.predict(pen=penalty)
    # After the change points are found using ruptures, the program makes sure that the last
    # index in the MSA is included as a change point
    if len(BitScores) - 1 not in result:
        result.append(len(BitScores) - 1)
    # Because python starts its counting at 0, if the length of the MSA is included as
    # a result, it will cause an indexing error if it's used. This is why it's removed
    # if it exists.
    if len(BitScores) in result:
        result.remove(len(BitScores))
    # It also makes sure the starting index is in the results
    if 0 not in result:
        result.append(0)
    result = sorted(result)

    # The average bit score value in each partitioned section of the bit score map
    # is then determined
    averages = []
    for changePoint in result:
        # If the last change point is found, then there will not be a change point after it,
        # so the loop ends
        if result.index(changePoint) == len(result) - 1:
            break
        # otherwise, the change point after the change point selected is chosen
        else:
            changePoint2 = result[result.index(changePoint) + 1]
        # The total bit score is set to zero
        total = 0
        # and then each bit score between the two change points is added to the total
        for score in BitScores[changePoint:changePoint2]:
            total += score
        # The average is then found by dividing the total by the length of the interval
        average = total / (changePoint2 - changePoint)
        # and the average is added to the list that will be returned to the user
        averages.append(average)
    # If the user has specified that they would like to see the change points
    # illustrated on the change point map, then the plot is printed.
    if Display == True:
        rpt.display(np.array(BitScores), result)
        plt.show()

    # The program then returns the list of change points and the average bit score values
    # between them to the user.
    return result, averages