def change_point(connect_df, pen=None, epsilon=None, nbkp=1): ''' Change point analysis for either the connectivity method or graph metrics least sq - l1 l2 rbf radial basis functions a.create_posjac() t = connectivity(a,inorganics,plot=2) r= np.log10(t[t.sum().sort_values(ascending = False).index]) change_point(np.array(r).T[:,::50]) ''' import ruptures as rpt import matplotlib.pyplot as plt print('calculating change points') # detection signal = np.array(connect_df) algo = rpt.Binseg(model='rbf').fit( signal ) #rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal) result = algo.fit_predict(signal, n_bkps=nbkp) print(result) #algo.predict(pen=pen) # display rpt.display(signal, result, np.array(result) / 2) plt.show()
def actvTrnsAutoDetect(Data, Interval, Time='YYYY/MM/DD_HH:MM:SS', Model="mahalanobis", Penalty=50): #Interval is in Minutes ###Not recommended to use. Current decetion algorithm is not very robust. import ruptures algo = ruptures.Pelt(model=Model).fit(Data) result = algo.predict(pen=Penalty) times = [] Ttimes = [] #plt.plot(Data) #plt.xticks(rotation=45) #plt.show() ruptures.display(Data, result) plt.show() for entry in result: time = Data.index[entry - 1] print("Time is: {}".format(time)) #if entry is not 1 and entry is not len(self.data): if True: ###Excluding results that are first and last. It's usually meaningless. times.append(time) Ttimes.append([ time - pandas.Timedelta(minutes=Interval / 2), time + pandas.Timedelta(minutes=Interval / 2) ]) return Ttimes
def change_point(connect_df,pen = None,epsilon=None,nbkp=1): ''' Change point analysis for either the connectivity method or graph metrics least sq - l1 l2 rbf radial basis functions a.create_posjac() t = connectivity(a,inorganics,plot=2) r= np.log10(t[t.sum().sort_values(ascending = False).index]) change_point(np.array(r).T[:,::50]) ''' import ruptures as rpt import matplotlib.pyplot as plt print('calculating change points') # detection signal = np.array(connect_df) algo = rpt.Binseg(model='rbf').fit(signal)#rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal) result = algo.fit_predict(signal, n_bkps=nbkp) print (result) #algo.predict(pen=pen) # display rpt.display(signal, result, np.array(result)/2) plt.show()
def get_change_points(log): attr_datetime = pm4py.get_attribute_values(log, 'time:timestamp') start_date = min(attr_datetime).date() end_date = max(attr_datetime).date() delta = datetime.timedelta(days=1) print("Start date: ", start_date, "\nEnd date: ", end_date) event_counts = {} i = start_date while i <= end_date: event_counts[i.strftime('%Y-%m-%d')] = 0 #print(i) i += delta #print(event_counts) for t in attr_datetime: event_counts[t.date().strftime('%Y-%m-%d')] += 1 dates = np.array(list(event_counts.values())) # detection algo = rpt.Pelt(model=MODEL).fit(dates) detect_result = algo.predict(pen=PENALTY) # display rpt.display(dates, detect_result, detect_result) plt.savefig('change_points.png') plt.show() print('Change point plot is saved as "change_points.png"') return event_counts, detect_result
def get_change_points(pos, neu, neg, pen=10, jump=1, min_size=2, plot=False): signal = (pos['count'].to_numpy(), neu['count'].to_numpy(), neg['count'].to_numpy()) signal = np.stack(tuple(signal), axis=0).T cp = rpt.KernelCPD(kernel="rbf", jump=jump, min_size=min_size).fit_predict(signal, pen=pen) if plot: rpt.display(signal, cp, cp) plt.show() return pos.index.values[cp[:-1]]
def FindChangePoints(BitScores, penalty, minimumSize, Display=False): # This function uses the module Ruptures to detect change points in the bit score map. algo = rpt.Pelt(min_size=minimumSize).fit(np.array(BitScores)) result = algo.predict(pen=penalty) # After the change points are found using ruptures, the program makes sure that the last # index in the MSA is included as a change point if len(BitScores) - 1 not in result: result.append(len(BitScores) - 1) # Because python starts its counting at 0, if the length of the MSA is included as # a result, it will cause an indexing error if it's used. This is why it's removed # if it exists. if len(BitScores) in result: result.remove(len(BitScores)) # It also makes sure the starting index is in the results if 0 not in result: result.append(0) result = sorted(result) # The average bit score value in each partitioned section of the bit score map # is then determined averages = [] for changePoint in result: # If the last change point is found, then there will not be a change point after it, # so the loop ends if result.index(changePoint) == len(result) - 1: break # otherwise, the change point after the change point selected is chosen else: changePoint2 = result[result.index(changePoint) + 1] # The total bit score is set to zero total = 0 # and then each bit score between the two change points is added to the total for score in BitScores[changePoint:changePoint2]: total += score # The average is then found by dividing the total by the length of the interval average = total / (changePoint2 - changePoint) # and the average is added to the list that will be returned to the user averages.append(average) # If the user has specified that they would like to see the change points # illustrated on the change point map, then the plot is printed. if Display == True: rpt.display(np.array(BitScores), result) plt.show() # The program then returns the list of change points and the average bit score values # between them to the user. return result, averages
def figure(self): """ Returns figure showing changepoints :rtype: :class:`matplotlib.figure.Figure` """ fig, ax = rpt.display(self._signal, self.breakpoints()) return fig
def test_1(): # generate signal n_samples, dim, sigma = 1000, 3, 4 n_bkps = 4 # number of breakpoints signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma) # detection # algo = rpt.Pelt(model="rbf").fit(signal[:,1]) rpt_data = data_month['sku_num_sum'].values # rpt_data = data['sku_num_sum'].values algo = rpt.Pelt(model="rbf").fit(rpt_data) # algo = rpt.Binseg(model="rbf").fit(rpt_data) res_kps = algo.predict(pen=3) # display # rpt.display(signal, bkps, res_kps) rpt.display(rpt_data, res_kps) plt.show()
def find_change_points(session_vals, penalty=1000): vals = np.array(list(itertools.chain(*session_vals))) session_lens = [len(x) for x in session_vals] total_frames = np.concatenate(([0], np.cumsum(session_lens))) algo = rpt.Pelt(model='l2').fit(vals) change_points = algo.predict(pen=penalty) logging.debug('Changepoints detected: %s', change_points) logging.debug('Total frames per session: %s', total_frames) session_change_points = [[] for _ in range(len(session_lens))] if hasattr(sys, 'ps1'): # interactive mode rpt.display(vals, total_frames, change_points) plt.title('Detected change points') plt.show(block=True) for i, change in enumerate(change_points[:-1]): session_i = bisect.bisect_right(total_frames, change) - 1 session_start_frame = total_frames[session_i] session_change_points[session_i].append(change - session_start_frame) for i, session_len in enumerate(session_lens): session_change_points[i].append(session_len) return session_change_points
def segment_time_series(time_series, bkps=1, gamma=1e-2, display=True): """ Implementation of ruptures, python library https://github.com/deepcharles/ruptures Usage: Segmentation of time-series data Inputs: time_series: time series kinematic data from the ReachMaster experimental system bkps : int, number of changepoints (reaches), 1 for simple detection display: Boolean, variable to set display functionality of ruptures Returns: rpt_result: List containing breakpoint indexes and the total length of the time-series array """ params = {"gamma": gamma} algo = rpt.Dynp(model="rbf", params=params, jump=1, min_size=2).fit(time_series) rpt_result = algo.predict(n_bkps=bkps) if display: rpt.display(time_series, bkps, rpt_result) plt.show() return rpt_result
def get_change_points(ts, model="rbf", jump=5, pen=1, segments_size=None, figsize=(20, 3), plot=True): if model == "constant": change_points = list( np.arange(0, len(ts) + 1)[segments_size::segments_size]) if change_points[-1] < (len(ts)): change_points.append(len(ts)) else: algo = rpt.Pelt(model=model, jump=jump).fit(ts) change_points = algo.predict(pen=pen) if plot: rpt.display(ts, true_chg_pts=change_points, computed_chg_pts=change_points, figsize=figsize) plt.show() return change_points
import matplotlib.pyplot as plt import ruptures as rpt # generate signal n_samples, dim, sigma = 1000, 3, 4 n_bkps = 4 # number of breakpoints signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma) print(type(signal)) # detection algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=10) # display rpt.display(signal, bkps, result) plt.show()
def change_point(self, width: int, cut_off: list, custom_cost, jump: int, pen: float, results_show: bool, title=None, save_path=None, fig_name=None): ''' ---------------- DESCRIPTION ---------------- The purpose of the change point detection is to check whether there is a large enough sudden change in a specific interval interval of the resistance signal. If there is a large enough change, it means that the explosion phenomenon has occurred during this welding. the algorithms of detection can be fund by this link: https://centre-borelli.github.io/ruptures-docs/index.html#documentation for the resistance data especially MDK2 this methode can be used to detective if a change point in selectarea if there is a change point, mean value before change point and after change point will be compared --> delta R else no change point delta R = 0 because of material loss the dalta R musst bigger than 0, if the there is a chagne point but delta R < 0, this situation has nothing to do with spritzer rarely occurs delta R can also be 0 and as usaual the resistance curve is going down with the time ---------------- PARAMETER ---------------- width: int windows width 40 cutoff: list [float, float], float: 0...1 1 means all data length will be selected [0.15, 0.45] custom_cost: https://centre-borelli.github.io/ruptures-docs/costs/index.html jump: int subsample (one every jump points) 5 pen: float penalty value (>0) 2 result_show : show image evaluation to displan the detective result title: the image title save_path: the path to save the result image fig_name: the image name ---------------- RETURN ---------------- delta_R: the Variation before and after the change point of resistance signal ''' ab_R = self.R_data[round(len(self.valley_id) * cut_off[0] ):round(len(self.valley_id) * cut_off[1])].values c = custom_cost algo = rpt.Window(width=width, custom_cost=c, jump=jump).fit_predict(ab_R, pen=2) if len(algo) >= 2: delta_R = np.mean(ab_R[:algo[0]]) - np.mean(ab_R[algo[0]:]) if delta_R < 0: # delta_R can not less than 0 bescause the the resistance curve is going down with the time delta_R = 0 else: delta_R = 0 if results_show: rpt.display(ab_R, algo) if title != None: plt.title(title) if save_path and fig_name is not None: save_fig(image_path=save_path, fig_name=fig_name) plt.show() return delta_R
nonlinear2_abrupt_raw = create_simdata.nonlinear3_abrupt() nonlinear2_abrupt = functions.preprocess_timeseries(nonlinear2_abrupt_raw, windowsize=20) plt.plot(nonlinear2_abrupt) lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt) signal = nonlinear2_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5', 'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:,1:]) my_bkps = algo.predict(pen=18) fig, (ax,) = rpt.display(signal[:,0], my_bkps, figsize=(10, 6)) plt.show() lin1_abrupt = create_simdata.linear1_abrupt() lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt) #cuts out the first 10 observations signal = lin1_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5', 'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy() signal = lin1_abrupt.loc[:,["t", 'pacf1','pacf2', 'pacf3','acf1','acf2', 'acf3', 'acf4', 'acf5', 'var','kurt','skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3']].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:,1:]) bkps = algo.predict(pen=30)
plt.xlabel("RankedCells") plt.ylabel("Moving Avg Topic Probability") plt.savefig(os.path.join(sc.settings.figdir, name + "_TopicMovingAvg.png")) plt.clf() convolvedSD = moving_average(adata.obs['percent_ribo'].tolist(), 300) plt.plot(range(len(convolvedSD)), convolvedSD) plt.title("Moving Average Percent Ribo") plt.savefig(os.path.join(sc.settings.figdir, name + "_RiboCounts.png")) plt.clf() convolvedSD = moving_average(adata.obs['percent_mito'].tolist(), 300) plt.plot(range(len(convolvedSD)), convolvedSD) plt.title("Moving Average Percent Mito") plt.savefig(os.path.join(sc.settings.figdir, name + "_MitoCounts.png")) plt.clf() signal = np.column_stack( (np.std(doc_topic, axis=1), adata.obs['percent_ribo'].tolist(), adata.obs['percent_mito'].tolist(), np.array(list(adata.obs.n_counts)))) algo = rpt.Window(width=2500, model="l1").fit(signal) result = algo.predict(pen=50) costs = [] for i in range(len(result) - 1): costs.append(algo.cost.sum_of_costs([result[i], result[len(result) - 1]])) rpt.display(signal=signal, true_chg_pts=result, computed_chg_pts=result) plt.title(str(np.argmin(costs)) + ' <- Best cPoint') plt.savefig(os.path.join(sc.settings.figdir, name + "_Changepoints.png")) plt.clf() print_top_words(ldaM, adata.var.index, 15) table_top_words(ldaM, adata.var.index, 25).to_csv( os.path.join(sc.settings.figdir, name + "_TopicMarkers.txt"))
arr = [] for j in range(X.shape[1]): day = X.loc[ X.iloc[:, j] > 0] #Excluding masked pixels. they are assigned a negative value value = day.iloc[:, j].mean() if value > 0: arr.append(value) #Changepoint detection with the Pelt search method signal = np.array(arr) algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=10) rpt.display(signal, result) plt.title('Change Point Detection: Pelt Search Method') plt.show() #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(signal) my_bkps = algo.predict(n_bkps=10) # show results rpt.show.display(signal, my_bkps) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() #Changepoint detection with window-based search method model = "l2" algo = rpt.Window(width=40, model=model).fit(signal)
Fos_pred[loopNo] = F[int(idxo_pred)] if bkps: tbkps = np.zeros(len(bkps)-1) for l in range(0,len(bkps)-1): tbkps[l] = t[bkps[l]-1] bkpsTrue = bkps bkpsCal = bkps bkpsCal = np.zeros_like(bkps) bkpsCal = np.zeros_like(bkps) for x in range(0,len(bkpsTrue)): bkpsTrue[x] = bkps[-1] if segPlt > 0: fig, (ax,) = rpt.display(norm(gradF), bkpsTrue, bkpsCal) fig, (ax,) = rpt.display(F, bkpsTrue, bkpsCal) fig, (ax,) = rpt.display(s, bkpsTrue, bkpsCal) ############-Plot if showPlt > 0: if ruleBased > 0: plt.figure() plt.plot(norm(F[:trim_to])) plt.plot(norm(gradF[:trim_to])) plt.plot(norm(grad2F[:trim_to])) plt.plot(norm(s[:trim_to])) #plt.plot(norm(A[:trim_to])) plt.axvline(idxo, color="red", linestyle = "--") plt.axvline(idxo_pred,color="black", linestyle = "--") else:
https://github.com/deepcharles/ruptures """ import matplotlib.pyplot as plt import matplotlib.dates as mdates import ruptures as rpt import matplotlib import pandas as pd import os import numpy as np from _config import Config matplotlib.use('TkAgg') if __name__ == '__main__': # Read input data from csv input_data = pd.read_csv(os.path.join('data', 'input_data.csv'), header=0, index_col=[0], parse_dates=[0]) print('Shape of input data = ', input_data.shape) # Detection with PELT algo = rpt.Pelt(model="rbf").fit(input_data) result = algo.predict(pen=100) print(','.join([str(input_data.index[i - 1]) for i in result])) # Display rpt.display(input_data, result, figsize=(12, 6)) plt.show()
import yfinance as yf import matplotlib.pyplot as plt import ruptures as rpt msft = yf.Ticker("SPY") data = yf.download('spy', start='2019-01-01', end='2019-07-11') data = data.Close.values algo = rpt.Pelt(model='rbf').fit(data) result = algo.predict(pen=10) rpt.display(data, result=result) plt.show()
plt.show() plt.plot(time_keeper, nbr_communities_over_time) plt.title('Enron Emails Network: Infomap Community Count by Week') plt.xlabel('Time') plt.ylabel('Number of Communities') plt.show() # Change point detection weeks_to_ignore = 4 algo = rpt.Pelt(model="rbf").fit(np.array(avg_degree_over_time)) result = algo.predict(pen=4) print("Change points in diameter: ", [time_keeper[i] for i in result[:len(result) - 1]]) rpt.display(np.array(avg_degree_over_time), [weeks_to_ignore], result) plt.title("Change Points in Avg Degree Centrality Over Time") plt.tight_layout() plt.show() algo = rpt.Pelt(model="rbf").fit(np.array(diameter_over_time)) result = algo.predict(pen=4) print("Change points in diameter: ", [time_keeper[i] for i in result[:len(result) - 1]]) rpt.display(np.array(diameter_over_time), [weeks_to_ignore], result) plt.title("Change Points in Diameter Over Time") plt.tight_layout() plt.show() algo = rpt.Pelt(model="rbf").fit(np.array(nbr_communities_over_time)) result = algo.predict(pen=4)
# data = sio.loadmat('./data/shiftcorr%d'%idx) # poor results # data = sio.loadmat('./data/shiftlinear%d'%idx) # poor results # data = sio.loadmat('./data/singledimshiftfreq%d'%idx)# poor results # data = sio.loadmat('./data/agotsshiftmean%d'%idx) # data = sio.loadmat('./data/agotsshiftvar%d'%idx) data = sio.loadmat('./data/extreme%d' % idx) # good esults ts = data['ts'] #.T # transpose is needed for shiftfreq print(ts.shape) bkps = data['bkps'][0] scaler = StandardScaler() ts = scaler.fit_transform(ts) width = 10 step = 5 ts = [ts] segment = SegmentX(width=width, step=step) x = segment.fit_transform(ts, None)[0] x = x.reshape([x.shape[0], -1]) x = torch.from_numpy(x).float() bkss = bkps // 5 #bkss for break samples model = AutoEncoder(input_dim=10, latent_dim=1, output_dim=10) _, pred = model.fit_predict(x) err = (pred - x).detach().numpy() err = np.max(np.power(err, 2), axis=1) rpt.display(err, true_chg_pts=bkss) rpt.display(ts[0], true_chg_pts=bkps) plt.show()
total = result[1] - result[0] print(A/total) print(B/total) print(C/total) print() A = 0 B = 0 C = 0 for i in range(result[1], len(choice_sequence)): if choice_sequence[i][0] == 1: A = A + 1 if choice_sequence[i][0] == 2: B = B + 1 if choice_sequence[i][0] == 3: C = C + 1 total = len(choice_sequence) - result[1] print(A/total) print(B/total) print(C/total) print() rpt.display(choice_sequence, result) plt.show()
def find_changepoints_for_time_series(series, modeltype="binary", number_breakpoints=10, plot_flag=True, plot_with_dates=False, show_time_flag=False): #RUPTURES PACKAGE #points=np.array(series) points = series.values title = "" t0 = time.time() if modeltype == "binary": title = "Change Point Detection: Binary Segmentation Search Method" model = "l2" changepoint_model = rpt.Binseg(model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "pelt": title = "Change Point Detection: Pelt Search Method" model = "rbf" changepoint_model = rpt.Pelt(model=model).fit(points) result = changepoint_model.predict(pen=10) if modeltype == "window": title = "Change Point Detection: Window-Based Search Method" model = "l2" changepoint_model = rpt.Window(width=40, model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "Dynamic": title = "Change Point Detection: Dynamic Programming Search Method" model = "l1" changepoint_model = rpt.Dynp(model=model, min_size=3, jump=5).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "online": # CHANGEFINDER PACKAGE title = "Simulates the working of finding changepoints in online fashion" cf = changefinder.ChangeFinder() scores = [cf.update(p) for p in points] result = (-np.array(scores)).argsort()[:number_breakpoints] result = sorted(list(result)) if series.shape[0] not in result: result.append(series.shape[0]) if show_time_flag: elapsed_time = time.time() - t0 print("[exp msg] elapsed time for process: " + str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) if plot_flag: if not plot_with_dates: rpt.display(points, result, figsize=(18, 6)) plt.title(title) plt.show() else: series.plot(figsize=(18, 6)) plt.title(title) for i in range(len(result) - 1): if i % 2 == 0: current_color = 'xkcd:salmon' else: current_color = 'xkcd:sky blue' #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3) plt.fill_between(series.index[result[i]:result[i + 1]], y1=series.max() * 1.1, y2=series.min() * 0.9, color=current_color, alpha=0.3) plt.show() return (result)
elif sys_platform == 'mac': import matplotlib from matplotlib.font_manager import FontProperties matplotlib.use('TkAgg') import matplotlib.pyplot as plt import matplotlib.ticker as ticker plt.style.use('seaborn') data_path = u'/Users/longguangbin/Work/Documents/SAAS/安踏线下/季节性/sample/duanku_sales.xls' data = pd.read_excel(data_path) import ruptures as rpt # # generate signal n_samples, dim, sigma = 1000, 3, 4 n_bkps = 4 # number of breakpoints signal, bkps = rpt.pw_constant(n_samples, dim, n_bkps, noise_std=sigma) # detection # algo = rpt.Pelt(model="rbf").fit(signal[:,1]) algo = rpt.Pelt(model="rbf").fit(data['sku_num_sum'].values) result = algo.predict(pen=10) # display rpt.display(signal, bkps, result) rpt.display(data['sku_num_sum'].values, result) plt.show()
def destill_profiles(dst, # profile2top = False ): # precondition the data def profile2top(sectioned): sect_tmp = sectioned.copy() sect_cponly = sect_tmp[~sect_tmp.changepoints.isna()] # sect_tmp.classfied[~sect_tmp.changepoints.isna()] == 0 # sect_cponly.classfied == 0 profcounter = 1 for ts, row in sect_cponly.iterrows(): if row.profile == -8888: profcounter += 1 elif np.isnan(row.profile): continue else: sect_tmp.loc[ts, 'profile'] = profcounter return sect_tmp def fillit(sectioned_tmp): sectioned_tmp.profile.ffill(inplace=True) sectioned_tmp.profile[sectioned_tmp.profile == 0] = np.nan sectioned_tmp.profile[sectioned_tmp.profile == -9999] = np.nan sectioned_tmp.profile[sectioned_tmp.profile == -8888] = np.nan def remove_cp_when_section2short(df, mintimedelta): df[~df.changepoints.isna()] idx = df[~df.changepoints.isna()].index idx_delta = idx[1:] - idx[:-1] mintimedelta = pd.to_timedelta(mintimedelta) where = idx_delta < mintimedelta where = np.append(where[::-1], False)[::-1] df.loc[idx[where], 'changepoints'] = np.nan alt = dst.altitude.to_dataframe().altitude ## remove falty datapoints alt[alt < ignor_altitudes_smaller_than] = np.nan ## smoothen the altitude and then get the derivative ints = 5 altsmooth = alt.resample(f'{ints}s').mean() div = np.gradient(altsmooth) altsmooth = pd.DataFrame(altsmooth) altsmooth['deriv'] = div if 0: a = alt.plot() altsmooth.altitude.plot(ax=a) at = a.twinx() altsmooth.deriv.plot(ax=at, color=colors[2]) # ruptures ## make the points for the change point analysis altsmooth.interpolate(inplace=True) altsmooth.dropna(inplace=True) points = altsmooth.deriv.values ## do the change point analysis model = "rbf" algo = rpt.Pelt(model=model).fit(points) result = algo.predict(pen=pen) ## plot results # %matplotlib inline # plt.rcParams['figure.dpi']=200 if 0: f, aa = rpt.display(points, result, figsize=(10, 6)) for a in aa: at = a.twinx() at.plot(altsmooth.altitude.values, color=colors[1]) # convert the change point info into profiles sectioned = pd.DataFrame(index=dst.datetime.to_pandas().index, columns=[ 'changepoints', 'slope', 'classfied', '_alt_avg', '_alt_at_cp', '_alt_min', 'profile' ]) ## mark change points sectioned.changepoints.loc[altsmooth.iloc[result[:-1]].index] = 1 sectioned.changepoints.iloc[-1] = 1 ## remove change points where section is too short remove_cp_when_section2short(sectioned, mintimedelta) ## add aditional parameters to sections between change points startime = sectioned.index[0] for ts, cp in sectioned.changepoints.dropna().iteritems(): endtime = ts sect = alt.loc[startime:endtime] df = pd.DataFrame(sect) df['s'] = range(df.shape[0]) dfwn = df.copy() # a copy with all the nans still in it df.dropna(inplace=True) if df.shape[0] == 0: continue res = sp.stats.linregress(df.s, df.altitude) sectioned.slope.loc[dfwn.index] = res.slope sectioned._alt_avg.loc[dfwn.index] = sect.mean() sectioned._alt_min.loc[dfwn.index] = sect.min() sectioned._alt_at_cp.loc[ts] = sect.dropna().iloc[-1] startime = ts ## classify sections between change points ground = 0 park = 1 up = 2 down = 3 sectioned.classfied[ sectioned.slope.abs() < threshold_park_slope] = park sectioned.classfied[sectioned.slope >= threshold_park_slope] = up sectioned.classfied[sectioned.slope < -threshold_park_slope] = down where = np.logical_and( sectioned.classfied == park, sectioned._alt_min.interpolate() < threshold_ground_alt) sectioned.classfied[where] = ground ## combine sections into profiles profile_id = 0 cp1 = 0 cp2 = 0 cp3 = 0 uod = lambda x: x if row.classfied == 2 else -x # profile_id = iter(range(100)) inside = False sect_cponly = sectioned[~sectioned.changepoints.isna()] for ts, row in sect_cponly.iterrows(): # for ts, row in sectioned.iterrows(): try: # print('works') row_next = sect_cponly.iloc[sect_cponly.index.get_loc(ts) + 1] # ts_next = row_next.name except IndexError: # print('doese') # this happens if we are at the end of the list row_next = row.copy() row_next.name = pd.to_datetime('2200-01-01 00:00:00') row_next.classfied = -9999 try: row_next_next = sect_cponly.iloc[ sect_cponly.index.get_loc(row_next.name) + 1] # ts_next_next = row_next_next.name except: row_next_next = row.copy() row_next_next.name = pd.to_datetime('2200-01-01 00:00:00') row_next_next.classfied = -9999 # row_next_next = False # ts_next_next = pd.to_timedelta('2200-01-01 00:00:00') # if ts.__str__() == '2017-05-23 17:55:45': # break if not inside: if row.classfied == ground: continue elif row.classfied == park: continue elif pd.isna(row.classfied): continue elif row.classfied in [up, down]: # cp1 += 1 profile_id += 1 sectioned.loc[ts, 'profile'] = uod(profile_id) inside = row.classfied time_at_classified = ts else: assert (False) # should not be possible if inside in [up, down]: if row.classfied == ground: sectioned.loc[ts, 'profile'] = -8888 inside = False elif row.classfied == inside: sectioned.loc[ts, 'profile'] = uod(profile_id) time_at_classified = ts # This is the time at which a valid change is happening # elif not isinstance(row_next_next, bool) : elif ((row_next.name - time_at_classified) < pd.to_timedelta(mintime2interrupt_profile) ) and row_next_next.classfied == inside: # if not isinstance(row_next_next, bool): # if row_next_next.classfied == inside: sectioned.loc[ts, 'profile'] = np.nan elif row.classfied == park: # if row_next.classfied == inside: # sectioned.loc[ts, 'profile'] = np.nan # uod(profile_id) # if (row_next.name - time_at_classified) < pd.to_timedelta(mintime2interrupt_profile): # sectioned.loc[ts, 'profile'] = np.nan # else: sectioned.loc[ts, 'profile'] = 0 inside = False # continue elif pd.isna(row.classfied): # cp2 += 1 inside = False sectioned.loc[ts, 'profile'] = -9999 continue elif row.classfied != inside: # cp3 += 1 # if (row_next.name - time_at_classified) < pd.to_timedelta(mintime2interrupt_profile): # sectioned.loc[ts, 'profile'] = np.nan # else: profile_id += 1 sectioned.loc[ts, 'profile'] = uod(profile_id) inside = row.classfied else: assert (False) # noep else: assert (False) # should not be possible ## combine even further if profiles comprise of everything up to the top and then down again sectioned_coarse = profile2top(sectioned) ## fill all the gaps inbetween the checkpoints fillit(sectioned) fillit(sectioned_coarse) ## continueation of global profiles (s.o.) distinguish between up and down for profcounter in sectioned_coarse.profile.dropna().unique(): altsec = alt[sectioned_coarse.profile == profcounter] tmax = altsec.idxmax() sectioned_coarse.profile[np.logical_and( sectioned_coarse.profile == profcounter, sectioned_coarse.index > tmax)] = -profcounter # break ## test if profile reaches the ground and remove if not def test_if_grounded(sectioned_tmp): minaltsofprofs = alt.groupby(sectioned_tmp.profile).min() for prof, minalt in minaltsofprofs[ minaltsofprofs > threshold_ground_alt].iteritems(): sectioned_tmp.profile[sectioned_tmp.profile == prof] = np.nan if has2be_connected2ground: test_if_grounded(sectioned) test_if_grounded(sectioned_coarse) ## remove sections that are too short or not enought elevation difference def remove_profiles_2short_or_heigh(sectioned): for pf in sectioned.profile.dropna().unique(): altsect = alt[sectioned.profile == pf] dt = altsect.index[-1] - altsect.index[0] dalt = abs(altsect.max() - altsect.min()) if (dt < pd.to_timedelta(minprofile_duration) ) or dalt < minprofile_altdiff: sectioned.profile[sectioned.profile == pf] = np.nan remove_profiles_2short_or_heigh(sectioned) remove_profiles_2short_or_heigh(sectioned_coarse) ## if one was deleted we have to shift all sucessive ones to avoid confusion while 1: profnos = abs(sectioned.profile.dropna().unique()) diff = profnos[1:] - profnos[:-1] if len(np.unique(diff)) > 1: idx = diff.argmax() + 1 else: break for pn in profnos[idx:]: where = sectioned.profile.abs() == pn if sectioned.profile[where].unique()[0] > 0: fct = 1 elif sectioned.profile[where].unique()[0] < 0: fct = -1 sectioned.profile[where] = fct * (pn - 1) ## plot result # if 1: # # %matplotlib inline # # plt.rcParams['figure.dpi']=200 # a = sectioned.profile.plot() # at = a.twinx() # alt.plot(ax = at, color = colors[1]) # for idx in sect_cponly.index: # a.axvline(idx, color = colors[2], lw = 0.5, ls = '--') out = {} out['sectioned_deteiled'] = sectioned out['sectioned_coarse'] = sectioned_coarse return out
def make_neighborhood_rank_divergence_plot(rank_df, adj_df): rank_df.sort_values('rank', inplace=True, ascending=True) divergences = np.zeros(len(rank_df.index)) for i, (county, rank) in enumerate(zip(rank_df['County'], rank_df['rank'])): neighbors = adj_df.loc[adj_df.source == county, 'destination'] if len(neighbors) == 0: neighbors = adj_df.loc[adj_df.destination == county, 'source'] rank_ind = rank_df.County.isin(neighbors).values neighbor_ranks = rank_df.loc[rank_ind, 'rank'] divergence = np.abs(rank - neighbor_ranks).mean() divergences[i] = divergence if np.isnan(divergence): print(county) print(neighbors) print(neighbor_ranks) rank_df['rank_div'] = divergences # Change point detection signal = rank_df['rank_div'].rolling(100).mean().dropna().values # model = {'l1', 'l2', 'rbf', 'linear', 'normal', 'ar'} pelt_bkps = rpt.Pelt(model='rbf').fit(signal).predict(pen=100) window_bkps = rpt.Window(width=1000, model='l2').fit(signal).predict(n_bkps=1) bin_bkps = rpt.Binseg(model='l2').fit(signal).predict(n_bkps=1) ensemble_bkp = np.mean( [*pelt_bkps[:-1], *window_bkps[:-1], *bin_bkps[:-1]]) print('Identified Breakpoints:' f'\n\tPelt Breakpoints: {pelt_bkps[:-1]}' f'\n\tWindow Breakpoints: {window_bkps[:-1]}' f'\n\tBinary Breakpoints: {bin_bkps[:-1]}' f'\n\tEnsemble Breakpoint: {ensemble_bkp}') plt.scatter( rank_df['rank'].values, rank_df['rank_div'].values, facecolor='None', edgecolor=sns.xkcd_rgb['denim blue'], linewidth=2, label='Data', ) plt.plot( rank_df['rank'].values, rank_df['rank_div'].rolling(100).mean(), color='darkorange', label='Rolling Mean', ) y_min, y_max = divergences.min(), divergences.max() y_range = y_max - y_min plt.plot([ensemble_bkp, ensemble_bkp], [y_min - 0.1 * y_range, y_max + 0.1 * y_range], 'k--', label='Estimated Breakpoint') plt.legend() plt.title('Mean Neighborhood Rank Divergence') plt.xlabel('Quality of Life Rank (Lower is better)') plt.ylabel('Rank Divergence') plt.tight_layout() ymin, ymax = plt.gca().get_ylim() figsize = plt.gcf().get_size_inches() plt.savefig('../output/neighborhood_rank_divergence.png', dpi=600) plt.close('all') # Visualize change points bkps = [] rpt.display( signal, bkps, pelt_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Pelt Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_pelt.png', dpi=600) plt.close('all') rpt.show.display( signal, bkps, window_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Window Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_window.png', dpi=600) plt.close('all') rpt.show.display( signal, bkps, bin_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Binary Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_binary.png', dpi=600) plt.close('all')
def windows(series, window_size=20, pen=2): algo = rpt.Window(width=window_size, model="l2").fit(series) result = algo.predict(pen=2) rpt.display(series, result) plt.show() return result
except Exception as e: print('failed to plot persist anomalies for %s - %s' % (base_name, e)) timer_start_ruptures = timer() for base_name in found_level_shift_and_persists: timeseries = more_analysis_metrics_timeseries[base_name]['timeseries'] if not timeseries: print('failed to find timeseries for %s' % base_name) continue working_timeseries_timestamps = [int(ts) for ts, value in timeseries] working_timeseries_values = [v for ts, v in timeseries] working_values = np.array(working_timeseries_values) algo_c = rpt.KernelCPD(kernel='linear', min_size=12).fit(working_values) # written in C results = algo_c.predict(pen=2) values = working_values rpt.display(values, results, figsize=(18, 6)) title = '%s' % base_name plt.title(title) plt.show() timer_end_ruptures = timer() print('%s metrics analysed with ruptures, took %.6f seconds' % ( str(len(found_level_shift_and_persists)), (timer_end_ruptures - timer_start_ruptures))) # @added 20210726 - Info #4198: ppscore # ppscore is the best cloudburst candidate algorithm found to data #### return cloudbursts_found, plot_images
#Format the 'Date' column price_df['Date'] = price_df['Date'].astype(str).str[:-3] #Convert the Date column into a date object price_df['Date'] = pd.to_datetime(price_df['Date'], format='%Y %m%d') #Subset to only include data going back to 2014 price_df = price_df[(price_df['Date'] >= '2014-01-01')] #Convert the time series values to a numpy 1D array points = np.array(price_df['WTI_Price']) #RUPTURES PACKAGE #Changepoint detection with the Pelt search method model = "rbf" algo = rpt.Pelt(model=model).fit(points) result = algo.predict(pen=10) rpt.display(points, result, figsize=(10, 6)) plt.title('Change Point Detection: Pelt Search Method') plt.show() #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(points) my_bkps = algo.predict(n_bkps=10) # show results rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() #Changepoint detection with window-based search method model = "l2" algo = rpt.Window(width=40, model=model).fit(points)
def show(self): rpt.display(self.signal, self.breakpoints()) plt.show()
def main(): USER_ID, CLIENT_SECRET, server = instantiate_server() ACCESS_TOKEN, REFRESH_TOKEN = get_access_token(server), get_refresh_token(server) auth_client = get_auth_client(USER_ID, CLIENT_SECRET, ACCESS_TOKEN, REFRESH_TOKEN) user_id = get_fitbit_user_id(get_user_information(server)) date_str = '2019-02-14' heart_data = get_heart_intraday(get_heart_data(auth_client, date_str), user_id) # print(heart_data) calories_data = get_calories_intraday(get_calories_data(auth_client, date_str), user_id) # print(calories_data) steps_data = get_steps_intraday(get_steps_data(auth_client, date_str), user_id) # print(steps_data) heart_beat_vals = np.array([minute_heart['value'] for minute_heart in heart_data]) heart_beat_vals = heart_beat_vals - np.mean(heart_beat_vals) heart_beat_vals = heart_beat_vals / np.std(heart_beat_vals) calories_vals = np.array([minute_calories['value'] for minute_calories in calories_data]) calories_vals = calories_vals - np.mean(calories_vals) calories_vals = calories_vals / np.std(calories_vals) activity_vals = np.array([minute_calories['level'] for minute_calories in calories_data]) #activity_vals = activity_vals - np.mean(activity_vals) #activity_vals = activity_vals / np.std(activity_vals) steps_vals = np.array([minute_steps['value'] for minute_steps in steps_data]) steps_vals = steps_vals - np.mean(steps_vals) steps_vals = steps_vals / np.std(steps_vals) all_vals = np.array(list(zip(heart_beat_vals, calories_vals, steps_vals))).reshape(-1, 3) print(all_vals.shape) model = 'l2' min_size_val = 1 algo = rpt.Pelt(model=model, min_size=min_size_val).fit(heart_beat_vals) result = algo.predict(pen=10) rpt.display(heart_beat_vals, result) plt.gcf().axes[0].set_title(f'Heart Beat: model={model} min_size={min_size_val}') plt.savefig(f'../data/plots/changepoint/heart_model={model}_min_size={min_size_val}.png') plt.gcf().axes[0].plot(activity_vals, 'r') plt.savefig(f'../data/plots/changepoint/Activity_Overlap_heart_model={model}_min_size={min_size_val}.png') # plt.show() algo = rpt.Pelt(model=model, min_size=min_size_val).fit(calories_vals) result = algo.predict(pen=10) rpt.display(calories_vals, result) plt.gcf().axes[0].set_title(f'Calories: model={model} min_size={min_size_val}') plt.savefig(f'../data/plots/changepoint/calories_model={model}_min_size={min_size_val}.png') plt.gcf().axes[0].plot(activity_vals, 'r') plt.savefig(f'../data/plots/changepoint/Activity_Overlap_calories_model={model}_min_size={min_size_val}.png') # plt.show() algo = rpt.Pelt(model=model, min_size=min_size_val).fit(steps_vals) result = algo.predict(pen=10) rpt.display(steps_vals, result) plt.gcf().axes[0].set_title(f'Steps: model={model} min_size={min_size_val}') plt.savefig(f'../data/plots/changepoint/steps_model={model}_min_size={min_size_val}.png') plt.gcf().axes[0].plot(activity_vals, 'r') plt.savefig(f'../data/plots/changepoint/Activity_Overlap_steps_model={model}_min_size={min_size_val}.png') # plt.show() algo = rpt.Pelt(model=model, min_size=min_size_val).fit(activity_vals) result = algo.predict(pen=10) rpt.display(activity_vals, result) plt.gcf().axes[0].set_title(f'Activity: model={model} min_size={min_size_val}') plt.savefig(f'../data/plots/changepoint/activity_model={model}_min_size={min_size_val}.png') plt.gcf().axes[0].plot(activity_vals, 'r') plt.savefig(f'../data/plots/changepoint/Activity_Overlap_activity_model={model}_min_size={min_size_val}.png') # plt.show() algo = rpt.Pelt(model=model, min_size=min_size_val).fit(all_vals) result = algo.predict(pen=10) rpt.display(all_vals, result) plt.gcf().axes[0].set_title(f'Heart Rate: model={model} min_size={min_size_val}') plt.gcf().axes[1].set_title(f'Calories: model={model} min_size={min_size_val}') plt.gcf().axes[2].set_title(f'Steps: model={model} min_size={min_size_val}') plt.savefig(f'../data/plots/changepoint/all_model={model}_min_size={min_size_val}.png') plt.gcf().axes[0].plot(activity_vals, 'r') plt.gcf().axes[1].plot(activity_vals, 'r') plt.gcf().axes[2].plot(activity_vals, 'r') plt.savefig(f'../data/plots/changepoint/Activity-Overlap Data-Date-{date_str} All-Attr-Model={model} Min-Size={min_size_val}.png')