def change_point(connect_df, pen=None, epsilon=None, nbkp=1): ''' Change point analysis for either the connectivity method or graph metrics least sq - l1 l2 rbf radial basis functions a.create_posjac() t = connectivity(a,inorganics,plot=2) r= np.log10(t[t.sum().sort_values(ascending = False).index]) change_point(np.array(r).T[:,::50]) ''' import ruptures as rpt import matplotlib.pyplot as plt print('calculating change points') # detection signal = np.array(connect_df) algo = rpt.Binseg(model='rbf').fit( signal ) #rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal) result = algo.fit_predict(signal, n_bkps=nbkp) print(result) #algo.predict(pen=pen) # display rpt.display(signal, result, np.array(result) / 2) plt.show()
def binary(data): ''' data: Valores del activo EURUSD. ''' datos = np.array(data.Close) n = len(datos) # Tamaño de los datos dentro del array. sigma = datos.std() # Desviación estandar de los datos. p = np.log(n) * sigma**2 # Penalización utilizada dentro del modelo. # Pasos a realizar dentro del modelo de Binary segmentation. algo = rpt.Binseg().fit(datos) my_bkps = algo.predict(pen=p) senal = pd.DataFrame(my_bkps) mean = senal.drop([ len(my_bkps) - 1 ]) # Quitamos de la serie el último valor ya que no es correcto. mean = np.array(mean) # Valores obtenidos del modelo traidos a un array. changes = mean.astype( int) # Valores del array anterior convertidos a numeros enteros. feature = boolean_change_point(datos, changes) # La función regresa las fechas y los cambios numericos. return changes, feature
def plot_changePoint(array): model = "" algo = rpt.Binseg(model=model).fit(array) my_bkps = algo.predict(n_bkps=1) # show results rpt.show.display(array, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() return None
def detect_change_points(self, ys: np.ndarray, **kwargs) -> Sequence[int]: ''' @param model: "l1", "rbf", "linear", "normal", "ar" (default is "l2") :return: list of estimated change points ''' model = kwargs["model"] if "model" in kwargs else "l2" estimator = ruptures.Binseg(model=model).fit(ys) return estimator.predict(pen=3)
def cp_detection_binary_segmentation(points): # Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(points) my_bkps = algo.predict(n_bkps=2) # show results rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() return my_bkps
def find_changepoints(lista_datos): #Convert the time series values to a numpy 1D array points=np.array(lista_datos) #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(points) my_bkps = algo.predict(pen=np.log(len(lista_datos))*6**2) output= pd.DataFrame(my_bkps,columns=['step']) return(output.to_dict(orient='record'))
def changePoint(value, model='rbf', penalty=1.0, brakepts=None, plot=False): # change point detection # available models: "rbf", "l1", "l2", rbf", "linear", "normal", "ar", "mahalanobis" signal = np.array(value) algo = rpt.Binseg(model=model).fit(signal) my_bkps = algo.predict(pen=penalty, n_bkps=brakepts) if plot: # show results rpt.show.display(signal, my_bkps, figsize=(10, 3)) plt.show() # define regions from breaking points sections = my_bkps sections.insert(0, 0) # check last point sections[-1] -= 1 if plot: print('model = ', model, ' - sections = ', sections) return (sections)
def filter_dwelltimes_smpl(N_smpl, t_smpl=[], pen=1, plots=False): ''' return filtered version of trace N_smpl(t) (sampled), using ruptures with parameter pen ''' import ruptures as rpt t0 = time.time() # find rupture points in N_smpl (can be slow): #bkpts = rpt.Pelt(model='rbf', jump=1).fit_predict(N_smpl, pen=pen) bkpts = rpt.Binseg(model='l2', jump=1).fit_predict(N_smpl, pen=pen) print(f'filter_dwelltimes_smpl(): rupture p.ts done in {time.time()-t0:.1f}s') bkpts = np.append(0, bkpts) N_filt = np.zeros(len(N_smpl)) # between rpt points, choose for N_filt the most frequent value of N_smpl: for i in range(len(bkpts)-1): Ns = N_smpl[bkpts[i]:bkpts[i+1]] m = np.array(list(zip(set(Ns), [list(Ns).count(s) for s in set(Ns)]))) # [(Ni, counts(Ni))] N_filt[bkpts[i]:bkpts[i+1]] = m[np.argmax(m[:,1])][0] if plots: if len(t_smpl)==0: t_smpl = np.arange(len(N_smpl)) ts_orig, dwts_orig = find_dwelltimes_smpl(t_smpl, N_smpl) ts_filt, dwts_filt = find_dwelltimes_smpl(t_smpl, N_filt) bins_orig = np.logspace(np.log10(np.min(dwts_orig)), np.log10(np.max(dwts_orig)), 50) bins_filt = np.logspace(np.log10(np.min(dwts_filt)), np.log10(np.max(dwts_filt)), 50) bins = np.max([bins_filt, bins_orig], axis=0) fig = plt.figure('filter_dwelltimes_smpl', clear=True) ax1 = fig.add_subplot(311) ax2 = fig.add_subplot(312) ax3 = fig.add_subplot(313) ax1.plot(t_smpl, N_smpl, '-', lw=3, label='orig') ax1.vlines(t_smpl[bkpts[:-1]], 0, Nmax, ls='--', alpha=0.2) ax1.plot(t_smpl, N_filt, label='filt') ax1.legend() ax1.set_xlabel('time or index') ax2.plot(t_smpl, N_smpl-N_filt, label='orig-filt') ax2.set_title(f'|err| = {np.sum(np.abs(N_smpl-N_filt))}', fontsize=10) ax2.legend() ax2.set_xlabel('time or index') ax3.hist([dwts_orig,dwts_filt], bins, label=['orig','filt']) ax3.legend() ax3.set_xscale('log') ax3.set_xlabel('dwell times') ax3.set_ylabel('counts') fig.tight_layout() return N_filt
def R_breakouts_detection(points): #Changepoint detection with the Pelt search method model = "rbf" algo = rpt.Pelt(model=model).fit(points) result = algo.predict(pen=10) rpt.display_breakouts(points, result, figsize=(10, 6)) plt.title('Change Point Detection: Pelt Search Method') plt.tight_layout() plt.show() #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(points) my_bkps = algo.predict(n_bkps=10) # show results rpt.show.display_breakouts(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.tight_layout() plt.show()
def changePointDetection(glacier, attr, startdate=None, enddate=None, \ n_breakpoints=1, method='window', model='l1', wwidth=5): """Use ruptures package to identify change points in glacier time series. Acceptable methods are 'window' (sliding window), 'binseg' (binary segmentation), and bottomup (bottom-up). See https://centre-borelli.github.io/ruptures-docs/user-guide for further information.""" attrs, dates = glacier.filterDates(attr, startdate, enddate) signal = attrs.values sigma = signal.std() n = len(signal) if method == 'window': algo = rpt.Window(width=wwidth, model=model).fit(signal) elif method == 'binseg': algo = rpt.Binseg(model=model).fit(signal) elif method == 'bottomup': algo = rpt.BottomUp(model=model).fit(signal) breakpoints = algo.predict(n_bkps=n_breakpoints) # remove breakpoints at beginning/end of time series if dates.index[0] - 1 in breakpoints: breakpoints.remove(dates.index[0] - 1) if dates.index[-1] in breakpoints: breakpoints.remove(dates.index[-1]) breakpoint_dates = dates[breakpoints] return breakpoint_dates, signal, breakpoints
def get_change_point(series, jump=5, n_bkps=5, pen=10): """ series: numpy array please jump: размер сэмпла n_bkps: количество возвращаемых остановок pen: пенальти для Pelt """ series = series.values alg_dynp = rpt.Dynp(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_pelt = rpt.Pelt(jump=jump).fit_predict(series, pen=pen) alg_bin = rpt.Binseg(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_bot = rpt.BottomUp(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_win = rpt.Window(jump=jump).fit_predict(series, n_bkps=n_bkps) alg_cumsum = change_point_detection(series.tolist()) # Получили разладки от нескольких алгоритмов # Теперь найдём точки, которые предсказывались алгоритмами несколько раз res = {} for i in alg_dynp + alg_pelt + alg_bin + alg_bot + alg_win + alg_cumsum: if i in res: res[i] += 1 else: res[i] = 1 del res[0] del res[len(series)] itemMaxValue = max(res.items(), key=lambda x: x[1]) listOfKeys = [] for key, value in res.items(): if value == itemMaxValue[1]: listOfKeys.append(key) return listOfKeys
def get_breakpoints(df: pd.DataFrame, model: str = "rbf", min_size: int = 5, jump: int = 1, pen: int = 2) -> List[int]: """ Calculate the breakpoints of a time series or a group of time series using binary segmentation. For more info http://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/detection/binseg.html. :param df: DataFrame containing the target time series as columns. :param model: segment model, [“l1”, “l2”, “rbf”,…]. Not used if 'custom_cost' is not None. :param min_size: minimum segment length. Defaults to 5 samples. :param jump: subsample (one every jump points). Defaults to 1 sample. :param pen: penalty value (>0). :return: list containing the indexes where breakpoints happen. """ signal = (df.values - df.values.mean(axis=0)) / df.values.std(axis=0) algo = ruptures.Binseg(model=model, min_size=min_size, jump=jump).fit(signal) result = algo.predict(pen=pen) return result
def breakpoint_detection(raw_data,fname,estimated_breaks,n_feat=feat): ruptures_cpts={} # PELT,BinSeg,Dynp bocpd_l1={} # BOCPD l=200 bocpd_l2={} # BOCPD l=400 exo_cpd_offline={} #EXO CPD offline exo_cpd_online={} #EXO CPD online for ind in range(n_feat): temp_data=raw_data[:,ind] # Going per feature/column start=time.time() # Ruptures rpt_pelt=rpt.Pelt(model='rbf').fit(temp_data) pelt_result=rpt_pelt.predict(pen=5) print("Pelt: ",time.time()-start) start=time.time() rpt_binseg=rpt.Binseg(model='rbf').fit(temp_data) bin_result=rpt_binseg.predict(n_bkps=estimated_breaks) print("Binseg: ",time.time()-start) # start=time.time() # rpt_dynp=rpt.Dynp(model='normal',min_size=2,jump=5).fit(temp_data) # dynp_result=rpt_dynp.predict(n_bkps=estimated_breaks) # print("Dynp: ",time.time()-start) # ruptures_cpts[ind]=list(set().union(pelt_result,bin_result,dynp_result)) ruptures_cpts[ind]=list(set().union(pelt_result,bin_result)) #BOCPD start=time.time() hazard_func_l1=lambda r: bcp.constant_hazard(r, _lambda=200) beliefs_l1,maxes_l1=bcp.inference(temp_data, hazard_func_l1) log_bel_l1=-np.log(beliefs_l1) index_changes_l1=np.where(np.diff(maxes_l1.T[0])<0)[0] print("BOCPD_l1: ",time.time()-start) bocpd_l1[ind]=[index_changes_l1,log_bel_l1] start=time.time() hazard_func_l2=lambda r: bcp.constant_hazard(r, _lambda=400) beliefs_l2,maxes_l2=bcp.inference(temp_data, hazard_func_l2) log_bel_l2=-np.log(beliefs_l2) index_changes_l2=np.where(np.diff(maxes_l2.T[0])<0)[0] print("BOCPD_l2: ",time.time()-start) bocpd_l2[ind]=[index_changes_l2,log_bel_l2] #Offline/Online Exact and Efficient Bayesian Inference #Offline # start=time.time() # Q,P,Pcp = offcd.offline_changepoint_detection(temp_data,partial(offcd.const_prior, l=(len(temp_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40) # offline_cpts=data=np.exp(Pcp).sum(0) # offline_peaks=find_peaks(offline_cpts) # print("Offline EXO: ",time.time()-start) # exo_cpd_offline[ind]=[offline_peaks,offline_cpts] #Online start=time.time() Nw=10 R,maxes=oncd.online_changepoint_detection(temp_data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0)) online_cpts=R[Nw,Nw:-1] online_peaks=sig.find_peaks(online_cpts) print("Online EXO: ",time.time()-start) exo_cpd_online[ind]=[online_peaks,online_cpts] print("\t\t\t Breakpoint detection Index {} done.".format(ind)) ruptures_fname=fname+'_ruptures.csv' bocpdl1_fname=fname+'_bocpdl1.csv' bocpdl2_fname=fname+'_bocpdl2.csv' ofexo_fname=fname+'_ofexo.csv' onexo_fname=fname+'_onexo.csv' with open(ruptures_fname,'w') as f: writer=csv.writer(f) for k,v in ruptures_cpts.items(): writer.writerow([k,v]) f.close() with open(bocpdl1_fname,'w') as f: writer=csv.writer(f) for k,v in bocpd_l1.items(): writer.writerow([k,v]) f.close() with open(bocpdl2_fname,'w') as f: writer=csv.writer(f) for k,v in bocpd_l2.items(): writer.writerow([k,v]) f.close() # with open(ofexo_fname,'w') as f: # writer=csv.writer(f) # for k,v in exo_cpd_offline.items(): # writer.writerow([k,v]) # f.close() with open(onexo_fname,'w') as f: writer=csv.writer(f) for k,v in exo_cpd_online.items(): writer.writerow([k,v]) f.close() print("\t\t Saved changepoint detection")
for idx in result[:-1]: x.append(indexes[idx]) y = [] for idx in x: y.append(df.loc[df.index == idx]['p1_current'].values[0]) plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal') plt.scatter(x, y, label='outlier', color='red', marker='o') plt.title("Change Finder Window Segmentation p1_current") plt.xlabel('Date Time') plt.ylabel('p1_current') plt.savefig(ofn + "_Window_p1_current.png") plt.show() plt.close() algo = rpt.Binseg(model="l2") result = algo.fit_predict(X, n_bkps=n_bkps) x = [] for idx in result[:-1]: x.append(indexes[idx]) y = [] for idx in x: y.append(df.loc[df.index == idx]['p1_current'].values[0]) plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal') plt.scatter(x, y, label='outlier', color='red', marker='o') plt.title("Change Finder Binseg p1_current") plt.xlabel('Date Time') plt.ylabel('p1_current') plt.savefig(ofn + "_BinarySeg_p1_current.png") plt.show()
#Convert the time series values to a numpy 1D array points = np.array(price_df['WTI_Price']) #RUPTURES PACKAGE #Changepoint detection with the Pelt search method model = "rbf" algo = rpt.Pelt(model=model).fit(points) result = algo.predict(pen=10) rpt.display(points, result, figsize=(10, 6)) plt.title('Change Point Detection: Pelt Search Method') plt.show() #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(points) my_bkps = algo.predict(n_bkps=10) # show results rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() #Changepoint detection with window-based search method model = "l2" algo = rpt.Window(width=40, model=model).fit(points) my_bkps = algo.predict(n_bkps=10) rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Window-Based Search Method') plt.show() #Changepoint detection with dynamic programming search method
playerids = np.unique(list(c[:-2] for c in team.columns if c[:4] in ['Home', 'Away'])) playerids = np.unique(list(map(lambda x: split_at(x, '_', 2)[0], playerids))) #for player in playerids: player = 'Home_6' mc_temp = list(map(lambda x: metabolic_cost(team[player + '_Acc'][x]), range(1, len(team[player + '_Acc'])+1))) #team[player+'_MP'] = mc_temp * team[player+'_speed'] mp_temp = mc_temp * team[player+'_speed'] test_mp = mp_temp.rolling(7500,min_periods=1).apply(lambda x : np.nansum(x)) #Use Changepoint Detection Here plt.plot(test_mp) plt.title('Metabolic Power Output [5 min Rolling Window]') signal = np.array(test_mp[7500:len(test_mp)]).reshape((len(test_mp[7500:len(test_mp)]),1)) algo = rpt.Pelt(model="l2",min_size=7500).fit(signal) result = algo.predict(pen=np.log(len(signal))*1*np.std(signal)**2) ##Potentially pacing strategy or identifying moments in the game that are slower algo = rpt.Binseg(model="l2").fit(signal) ##potentially finding spot where substitution should happen result = algo.predict(n_bkps=1) #big_seg rpt.show.display(signal, result, figsize=(10, 6)) plt.title('Metabolic Power Output [5 min Rolling Window]') #SPI and Measure the minute after home_spi_list = [] for player in home_players: print(player) test_spi = tracking_home['Home_'+player+'_speed'].rolling(1500,min_periods=1).apply(lambda x : np.nansum(x)) / 25. xcoords = sp.signal.find_peaks(test_spi, distance=1500) spi_values = list(map(lambda x: test_spi[x], xcoords[0])) spi_values_index = np.argsort(spi_values)[-3:]
def detect_anomalies( kernel_distance_seq, policy ): # Unpack policy policy_name = policy["name"] policy_params = policy["params"] # Do a truly naive anomaly detection policy where we just define the slice # containing the max kernel distance as anomalous and all others as not # anomalous. This is not really "anomaly detection" in any meaningful sense # But it suffices for testing the basic workflow if policy_name == "naive_max": max_dist_slice_idx = 0 max_dist = 0 for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) slice_max = max( distances) if max_distance_in_slice > max_dist: max_dist = slice_max max_dist_slice_idx = slice_idx return [ max_dist_slice_idx ] # Detect anomalies based on whether the median kernel distance increases # from slice to slice or not elif policy_name == "increasing_median": threshold = policy_params["threshold"] flagged_slice_indices = [] prev_median_distance = 0 curr_median_distance = 0 for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) curr_median_distance = np.median( distances ) #if curr_median_distance > prev_median_distance: if curr_median_distance - prev_median_distance > threshold: flagged_slice_indices.append( slice_idx ) prev_median_distance = curr_median_distance return flagged_slice_indices elif policy_name == "kolmogorov_smirnov": flagged_slice_indices = [] prev_distribution = None next_distribution = None for slice_idx in range(len(kernel_distance_seq))[1:-1]: prev_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx - 1 ]) curr_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx ]) next_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx + 1 ]) ks2_stat_prev, p_val_prev = ks_2samp( prev_dist, curr_dist ) ks2_stat_next, p_val_next = ks_2samp( next_dist, curr_dist ) thresh = 0.0001 if p_val_prev < thresh and p_val_next < thresh: flagged_slice_indices.append( slice_idx ) return flagged_slice_indices # Flag slices if the median kernel distance exceeds a user-supplied # threshold elif policy_name == "median_exceeds_threshold": threshold = policy_params[ "threshold" ] flagged_slice_indices = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) median_distance = np.median( distances ) if median_distance > threshold: flagged_slice_indices.append( slice_idx ) return flagged_slice_indices # Randomly choose slices. This isn't really an anomaly detection policy, but # we use it to check whether the distribution of callstacks from a random # sample of slices looks different than the distribution of callstacks from # the flagged slices elif policy_name == "random": n_samples = policy_params["n_samples"] n_slices = len(kernel_distance_seq) n_generated = 0 flagged_slice_indices = set() while n_generated < n_samples: # generate uniform random number between 0 and n_slices-1 rand_slice_idx = np.random.randint( 0, n_slices, size=1 )[0] if rand_slice_idx not in flagged_slice_indices: flagged_slice_indices.add( rand_slice_idx ) n_generated += 1 return list( flagged_slice_indices ) elif policy_name == "all": n_slices = len(kernel_distance_seq) return list( range( n_slices ) ) elif policy_name == "ruptures_binary_segmentation": # Unpack policy model = policy_params[ "model" ] #width = policy_params[ "width" ] n_change_points = policy_params[ "n_change_points" ] penalty = policy_params[ "penalty" ] epsilon = policy_params[ "epsilon" ] # Get list of distance distributions distance_distribution_seq = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) distance_distribution_seq.append( distances ) # Get some properties about the distances needed by Ruptures n_distributions = len( distance_distribution_seq ) dim = len( distances ) all_distances = [] for d in distance_distribution_seq: all_distances += d sigma = np.std( all_distances ) # Make into ndarray for ruptures #signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) # Set up model algo = rpt.Binseg( model=model ).fit( signal ) # Find change-points if n_change_points == "unknown": if penalty == True and epsilon == False: penalty_value = np.log( n_distributions ) * dim * sigma**2 change_points = algo.predict( pen=penalty_value ) elif penalty == False and epsilon == True: threshold = 3 * n_distributions * sigma**2 change_points = algo.predict( epsilon=threshold ) else: raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params)) else: change_points = algo.predict( n_bkps=n_change_points ) flagged_slice_indices = [ cp-1 for cp in change_points ] return flagged_slice_indices elif policy_name == "ruptures_window_based": # Unpack policy model = policy_params[ "model" ] width = policy_params[ "width" ] n_change_points = policy_params[ "n_change_points" ] penalty = policy_params[ "penalty" ] epsilon = policy_params[ "epsilon" ] # Get list of distance distributions distance_distribution_seq = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) distance_distribution_seq.append( distances ) # Get some properties about the distances needed by Ruptures n_distributions = len( distance_distribution_seq ) dim = len( distances ) all_distances = [] for d in distance_distribution_seq: all_distances += d sigma = np.std( all_distances ) # Make into ndarray for ruptures signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) # Set up model algo = rpt.Window( width=width, model=model ).fit( signal ) # Find change-points if n_change_points == "unknown": if penalty == True and epsilon == False: penalty_value = np.log( n_distributions ) * dim * sigma**2 change_points = algo.predict( pen=penalty_value ) elif penalty == False and epsilon == True: threshold = 3 * n_distributions * sigma**2 change_points = algo.predict( epsilon=threshold ) else: raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params)) else: change_points = algo.predict( n_bkps=n_change_points ) flagged_slice_indices = [ cp-1 for cp in change_points ] return flagged_slice_indices else: raise NotImplementedError("Anomaly detection policy: {} is not implemented".format(policy_name))
def make_neighborhood_rank_divergence_plot(rank_df, adj_df): rank_df.sort_values('rank', inplace=True, ascending=True) divergences = np.zeros(len(rank_df.index)) for i, (county, rank) in enumerate(zip(rank_df['County'], rank_df['rank'])): neighbors = adj_df.loc[adj_df.source == county, 'destination'] if len(neighbors) == 0: neighbors = adj_df.loc[adj_df.destination == county, 'source'] rank_ind = rank_df.County.isin(neighbors).values neighbor_ranks = rank_df.loc[rank_ind, 'rank'] divergence = np.abs(rank - neighbor_ranks).mean() divergences[i] = divergence if np.isnan(divergence): print(county) print(neighbors) print(neighbor_ranks) rank_df['rank_div'] = divergences # Change point detection signal = rank_df['rank_div'].rolling(100).mean().dropna().values # model = {'l1', 'l2', 'rbf', 'linear', 'normal', 'ar'} pelt_bkps = rpt.Pelt(model='rbf').fit(signal).predict(pen=100) window_bkps = rpt.Window(width=1000, model='l2').fit(signal).predict(n_bkps=1) bin_bkps = rpt.Binseg(model='l2').fit(signal).predict(n_bkps=1) ensemble_bkp = np.mean( [*pelt_bkps[:-1], *window_bkps[:-1], *bin_bkps[:-1]]) print('Identified Breakpoints:' f'\n\tPelt Breakpoints: {pelt_bkps[:-1]}' f'\n\tWindow Breakpoints: {window_bkps[:-1]}' f'\n\tBinary Breakpoints: {bin_bkps[:-1]}' f'\n\tEnsemble Breakpoint: {ensemble_bkp}') plt.scatter( rank_df['rank'].values, rank_df['rank_div'].values, facecolor='None', edgecolor=sns.xkcd_rgb['denim blue'], linewidth=2, label='Data', ) plt.plot( rank_df['rank'].values, rank_df['rank_div'].rolling(100).mean(), color='darkorange', label='Rolling Mean', ) y_min, y_max = divergences.min(), divergences.max() y_range = y_max - y_min plt.plot([ensemble_bkp, ensemble_bkp], [y_min - 0.1 * y_range, y_max + 0.1 * y_range], 'k--', label='Estimated Breakpoint') plt.legend() plt.title('Mean Neighborhood Rank Divergence') plt.xlabel('Quality of Life Rank (Lower is better)') plt.ylabel('Rank Divergence') plt.tight_layout() ymin, ymax = plt.gca().get_ylim() figsize = plt.gcf().get_size_inches() plt.savefig('../output/neighborhood_rank_divergence.png', dpi=600) plt.close('all') # Visualize change points bkps = [] rpt.display( signal, bkps, pelt_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Pelt Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_pelt.png', dpi=600) plt.close('all') rpt.show.display( signal, bkps, window_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Window Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_window.png', dpi=600) plt.close('all') rpt.show.display( signal, bkps, bin_bkps, figsize=figsize, ) plt.ylim(ymin, ymax) plt.gca().get_lines()[0].set_color('darkorange') plt.title('Binary Change Point Detection') plt.xlabel('Quality of Life Rank') plt.ylabel('Local Rank Divergence') plt.tight_layout() plt.savefig('../output/rank_div_change_point_binary.png', dpi=600) plt.close('all')
signal = numpy.array(ll) # alignlen = 5000 # mean = numpy.mean(signal) # std = numpy.std(signal) # change point detection model = "l1" # "l1", "rbf", "linear", "normal", "ar" # search_method = 'dynamic programming' # my_bkps = rpt.Dynp(model=model, min_size=100).fit_predict(signal,n_bkps=5) # search_method = 'Window-based change point detection' # my_bkps = rpt.Window(model=model, width= 5).fit_predict(signal,pen=1000) # search_method = 'Exact segmentation: Pelt' # my_bkps = rpt.Pelt(model = model, min_size=5).fit_predict(signal,pen=10) # search_method = 'Bottom-up segmentation' # my_bkps = rpt.BottomUp(model = model).fit_predict(signal,pen=5) search_method = 'Binary segmentation' my_bkps = rpt.Binseg(model=model).fit_predict(signal, pen=30) print(my_bkps) # show results rpt.show.display(signal, my_bkps, figsize=(15, 7)) plt.title(search_method) plt.show()
value = day.iloc[:, j].mean() if value > 0: arr.append(value) #Changepoint detection with the Pelt search method signal = np.array(arr) algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=10) rpt.display(signal, result) plt.title('Change Point Detection: Pelt Search Method') plt.show() #Changepoint detection with the Binary Segmentation search method model = "l2" algo = rpt.Binseg(model=model).fit(signal) my_bkps = algo.predict(n_bkps=10) # show results rpt.show.display(signal, my_bkps) plt.title('Change Point Detection: Binary Segmentation Search Method') plt.show() #Changepoint detection with window-based search method model = "l2" algo = rpt.Window(width=40, model=model).fit(signal) my_bkps = algo.predict(n_bkps=10) rpt.show.display(signal, my_bkps) plt.title('Change Point Detection: Window-Based Search Method') plt.show() #Changepoint detection with dynamic programming search method
def find_changepoints_for_time_series(series, modeltype="binary", number_breakpoints=10, plot_flag=True, plot_with_dates=False, show_time_flag=False): #RUPTURES PACKAGE #points=np.array(series) points = series.values title = "" t0 = time.time() if modeltype == "binary": title = "Change Point Detection: Binary Segmentation Search Method" model = "l2" changepoint_model = rpt.Binseg(model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "pelt": title = "Change Point Detection: Pelt Search Method" model = "rbf" changepoint_model = rpt.Pelt(model=model).fit(points) result = changepoint_model.predict(pen=10) if modeltype == "window": title = "Change Point Detection: Window-Based Search Method" model = "l2" changepoint_model = rpt.Window(width=40, model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "Dynamic": title = "Change Point Detection: Dynamic Programming Search Method" model = "l1" changepoint_model = rpt.Dynp(model=model, min_size=3, jump=5).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "online": # CHANGEFINDER PACKAGE title = "Simulates the working of finding changepoints in online fashion" cf = changefinder.ChangeFinder() scores = [cf.update(p) for p in points] result = (-np.array(scores)).argsort()[:number_breakpoints] result = sorted(list(result)) if series.shape[0] not in result: result.append(series.shape[0]) if show_time_flag: elapsed_time = time.time() - t0 print("[exp msg] elapsed time for process: " + str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) if plot_flag: if not plot_with_dates: rpt.display(points, result, figsize=(18, 6)) plt.title(title) plt.show() else: series.plot(figsize=(18, 6)) plt.title(title) for i in range(len(result) - 1): if i % 2 == 0: current_color = 'xkcd:salmon' else: current_color = 'xkcd:sky blue' #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3) plt.fill_between(series.index[result[i]:result[i + 1]], y1=series.max() * 1.1, y2=series.min() * 0.9, color=current_color, alpha=0.3) plt.show() return (result)
def get_decomp_plus_cp(self,signal, dates, decomp_algo='STL', cp_algo='bayes', config=None): ''' task function description: applies decomposition, and gets the change points ''' #formatting the np.array to dataframe for trend extraction signal = pd.DataFrame({'signal': signal}) signal.index = dates #trend extraction if config: if 'decomp_algo' in config: decomp_algo = config['decomp_algo'] if decomp_algo == 'STL': signal_trend = self.extract_climate_trend(signal, 'STL') signal = np.array(signal_trend['signal']) if decomp_algo == None: signal = np.array(signal) #change point detection #bayesian change point detection if cp_algo == 'bayes': #change point detection #assign config if exists if config: if 'distribution' in config: distribution = config['distribution'] if 'log_odds_threshold' in config: log_odds = config['log_odds_threshold'] detector = cpDetector([signal], distribution=distribution, log_odds_threshold=log_odds) #else use log normal and 0 treshold else: detector = cpDetector([signal], distribution='log_normal', log_odds_threshold=0) detector.detect_cp() #gets the breakpoints via idx from the detector predicted_breaks = detector.change_points['traj_0']['ts'].values predicted_breaks = np.append(predicted_breaks, len(signal)) if self.pen == 'aic': pen = self.aic_penalty(signal) elif self.pen == 'bic': pen = self.bic_penalty(signal) if cp_algo == 'pelt': model = 'rbf' #pen= 10 if config: if 'model' in config: model = config['model'] if 'pen' in config: pen = config['pen'] algo = rpt.Pelt(model=model).fit(signal) #gets the breakpoints via idx from the detector predicted_breaks = algo.predict(pen=pen) #may need to change the 10 if cp_algo == 'binseg': algo = rpt.Binseg(model='rbf').fit(signal) predicted_breaks = algo.predict(pen=pen) if cp_algo == 'window': width = 10 model = 'rbf' std = 0.045 #cost = rpt.costs.CostRank().fit(signal) if config: if 'width' in config: width = config['width'] if 'model' in config: model = config['model'] if 'std' in config: std = config['std'] n_bkps = 3*len(signal)*std**2 algo = rpt.Window(width=width).fit(signal) predicted_breaks = algo.predict(pen=pen) return predicted_breaks