def onlineCD(sessionHistory, chunk_when_last_chd, interval, playerVisibleBW): chd_detected = False chd_index = chunk_when_last_chd trimThresh = 1000 lenarray = len(playerVisibleBW) playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh) R, maxes = oncd.online_changepoint_detection( np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, 0.01, 1, 0)) #interval = 5 interval = min(interval, len(playerVisibleBW)) changeArray = R[interval, interval:-1] #for i ,v in list(enumerate(changeArray)): # if v > 0.01: # print "loop",i + cutoff, playerVisibleBW[i], v for i, v in reversed( list(enumerate(changeArray)) ): #reversed(list(enumerate(changeArray))): # enumerate(changeArray): # if v > 0.01 and i > chunk_when_last_chd: # chd_index = i # chd_detected = True # print chd_index, chd_detected # break #print changeArray #for i,v in list(enumerate(changeArray)): #reversed(list(enumerate(changeArray))): # enumerate(changeArray): if v > 0.01 and i + cutoff > chunk_when_last_chd and not ( i == 0 and chunk_when_last_chd > -1): chd_index = i + cutoff chd_detected = True #print chd_index, chd_detected, cutoff break #print chd_detected, chd_index return chd_detected, chd_index
def identify_change_points(feature_by_time_matrix): # offline method #Q, P, Pcp = offcd.offline_changepoint_detection(feature_by_time_matrix, # partial(offcd.const_prior, # l=(len(feature_by_time_matrix)+1)), # offcd.gaussian_obs_log_likelihood, # truncate=-40) # online method R, maxes = oncd.online_changepoint_detection( feature_by_time_matrix.T, partial(oncd.constant_hazard, 250), oncd.MV_Norm(mu=np.zeros(feature_by_time_matrix.shape[0]), Sigma=np.diag(np.ones(feature_by_time_matrix.shape[0])), n=np.array([1.0]))) diff_in_max = np.abs(np.diff(np.argmax( R, axis=0))) # looks for differences in most likely run lengths expected_run_len = np.dot(R.T, np.arange(len(R))) # determine change point method change_points = np.zeros(feature_by_time_matrix.shape[1]) change_points[diff_in_max > 5] = 1.0 out = {} out['change_points'] = change_points return (out)
def get_cp_prob(self, ts): f_hazard = partial(oncd.constant_hazard, self.hazard_lambda) prob_length, map_estimates = oncd.online_changepoint_detection( np.asarray(ts.y), f_hazard, oncd.StudentT(0.1, .01, 1, 0)) if ((prob_length.shape[0] >= self.future_win_len) and (prob_length.shape[1] >= self.future_win_len)): cp_prob = prob_length[self.future_win_len, self.future_win_len:-1] else: cp_prob = [] # fig, ax = plt.subplots(figsize=[18, 16]) # ax = fig.add_subplot(3, 1, 1) # ax.scatter(range(len(ts.y)), ts.y) # ax = fig.add_subplot(3, 1, 2, sharex=ax) # sparsity = 5 # only plot every fifth data for faster display # ax.pcolor(np.array(range(0, len(prob_length[:, 0]), sparsity)), # np.array(range(0, len(prob_length[:, 0]), sparsity)), # -np.log(prob_length[0:-1:sparsity, 0:-1:sparsity]), # cmap=cm.Greys, vmin=0, vmax=30) # ax = fig.add_subplot(3, 1, 3, sharex=ax) # ax.plot(prob_length[self.future_win_len, # self.future_win_len:-1]) # plt.show() if len(cp_prob): cp_prob[0] = 0.0 ts_cp_prob = time_series.dist_ts(ts) for i in xrange(len(cp_prob) - 1): ts_cp_prob.x.append(ts.x[i]) ts_cp_prob.y.append(cp_prob[i]) return ts_cp_prob
def test_univariate(): np.random.seed(seed=34) # 10-dimensional univariate normal dataset = np.hstack((norm.rvs(0, size=50), norm.rvs(2, size=50))) r, maxes = online.online_changepoint_detection( dataset, partial(online.constant_hazard, 20), online.StudentT(0.1, .01, 1, 0)) assert maxes[50] - maxes[51] > 40
def findonchangepoint(self, data): ''' finds the changepoints and returns the run lenth probability matrix and indexes of maximum run lengths probability ''' R, maxes = oncd.online_changepoint_detection( data, partial(oncd.constant_hazard, self.mean_runlen), oncd.StudentT(0.1, .01, 1, 0)) return R, maxes
def test_multivariate(): np.random.seed(seed=34) # 10-dimensional multivariate normal, that shifts its mean at t=50, 100, and 150 dataset = np.vstack((multivariate_normal.rvs([0] * 10, size=50), multivariate_normal.rvs([4] * 10, size=50), multivariate_normal.rvs([0] * 10, size=50), multivariate_normal.rvs([-4] * 10, size=50))) r, maxes = online.online_changepoint_detection( dataset, partial(online.constant_hazard, 50), online.MultivariateT(dims=10)) # Assert that we detected the mean shifts for brkpt in [50, 100, 150]: assert maxes[brkpt + 1] < maxes[brkpt - 1]
def onlineCD(chunk_when_last_chd, interval, cwnd): chd_detected = False chd_index = chunk_when_last_chd trimThresh = 1000 lenarray = len(cwnd) cwnd, cutoff = trimPlayerVisibleBW(cwnd, trimThresh) R, maxes = oncd.online_changepoint_detection( np.asanyarray(cwnd), partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, 0.01, 1, 0)) interval = min(interval, len(cwnd)) changeArray = R[interval, interval:-1] for i, v in reversed( list(enumerate(changeArray)) ): #reversed(list(enumerate(changeArray))): # enumerate(changeArray): if v > 0.01 and i + cutoff > chunk_when_last_chd and not ( i == 0 and chunk_when_last_chd > -1): chd_index = i + cutoff chd_detected = True break return chd_detected, chd_index
def onlineCD(chunk_when_last_chd, interval, playerVisibleBW): print "evaluating change: ", chd_detected = False chd_index = chunk_when_last_chd # threshold for the amount to samples to consider for change point trimThresh = 100 lenarray = len(playerVisibleBW) playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh) R, maxes = oncd.online_changepoint_detection(np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250), oncd.StudentT(0.1,0.01,1,0)) #interval = 5 interval = min(interval, len(playerVisibleBW)) changeArray = R[interval,interval:-1] for i,v in reversed(list(enumerate(changeArray))): #reversed(list(enumerate(changeArray))): # enumerate(changeArray): if v > 0.01 and i + cutoff > chunk_when_last_chd and not (i == 0 and chunk_when_last_chd > -1) : chd_index = i + cutoff chd_detected = True print "change detected i = ", i, " chd_index = ", chd_index, " chunk_when_last_chd =", chunk_when_last_chd, " len = ", lenarray break if chd_detected == False: print "no change" return chd_detected, chd_index
def identify_change_points(self): #print('estimating change points') if 'cp_params' in self.extra_inputs.keys(): cp_params = self.extra_inputs['cp_params'] method = cp_params['method'] else: # defaults method == 'Online' if method == 'Online': # online method R, maxes = oncd.online_changepoint_detection( self.feature_by_time_matrix_reduced, partial(oncd.constant_hazard, 250), oncd.MV_Norm( mu=np.zeros(self.feature_by_time_matrix_reduced.shape[1]), Sigma=5.0 * np.diag( np.ones(self.feature_by_time_matrix_reduced.shape[1])), n=np.array([1.0]))) diff_in_max = np.abs(np.diff(np.argmax( R, axis=0))) # looks for differences in most likely run lengths expected_run_len = np.dot(R.T, np.arange(len(R))) self.R = R self.diff_in_max = diff_in_max self.expected_run_len = expected_run_len # calculate change points self.change_points = np.zeros( self.feature_by_time_matrix_reduced.shape[0]) self.change_points[diff_in_max > 5] = 1.0 elif method == 'Offline': Q, P, Pcp = offcd.offline_changepoint_detection( self.feature_by_time_matrix_reduced, partial(offcd.const_prior, l=(self.feature_by_time_matrix_reduced.shape[0] + 1)), offcd.fullcov_obs_log_likelihood, truncate=-20) self.cp_prob = np.exp(Pcp).sum(0) self.change_points = self.cp_prob > 0.7
def determine_concept_drift(df): R1, maxes = oncd.online_changepoint_detection(df.iloc[:,1], partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0)) # choose 95th percentile for vmax sparsity = 5 # only plot every fifth data for faster display unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity]) post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten() chosen_vmax = int(np.percentile(post_probs, 5)) plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)), np.array(range(0, len(R1[:, 0]), sparsity)), unflattened_post_probs, cmap=cm.gray, vmin=0, vmax=chosen_vmax) plt.xlabel("time steps") plt.ylabel("run length") cbar = plt.colorbar(label="P(run)") cbar.set_ticks([0, chosen_vmax]) cbar.set_ticklabels([1, 0]) # black = highest prob # white = lowest prob # the colors mean the same as in arxiv paper # the bar direction is just reversed plt.show()
def concept_drift(vals, plot=False, verbose=False): # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.pcolor.html#matplotlib.pyplot.pcolor # https://stackoverflow.com/questions/42687454/pcolor-data-plot-in-python R1, maxes = oncd.online_changepoint_detection( vals, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0)) sparsity = 1 # 5, increase this if time is too long unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity]) post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten() chosen_vmax = int(np.percentile(post_probs, 5)) if plot: plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)), np.array(range(0, len(R1[:, 0]), sparsity)), unflattened_post_probs, cmap=cm.gray, vmin=0, vmax=chosen_vmax) plt.xlabel("time steps") plt.ylabel("run length") cbar = plt.colorbar(label="P(run)") cbar.set_ticks([0, chosen_vmax]) cbar.set_ticklabels([1, 0]) plt.show() epsilon = 2 * abs( unflattened_post_probs[1][1] - chosen_vmax ) # a run consists of a diagonal of values that must be within this epsilon from chosen_vmax thresh_run = .1 * len( vals ) # a run must be at least 10% the length of the time series before a concept drift can occur if verbose: print("vmax: ", chosen_vmax) print("epsilon: ", epsilon) print("threshold length for a run: ", thresh_run) return unflattened_post_probs, chosen_vmax, epsilon, thresh_run
def online_changepoint_det(data, lamda_gap, alpha, beta, M_): return oncd.online_changepoint_detection( data, partial(oncd.constant_hazard, lamda_gap), oncd.StudentT(alpha, beta, 1, M_))
for p in partition: mean = np.random.randn() * 10 var = np.random.randn() * 1 if var < 0: var = var * -1 tdata = np.random.normal(mean, var, p) data = np.concatenate((data, tdata)) return data # Generate Sample data #data = generate_normal_time_series(7, 50, 200) # Draw sample data plt.figure(1, figsize=(28, 12), dpi=80, facecolor='w', edgecolor='k') plt.subplot(211) plt.title("Raw data") plt.plot(data) # Main functions import bayesian_changepoint_detection.online_changepoint_detection as oncd from functools import partial R, maxes = oncd.online_changepoint_detection( data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0)) plt.subplot(212) plt.title("Detection") Nw = 10 plt.plot(R[Nw, Nw:-1]) plt.show()