Ejemplo n.º 1
0
def onlineCD(sessionHistory, chunk_when_last_chd, interval, playerVisibleBW):
    chd_detected = False
    chd_index = chunk_when_last_chd
    trimThresh = 1000
    lenarray = len(playerVisibleBW)
    playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh)
    R, maxes = oncd.online_changepoint_detection(
        np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, 0.01, 1, 0))
    #interval = 5
    interval = min(interval, len(playerVisibleBW))
    changeArray = R[interval, interval:-1]
    #for i ,v in list(enumerate(changeArray)):
    #  if v > 0.01:
    #    print "loop",i + cutoff, playerVisibleBW[i], v
    for i, v in reversed(
            list(enumerate(changeArray))
    ):  #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        # if v > 0.01 and i > chunk_when_last_chd:
        #   chd_index = i
        #   chd_detected = True
        #   print chd_index, chd_detected
        #   break
        #print changeArray

        #for i,v in list(enumerate(changeArray)): #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        if v > 0.01 and i + cutoff > chunk_when_last_chd and not (
                i == 0 and chunk_when_last_chd > -1):
            chd_index = i + cutoff
            chd_detected = True
            #print chd_index, chd_detected, cutoff
            break
    #print chd_detected, chd_index
    return chd_detected, chd_index
def identify_change_points(feature_by_time_matrix):
    # offline method
    #Q, P, Pcp = offcd.offline_changepoint_detection(feature_by_time_matrix,
    #    partial(offcd.const_prior,
    #    l=(len(feature_by_time_matrix)+1)),
    #    offcd.gaussian_obs_log_likelihood,
    #    truncate=-40)

    # online method
    R, maxes = oncd.online_changepoint_detection(
        feature_by_time_matrix.T, partial(oncd.constant_hazard, 250),
        oncd.MV_Norm(mu=np.zeros(feature_by_time_matrix.shape[0]),
                     Sigma=np.diag(np.ones(feature_by_time_matrix.shape[0])),
                     n=np.array([1.0])))

    diff_in_max = np.abs(np.diff(np.argmax(
        R, axis=0)))  # looks for differences in most likely run lengths
    expected_run_len = np.dot(R.T, np.arange(len(R)))

    # determine change point method
    change_points = np.zeros(feature_by_time_matrix.shape[1])
    change_points[diff_in_max > 5] = 1.0
    out = {}
    out['change_points'] = change_points
    return (out)
    def get_cp_prob(self, ts):
        f_hazard = partial(oncd.constant_hazard, self.hazard_lambda)
        prob_length, map_estimates = oncd.online_changepoint_detection(
            np.asarray(ts.y), f_hazard, oncd.StudentT(0.1, .01, 1, 0))

        if ((prob_length.shape[0] >= self.future_win_len)
                and (prob_length.shape[1] >= self.future_win_len)):
            cp_prob = prob_length[self.future_win_len, self.future_win_len:-1]
        else:
            cp_prob = []

        # fig, ax = plt.subplots(figsize=[18, 16])
        # ax = fig.add_subplot(3, 1, 1)
        # ax.scatter(range(len(ts.y)), ts.y)
        # ax = fig.add_subplot(3, 1, 2, sharex=ax)
        # sparsity = 5  # only plot every fifth data for faster display
        # ax.pcolor(np.array(range(0, len(prob_length[:, 0]), sparsity)),
        #           np.array(range(0, len(prob_length[:, 0]), sparsity)),
        #           -np.log(prob_length[0:-1:sparsity, 0:-1:sparsity]),
        #           cmap=cm.Greys, vmin=0, vmax=30)
        # ax = fig.add_subplot(3, 1, 3, sharex=ax)
        # ax.plot(prob_length[self.future_win_len,
        #                     self.future_win_len:-1])
        # plt.show()

        if len(cp_prob):
            cp_prob[0] = 0.0
        ts_cp_prob = time_series.dist_ts(ts)
        for i in xrange(len(cp_prob) - 1):
            ts_cp_prob.x.append(ts.x[i])
            ts_cp_prob.y.append(cp_prob[i])
        return ts_cp_prob
Ejemplo n.º 4
0
def test_univariate():
    np.random.seed(seed=34)
    # 10-dimensional univariate normal
    dataset = np.hstack((norm.rvs(0, size=50), norm.rvs(2, size=50)))
    r, maxes = online.online_changepoint_detection(
        dataset, partial(online.constant_hazard, 20),
        online.StudentT(0.1, .01, 1, 0))
    assert maxes[50] - maxes[51] > 40
Ejemplo n.º 5
0
 def findonchangepoint(self, data):
     '''
     finds the changepoints and returns the run lenth probability matrix and indexes of maximum run lengths
     probability
     '''
     R, maxes = oncd.online_changepoint_detection(
         data, partial(oncd.constant_hazard, self.mean_runlen),
         oncd.StudentT(0.1, .01, 1, 0))
     return R, maxes
Ejemplo n.º 6
0
def test_multivariate():
    np.random.seed(seed=34)
    # 10-dimensional multivariate normal, that shifts its mean at t=50, 100, and 150
    dataset = np.vstack((multivariate_normal.rvs([0] * 10, size=50),
                         multivariate_normal.rvs([4] * 10, size=50),
                         multivariate_normal.rvs([0] * 10, size=50),
                         multivariate_normal.rvs([-4] * 10, size=50)))
    r, maxes = online.online_changepoint_detection(
        dataset, partial(online.constant_hazard, 50),
        online.MultivariateT(dims=10))

    # Assert that we detected the mean shifts
    for brkpt in [50, 100, 150]:
        assert maxes[brkpt + 1] < maxes[brkpt - 1]
Ejemplo n.º 7
0
def onlineCD(chunk_when_last_chd, interval, cwnd):
    chd_detected = False
    chd_index = chunk_when_last_chd
    trimThresh = 1000
    lenarray = len(cwnd)
    cwnd, cutoff = trimPlayerVisibleBW(cwnd, trimThresh)
    R, maxes = oncd.online_changepoint_detection(
        np.asanyarray(cwnd), partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, 0.01, 1, 0))
    interval = min(interval, len(cwnd))
    changeArray = R[interval, interval:-1]
    for i, v in reversed(
            list(enumerate(changeArray))
    ):  #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        if v > 0.01 and i + cutoff > chunk_when_last_chd and not (
                i == 0 and chunk_when_last_chd > -1):
            chd_index = i + cutoff
            chd_detected = True
            break
    return chd_detected, chd_index
Ejemplo n.º 8
0
def onlineCD(chunk_when_last_chd, interval, playerVisibleBW):
  print "evaluating change: ",
  chd_detected = False
  chd_index = chunk_when_last_chd
  # threshold for the amount to samples to consider for change point
  trimThresh = 100
  lenarray = len(playerVisibleBW)
  playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh)
  R, maxes = oncd.online_changepoint_detection(np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250), oncd.StudentT(0.1,0.01,1,0))
  #interval = 5
  interval = min(interval, len(playerVisibleBW))
  changeArray = R[interval,interval:-1]
  for i,v in reversed(list(enumerate(changeArray))): #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
    if v > 0.01 and i + cutoff > chunk_when_last_chd and not (i == 0 and chunk_when_last_chd > -1) :
      chd_index = i + cutoff
      chd_detected = True
      print "change detected i = ", i, " chd_index = ", chd_index, " chunk_when_last_chd =", chunk_when_last_chd, " len = ", lenarray
      break
  if chd_detected == False:
    print "no change"
  return chd_detected, chd_index
Ejemplo n.º 9
0
    def identify_change_points(self):
        #print('estimating change points')

        if 'cp_params' in self.extra_inputs.keys():
            cp_params = self.extra_inputs['cp_params']
            method = cp_params['method']
        else:  # defaults
            method == 'Online'

        if method == 'Online':  # online method
            R, maxes = oncd.online_changepoint_detection(
                self.feature_by_time_matrix_reduced,
                partial(oncd.constant_hazard, 250),
                oncd.MV_Norm(
                    mu=np.zeros(self.feature_by_time_matrix_reduced.shape[1]),
                    Sigma=5.0 * np.diag(
                        np.ones(self.feature_by_time_matrix_reduced.shape[1])),
                    n=np.array([1.0])))

            diff_in_max = np.abs(np.diff(np.argmax(
                R,
                axis=0)))  # looks for differences in most likely run lengths
            expected_run_len = np.dot(R.T, np.arange(len(R)))
            self.R = R
            self.diff_in_max = diff_in_max
            self.expected_run_len = expected_run_len
            # calculate change points
            self.change_points = np.zeros(
                self.feature_by_time_matrix_reduced.shape[0])
            self.change_points[diff_in_max > 5] = 1.0
        elif method == 'Offline':
            Q, P, Pcp = offcd.offline_changepoint_detection(
                self.feature_by_time_matrix_reduced,
                partial(offcd.const_prior,
                        l=(self.feature_by_time_matrix_reduced.shape[0] + 1)),
                offcd.fullcov_obs_log_likelihood,
                truncate=-20)
            self.cp_prob = np.exp(Pcp).sum(0)
            self.change_points = self.cp_prob > 0.7
def determine_concept_drift(df):
    R1, maxes = oncd.online_changepoint_detection(df.iloc[:,1], partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))
    # choose 95th percentile for vmax
    sparsity = 5  # only plot every fifth data for faster display
    unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity])
    post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten()
    chosen_vmax = int(np.percentile(post_probs, 5))

    plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)),
               np.array(range(0, len(R1[:, 0]), sparsity)),
               unflattened_post_probs,
               cmap=cm.gray, vmin=0, vmax=chosen_vmax)
    plt.xlabel("time steps")
    plt.ylabel("run length")
    cbar = plt.colorbar(label="P(run)")
    cbar.set_ticks([0, chosen_vmax])
    cbar.set_ticklabels([1, 0])
    # black = highest prob
    # white = lowest prob
    # the colors mean the same as in arxiv paper
    # the bar direction is just reversed
    plt.show()
Ejemplo n.º 11
0
def concept_drift(vals, plot=False, verbose=False):
    # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.pcolor.html#matplotlib.pyplot.pcolor
    # https://stackoverflow.com/questions/42687454/pcolor-data-plot-in-python
    R1, maxes = oncd.online_changepoint_detection(
        vals, partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, .01, 1, 0))
    sparsity = 1  # 5, increase this if time is too long
    unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity])
    post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten()
    chosen_vmax = int(np.percentile(post_probs, 5))

    if plot:
        plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)),
                   np.array(range(0, len(R1[:, 0]), sparsity)),
                   unflattened_post_probs,
                   cmap=cm.gray,
                   vmin=0,
                   vmax=chosen_vmax)
        plt.xlabel("time steps")
        plt.ylabel("run length")
        cbar = plt.colorbar(label="P(run)")
        cbar.set_ticks([0, chosen_vmax])
        cbar.set_ticklabels([1, 0])
        plt.show()

    epsilon = 2 * abs(
        unflattened_post_probs[1][1] - chosen_vmax
    )  # a run consists of a diagonal of values that must be within this epsilon from chosen_vmax
    thresh_run = .1 * len(
        vals
    )  # a run must be at least 10% the length of the time series before a concept drift can occur
    if verbose:
        print("vmax: ", chosen_vmax)
        print("epsilon: ", epsilon)
        print("threshold length for a run: ", thresh_run)

    return unflattened_post_probs, chosen_vmax, epsilon, thresh_run
Ejemplo n.º 12
0
def online_changepoint_det(data, lamda_gap, alpha, beta, M_):
    return oncd.online_changepoint_detection(
        data, partial(oncd.constant_hazard, lamda_gap),
        oncd.StudentT(alpha, beta, 1, M_))
    for p in partition:
        mean = np.random.randn() * 10
        var = np.random.randn() * 1
        if var < 0:
            var = var * -1
        tdata = np.random.normal(mean, var, p)
        data = np.concatenate((data, tdata))
    return data


# Generate Sample data
#data = generate_normal_time_series(7, 50, 200)
# Draw sample data
plt.figure(1, figsize=(28, 12), dpi=80, facecolor='w', edgecolor='k')

plt.subplot(211)
plt.title("Raw data")
plt.plot(data)

# Main functions
import bayesian_changepoint_detection.online_changepoint_detection as oncd
from functools import partial

R, maxes = oncd.online_changepoint_detection(
    data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))

plt.subplot(212)
plt.title("Detection")
Nw = 10
plt.plot(R[Nw, Nw:-1])
plt.show()