def get_cp_prob(self, ts):
        f_hazard = partial(oncd.constant_hazard, self.hazard_lambda)
        prob_length, map_estimates = oncd.online_changepoint_detection(
            np.asarray(ts.y), f_hazard, oncd.StudentT(0.1, .01, 1, 0))

        if ((prob_length.shape[0] >= self.future_win_len)
                and (prob_length.shape[1] >= self.future_win_len)):
            cp_prob = prob_length[self.future_win_len, self.future_win_len:-1]
        else:
            cp_prob = []

        # fig, ax = plt.subplots(figsize=[18, 16])
        # ax = fig.add_subplot(3, 1, 1)
        # ax.scatter(range(len(ts.y)), ts.y)
        # ax = fig.add_subplot(3, 1, 2, sharex=ax)
        # sparsity = 5  # only plot every fifth data for faster display
        # ax.pcolor(np.array(range(0, len(prob_length[:, 0]), sparsity)),
        #           np.array(range(0, len(prob_length[:, 0]), sparsity)),
        #           -np.log(prob_length[0:-1:sparsity, 0:-1:sparsity]),
        #           cmap=cm.Greys, vmin=0, vmax=30)
        # ax = fig.add_subplot(3, 1, 3, sharex=ax)
        # ax.plot(prob_length[self.future_win_len,
        #                     self.future_win_len:-1])
        # plt.show()

        if len(cp_prob):
            cp_prob[0] = 0.0
        ts_cp_prob = time_series.dist_ts(ts)
        for i in xrange(len(cp_prob) - 1):
            ts_cp_prob.x.append(ts.x[i])
            ts_cp_prob.y.append(cp_prob[i])
        return ts_cp_prob
Esempio n. 2
0
def onlineCD(sessionHistory, chunk_when_last_chd, interval, playerVisibleBW):
    chd_detected = False
    chd_index = chunk_when_last_chd
    trimThresh = 1000
    lenarray = len(playerVisibleBW)
    playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh)
    R, maxes = oncd.online_changepoint_detection(
        np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, 0.01, 1, 0))
    #interval = 5
    interval = min(interval, len(playerVisibleBW))
    changeArray = R[interval, interval:-1]
    #for i ,v in list(enumerate(changeArray)):
    #  if v > 0.01:
    #    print "loop",i + cutoff, playerVisibleBW[i], v
    for i, v in reversed(
            list(enumerate(changeArray))
    ):  #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        # if v > 0.01 and i > chunk_when_last_chd:
        #   chd_index = i
        #   chd_detected = True
        #   print chd_index, chd_detected
        #   break
        #print changeArray

        #for i,v in list(enumerate(changeArray)): #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        if v > 0.01 and i + cutoff > chunk_when_last_chd and not (
                i == 0 and chunk_when_last_chd > -1):
            chd_index = i + cutoff
            chd_detected = True
            #print chd_index, chd_detected, cutoff
            break
    #print chd_detected, chd_index
    return chd_detected, chd_index
Esempio n. 3
0
def test_univariate():
    np.random.seed(seed=34)
    # 10-dimensional univariate normal
    dataset = np.hstack((norm.rvs(0, size=50), norm.rvs(2, size=50)))
    r, maxes = online.online_changepoint_detection(
        dataset, partial(online.constant_hazard, 20),
        online.StudentT(0.1, .01, 1, 0))
    assert maxes[50] - maxes[51] > 40
Esempio n. 4
0
 def findonchangepoint(self, data):
     '''
     finds the changepoints and returns the run lenth probability matrix and indexes of maximum run lengths
     probability
     '''
     R, maxes = oncd.online_changepoint_detection(
         data, partial(oncd.constant_hazard, self.mean_runlen),
         oncd.StudentT(0.1, .01, 1, 0))
     return R, maxes
Esempio n. 5
0
def onlineCD(chunk_when_last_chd, interval, cwnd):
    chd_detected = False
    chd_index = chunk_when_last_chd
    trimThresh = 1000
    lenarray = len(cwnd)
    cwnd, cutoff = trimPlayerVisibleBW(cwnd, trimThresh)
    R, maxes = oncd.online_changepoint_detection(
        np.asanyarray(cwnd), partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, 0.01, 1, 0))
    interval = min(interval, len(cwnd))
    changeArray = R[interval, interval:-1]
    for i, v in reversed(
            list(enumerate(changeArray))
    ):  #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        if v > 0.01 and i + cutoff > chunk_when_last_chd and not (
                i == 0 and chunk_when_last_chd > -1):
            chd_index = i + cutoff
            chd_detected = True
            break
    return chd_detected, chd_index
Esempio n. 6
0
def onlineCD(chunk_when_last_chd, interval, playerVisibleBW):
    chd_detected = False
    chd_index = chunk_when_last_chd
    # threshold for the amount to samples to consider for change point
    trimThresh = 100
    lenarray = len(playerVisibleBW)
    playerVisibleBW, cutoff = trimPlayerVisibleBW(playerVisibleBW, trimThresh)
    R, maxes = oncd.online_changepoint_detection(
        np.asanyarray(playerVisibleBW), partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, 0.01, 1, 0))
    #interval = 5
    interval = min(interval, len(playerVisibleBW))
    changeArray = R[interval, interval:-1]
    for i, v in reversed(
            list(enumerate(changeArray))
    ):  #reversed(list(enumerate(changeArray))): # enumerate(changeArray):
        if v > 0.01 and i + cutoff > chunk_when_last_chd and not (
                i == 0 and chunk_when_last_chd > -1):
            chd_index = i + cutoff
            chd_detected = True
            print "change detected i = ", i, " cutoff = ", cutoff, " chd_index = ", chd_index, " chunk_when_last_chd =", chunk_when_last_chd, " len = ", lenarray,
            break
    return chd_detected, chd_index
Esempio n. 7
0
def concept_drift(vals, plot=False, verbose=False):
    # https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.pcolor.html#matplotlib.pyplot.pcolor
    # https://stackoverflow.com/questions/42687454/pcolor-data-plot-in-python
    R1, maxes = oncd.online_changepoint_detection(
        vals, partial(oncd.constant_hazard, 250),
        oncd.StudentT(0.1, .01, 1, 0))
    sparsity = 1  # 5, increase this if time is too long
    unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity])
    post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten()
    chosen_vmax = int(np.percentile(post_probs, 5))

    if plot:
        plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)),
                   np.array(range(0, len(R1[:, 0]), sparsity)),
                   unflattened_post_probs,
                   cmap=cm.gray,
                   vmin=0,
                   vmax=chosen_vmax)
        plt.xlabel("time steps")
        plt.ylabel("run length")
        cbar = plt.colorbar(label="P(run)")
        cbar.set_ticks([0, chosen_vmax])
        cbar.set_ticklabels([1, 0])
        plt.show()

    epsilon = 2 * abs(
        unflattened_post_probs[1][1] - chosen_vmax
    )  # a run consists of a diagonal of values that must be within this epsilon from chosen_vmax
    thresh_run = .1 * len(
        vals
    )  # a run must be at least 10% the length of the time series before a concept drift can occur
    if verbose:
        print("vmax: ", chosen_vmax)
        print("epsilon: ", epsilon)
        print("threshold length for a run: ", thresh_run)

    return unflattened_post_probs, chosen_vmax, epsilon, thresh_run
Esempio n. 8
0
def HDonline_changepoint_det(data, lamda_gap, alpha, beta, M):
	return oncd.HDSonline_changepoint_detection(data, partial(oncd.constant_hazard, lamda_gap), oncd.StudentT(alpha, beta, 1, M))
def determine_concept_drift(df):
    R1, maxes = oncd.online_changepoint_detection(df.iloc[:,1], partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))
    # choose 95th percentile for vmax
    sparsity = 5  # only plot every fifth data for faster display
    unflattened_post_probs = -np.log(R1[0:-1:sparsity, 0:-1:sparsity])
    post_probs = (-np.log(R1[0:-1:sparsity, 0:-1:sparsity])).flatten()
    chosen_vmax = int(np.percentile(post_probs, 5))

    plt.pcolor(np.array(range(0, len(R1[:, 0]), sparsity)),
               np.array(range(0, len(R1[:, 0]), sparsity)),
               unflattened_post_probs,
               cmap=cm.gray, vmin=0, vmax=chosen_vmax)
    plt.xlabel("time steps")
    plt.ylabel("run length")
    cbar = plt.colorbar(label="P(run)")
    cbar.set_ticks([0, chosen_vmax])
    cbar.set_ticklabels([1, 0])
    # black = highest prob
    # white = lowest prob
    # the colors mean the same as in arxiv paper
    # the bar direction is just reversed
    plt.show()
    for p in partition:
        mean = np.random.randn() * 10
        var = np.random.randn() * 1
        if var < 0:
            var = var * -1
        tdata = np.random.normal(mean, var, p)
        data = np.concatenate((data, tdata))
    return data


# Generate Sample data
#data = generate_normal_time_series(7, 50, 200)
# Draw sample data
plt.figure(1, figsize=(28, 12), dpi=80, facecolor='w', edgecolor='k')

plt.subplot(211)
plt.title("Raw data")
plt.plot(data)

# Main functions
import bayesian_changepoint_detection.online_changepoint_detection as oncd
from functools import partial

R, maxes = oncd.online_changepoint_detection(
    data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))

plt.subplot(212)
plt.title("Detection")
Nw = 10
plt.plot(R[Nw, Nw:-1])
plt.show()