def dbocpdGP( theta, X, model, num_hazard_params, dt, ): beta0 = 1 num_scale_params = 1 # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 theta_h = theta[:num_hazard_params] # num_hazard x 1 theta_m = theta[num_hazard_params:-1] # num_model x 1 alpha0 = np.exp(theta[-1]) # Use exp to ensure it is positive. 1 x 1 num_model_params = len(theta_m) # 1 x 1 assert dt > 0 (T, D) = X.shape # Number of time point observed assert D == 1 # Never need to consider more than T points in the past. maxPossibleLen = min(T, maxPossibleLen) # Evaluate the hazard function for this interval. # H(r) = P(runlength_t = 0|runlength_t-1 = r-1) # Pre-computed the hazard in preperation for steps 4 & 5, alg 1, of [RPA] # logH = log(H), logmH = log(1-H) (logH, logmH, dlogH, dlogmH) = logistic_logh(np.asarray(range(1, T + 1)), theta_h) assert isKosher(dlogH) assert isKosher(dlogmH) # R(r, t) = P(runlength_t-1 = r-1|X_1:t-1). # P(runglenth_0 = 0|nothing) = 1 => logR(1, 1) = 0 logR = np.zeros((T + 1, 1)) # pre-allocate the run length distribution. [P] dlogR_h = np.zeros((T + 1, num_hazard_params)) dlogR_m = np.zeros((T + 1, num_model_params)) dlogR_s = np.zeros((T + 1, num_scale_params)) SSE = np.zeros((T + 1, D)) # This will change with higher D dSSE = np.zeros((T + 1, num_model_params)) SSE[0, 0] = 2 * beta0 # 1 x 1 # Pre-compute GP stuff: (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] # Extend sigma2 to account for that we might call for its value past maxLen # t - maxLen x 1 sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones( (T - sigma2.shape[0], 1)))) dsigma2 = np.concatenate((dsigma2, np.tile(dsigma2[-1, :], (T - maxLen, 1)))) ddf = 2 for t in range(1, T + 1): MRC = min(maxLen, t) # How many points back to look when predicting mu = np.dot(alpha[:MRC, :MRC - 1], X[t - MRC:t - 1, 0][::-1]) # MRC x 1. [x] # Extend the mu (mean) prediction for the older (> MRC) run length # hypothesis if MRC < t: mu = np.append(mu, mu[-1] * np.ones(t - mu.shape[0])) # t - MRC x 1. [x] df = np.asarray([2 * alpha0]) + np.asarray(range(t)) pred_var = sigma2[:t, 0] * SSE[:t, 0] / df dpredvar_s = np.atleast_2d(ddf * -sigma2[:t, 0] * SSE[:t, 0] / df**2).T (logpredprobs, dlogpredprobs) = studentlogpdf(X[t - 1, 0], mu, pred_var, df, 2) # Now do the derivatives. [t x 1, t x 1] dmu = np.zeros((t, num_model_params)) dpredvar = np.zeros((t, num_model_params)) for ii in range(num_model_params): # MRC x 1. [x/theta_m] dmu[:MRC, ii] = np.dot(dalpha[:MRC, :MRC - 1, ii], X[t - MRC:t - 1, 0][::-1]) if MRC < t: # Extend the mu (mean) prediction for the older (>MRC) run length # hypothesis dmu = np.concatenate((dmu, [dmu[MRC - 1]] * np.ones( (t - dmu.shape[0], 1)))) # Use the product rule. t x 1. [x^2/theta_m] dpredvar[:, ii] = (dsigma2[:t, ii] * SSE[:t, 0] + sigma2[:t, 0] * dSSE[:t, ii]) / df # Use the quotient rule. t x 1. [1/theta_m] dSSE[1:t + 1, ii] = dSSE[:t, ii] + 2 * (mu - X[t - 1, 0]) \ / sigma2[:t, 0] * dmu[:, ii] + -(mu - X[t - 1, 0]) ** 2 \ / sigma2[:t, 0] ** 2 * dsigma2[:t, ii] dSSE[0, ii] = 0 dlogpredprobs_m = rmult(dmu, dlogpredprobs[:, 0]) \ + rmult(dpredvar[:t, :], dlogpredprobs[:, 1]) # mu has zero dependence on alpha (scale). t x 1. [log(P/x)] dlogpredprobs_s = np.atleast_2d(dpredvar_s[:t, 0] * dlogpredprobs[:, 1] + ddf * dlogpredprobs[:, 2]).T # Update with the Maha error of predicting the next point. t x 1. [] SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0]) ** 2 \ / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] # Update the run length distributions and their derivatives. logMsg = logR[:t, 0] + logpredprobs + logH[:t, 0] # t x 1 dlogMsg_h = dlogR_h[:t, :] + dlogH[:t, :] # t x num_hazard logR[1:t + 1, 0] = logR[:t, 0] + \ logpredprobs + logmH[:t, 0] # t x 1. [P] dlogR_h[1:t + 1, :] = dlogR_h[:t, :] + dlogmH[:t, :] # t x num_hazard dlogR_m[1:t + 1, :] = dlogR_m[:t, :] + dlogpredprobs_m # t x num_model dlogR_s[1:t + 1, :] = dlogR_s[:t, :] + dlogpredprobs_s # t x num_model (logR[0, 0], normMsg, Z) = logsumexp(logMsg) # 1 x 1. [P] # 1 x num_hazard dlogR_h[0, :] = rmult(dlogMsg_h, normMsg).sum(axis=0) / Z # 1 x num_mod dlogR_m[0, :] = rmult(dlogR_m[1:t + 1, :], normMsg).sum(axis=0) \ / Z # 1 x num_sca dlogR_s[0, :] = rmult(dlogR_s[1:t + 1, :], normMsg).sum(axis=0) \ / Z # end t loop # Get the log marginal likelihood of the data, X(1:end), under the model # = P(X_1:T), integrating out all the runlengths. 1 x 1. [log P] nlml = -1.0 * logsumexp(logR)[0] # Do the derivatives of nlml normR = np.exp(logR - max(logR)) # T x 1 dnlml_h = -rmult(dlogR_h, normR).sum(axis=0) / sum(normR) # 1 x num_hazard dnlml_m = -rmult(dlogR_m, normR).sum(axis=0) / sum(normR) # 1 x num_model dnlml_s = -rmult(dlogR_s, normR).sum(axis=0) / sum(normR) # 1 x num_scale # Correct for that input is log alpha0. 1 x num_scale. dnlml_s = alpha0 * dnlml_s # (num_hazard + num_model + num_scale) x 1 dnlml = np.append(np.append(dnlml_h, dnlml_m), dnlml_s) assert isKosher(nlml) assert isKosher(dnlml) return (nlml, dnlml)
def bocpdGPT_trunc( X, model, theta_m, theta_h, scalePrior, dt, ): # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 num_hazard_params = len(theta_h) num_model_params = len(theta_m) assert isKosher(X) assert dt > 0 (T, D) = X.shape # Number of time point observed. 1 x 1. [s] # TODO extend to higher D assert D == 1 # Never need to consider more than T points in the past. 1 x 1. [points] maxPossibleLen = min(T, maxPossibleLen) # Ensure the gamma prior parameters are positive(as required). 2 x 1. [] scalePrior = np.exp(scalePrior) alpha0 = scalePrior[0] beta0 = scalePrior[1] # Precompute all the gpr aspects of algorithm. [maxLen x maxLen, maxLen x # 1] (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] assert maxLen >= 1 # Evaluate the hazard function: # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1) # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA] (H, dH) = logistic_h2(np.asarray(range(1, maxLen + 1)), theta_h) R = np.zeros((maxLen + 1, T + 1)) # The standardized square error for each runlength. SSE = np.zeros((maxLen, D)) # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1). Z = np.zeros((T, 1)) predMeans = np.zeros((T, 1)) predMed = np.zeros((T, 1)) # At time t = 1, we have complete knowledge about the run length. This assumes # there was surely a change point right before the first data point not at the # first data point. Implements step 1, alg 1, of[RPA]. # = > P(runglenth_0=0 | nothing) = 1 R[0, 0] = 1 # Initialize first SSE to contribution from gamma prior. SSE[0] = 2 * beta0 # How many degrees of freedom in the prediction for each run length. df = np.asarray([2 * alpha0]) + np.asarray(range(maxLen)) for t in range(1, T + 1): # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply # by incrementing the loop index. # Evaluate the predictive distribution for the new datum under each of the # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r) # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P] predprobs = np.zeros(maxLen) if t < maxLen: mu = np.dot(alpha[:t, :t], X[:t, 0][::-1]) # The predictive variance for each prediction pred_var = sigma2[:t, 0] * SSE[:t, 0] / df[:t] # get the posterior predictive probability for each run length predprobs[:t] = studentpdf(X[t - 1, 0], mu, pred_var, df[:t], 1) # Update the SSE for each run length SSE[1:t + 1, 0] = SSE[:t, 0] + \ (mu - X[t - 1, 0]) ** 2 / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] else: mu = np.dot(alpha, X[t - maxLen + 1:t, 0][::-1]) # The predictive variance for each prediction pred_var = sigma2[:, 0] * SSE[:, 0] / df # get the posterior predictive probability for each run length predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1) # Update the SSE for each run length SSE[1:maxLen, 0] = SSE[:maxLen - 1, 0] + \ (mu[:maxLen - 1] - X[maxLen - 1, 0]) ** 2 / \ sigma2[:maxLen - 1, 0] SSE[0, 0] = 2 * beta0 # endif predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu) predMed[t - 1] = np.median( MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000)) # Evaluate the growth probabilities - shift the probabilities up and to the # right, scaled by the hazard function and the predictive # probabilities. R[1:, t] = R[: maxLen, t - 1] * predprobs * (1 - H[: maxLen]) # Evaluate the probability that there * was * a changepoint and we're # accumulating the mass back down at r = 0. R[0, t] = (R[: maxLen, t - 1] * predprobs * H[: maxLen]).sum() # Renormalize the run length probabilities for improved numerical stability. # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t) # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of # Implements step 6, alg 1, of[RPA]. Z[t - 1] = R[:, t].sum() R[: maxLen, t] /= Z[t - 1] R[maxLen - 1, t] = R[maxLen - 1, t] + R[maxLen, t] R[maxLen, t] = 0 # endTloop # Get the negative log marginal likelihood of the data, X(1: end), under # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log # P] nlml = -sum(np.log(Z)) return (R, nlml, Z, predMeans, predMed)
def bocpdGPT( X, model, theta_m, theta_h, scalePrior, dt, ): # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 num_hazard_params = len(theta_h) num_model_params = len(theta_m) assert isKosher(X) assert dt > 0 (T, D) = X.shape # Number of time point observed. 1 x 1. [s] # TODO extend to higher D assert D == 1 # Never need to consider more than T points in the past. 1 x 1. [points] maxPossibleLen = min(T, maxPossibleLen) # Ensure the gamma prior parameters are positive(as required). 2 x 1. [] scalePrior = np.exp(scalePrior) alpha0 = scalePrior[0] beta0 = scalePrior[1] # Evaluate the hazard function: # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1) # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA] (H, dH) = logistic_h2(np.asarray(range(1, T + 1)), theta_h) R = np.zeros((T + 1, T + 1)) S = np.zeros((T, T)) # The standardized square error (SSE) for each runlength. SSE = np.zeros((T + 1, D)) # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1). Z = np.zeros((T, 1)) predMeans = np.zeros((T, 1)) predMed = np.zeros((T, 1)) # At time t = 1, we have complete knowledge about the run length. This assumes # there was surely a change point right before the first data point not at the # first data point. Implements step 1, alg 1, of[RPA]. # = > P(runglenth_0=0 | nothing) = 1 R[0, 0] = 1 # Initialize first SSE to contribution from gamma prior. SSE[0] = 2 * beta0 # Precompute all the gpr aspects of algorithm. (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones( (T - sigma2.shape[0], 1)))) for t in range(1, T + 1): # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply # by incrementing the loop index. # Evaluate the predictive distribution for the new datum under each of the # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r) # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P] MRC = min(maxLen, t) # How many points back to look when predicting mu = np.dot(alpha[:MRC, :MRC - 1], X[t - MRC:t - 1, 0][::-1]) # MRC x 1. [x] # Extend the mu (mean) prediction for the older (> MRC) run length # hypothesis if MRC < t: mu = np.append(mu, mu[-1] * np.ones(t - mu.shape[0])) # t - MRC x 1. [x] df = np.asarray([2 * alpha0]) + np.asarray(range(t)) pred_var = sigma2[:t, 0] * SSE[:t, 0] / df predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1) # Update the SSE for each run length SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0])**2 / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu) # The following is pretty slow #np.median(MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000)) predMed[t - 1] = 0 # Evaluate the growth probabilities - shift the probabilities up and to the # right, scaled by the hazard function and the predictive # probabilities. R[1:t + 1, t] = R[:t, t - 1] * predprobs * (1 - H[:t]) # Evaluate the probability that there * was * a changepoint and we're # accumulating the mass back down at r = 0. R[0, t] = (R[:t, t - 1] * predprobs * H[:t]).sum() # Renormalize the run length probabilities for improved numerical stability. # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t) # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of # Implements step 6, alg 1, of[RPA]. Z[t - 1] = R[:t + 1, t].sum() R[:t + 1, t] /= Z[t - 1] # Get the S matrix S[:t, t - 1] = R[:t, t - 1] * predprobs S[:, t - 1] = S[:, t - 1] / S[:, t - 1].sum() # endTloop # Get the negative log marginal likelihood of the data, X(1: end), under # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log # P] nlml = -sum(np.log(Z)) return (R, S, nlml, Z, predMeans, predMed)
def dbocpdGP( theta, X, model, num_hazard_params, dt, ): beta0 = 1 num_scale_params = 1 # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 theta_h = theta[:num_hazard_params] # num_hazard x 1 theta_m = theta[num_hazard_params:-1] # num_model x 1 alpha0 = np.exp(theta[-1]) # Use exp to ensure it is positive. 1 x 1 num_model_params = len(theta_m) # 1 x 1 assert dt > 0 (T, D) = X.shape # Number of time point observed assert D == 1 # Never need to consider more than T points in the past. maxPossibleLen = min(T, maxPossibleLen) # Evaluate the hazard function for this interval. # H(r) = P(runlength_t = 0|runlength_t-1 = r-1) # Pre-computed the hazard in preperation for steps 4 & 5, alg 1, of [RPA] # logH = log(H), logmH = log(1-H) (logH, logmH, dlogH, dlogmH) = logistic_logh( np.asarray(range(1, T + 1)), theta_h) assert isKosher(dlogH) assert isKosher(dlogmH) # R(r, t) = P(runlength_t-1 = r-1|X_1:t-1). # P(runglenth_0 = 0|nothing) = 1 => logR(1, 1) = 0 logR = np.zeros((T + 1, 1)) # pre-allocate the run length distribution. [P] dlogR_h = np.zeros((T + 1, num_hazard_params)) dlogR_m = np.zeros((T + 1, num_model_params)) dlogR_s = np.zeros((T + 1, num_scale_params)) SSE = np.zeros((T + 1, D)) # This will change with higher D dSSE = np.zeros((T + 1, num_model_params)) SSE[0, 0] = 2 * beta0 # 1 x 1 # Pre-compute GP stuff: (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] # Extend sigma2 to account for that we might call for its value past maxLen # t - maxLen x 1 sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones((T - sigma2.shape[0], 1)))) dsigma2 = np.concatenate((dsigma2, np.tile(dsigma2[-1, :], (T - maxLen, 1)))) ddf = 2 for t in range(1, T + 1): MRC = min(maxLen, t) # How many points back to look when predicting mu = np.dot(alpha[:MRC, :MRC - 1], X[ t - MRC:t - 1, 0][::-1]) # MRC x 1. [x] # Extend the mu (mean) prediction for the older (> MRC) run length # hypothesis if MRC < t: mu = np.append(mu, mu[-1] * np.ones( t - mu.shape[0])) # t - MRC x 1. [x] df = np.asarray([2 * alpha0]) + np.asarray(range(t)) pred_var = sigma2[:t, 0] * SSE[:t, 0] / df dpredvar_s = np.atleast_2d( ddf * -sigma2[:t, 0] * SSE[:t, 0] / df ** 2).T (logpredprobs, dlogpredprobs) = studentlogpdf(X[t - 1, 0], mu, pred_var, df, 2) # Now do the derivatives. [t x 1, t x 1] dmu = np.zeros((t, num_model_params)) dpredvar = np.zeros((t, num_model_params)) for ii in range(num_model_params): # MRC x 1. [x/theta_m] dmu[:MRC, ii] = np.dot(dalpha[:MRC, :MRC - 1, ii], X[t - MRC:t - 1, 0][::-1]) if MRC < t: # Extend the mu (mean) prediction for the older (>MRC) run length # hypothesis dmu = np.concatenate((dmu, [dmu[MRC - 1]] * np.ones((t - dmu.shape[0], 1)))) # Use the product rule. t x 1. [x^2/theta_m] dpredvar[:, ii] = (dsigma2[:t, ii] * SSE[:t, 0] + sigma2[: t, 0] * dSSE[:t, ii]) / df # Use the quotient rule. t x 1. [1/theta_m] dSSE[1:t + 1, ii] = dSSE[:t, ii] + 2 * (mu - X[t - 1, 0]) \ / sigma2[:t, 0] * dmu[:, ii] + -(mu - X[t - 1, 0]) ** 2 \ / sigma2[:t, 0] ** 2 * dsigma2[:t, ii] dSSE[0, ii] = 0 dlogpredprobs_m = rmult(dmu, dlogpredprobs[:, 0]) \ + rmult(dpredvar[:t, :], dlogpredprobs[:, 1]) # mu has zero dependence on alpha (scale). t x 1. [log(P/x)] dlogpredprobs_s = np.atleast_2d(dpredvar_s[:t, 0] * dlogpredprobs[:, 1] + ddf * dlogpredprobs[:, 2]).T # Update with the Maha error of predicting the next point. t x 1. [] SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0]) ** 2 \ / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] # Update the run length distributions and their derivatives. logMsg = logR[:t, 0] + logpredprobs + logH[:t, 0] # t x 1 dlogMsg_h = dlogR_h[:t, :] + dlogH[:t, :] # t x num_hazard logR[1:t + 1, 0] = logR[:t, 0] + \ logpredprobs + logmH[:t, 0] # t x 1. [P] dlogR_h[1:t + 1, :] = dlogR_h[:t, :] + dlogmH[:t, :] # t x num_hazard dlogR_m[1:t + 1, :] = dlogR_m[:t, :] + dlogpredprobs_m # t x num_model dlogR_s[1:t + 1, :] = dlogR_s[:t, :] + dlogpredprobs_s # t x num_model (logR[0, 0], normMsg, Z) = logsumexp(logMsg) # 1 x 1. [P] # 1 x num_hazard dlogR_h[0, :] = rmult(dlogMsg_h, normMsg).sum(axis=0) / Z # 1 x num_mod dlogR_m[0, :] = rmult(dlogR_m[1:t + 1, :], normMsg).sum(axis=0) \ / Z # 1 x num_sca dlogR_s[0, :] = rmult(dlogR_s[1:t + 1, :], normMsg).sum(axis=0) \ / Z # end t loop # Get the log marginal likelihood of the data, X(1:end), under the model # = P(X_1:T), integrating out all the runlengths. 1 x 1. [log P] nlml = -1.0 * logsumexp(logR)[0] # Do the derivatives of nlml normR = np.exp(logR - max(logR)) # T x 1 dnlml_h = -rmult(dlogR_h, normR).sum(axis=0) / sum(normR) # 1 x num_hazard dnlml_m = -rmult(dlogR_m, normR).sum(axis=0) / sum(normR) # 1 x num_model dnlml_s = -rmult(dlogR_s, normR).sum(axis=0) / sum(normR) # 1 x num_scale # Correct for that input is log alpha0. 1 x num_scale. dnlml_s = alpha0 * dnlml_s # (num_hazard + num_model + num_scale) x 1 dnlml = np.append(np.append(dnlml_h, dnlml_m), dnlml_s) assert isKosher(nlml) assert isKosher(dnlml) return (nlml, dnlml)