def test_studentpdf(): x = asarray([0.0608528, 0.1296728, -0.2238741, 0.79862108]) mu = asarray([-0.85759774, 0.70178911, -0.29351646, 1.60215909]) var = asarray([0.82608497, 0.75882319, 0.86101641, 0.73113357]) nu = asarray([0.71341641, 0.52532607, 0.20685246, 0.02304925]) p = studentpdf(x, mu, var, nu, nargout=1) assert allclose(p, asarray([0.1521209, 0.1987373, 0.21214484, 0.01335992])) (p, dp) = studentpdf(x, mu, var, nu, nargout=2) assert allclose(p, asarray([0.1521209, 0.1987373, 0.21214484, 0.01335992])) assert allclose( dp, asarray([[1.67068098e-01, 8.00695192e-04, 9.07088043e-02], [-2.38903047e-01, -4.08902709e-02, 1.76043126e-01], [9.74584714e-02, -1.19253012e-01, 4.08675818e-01], [-1.65769327e-02, -2.71641034e-05, 5.45223728e-01]])) print 'studentpdf Test PASSED'
def test_studentpdf(): x = asarray([0.0608528, 0.1296728, -0.2238741, 0.79862108]) mu = asarray([-0.85759774, 0.70178911, -0.29351646, 1.60215909]) var = asarray([0.82608497, 0.75882319, 0.86101641, 0.73113357]) nu = asarray([0.71341641, 0.52532607, 0.20685246, 0.02304925]) p = studentpdf(x, mu, var, nu, nargout=1) assert allclose( p, asarray([0.1521209, 0.1987373, 0.21214484, 0.01335992])) (p, dp) = studentpdf(x, mu, var, nu, nargout=2) assert allclose( p, asarray([0.1521209, 0.1987373, 0.21214484, 0.01335992])) assert allclose( dp, asarray([[1.67068098e-01, 8.00695192e-04, 9.07088043e-02], [-2.38903047e-01, -4.08902709e-02, 1.76043126e-01], [9.74584714e-02, -1.19253012e-01, 4.08675818e-01], [-1.65769327e-02, -2.71641034e-05, 5.45223728e-01]])) print 'studentpdf Test PASSED'
def predict(self,X,needDerivatives=False): N = self.post_params.shape[0] # 1 x 1 mus = self.post_params[0] # N x 1. [x] kappas = self.post_params[1] # N x 1. [points] alphas = self.post_params[2] # N x 1. [points] betas = self.post_params[3] # N x 1. [x^2] # TODO verify this is correct by citing reference with posterior predictive # However, probably correct since we get the same lml under random # permutations of the data => coherence. # N x 1. [x^2] predictive_variance = betas*(kappas+1)/(alphas*kappas) df = 2.0*alphas # N x 1. [points] if not needDerivatives: self.predprobs = studentpdf(xnew, mus, predictive_variance, df) # N x 1. [P/x] else: self.predprobs, dtpdf = studentpdf(X, mus, predictive_variance, df, nargout=2) # N x 1. [P/x] dmu_dtheta = np.transpose(self.dpost_params[0,:,:], axes=[2,1,0]) # N x 4 dkappa_dtheta = np.transpose(self.dpost_params[1,:,:], axes=[2,1,0]) # N x 4 dalpha_dtheta = np.transpose(self.dpost_params[2,:,:], axes=[2,1,0]) # N x 4 dbeta_dtheta = np.transpose(self.dpost_params[3,:,:], axes=[2,1,0]) # N x 4 dnu_dtheta = 2.0 * dalpha_dtheta # N x 4 # TODO use rmult and eliminate the for loop dpv_dtheta = np.zeros((N, 4)) for ii in range(4): QRpart = (dbeta_dtheta[:,ii]*alphas - betas*dalpha_dtheta[:,ii])/ alphas**2 # N x 1 dpv_dtheta[:,ii] = -(betas/(alphas*kappas**2))*dkappa_dtheta[:,ii] + (1 + 1/kappas)*QRpart # N x 1 # TODO use rmult and eliminate the for loop dp_dtheta = np.zeros((N, 4)) for ii in range(4): # dp/dtheta_i = dp/dmu * dmu/dtheta_i + dp/dsigma2 * dsigma2/dtheta_i + # dp/dnu + dnu/dtheta_i # N x 1 dp_dtheta[:,ii] = dtpdf[:,0]*dmu_dtheta[:,ii] + dtpdf[:, 1]*dpv_dtheta[:,ii] + dtpdf[:, 2]*dnu_dtheta[:,ii] self.dpredprobs = dp_dtheta
def bocpdGPT( X, model, theta_m, theta_h, scalePrior, dt, ): # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 num_hazard_params = len(theta_h) num_model_params = len(theta_m) assert isKosher(X) assert dt > 0 (T, D) = X.shape # Number of time point observed. 1 x 1. [s] # TODO extend to higher D assert D == 1 # Never need to consider more than T points in the past. 1 x 1. [points] maxPossibleLen = min(T, maxPossibleLen) # Ensure the gamma prior parameters are positive(as required). 2 x 1. [] scalePrior = np.exp(scalePrior) alpha0 = scalePrior[0] beta0 = scalePrior[1] # Evaluate the hazard function: # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1) # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA] (H, dH) = logistic_h2(np.asarray(range(1, T + 1)), theta_h) R = np.zeros((T + 1, T + 1)) S = np.zeros((T, T)) # The standardized square error (SSE) for each runlength. SSE = np.zeros((T + 1, D)) # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1). Z = np.zeros((T, 1)) predMeans = np.zeros((T, 1)) predMed = np.zeros((T, 1)) # At time t = 1, we have complete knowledge about the run length. This assumes # there was surely a change point right before the first data point not at the # first data point. Implements step 1, alg 1, of[RPA]. # = > P(runglenth_0=0 | nothing) = 1 R[0, 0] = 1 # Initialize first SSE to contribution from gamma prior. SSE[0] = 2 * beta0 # Precompute all the gpr aspects of algorithm. (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones( (T - sigma2.shape[0], 1)))) for t in range(1, T + 1): # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply # by incrementing the loop index. # Evaluate the predictive distribution for the new datum under each of the # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r) # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P] MRC = min(maxLen, t) # How many points back to look when predicting mu = np.dot(alpha[:MRC, :MRC - 1], X[t - MRC:t - 1, 0][::-1]) # MRC x 1. [x] # Extend the mu (mean) prediction for the older (> MRC) run length # hypothesis if MRC < t: mu = np.append(mu, mu[-1] * np.ones(t - mu.shape[0])) # t - MRC x 1. [x] df = np.asarray([2 * alpha0]) + np.asarray(range(t)) pred_var = sigma2[:t, 0] * SSE[:t, 0] / df predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1) # Update the SSE for each run length SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0])**2 / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu) # The following is pretty slow #np.median(MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000)) predMed[t - 1] = 0 # Evaluate the growth probabilities - shift the probabilities up and to the # right, scaled by the hazard function and the predictive # probabilities. R[1:t + 1, t] = R[:t, t - 1] * predprobs * (1 - H[:t]) # Evaluate the probability that there * was * a changepoint and we're # accumulating the mass back down at r = 0. R[0, t] = (R[:t, t - 1] * predprobs * H[:t]).sum() # Renormalize the run length probabilities for improved numerical stability. # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t) # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of # Implements step 6, alg 1, of[RPA]. Z[t - 1] = R[:t + 1, t].sum() R[:t + 1, t] /= Z[t - 1] # Get the S matrix S[:t, t - 1] = R[:t, t - 1] * predprobs S[:, t - 1] = S[:, t - 1] / S[:, t - 1].sum() # endTloop # Get the negative log marginal likelihood of the data, X(1: end), under # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log # P] nlml = -sum(np.log(Z)) return (R, S, nlml, Z, predMeans, predMed)
def bocpdGPT_trunc( X, model, theta_m, theta_h, scalePrior, dt, ): # Maximum numbers of points considered for predicting the next one regardless of # the run length and cov function. Set to Inf is we don't care about speed. maxPossibleLen = 500 num_hazard_params = len(theta_h) num_model_params = len(theta_m) assert isKosher(X) assert dt > 0 (T, D) = X.shape # Number of time point observed. 1 x 1. [s] # TODO extend to higher D assert D == 1 # Never need to consider more than T points in the past. 1 x 1. [points] maxPossibleLen = min(T, maxPossibleLen) # Ensure the gamma prior parameters are positive(as required). 2 x 1. [] scalePrior = np.exp(scalePrior) alpha0 = scalePrior[0] beta0 = scalePrior[1] # Precompute all the gpr aspects of algorithm. [maxLen x maxLen, maxLen x # 1] (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model, maxPossibleLen, dt) maxLen = alpha.shape[0] assert maxLen >= 1 # Evaluate the hazard function: # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1) # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA] (H, dH) = logistic_h2(np.asarray(range(1, maxLen + 1)), theta_h) R = np.zeros((maxLen + 1, T + 1)) # The standardized square error for each runlength. SSE = np.zeros((maxLen, D)) # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1). Z = np.zeros((T, 1)) predMeans = np.zeros((T, 1)) predMed = np.zeros((T, 1)) # At time t = 1, we have complete knowledge about the run length. This assumes # there was surely a change point right before the first data point not at the # first data point. Implements step 1, alg 1, of[RPA]. # = > P(runglenth_0=0 | nothing) = 1 R[0, 0] = 1 # Initialize first SSE to contribution from gamma prior. SSE[0] = 2 * beta0 # How many degrees of freedom in the prediction for each run length. df = np.asarray([2 * alpha0]) + np.asarray(range(maxLen)) for t in range(1, T + 1): # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply # by incrementing the loop index. # Evaluate the predictive distribution for the new datum under each of the # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r) # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P] predprobs = np.zeros(maxLen) if t < maxLen: mu = np.dot(alpha[:t, :t], X[:t, 0][::-1]) # The predictive variance for each prediction pred_var = sigma2[:t, 0] * SSE[:t, 0] / df[:t] # get the posterior predictive probability for each run length predprobs[:t] = studentpdf(X[t - 1, 0], mu, pred_var, df[:t], 1) # Update the SSE for each run length SSE[1:t + 1, 0] = SSE[:t, 0] + \ (mu - X[t - 1, 0]) ** 2 / sigma2[:t, 0] SSE[0, 0] = 2 * beta0 # 1 x 1. [] else: mu = np.dot(alpha, X[t - maxLen + 1:t, 0][::-1]) # The predictive variance for each prediction pred_var = sigma2[:, 0] * SSE[:, 0] / df # get the posterior predictive probability for each run length predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1) # Update the SSE for each run length SSE[1:maxLen, 0] = SSE[:maxLen - 1, 0] + \ (mu[:maxLen - 1] - X[maxLen - 1, 0]) ** 2 / \ sigma2[:maxLen - 1, 0] SSE[0, 0] = 2 * beta0 # endif predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu) predMed[t - 1] = np.median( MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000)) # Evaluate the growth probabilities - shift the probabilities up and to the # right, scaled by the hazard function and the predictive # probabilities. R[1:, t] = R[: maxLen, t - 1] * predprobs * (1 - H[: maxLen]) # Evaluate the probability that there * was * a changepoint and we're # accumulating the mass back down at r = 0. R[0, t] = (R[: maxLen, t - 1] * predprobs * H[: maxLen]).sum() # Renormalize the run length probabilities for improved numerical stability. # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t) # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of # Implements step 6, alg 1, of[RPA]. Z[t - 1] = R[:, t].sum() R[: maxLen, t] /= Z[t - 1] R[maxLen - 1, t] = R[maxLen - 1, t] + R[maxLen, t] R[maxLen, t] = 0 # endTloop # Get the negative log marginal likelihood of the data, X(1: end), under # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log # P] nlml = -sum(np.log(Z)) return (R, nlml, Z, predMeans, predMed)