Python isKosher Examples

Programming Language: Python

Namespace/Package Name: Utils.iskosher

Method/Function: isKosher

Examples at hotexamples.com: 4

Python isKosher - 4 examples found. These are the top rated real world Python examples of Utils.iskosher.isKosher extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def dbocpdGP(
    theta,
    X,
    model,
    num_hazard_params,
    dt,
):

    beta0 = 1
    num_scale_params = 1

    # Maximum numbers of points considered for predicting the next one regardless of
    # the run length and cov function. Set to Inf is we don't care about speed.

    maxPossibleLen = 500

    theta_h = theta[:num_hazard_params]  # num_hazard x 1
    theta_m = theta[num_hazard_params:-1]  # num_model x 1
    alpha0 = np.exp(theta[-1])  # Use exp to ensure it is positive. 1 x 1
    num_model_params = len(theta_m)  # 1 x 1

    assert dt > 0

    (T, D) = X.shape  # Number of time point observed

    assert D == 1

    # Never need to consider more than T points in the past.

    maxPossibleLen = min(T, maxPossibleLen)

    # Evaluate the hazard function for this interval.
    # H(r) = P(runlength_t = 0|runlength_t-1 = r-1)
    # Pre-computed the hazard in preperation for steps 4 & 5, alg 1, of [RPA]
    # logH = log(H), logmH = log(1-H)

    (logH, logmH, dlogH, dlogmH) = logistic_logh(np.asarray(range(1, T + 1)),
                                                 theta_h)
    assert isKosher(dlogH)
    assert isKosher(dlogmH)

    # R(r, t) = P(runlength_t-1 = r-1|X_1:t-1).
    # P(runglenth_0 = 0|nothing) = 1 => logR(1, 1) = 0

    logR = np.zeros((T + 1, 1))

    # pre-allocate the run length distribution. [P]
    dlogR_h = np.zeros((T + 1, num_hazard_params))
    dlogR_m = np.zeros((T + 1, num_model_params))
    dlogR_s = np.zeros((T + 1, num_scale_params))

    SSE = np.zeros((T + 1, D))

    # This will change with higher D
    dSSE = np.zeros((T + 1, num_model_params))

    SSE[0, 0] = 2 * beta0  # 1 x 1

    # Pre-compute GP stuff:

    (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model,
                                                 maxPossibleLen, dt)
    maxLen = alpha.shape[0]

    # Extend sigma2 to account for that we might call for its value past maxLen
    # t - maxLen x 1

    sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones(
        (T - sigma2.shape[0], 1))))

    dsigma2 = np.concatenate((dsigma2, np.tile(dsigma2[-1, :],
                                               (T - maxLen, 1))))

    ddf = 2

    for t in range(1, T + 1):
        MRC = min(maxLen, t)  # How many points back to look when predicting

        mu = np.dot(alpha[:MRC, :MRC - 1], X[t - MRC:t - 1,
                                             0][::-1])  # MRC x 1. [x]

        # Extend the mu (mean) prediction for the older (> MRC) run length
        # hypothesis

        if MRC < t:
            mu = np.append(mu, mu[-1] *
                           np.ones(t - mu.shape[0]))  # t - MRC x 1. [x]

        df = np.asarray([2 * alpha0]) + np.asarray(range(t))
        pred_var = sigma2[:t, 0] * SSE[:t, 0] / df
        dpredvar_s = np.atleast_2d(ddf * -sigma2[:t, 0] * SSE[:t, 0] / df**2).T

        (logpredprobs, dlogpredprobs) = studentlogpdf(X[t - 1, 0], mu,
                                                      pred_var, df, 2)

        # Now do the derivatives. [t x 1, t x 1]

        dmu = np.zeros((t, num_model_params))
        dpredvar = np.zeros((t, num_model_params))

        for ii in range(num_model_params):

            # MRC x 1. [x/theta_m]

            dmu[:MRC, ii] = np.dot(dalpha[:MRC, :MRC - 1, ii], X[t - MRC:t - 1,
                                                                 0][::-1])
            if MRC < t:

                # Extend the mu (mean) prediction for the older (>MRC) run length
                # hypothesis

                dmu = np.concatenate((dmu, [dmu[MRC - 1]] * np.ones(
                    (t - dmu.shape[0], 1))))

                # Use the product rule. t x 1. [x^2/theta_m]

            dpredvar[:, ii] = (dsigma2[:t, ii] * SSE[:t, 0] +
                               sigma2[:t, 0] * dSSE[:t, ii]) / df

            # Use the quotient rule. t x 1. [1/theta_m]

            dSSE[1:t + 1, ii] = dSSE[:t, ii] + 2 * (mu - X[t - 1, 0]) \
                / sigma2[:t, 0] * dmu[:, ii] + -(mu - X[t - 1, 0]) ** 2 \
                / sigma2[:t, 0] ** 2 * dsigma2[:t, ii]
            dSSE[0, ii] = 0

        dlogpredprobs_m = rmult(dmu, dlogpredprobs[:, 0]) \
            + rmult(dpredvar[:t, :], dlogpredprobs[:, 1])

        # mu has zero dependence on alpha (scale). t x 1. [log(P/x)]

        dlogpredprobs_s = np.atleast_2d(dpredvar_s[:t, 0] *
                                        dlogpredprobs[:, 1] +
                                        ddf * dlogpredprobs[:, 2]).T

        # Update with the Maha error of predicting the next point. t x 1. []

        SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0]) ** 2 \
            / sigma2[:t, 0]
        SSE[0, 0] = 2 * beta0  # 1 x 1. []

        # Update the run length distributions and their derivatives.

        logMsg = logR[:t, 0] + logpredprobs + logH[:t, 0]  # t x 1
        dlogMsg_h = dlogR_h[:t, :] + dlogH[:t, :]  # t x num_hazard

        logR[1:t + 1, 0] = logR[:t, 0] + \
            logpredprobs + logmH[:t, 0]  # t x 1. [P]

        dlogR_h[1:t + 1, :] = dlogR_h[:t, :] + dlogmH[:t, :]  # t x num_hazard
        dlogR_m[1:t + 1, :] = dlogR_m[:t, :] + dlogpredprobs_m  # t x num_model

        dlogR_s[1:t + 1, :] = dlogR_s[:t, :] + dlogpredprobs_s  # t x num_model

        (logR[0, 0], normMsg, Z) = logsumexp(logMsg)  # 1 x 1. [P]

        # 1 x num_hazard

        dlogR_h[0, :] = rmult(dlogMsg_h, normMsg).sum(axis=0) / Z

        # 1 x num_mod

        dlogR_m[0, :] = rmult(dlogR_m[1:t + 1, :], normMsg).sum(axis=0) \
            / Z

        # 1 x num_sca

        dlogR_s[0, :] = rmult(dlogR_s[1:t + 1, :], normMsg).sum(axis=0) \
            / Z

    # end t loop

    # Get the log marginal likelihood of the data, X(1:end), under the model
    # = P(X_1:T), integrating out all the runlengths. 1 x 1. [log P]

    nlml = -1.0 * logsumexp(logR)[0]

    # Do the derivatives of nlml

    normR = np.exp(logR - max(logR))  # T x 1
    dnlml_h = -rmult(dlogR_h, normR).sum(axis=0) / sum(normR)  # 1 x num_hazard
    dnlml_m = -rmult(dlogR_m, normR).sum(axis=0) / sum(normR)  # 1 x num_model
    dnlml_s = -rmult(dlogR_s, normR).sum(axis=0) / sum(normR)  # 1 x num_scale

    # Correct for that input is log alpha0. 1 x num_scale.

    dnlml_s = alpha0 * dnlml_s

    # (num_hazard + num_model + num_scale) x 1
    dnlml = np.append(np.append(dnlml_h, dnlml_m), dnlml_s)

    assert isKosher(nlml)
    assert isKosher(dnlml)
    return (nlml, dnlml)

Example #2

Show file

File: bocpdGPT_trunc.py Project: mathDR/gpts

def bocpdGPT_trunc(
    X,
    model,
    theta_m,
    theta_h,
    scalePrior,
    dt,
):

    # Maximum numbers of points considered for predicting the next one regardless of
    # the run length and cov function. Set to Inf is we don't care about speed.

    maxPossibleLen = 500

    num_hazard_params = len(theta_h)
    num_model_params = len(theta_m)

    assert isKosher(X)
    assert dt > 0

    (T, D) = X.shape

    # Number of time point observed. 1 x 1. [s]
    # TODO extend to higher D

    assert D == 1

    # Never need to consider more than T points in the past. 1 x 1. [points]

    maxPossibleLen = min(T, maxPossibleLen)

    # Ensure the gamma prior parameters are positive(as required). 2 x 1. []

    scalePrior = np.exp(scalePrior)
    alpha0 = scalePrior[0]
    beta0 = scalePrior[1]

    # Precompute all the gpr aspects of algorithm. [maxLen x maxLen, maxLen x
    # 1]

    (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model,
                                                 maxPossibleLen, dt)

    maxLen = alpha.shape[0]
    assert maxLen >= 1

    # Evaluate the hazard function:

    # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1)
    # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA]

    (H, dH) = logistic_h2(np.asarray(range(1, maxLen + 1)), theta_h)

    R = np.zeros((maxLen + 1, T + 1))

    # The standardized square error for each runlength.
    SSE = np.zeros((maxLen, D))

    # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1).
    Z = np.zeros((T, 1))
    predMeans = np.zeros((T, 1))
    predMed = np.zeros((T, 1))

    # At time t = 1, we have complete knowledge about the run length. This assumes
    # there was surely a change point right before the first data point not at the
    # first data point. Implements step 1, alg 1, of[RPA].
    # = > P(runglenth_0=0 | nothing) = 1

    R[0, 0] = 1

    # Initialize first SSE to contribution from gamma prior.

    SSE[0] = 2 * beta0

    # How many degrees of freedom in the prediction for each run length.

    df = np.asarray([2 * alpha0]) + np.asarray(range(maxLen))

    for t in range(1, T + 1):
    # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply
    # by incrementing the loop index.

    # Evaluate the predictive distribution for the new datum under each of the
    # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r)
    # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P]

        predprobs = np.zeros(maxLen)
        if t < maxLen:
            mu = np.dot(alpha[:t, :t], X[:t, 0][::-1])

            # The predictive variance for each prediction
            pred_var = sigma2[:t, 0] * SSE[:t, 0] / df[:t]

            # get the posterior predictive probability for each run length
            predprobs[:t] = studentpdf(X[t - 1, 0], mu, pred_var, df[:t], 1)

            # Update the SSE for each run length
            SSE[1:t + 1, 0] = SSE[:t, 0] + \
                (mu - X[t - 1, 0]) ** 2 / sigma2[:t, 0]
            SSE[0, 0] = 2 * beta0  # 1 x 1. []
        else:
            mu = np.dot(alpha, X[t - maxLen + 1:t, 0][::-1])

            # The predictive variance for each prediction
            pred_var = sigma2[:, 0] * SSE[:, 0] / df

            # get the posterior predictive probability for each run length
            predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1)

            # Update the SSE for each run length
            SSE[1:maxLen, 0] = SSE[:maxLen - 1, 0] + \
                (mu[:maxLen - 1] - X[maxLen - 1, 0]) ** 2 / \
                sigma2[:maxLen - 1, 0]
            SSE[0, 0] = 2 * beta0

        # endif
        predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu)

        predMed[t - 1] = np.median(
            MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000))

        # Evaluate the growth probabilities - shift the probabilities up and to the
        # right, scaled by the hazard function and the predictive
        # probabilities.
        R[1:, t] = R[: maxLen, t - 1] * predprobs * (1 - H[: maxLen])

        # Evaluate the probability that there * was * a changepoint and we're
        # accumulating the mass back down at r = 0.

        R[0, t] = (R[: maxLen, t - 1] * predprobs * H[: maxLen]).sum()

        # Renormalize the run length probabilities for improved numerical stability.
        # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track
        # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t)
        # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of
        # Implements step 6, alg 1, of[RPA].

        Z[t - 1] = R[:, t].sum()

        R[: maxLen, t] /= Z[t - 1]

        R[maxLen - 1, t] = R[maxLen - 1, t] + R[maxLen, t]
        R[maxLen, t] = 0

    # endTloop

    # Get the negative log marginal likelihood of the data, X(1: end), under
    # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log
    # P]

    nlml = -sum(np.log(Z))

    return (R, nlml, Z, predMeans, predMed)

Example #3

Show file

File: bocpdGPT.py Project: time-series-tools/gpts

def bocpdGPT(
    X,
    model,
    theta_m,
    theta_h,
    scalePrior,
    dt,
):

    # Maximum numbers of points considered for predicting the next one regardless of
    # the run length and cov function. Set to Inf is we don't care about speed.

    maxPossibleLen = 500

    num_hazard_params = len(theta_h)
    num_model_params = len(theta_m)

    assert isKosher(X)
    assert dt > 0

    (T, D) = X.shape

    # Number of time point observed. 1 x 1. [s]
    # TODO extend to higher D

    assert D == 1

    # Never need to consider more than T points in the past. 1 x 1. [points]

    maxPossibleLen = min(T, maxPossibleLen)

    # Ensure the gamma prior parameters are positive(as required). 2 x 1. []

    scalePrior = np.exp(scalePrior)
    alpha0 = scalePrior[0]
    beta0 = scalePrior[1]

    # Evaluate the hazard function:

    # H(r) = P(runlength_t=0 | runlength_t - 1=r - 1)
    # Pre - computed the hazard in preperation for steps 4 & 5, alg 1, of[RPA]

    (H, dH) = logistic_h2(np.asarray(range(1, T + 1)), theta_h)

    R = np.zeros((T + 1, T + 1))
    S = np.zeros((T, T))

    # The standardized square error (SSE) for each runlength.
    SSE = np.zeros((T + 1, D))

    # The evidence at each time step = > Z(t) = P(X_t | X_1: t - 1).
    Z = np.zeros((T, 1))
    predMeans = np.zeros((T, 1))
    predMed = np.zeros((T, 1))

    # At time t = 1, we have complete knowledge about the run length. This assumes
    # there was surely a change point right before the first data point not at the
    # first data point. Implements step 1, alg 1, of[RPA].
    # = > P(runglenth_0=0 | nothing) = 1
    R[0, 0] = 1

    # Initialize first SSE to contribution from gamma prior.
    SSE[0] = 2 * beta0

    # Precompute all the gpr aspects of algorithm.
    (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model,
                                                 maxPossibleLen, dt)

    maxLen = alpha.shape[0]

    sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones(
        (T - sigma2.shape[0], 1))))

    for t in range(1, T + 1):
        # Implictly Implements step 2, alg 1, of[RPA]: oberserve new datum, simply
        # by incrementing the loop index.

        # Evaluate the predictive distribution for the new datum under each of the
        # parameters. Implements step 3, alg 1, of[RPA]. predprobs(r)
        # = p(X(t) | X(1: t - 1), runlength_t - 1=r - 1). t x 1. [P]
        MRC = min(maxLen, t)  # How many points back to look when predicting

        mu = np.dot(alpha[:MRC, :MRC - 1], X[t - MRC:t - 1,
                                             0][::-1])  # MRC x 1. [x]

        # Extend the mu (mean) prediction for the older (> MRC) run length
        # hypothesis
        if MRC < t:
            mu = np.append(mu, mu[-1] *
                           np.ones(t - mu.shape[0]))  # t - MRC x 1. [x]

        df = np.asarray([2 * alpha0]) + np.asarray(range(t))
        pred_var = sigma2[:t, 0] * SSE[:t, 0] / df

        predprobs = studentpdf(X[t - 1, 0], mu, pred_var, df, 1)

        # Update the SSE for each run length
        SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0])**2 / sigma2[:t, 0]
        SSE[0, 0] = 2 * beta0  # 1 x 1. []

        predMeans[t - 1] = np.dot(R[:mu.shape[0], t - 1].T, mu)

        # The following is pretty slow
        #np.median(MoTrnd(R[:mu.shape[0], t - 1], mu, pred_var[:mu.shape[0]], df[:mu.shape[0]], 1000))
        predMed[t - 1] = 0

        # Evaluate the growth probabilities - shift the probabilities up and to the
        # right, scaled by the hazard function and the predictive
        # probabilities.
        R[1:t + 1, t] = R[:t, t - 1] * predprobs * (1 - H[:t])

        # Evaluate the probability that there * was * a changepoint and we're
        # accumulating the mass back down at r = 0.

        R[0, t] = (R[:t, t - 1] * predprobs * H[:t]).sum()

        # Renormalize the run length probabilities for improved numerical stability.
        # Note that unlike in [RPA] which keeps track of P(r_t, X_1: t), we keep track
        # of P(r_t | X_1: t) = > unnormalized R(i, t + 1) = P(runlength_t=i - 1 | X_1: t)
        # * P(X_t | X_1: t - 1) = > normalization const Z(t) = P(X_t | X_1: t - 1). Sort of
        # Implements step 6, alg 1, of[RPA].

        Z[t - 1] = R[:t + 1, t].sum()

        R[:t + 1, t] /= Z[t - 1]

        # Get the S matrix
        S[:t, t - 1] = R[:t, t - 1] * predprobs
        S[:, t - 1] = S[:, t - 1] / S[:, t - 1].sum()

    # endTloop

    # Get the negative log marginal likelihood of the data, X(1: end), under
    # the model = P(X_1: T), integrating out all the runlengths. 1 x 1. [log
    # P]

    nlml = -sum(np.log(Z))

    return (R, S, nlml, Z, predMeans, predMed)

Example #4

Show file

File: bocpdGPTlearn.py Project: mathDR/gpts

def dbocpdGP(
    theta,
    X,
    model,
    num_hazard_params,
    dt,
):

    beta0 = 1
    num_scale_params = 1

  # Maximum numbers of points considered for predicting the next one regardless of
  # the run length and cov function. Set to Inf is we don't care about speed.

    maxPossibleLen = 500

    theta_h = theta[:num_hazard_params]  # num_hazard x 1
    theta_m = theta[num_hazard_params:-1]  # num_model x 1
    alpha0 = np.exp(theta[-1])  # Use exp to ensure it is positive. 1 x 1
    num_model_params = len(theta_m)  # 1 x 1

    assert dt > 0

    (T, D) = X.shape  # Number of time point observed

    assert D == 1

  # Never need to consider more than T points in the past.

    maxPossibleLen = min(T, maxPossibleLen)

  # Evaluate the hazard function for this interval.
  # H(r) = P(runlength_t = 0|runlength_t-1 = r-1)
  # Pre-computed the hazard in preperation for steps 4 & 5, alg 1, of [RPA]
  # logH = log(H), logmH = log(1-H)

    (logH, logmH, dlogH, dlogmH) = logistic_logh(
        np.asarray(range(1, T + 1)), theta_h)
    assert isKosher(dlogH)
    assert isKosher(dlogmH)

  # R(r, t) = P(runlength_t-1 = r-1|X_1:t-1).
  # P(runglenth_0 = 0|nothing) = 1 => logR(1, 1) = 0

    logR = np.zeros((T + 1, 1))

    # pre-allocate the run length distribution. [P]
    dlogR_h = np.zeros((T + 1, num_hazard_params))
    dlogR_m = np.zeros((T + 1, num_model_params))
    dlogR_s = np.zeros((T + 1, num_scale_params))

    SSE = np.zeros((T + 1, D))

    # This will change with higher D
    dSSE = np.zeros((T + 1, num_model_params))

    SSE[0, 0] = 2 * beta0  # 1 x 1

  # Pre-compute GP stuff:

    (alpha, sigma2, dalpha, dsigma2) = gpr1step5(theta_m, model,
                                                 maxPossibleLen, dt)
    maxLen = alpha.shape[0]

    # Extend sigma2 to account for that we might call for its value past maxLen
    # t - maxLen x 1

    sigma2 = np.concatenate((sigma2, sigma2[-1, 0] * np.ones((T
                            - sigma2.shape[0], 1))))

    dsigma2 = np.concatenate((dsigma2, np.tile(dsigma2[-1, :], (T
                             - maxLen, 1))))

    ddf = 2

    for t in range(1, T + 1):
        MRC = min(maxLen, t)  # How many points back to look when predicting

        mu = np.dot(alpha[:MRC, :MRC - 1], X[
                    t - MRC:t - 1, 0][::-1])  # MRC x 1. [x]

        # Extend the mu (mean) prediction for the older (> MRC) run length
        # hypothesis

        if MRC < t:
            mu = np.append(mu, mu[-1] * np.ones(
                t - mu.shape[0]))  # t - MRC x 1. [x]

        df = np.asarray([2 * alpha0]) + np.asarray(range(t))
        pred_var = sigma2[:t, 0] * SSE[:t, 0] / df
        dpredvar_s = np.atleast_2d(
            ddf * -sigma2[:t, 0] * SSE[:t, 0] / df ** 2).T

        (logpredprobs, dlogpredprobs) = studentlogpdf(X[t - 1, 0], mu,
                                                      pred_var, df, 2)

        # Now do the derivatives. [t x 1, t x 1]

        dmu = np.zeros((t, num_model_params))
        dpredvar = np.zeros((t, num_model_params))

        for ii in range(num_model_params):

        # MRC x 1. [x/theta_m]

            dmu[:MRC, ii] = np.dot(dalpha[:MRC, :MRC - 1, ii], X[t
                                   - MRC:t - 1, 0][::-1])
            if MRC < t:

            # Extend the mu (mean) prediction for the older (>MRC) run length
            # hypothesis

                dmu = np.concatenate((dmu, [dmu[MRC - 1]] * np.ones((t
                                                                     - dmu.shape[0], 1))))

                # Use the product rule. t x 1. [x^2/theta_m]

            dpredvar[:, ii] = (dsigma2[:t, ii] * SSE[:t, 0] + sigma2[:
                               t, 0] * dSSE[:t, ii]) / df

            # Use the quotient rule. t x 1. [1/theta_m]

            dSSE[1:t + 1, ii] = dSSE[:t, ii] + 2 * (mu - X[t - 1, 0]) \
                / sigma2[:t, 0] * dmu[:, ii] + -(mu - X[t - 1, 0]) ** 2 \
                / sigma2[:t, 0] ** 2 * dsigma2[:t, ii]
            dSSE[0, ii] = 0

        dlogpredprobs_m = rmult(dmu, dlogpredprobs[:, 0]) \
            + rmult(dpredvar[:t, :], dlogpredprobs[:, 1])

        # mu has zero dependence on alpha (scale). t x 1. [log(P/x)]

        dlogpredprobs_s = np.atleast_2d(dpredvar_s[:t, 0]
                                        * dlogpredprobs[:, 1] + ddf * dlogpredprobs[:, 2]).T

        # Update with the Maha error of predicting the next point. t x 1. []

        SSE[1:t + 1, 0] = SSE[:t, 0] + (mu - X[t - 1, 0]) ** 2 \
            / sigma2[:t, 0]
        SSE[0, 0] = 2 * beta0  # 1 x 1. []

        # Update the run length distributions and their derivatives.

        logMsg = logR[:t, 0] + logpredprobs + logH[:t, 0]  # t x 1
        dlogMsg_h = dlogR_h[:t, :] + dlogH[:t, :]  # t x num_hazard

        logR[1:t + 1, 0] = logR[:t, 0] + \
            logpredprobs + logmH[:t, 0]  # t x 1. [P]

        dlogR_h[1:t + 1, :] = dlogR_h[:t, :] + dlogmH[:t, :]  # t x num_hazard
        dlogR_m[1:t + 1, :] = dlogR_m[:t, :] + dlogpredprobs_m  # t x num_model

        dlogR_s[1:t + 1, :] = dlogR_s[:t, :] + dlogpredprobs_s  # t x num_model

        (logR[0, 0], normMsg, Z) = logsumexp(logMsg)  # 1 x 1. [P]

        # 1 x num_hazard

        dlogR_h[0, :] = rmult(dlogMsg_h, normMsg).sum(axis=0) / Z

        # 1 x num_mod

        dlogR_m[0, :] = rmult(dlogR_m[1:t + 1, :], normMsg).sum(axis=0) \
            / Z

        # 1 x num_sca

        dlogR_s[0, :] = rmult(dlogR_s[1:t + 1, :], normMsg).sum(axis=0) \
            / Z

    # end t loop

    # Get the log marginal likelihood of the data, X(1:end), under the model
    # = P(X_1:T), integrating out all the runlengths. 1 x 1. [log P]

    nlml = -1.0 * logsumexp(logR)[0]

    # Do the derivatives of nlml

    normR = np.exp(logR - max(logR))  # T x 1
    dnlml_h = -rmult(dlogR_h, normR).sum(axis=0) / sum(normR)  # 1 x num_hazard
    dnlml_m = -rmult(dlogR_m, normR).sum(axis=0) / sum(normR)  # 1 x num_model
    dnlml_s = -rmult(dlogR_s, normR).sum(axis=0) / sum(normR)  # 1 x num_scale

  # Correct for that input is log alpha0. 1 x num_scale.

    dnlml_s = alpha0 * dnlml_s

    # (num_hazard + num_model + num_scale) x 1
    dnlml = np.append(np.append(dnlml_h, dnlml_m), dnlml_s)

    assert isKosher(nlml)
    assert isKosher(dnlml)
    return (nlml, dnlml)