Esempio n. 1
0
def log_add_by_margs(probability_distr):
    tot = 0

    for row in probability_distr:
        tot += logsumexp(row)

    return tot
Esempio n. 2
0
    def __init__(self, hist_array, labels, phasing=None, ignore_nan=False):
        conv = 1e-40

        # We need to noramlize the array to 1. This is hard.
        # This step does the following:
        # convert to log space after adding a small convolution paramter so we don't get INF and NAN
        # for each mutation
        # normalize the row to 1 in logspace.

        hist = np.asarray(hist_array, dtype=np.float32) + conv
        if (~(hist > 0)).any():
            logging.error("Negative histogram bin or NAN mutation!")
            if ignore_nan:
                logging.warning(
                    "Caught ignore nan flag, not exiting, setting nan values to zero"
                )
                hist[np.logical_not(hist > 0)] = conv
            else:
                sys.exit(1)

        n_samples = np.shape(hist)[1]
        for sample in range(n_samples):
            hist[:, :, 0] = conv  ##set zero bins
        self._hist_array = np.apply_over_axes(
            lambda x, y: np.apply_along_axis(lambda z: z - logsumexp(z), y, x),
            np.log(hist), 2)
        ####

        self._labels = labels
        self._label_ids = dict([[y, x] for x, y in enumerate(labels)])
        self._phasing = {} if phasing is None else phasing
        self.n_samples = np.shape(hist_array)[1]
        self.n_bins = np.shape(hist_array)[-1]
Esempio n. 3
0
def sselogsum(v):
    """
    Fastest log sum exp on v array:
    https://github.com/rmcgibbo/logsumexp

    :param v: must be np.type32
    """
    return sselogsumexp.logsumexp(v)
Esempio n. 4
0
def sselogsum(v):
    """
    Fastest log sum exp on v array:
    https://github.com/rmcgibbo/logsumexp

    :param v: must be np.type32
    """
    return sselogsumexp.logsumexp(v)
Esempio n. 5
0
def get_norm_marg_hist(log_sum, log_prior):
    # Normalize in each dimension
    log_f_LL = log_sum + log_prior

    log_f_post = []

    for row in log_f_LL:
        log_f_post.append(row - logsumexp(row))

    return np.array(log_f_post, dtype=np.float32)
Esempio n. 6
0
def change_of_variables(x_in, y_in, x_out):
    ##################################################################
    # Function to do a change of variables for prob density function #
    ##################################################################
    # x_in -- list, original domain
    # y_in -- list, original pdf
    # x_out -- list, transformed domain (has to be same dim as x_in)

    # bin x_in
    delta_x_in = bin_x_and_calculate_delta(x_in)

    # calculate the integral of the function over all bins and renormalize
    y_in = np.array(y_in, dtype=np.float32)
    # y_in = y_in - logsumexp(y_in + np.log( delta_x_in,dtype=np.float32))
    # for each bin, keep integral of each bin consistent, but transform widths according to transformation
    delta_x_out = bin_x_and_calculate_delta(x_out)

    y_out = np.array(y_in + np.log(delta_x_in) - np.log(delta_x_out), dtype=np.float32)
    return y_out - logsumexp(y_out)
def offline_changepoint_detection(data,
                                  prior_func,
                                  observation_log_likelihood_function,
                                  truncate=-np.inf):
    """Compute the likelihood of changepoints on data.

    Keyword arguments:
    data                                -- the time series data
    prior_func                          -- a function given the likelihood of a changepoint given the distance to the last one
    observation_log_likelihood_function -- a function giving the log likelihood
                                           of a data part
    truncate                            -- the cutoff probability 10^truncate to stop computation for that changepoint log likelihood

    P                                   -- the likelihoods if pre-computed
    """

    n = len(data)
    Q = np.zeros((n, ))
    g = np.zeros((n, ))
    G = np.zeros((n, ))
    P = np.ones((n, n)) * -np.inf

    # save everything in log representation
    for t in range(n):
        g[t] = np.log(prior_func(t))
        if t == 0:
            G[t] = g[t]
        else:
            G[t] = np.logaddexp(G[t - 1], g[t])

    P[n - 1, n - 1] = observation_log_likelihood_function(data, n - 1, n)
    Q[n - 1] = P[n - 1, n - 1]

    for t in reversed(range(n - 1)):
        P_next_cp = -np.inf  # == log(0)
        for s in range(t, n - 1):
            P[t, s] = observation_log_likelihood_function(data, t, s + 1)

            # compute recursion
            summand = P[t, s] + Q[s + 1] + g[s + 1 - t]
            P_next_cp = np.logaddexp(P_next_cp, summand)

            # truncate sum to become approx. linear in time (see
            # Fearnhead, 2006, eq. (3))
            if summand - P_next_cp < truncate:
                break

        P[t, n - 1] = observation_log_likelihood_function(data, t, n)

        # (1 - G) is numerical stable until G becomes numerically 1
        if G[n - 1 - t] < -1e-15:  # exp(-1e-15) = .99999...
            antiG = np.log(1 - np.exp(G[n - 1 - t]))
        else:
            # (1 - G) is approx. -log(G) for G close to 1
            antiG = np.log(-G[n - 1 - t])

        Q[t] = np.logaddexp(P_next_cp, P[t, n - 1] + antiG)

    Pcp = np.ones((n - 1, n - 1)) * -np.inf
    for t in range(n - 1):
        Pcp[0, t] = P[0, t] + Q[t + 1] + g[t] - Q[0]
        if np.isnan(Pcp[0, t]):
            Pcp[0, t] = -np.inf
    for j in range(1, n - 1):
        for t in range(j, n - 1):
            tmp_cond = Pcp[j - 1, j -
                           1:t] + P[j:t + 1,
                                    t] + Q[t + 1] + g[0:t - j + 1] - Q[j:t + 1]
            Pcp[j, t] = logsumexp(tmp_cond.astype(np.float32))
            if np.isnan(Pcp[j, t]):
                Pcp[j, t] = -np.inf

    return Q, P, Pcp
def offline_changepoint_detection(data, prior_func,
                                  observation_log_likelihood_function,
                                  truncate=-np.inf):
    """Compute the likelihood of changepoints on data.

    Keyword arguments:
    data                                -- the time series data
    prior_func                          -- a function given the likelihood of a changepoint given the distance to the last one
    observation_log_likelihood_function -- a function giving the log likelihood
                                           of a data part
    truncate                            -- the cutoff probability 10^truncate to stop computation for that changepoint log likelihood

    P                                   -- the likelihoods if pre-computed
    """

    n = len(data)
    Q = np.zeros((n,))
    g = np.zeros((n,))
    G = np.zeros((n,))
    P = np.ones((n, n)) * -np.inf

    # save everything in log representation
    for t in range(n):
        g[t] = np.log(prior_func(t))
        if t == 0:
            G[t] = g[t]
        else:
            G[t] = np.logaddexp(G[t-1], g[t])

    P[n-1, n-1] = observation_log_likelihood_function(data, n-1, n)
    Q[n-1] = P[n-1, n-1]

    for t in reversed(range(n-1)):
        P_next_cp = -np.inf  # == log(0)
        for s in range(t, n-1):
            P[t, s] = observation_log_likelihood_function(data, t, s+1)

            # compute recursion
            summand = P[t, s] + Q[s + 1] + g[s + 1 - t]
            P_next_cp = np.logaddexp(P_next_cp, summand)

            # truncate sum to become approx. linear in time (see
            # Fearnhead, 2006, eq. (3))
            if summand - P_next_cp < truncate:
                break

        P[t, n-1] = observation_log_likelihood_function(data, t, n)

        # (1 - G) is numerical stable until G becomes numerically 1
        if G[n-1-t] < -1e-15:  # exp(-1e-15) = .99999...
            antiG = np.log(1 - np.exp(G[n-1-t]))
        else:
            # (1 - G) is approx. -log(G) for G close to 1
            antiG = np.log(-G[n-1-t])

        Q[t] = np.logaddexp(P_next_cp, P[t, n-1] + antiG)

    Pcp = np.ones((n-1, n-1)) * -np.inf
    for t in range(n-1):
        Pcp[0, t] = P[0, t] + Q[t + 1] + g[t] - Q[0]
        if np.isnan(Pcp[0, t]):
            Pcp[0, t] = -np.inf
    for j in range(1, n-1):
        for t in range(j, n-1):
            tmp_cond = Pcp[j-1, j-1:t] + P[j:t+1, t] + Q[t + 1] + g[0:t-j+1] - Q[j:t+1]
            Pcp[j, t] = logsumexp(tmp_cond.astype(np.float32))
            if np.isnan(Pcp[j, t]):
                Pcp[j, t] = -np.inf

    return Q, P, Pcp
Esempio n. 9
0
 def normalize_loghist_with_prior(self, loghist):
     # Normalize in each dimension
     return np.apply_along_axis(lambda x: x - logsumexp(x), 1, loghist + self.logprior)
Esempio n. 10
0
 def logsum_of_marginals_per_sample(loghist):
     return np.apply_along_axis(lambda x: logsumexp(x), 1, np.array(loghist, dtype=np.float32))
Esempio n. 11
0
 def logsum_of_marginals(loghist):
     return np.sum(np.apply_along_axis(lambda x: logsumexp(x), 1, loghist))
Esempio n. 12
0
 def _normalize_loghist_with_prior(self, loghist):
     """ Normalize in each dimension in log space """
     loghist = np.asarray(loghist, dtype=np.float32)
     return np.apply_along_axis(lambda x: x - logsumexp(x), 1,
                                loghist + self._logprior)
def offline_changepoint_detection(data,
                                  prior_function,
                                  log_likelihood_class,
                                  truncate: int = -40):
    """
    Compute the likelihood of changepoints on data.

    Parameters:
    data    -- the time series data
    truncate  -- the cutoff probability 10^truncate to stop computation for that changepoint log likelihood

    Outputs:
        P  -- the log-likelihood of a datasequence [t, s], given there is no changepoint between t and s
        Q -- the log-likelihood of data
        Pcp --  the log-likelihood that the i-th changepoint is at time step t. To actually get the probility of a changepoint at time step t sum the probabilities.
    """

    # Set up the placeholders for each parameter
    n = len(data)
    Q = np.zeros((n, ))
    g = np.zeros((n, ))
    G = np.zeros((n, ))
    P = np.ones((n, n)) * -np.inf

    # save everything in log representation
    for t in range(n):
        g[t] = prior_function(t)
        if t == 0:
            G[t] = g[t]
        else:
            G[t] = np.logaddexp(G[t - 1], g[t])

    P[n - 1, n - 1] = log_likelihood_class.pdf(data, t=n - 1, s=n)
    Q[n - 1] = P[n - 1, n - 1]

    for t in reversed(range(n - 1)):
        P_next_cp = -np.inf  # == log(0)
        for s in range(t, n - 1):
            P[t, s] = log_likelihood_class.pdf(data, t=t, s=s + 1)

            # compute recursion
            summand = P[t, s] + Q[s + 1] + g[s + 1 - t]
            P_next_cp = np.logaddexp(P_next_cp, summand)

            # truncate sum to become approx. linear in time (see
            # Fearnhead, 2006, eq. (3))
            if summand - P_next_cp < truncate:
                break

        P[t, n - 1] = log_likelihood_class.pdf(data, t=t, s=n)

        # (1 - G) is numerical stable until G becomes numerically 1
        if G[n - 1 - t] < -1e-15:  # exp(-1e-15) = .99999...
            antiG = np.log(1 - np.exp(G[n - 1 - t]))
        else:
            # (1 - G) is approx. -log(G) for G close to 1
            antiG = np.log(-G[n - 1 - t])

        Q[t] = np.logaddexp(P_next_cp, P[t, n - 1] + antiG)

    Pcp = np.ones((n - 1, n - 1)) * -np.inf
    for t in range(n - 1):
        Pcp[0, t] = P[0, t] + Q[t + 1] + g[t] - Q[0]
        if np.isnan(Pcp[0, t]):
            Pcp[0, t] = -np.inf
    for j in range(1, n - 1):
        for t in range(j, n - 1):
            tmp_cond = (Pcp[j - 1, j - 1:t] + P[j:t + 1, t] + Q[t + 1] +
                        g[0:t - j + 1] - Q[j:t + 1])
            Pcp[j, t] = logsumexp(tmp_cond.astype(np.float32))
            if np.isnan(Pcp[j, t]):
                Pcp[j, t] = -np.inf

    return Q, P, Pcp