コード例 #1
0
 def _approximate_dataprob_correction(self, sample_size):
     '''
     ad hoc approximation,
     see `python derivations/clustering.py dataprob`
     see `python derivations/clustering.py approximations`
     '''
     n = log(sample_size)
     N = log(self.dataset_size)
     return 0.061 * n * (n - N) * (n + N) ** 0.75
コード例 #2
0
 def score_data(self, shared):
     """computes the joint p(q, Y)"""
     prior = sp.stats.beta.logpdf(self.p, shared.alpha, shared.beta)
     if self.p >= 0. and self.p <= 1.:
         likelihood = self.heads * \
             log(self.p) + self.tails * log(1. - self.p)
     else:
         likelihood = -np.inf
     return prior + likelihood
コード例 #3
0
ファイル: bbnc.py プロジェクト: datamicroscopes/common
 def score_data(self, shared):
     """computes the joint p(q, Y)"""
     prior = sp.stats.beta.logpdf(self.p, shared.alpha, shared.beta)
     if self.p >= 0. and self.p <= 1.:
         likelihood = self.heads * \
             log(self.p) + self.tails * log(1. - self.p)
     else:
         likelihood = -np.inf
     return prior + likelihood
コード例 #4
0
 def _approximate_dataprob_correction(self, sample_size):
     '''
     ad hoc approximation,
     see `python derivations/clustering.py dataprob`
     see `python derivations/clustering.py approximations`
     '''
     n = log(sample_size)
     N = log(self.dataset_size)
     return 0.061 * n * (n - N) * (n + N) ** 0.75
コード例 #5
0
ファイル: nich.py プロジェクト: ericmjonas/distributions
 def score_group(self, group):
     """
     \cite{murphy2007conjugate}, Eq. 171
     """
     post = self.plus_group(group)
     return gammaln(post.nu / 2.) - gammaln(self.nu / 2.) \
         + 0.5 * log(self.kappa / post.kappa) \
         + (0.5 * self.nu) * log(self.nu * self.sigmasq) \
         - (0.5 * post.nu) * log(post.nu * post.sigmasq) \
         - group.count / 2. * 1.1447298858493991
コード例 #6
0
ファイル: nich.py プロジェクト: akkinenirajesh/distributions
 def score_data(self, shared):
     """
     \cite{murphy2007conjugate}, Eq. 171
     """
     post = shared.plus_group(self)
     return gammaln(post.nu / 2.) - gammaln(shared.nu / 2.) \
         + 0.5 * log(shared.kappa / post.kappa) \
         + (0.5 * shared.nu) * log(shared.nu * shared.sigmasq) \
         - (0.5 * post.nu) * log(post.nu * post.sigmasq) \
         - self.count / 2. * 1.1447298858493991
コード例 #7
0
ファイル: nich.py プロジェクト: akkinenirajesh/distributions
def score_student_t(x, nu, mu, sigmasq):
    """
    \cite{murphy2007conjugate}, Eq. 304
    """
    score = gammaln(.5 * (nu + 1.)) - gammaln(.5 * nu)
    score -= .5 * log(nu * pi * sigmasq)
    xt = (x - mu)
    s = xt * xt / sigmasq
    score += -(.5 * (nu + 1.)) * log(1. + s / nu)
    return score
コード例 #8
0
 def score_data(self, shared):
     """
     \cite{murphy2007conjugate}, Eq. 171
     """
     post = shared.plus_group(self)
     return gammaln(post.nu / 2.) - gammaln(shared.nu / 2.) \
         + 0.5 * log(shared.kappa / post.kappa) \
         + (0.5 * shared.nu) * log(shared.nu * shared.sigmasq) \
         - (0.5 * post.nu) * log(post.nu * post.sigmasq) \
         - self.count / 2. * 1.1447298858493991
コード例 #9
0
def score_student_t(x, nu, mu, sigmasq):
    """
    \cite{murphy2007conjugate}, Eq. 304
    """
    score = gammaln(.5 * (nu + 1.)) - gammaln(.5 * nu)
    score -= .5 * log(nu * pi * sigmasq)
    xt = (x - mu)
    s = xt * xt / sigmasq
    score += -(.5 * (nu + 1.)) * log(1. + s / nu)
    return score
コード例 #10
0
ファイル: dd.py プロジェクト: jesserobertson/distributions
 def score_value(self, shared, value):
     """
     \cite{wallach2009rethinking} Eqn 4.
     McCallum, et. al, 'Rething LDA: Why Priors Matter'
     """
     numer = self.counts[value] + shared.alphas[value]
     denom = self.counts.sum() + shared.alphas.sum()
     return log(numer / denom)
コード例 #11
0
 def score_value(self, shared, value):
     """
     \cite{wallach2009rethinking} Eqn 4.
     McCallum, et. al, 'Rething LDA: Why Priors Matter'
     """
     numer = self.counts[value] + shared.alphas[value]
     denom = self.counts.sum() + shared.alphas.sum()
     return log(numer / denom)
コード例 #12
0
ファイル: bb.py プロジェクト: zbxzc35/distributions
 def score_value(self, shared, value):
     """
     \cite{wallach2009rethinking} Eqn 4.
     McCallum, et. al, 'Rething LDA: Why Priors Matter'
     """
     heads = shared.alpha + self.heads
     tails = shared.beta + self.tails
     numer = heads if value else tails
     denom = heads + tails
     return log(numer / denom)
コード例 #13
0
ファイル: bb.py プロジェクト: jesserobertson/distributions
 def score_value(self, shared, value):
     """
     \cite{wallach2009rethinking} Eqn 4.
     McCallum, et. al, 'Rething LDA: Why Priors Matter'
     """
     heads = shared.alpha + self.heads
     tails = shared.beta + self.tails
     numer = heads if value else tails
     denom = heads + tails
     return log(numer / denom)
コード例 #14
0
ファイル: dpd.py プロジェクト: ericmjonas/distributions
 def score_value(self, group, value):
     """
     Adapted from dd.py, which was adapted from:
     McCallum, et. al, 'Rethinking LDA: Why Priors Matter' eqn 4
     """
     denom = self.alpha + group.total
     if value == OTHER:
         numer = self.beta0 * self.alpha
     else:
         numer = self.betas[value] * self.alpha + group.counts.get(value, 0)
     return log(numer / denom)
コード例 #15
0
    def score_add_value(
            self,
            group_size,
            nonempty_group_count,
            sample_size,
            empty_group_count=1):
        '''
        Return log of posterior predictive probability given
        sufficient statistics of a partial assignments vector [X_0,...,X_{n-1}]

            log P[ X_n = k | X_0=x_0, ..., X_{n-1}=x_{n-1} ]

        where

            group_size = #{i | x_i = k, i in {0,...,n-1}}

            nonempty_group_count = #{x_i | i in {0,...,n-1}}

            sample_size = n

        and empty_group_count is the number of empty groups that are uniformly
        competing for the assignment.  Typically empty_group_count = 1, but
        multiple empty "ephemeral" groups are used in e.g. Radford Neal's
        Algorithm-8 \cite{neal2000markov}.
        '''
        assert sample_size < self.dataset_size
        assert 0 < empty_group_count

        if group_size == 0:
            score = -log(empty_group_count)
            if sample_size + 1 < self.dataset_size:
                score += self._approximate_postpred_correction(sample_size + 1)
            return score

        # see `python derivations/clustering.py fastlog`
        very_large = 10000
        bigger = 1.0 + group_size
        if group_size > very_large:
            return 1.0 + log(bigger)
        else:
            return log(bigger / group_size) * group_size + log(bigger)
コード例 #16
0
    def score_add_value(
            self,
            group_size,
            nonempty_group_count,
            sample_size,
            empty_group_count=1):
        '''
        Return log of posterior predictive probability given
        sufficient statistics of a partial assignments vector [X_0,...,X_{n-1}]

            log P[ X_n = k | X_0=x_0, ..., X_{n-1}=x_{n-1} ]

        where

            group_size = #{i | x_i = k, i in {0,...,n-1}}

            nonempty_group_count = #{x_i | i in {0,...,n-1}}

            sample_size = n

        and empty_group_count is the number of empty groups that are uniformly
        competing for the assignment.  Typically empty_group_count = 1, but
        multiple empty "ephemeral" groups are used in e.g. Radford Neal's
        Algorithm-8 \cite{neal2000markov}.
        '''
        assert sample_size < self.dataset_size
        assert 0 < empty_group_count

        if group_size == 0:
            score = -log(empty_group_count)
            if sample_size + 1 < self.dataset_size:
                score += self._approximate_postpred_correction(sample_size + 1)
            return score

        # see `python derivations/clustering.py fastlog`
        very_large = 10000
        bigger = 1.0 + group_size
        if group_size > very_large:
            return 1.0 + log(bigger)
        else:
            return log(bigger / group_size) * group_size + log(bigger)
コード例 #17
0
    def _approximate_postpred_correction(self, sample_size):
        '''
        ad hoc approximation,
        see `python derivations/clustering.py postpred`
        see `python derivations/clustering.py approximations`
        '''
        assert 0 < sample_size
        assert sample_size < self.dataset_size

        exponent = 0.45 - 0.1 / sample_size - 0.1 / self.dataset_size
        scale = self.dataset_size / sample_size
        return log(scale) * exponent
コード例 #18
0
    def _approximate_postpred_correction(self, sample_size):
        '''
        ad hoc approximation,
        see `python derivations/clustering.py postpred`
        see `python derivations/clustering.py approximations`
        '''
        assert 0 < sample_size
        assert sample_size < self.dataset_size

        exponent = 0.45 - 0.1 / sample_size - 0.1 / self.dataset_size
        scale = self.dataset_size / sample_size
        return log(scale) * exponent
コード例 #19
0
ファイル: dpd.py プロジェクト: datamicroscopes/distributions
 def score_value(self, shared, value):
     """
     Adapted from dd.py, which was adapted from:
     McCallum, et. al, 'Rethinking LDA: Why Priors Matter' eqn 4
     """
     denom = shared.alpha + self.total
     if value == OTHER:
         numer = shared.beta0 * shared.alpha
     else:
         count = self.counts.get(value, 0)
         assert count >= 0, "cannot score while in debt"
         numer = shared.betas[value] * shared.alpha + count
     return log(numer / denom)
コード例 #20
0
ファイル: dpd.py プロジェクト: vishalbelsare/distributions
 def score_value(self, shared, value):
     """
     Adapted from dd.py, which was adapted from:
     McCallum, et. al, 'Rethinking LDA: Why Priors Matter' eqn 4
     """
     denom = shared.alpha + self.total
     if value == OTHER:
         numer = shared.beta0 * shared.alpha
     else:
         count = self.counts.get(value, 0)
         assert count >= 0, "cannot score while in debt"
         numer = shared.betas[value] * shared.alpha + count
     return log(numer / denom)
コード例 #21
0
    def log_partition_function(self, sample_size):
        '''
        Computes

            log_sum_exp(
                sum(n * log(n) for n in partition)
                for partition in partitions(sample_size)
            )

        exactly for small n, and approximately for large n.
        '''
        # TODO incorporate dataset_size for higher accuracy
        n = sample_size
        if n < 48:
            return LowEntropy.log_partition_function_table[n]
        else:
            coeff = 0.28269584
            log_z_max = n * log(n)
            return log_z_max * (1.0 + coeff * n ** -0.75)
コード例 #22
0
    def log_partition_function(self, sample_size):
        '''
        Computes

            log_sum_exp(
                sum(n * log(n) for n in partition)
                for partition in partitions(sample_size)
            )

        exactly for small n, and approximately for large n.
        '''
        # TODO incorporate dataset_size for higher accuracy
        n = sample_size
        if n < 48:
            return LowEntropy.log_partition_function_table[n]
        else:
            coeff = 0.28269584
            log_z_max = n * log(n)
            return log_z_max * (1.0 + coeff * n ** -0.75)
コード例 #23
0
    def score_counts(self, counts):
        '''
        Return log probability of data, given sufficient statistics of
        a partial assignment vector [X_0,...,X_{n-1}]

            log P[ X_0=x_0, ..., X_{n-1}=x_{n-1} ]
        '''
        score = 0.0
        sample_size = 0
        for count in counts:
            sample_size += count
            if count > 1:
                score += count * log(count)
        assert sample_size <= self.dataset_size

        if sample_size != self.dataset_size:
            log_factor = self._approximate_postpred_correction(sample_size)
            score += log_factor * (len(counts) - 1)
            score += self._approximate_dataprob_correction(sample_size)
        score -= self.log_partition_function(sample_size)
        return score
コード例 #24
0
    def score_counts(self, counts):
        '''
        Return log probability of data, given sufficient statistics of
        a partial assignment vector [X_0,...,X_{n-1}]

            log P[ X_0=x_0, ..., X_{n-1}=x_{n-1} ]
        '''
        score = 0.0
        sample_size = 0
        for count in counts:
            sample_size += count
            if count > 1:
                score += count * log(count)
        assert sample_size <= self.dataset_size

        if sample_size != self.dataset_size:
            log_factor = self._approximate_postpred_correction(sample_size)
            score += log_factor * (len(counts) - 1)
            score += self._approximate_dataprob_correction(sample_size)
        score -= self.log_partition_function(sample_size)
        return score
コード例 #25
0
 def score_value(self, shared, value):
     """samples a value using the explicit p"""
     return log(self.p) if value else log(1. - self.p)
コード例 #26
0
ファイル: gp.py プロジェクト: jesserobertson/distributions
 def remove_value(self, shared, value):
     self.count -= 1
     self.sum -= int(value)
     self.log_prod -= log(factorial(value))
コード例 #27
0
ファイル: gp.py プロジェクト: jesserobertson/distributions
 def add_repeated_value(self, shared, value, count):
     self.count += count
     self.sum += int(count * value)
     self.log_prod += count * log(factorial(value))
コード例 #28
0
ファイル: gp.py プロジェクト: jesserobertson/distributions
 def add_value(self, shared, value):
     self.count += 1
     self.sum += int(value)
     self.log_prod += log(factorial(value))
コード例 #29
0
ファイル: gp.py プロジェクト: jesserobertson/distributions
 def score_data(self, shared):
     post = shared.plus_group(self)
     return gammaln(post.alpha) - gammaln(shared.alpha) \
         - post.alpha * log(post.inv_beta) \
         + shared.alpha * log(shared.inv_beta) \
         - self.log_prod
コード例 #30
0
 def score_data(self, shared):
     post = shared.plus_group(self)
     return gammaln(post.alpha) - gammaln(shared.alpha) \
         - post.alpha * log(post.inv_beta) \
         + shared.alpha * log(shared.inv_beta) \
         - self.log_prod
コード例 #31
0
 def remove_value(self, shared, value):
     self.count -= 1
     self.sum -= int(value)
     self.log_prod -= log(factorial(value))
コード例 #32
0
 def add_repeated_value(self, shared, value, count):
     self.count += count
     self.sum += int(count * value)
     self.log_prod += count * log(factorial(value))
コード例 #33
0
 def add_value(self, shared, value):
     self.count += 1
     self.sum += int(value)
     self.log_prod += log(factorial(value))
コード例 #34
0
ファイル: gp.py プロジェクト: jesserobertson/distributions
 def score_value(self, shared, value):
     post = shared.plus_group(self)
     return gammaln(post.alpha + value) - gammaln(post.alpha) \
         + post.alpha * log(post.inv_beta) \
         - (post.alpha + value) * log(1. + post.inv_beta) \
         - log(factorial(value))
コード例 #35
0
ファイル: bbnc.py プロジェクト: datamicroscopes/common
 def score_value(self, shared, value):
     """samples a value using the explicit p"""
     return log(self.p) if value else log(1. - self.p)
コード例 #36
0
 def score_value(self, shared, value):
     post = shared.plus_group(self)
     return gammaln(post.alpha + value) - gammaln(post.alpha) \
         + post.alpha * log(post.inv_beta) \
         - (post.alpha + value) * log(1. + post.inv_beta) \
         - log(factorial(value))