def test_logseries_convergence(self):
     # Test for ticket #923
     N = 1000
     random.seed(0)
     rvsn = random.logseries(0.8, size=N)
     # these two frequency counts should be close to theoretical
     # numbers with this large sample
     # theoretical large N result is 0.49706795
     freq = np.sum(rvsn == 1) / float(N)
     msg = "Frequency was %f, should be > 0.45" % freq
     assert_(freq > 0.45, msg)
     # theoretical large N result is 0.19882718
     freq = np.sum(rvsn == 2) / float(N)
     msg = "Frequency was %f, should be < 0.23" % freq
     assert_(freq < 0.23, msg)
コード例 #2
0
 def logarithmic(self, p):
     '''
     Parameters:\n
     p: float, must be in range (0, 1).
     '''
     return r.logseries(p, self.size)
コード例 #3
0
ファイル: _discrete_distns.py プロジェクト: sugiki/scipy
 def _rvs(self, p):
     # looks wrong for p>0.5, too few k=1
     # trying to use generic is worse, no k=1 at all
     return mtrand.logseries(p, size=self._size)
コード例 #4
0
ファイル: _discrete_distns.py プロジェクト: danaon/scipy
 def _rvs(self, p):
     # looks wrong for p>0.5, too few k=1
     # trying to use generic is worse, no k=1 at all
     return mtrand.logseries(p, size=self._size)
コード例 #5
0
ファイル: isa.py プロジェクト: afcarl/isa
	def loglikelihood(self, X, num_samples=10, method='biased', sampling_method=('ais', {'num_steps': 10}), **kwargs):
		"""
		Computes the log-likelihood (in nats) for a set of data samples. If the model is overcomplete,
		the log-likelihood is estimated using one of two importance sampling methods. The biased method
		tends to underestimate the log-likelihood. To get rid of the bias, use more samples.
		The unbiased method oftentimes suffers from extremely high variance and should be used with
		caution.

		@type  X: array_like
		@param X: a number of visible states stored in columns

		@type  method: string
		@param method: whether to use the 'biased' or 'unbiased' method

		@type  num_samples: integer
		@param num_samples: number of generated importance weights

		@type  sampling_method: tuple
		@param sampling_method: method and parameters to generate importance weights

		@type  return_all: boolean
		@param return_all: if true, return all important weights and don't average (default: False)

		@rtype: ndarray
		@return: the log-probability of each data point
		"""

		return_all = kwargs.get('return_all', False)

		if self.num_hiddens == self.num_visibles:
			return self.prior_loglikelihood(dot(inv(self.A), X)) - slogdet(self.A)[1]

		else:
			if method == 'biased':
				# sample importance weights
				log_is_weights = asshmarray(empty([num_samples, X.shape[1]]))
				def parfor(i):
					log_is_weights[i] = self.sample_posterior_ais(X, **sampling_method[1])[1]
				mapp(parfor, range(num_samples))

				if return_all:
					return asarray(log_is_weights)
				else:
					# average importance weights to get log-likelihoods
					return logmeanexp(log_is_weights, 0)

			elif method == 'unbiased':
				loglik = empty(X.shape[1])

				# sample importance weights
				log_is_weights = asshmarray(empty([num_samples, X.shape[1]]))
				def parfor(i):
					log_is_weights[i] = self.sample_posterior_ais(X, **sampling_method[1])[1]
				mapp(parfor, range(num_samples))

				# obtain an initial first guess using the biased method
				is_weights = exp(log_is_weights)
				is_mean = mean(is_weights, 0)
				is_var = var(is_weights, 0, ddof=1)

				# Taylor series expansion points
				c = (is_var + square(is_mean)) / is_mean

				# logarithmic series distribution parameters
				p = sqrt(is_var / (is_var + square(is_mean)))

				# sample "number of importance samples" for each data point
				num_samples = array([logseries(p_) for p_ in p], dtype='uint32')

				for k in range(1, max(num_samples) + 1):
					# data points for which to generate k importance weights
					indices = where(num_samples == k)[0]

					# sample importance weights
					if len(indices) > 0:
						log_is_weights = asshmarray(empty([k, len(indices)]))

						def parfor(i):
							log_is_weights[i] = self.sample_posterior_ais(X[:, indices], num_steps=num_steps)[1]
						mapp(parfor, range(k))

						# hyperparameter used for selected datapoints
						c_ = c[indices]
						p_ = p[indices]

						# unbiased estimate of log-likelihood
						loglik[indices] = log(c_) + log(1. - p_) * prod((c_ - exp(log_is_weights)) / (c_ * p_), 0)

				if return_all:
					return loglik
				else:
					return mean(loglik, 0).reshape(1, -1)

			else:
				raise NotImplementedError('Unknown method \'{0}\'.'.format(method))
コード例 #6
0
ファイル: utils.py プロジェクト: kunlegiwa/MANGO
def logseries(size, params):
    try:
        return random.logseries(params['p'], size)
    except ValueError as e:
        exit(e)