def step(self): proposal_object = self.current_proposal.propose() proposal_sample = proposal_object.sample proposal_new = self.construct_proposal(proposal_sample) if not self.current_proposal.is_symmetric: log_current_given_proposal = proposal_new.log_pdf(self.current.sample) log_proposal_given_current = self.current_proposal.log_pdf(proposal_sample) else: log_proposal_given_current = 0 log_current_given_proposal = 0 log_lik_proposal = self.target.log_pdf(proposal_sample) log_ratio = log_lik_proposal - self.log_lik_current \ + log_current_given_proposal - log_proposal_given_current log_ratio = min(log(1), log_ratio) accepted = log_ratio > log(rand(1)) if accepted: sample_object = proposal_object self.log_lik_current = log_lik_proposal self.current_proposal = proposal_new else: sample_object = self.current # adapt state: position and proposal_2d self.current = sample_object return MHStepOutput(sample_object, proposal_object, self.log_lik_current, log_ratio, accepted)
def test_idf(): a, b = idf(0, [1, 2, 3, 4], 2), log(3.0 / 2) print a, b assert a == b a, b = idf(5, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4), log(8.0 / 4) print a, b assert a == b
def classify(classifier, frec, document): dictionary, classes, p_c, p_c_count = classifier words = document.split(';') score = [] for ck in range(0, len(classes)): score.append(log(p_c[ck])) for i in range(1, len(words)): index = bisect(dictionary, words[i], 0, len(dictionary)) - 1 # laplass arg = (frec[index][ck + 1] + 1) / (p_c_count[ck] + len(dictionary)) score[ck] += log(arg) value = max(score) return classes[score.index(value)]
def execute(Tlo, Plo, Thi, Phi): C = 0.034167 Tratio = Thi/Tlo Pratio = Phi/Plo logTratio = log(Tratio) logPratio = log(Pratio) # Guard against divide-by-zero errors, again logPratio = masked_values(logPratio, 0, copy=False) result = C * logTratio / logPratio return result
def setup(self, examples, extraction_time_limit, setup_time_limit): ''' Prepares a dictionary of inverse-document-frequencies (IDFs) for each word, and selects the terms with the highest IDFs as the features. @param examples: A list of raw data examples. @param extraction_time_limit: The time that will be allocated for each example. @param setup_time_limit: The time limit for setting up this agent. ''' self.extraction_time_limit = extraction_time_limit doc_count = float(len(examples)) tf_examples = [] for raw_example in examples: tf_examples += [self._countTermFrequency(raw_example)] self.idf = self._countInverseDocumentFrequency(tf_examples) #print self.threshold_fact, len(self.idf), self.idf self.order = sorted(self.idf.items(), lambda item1, item2: -cmp(item1[1], item2[1])) self.order = self.order[:self.num_features] self.idf = dict(self.order) self.order = [x[0] for x in self.order] for word in self.idf.keys(): self.idf[word] = log(doc_count / self.idf[word])
def test_log_det_exact_toy_large_shogun(self): n = 1e6 d = abs(randn(n)) Q = spdiags(d, 0, n, n) self.assertAlmostEqual(OzonePosterior.log_det_shogun_exact(Q), sum(log(d)), delta=1e-5)
def test_log_det_exact_toy_small_scikits(self): n = 3 d = abs(randn(n)) Q = spdiags(d, 0, n, n) self.assertAlmostEqual(OzonePosterior.log_det_scikits(Q), sum(log(d)), delta=1e-15)
def log_mean_exp(X): """ Computes log 1/n sum_i exp(X_i). Useful if you want to solve log \int f(x)p(x) dx where you have samples from p(x) and can compute log f(x) """ return GPTools.log_sum_exp(X)-log(len(X))
def log_pdf_multiple_points(self, X): assert(len(shape(X)) == 2) assert(shape(X)[1] == self.dimension) log_determinant_part = -sum(log(diag(self.L))) quadratic_parts = zeros(len(X)) for i in range(len(X)): x = X[i] - self.mu # solve y=K^(-1)x = L^(-T)L^(-1)x y = solve_triangular(self.L, x.T, lower=True) y = solve_triangular(self.L.T, y, lower=False) quadratic_parts[i] = -0.5 * x.dot(y) const_part = -0.5 * len(self.L) * log(2 * pi) return const_part + log_determinant_part + quadratic_parts
def adapt(self, mcmc_chain, step_output): # this is an extension of the base adapt call KameleonWindow.adapt(self, mcmc_chain, step_output) iter_no = mcmc_chain.iteration if iter_no > self.sample_discard and iter_no < self.stop_adapt: learn_scale = 1.0 / sqrt(iter_no - self.sample_discard + 1.0) self.nu2 = exp(log(self.nu2) + learn_scale * (exp(step_output.log_ratio) - self.accstar))
def generateInt(self, k): ''' generowanie k liczb ''' T = [0]*k for i in range(k): U = uniform(low=0, high=1) X = log(U)/log(1-self.p) T[(int(floor(X)))] = T[(int(floor(X)))] + 1 return T #A = Geometric(0.1) #C = A.showIntGenAndCount(A.generateInt(100)) #P = A.probabilityChart(C) #A = PlotHist() #A.plotHistgram("Generator dwumianowy", C,P,1,'ilosc','liczby') #A.showHistogram()
def generateInt(self, k): ''' generowanie k liczb ''' T = [0]*k for i in range(k): U = uniform(low=0, high=1) X = - log(U)/self.lamb T.append(X) return T
def adapt(self, mcmc_chain, step_output): # this is an extension of the base adapt call KameleonWindow.adapt(self, mcmc_chain, step_output) iter_no = mcmc_chain.iteration if iter_no > self.sample_discard and iter_no < self.stop_adapt: learn_scale = 1.0 / sqrt(iter_no - self.sample_discard + 1.0) self.nu2 = exp( log(self.nu2) + learn_scale * (exp(step_output.log_ratio) - self.accstar))
def log_sum_exp(X): """ Computes log sum_i exp(X_i). Useful if you want to solve log \int f(x)p(x) dx where you have samples from p(x) and can compute log f(x) """ # extract minimum X0=X.min() X_without_X0=delete(X,X.argmin()) return X0+log(1+sum(exp(X_without_X0-X0)))
def idf(num_folds, data, num_of_features): """ calculates idf using params: @param num_folds: number of folds as sent to agent @param data: data snt to agent @param num_of_features: number of features of bag of words """ if num_folds < 2: num_folds = len(data) fold_size = len(data) / num_folds doc_count = len(data) - fold_size idf = log(float(doc_count) / num_of_features) return idf
def log_likelihood_logdet(self, tau, kappa): logging.debug("Entering") y, A = OzonePosterior.load_ozone_data() AtA = A.T.dot(A) Q = self.create_Q_matrix(kappa) n = len(y) M = Q + tau * AtA logdet1 = self.log_det_method(Q) logdet2 = self.log_det_method(M) log_det_part = 0.5 * logdet1 + 0.5 * n * log(tau) - 0.5 * logdet2 logging.debug("Leaving") return log_det_part
def log_pdf(self, X, component_index_given=None): """ If component_index_given is given, then just condition on it, otherwise, should compute the overall log_pdf """ if component_index_given == None: rez = zeros([len(X)]) for ii in range(len(X)): logpdfs = zeros([self.num_components]) for jj in range(self.num_components): logpdfs[jj] = self.components[jj].log_pdf([X[ii]]) lmax = max(logpdfs) rez[ii] = lmax + log(sum(self.mixing_proportion.omega * exp(logpdfs - lmax))) return rez else: assert(component_index_given < self.num_components) return self.components[component_index_given].log_pdf(X)
def log_pdf(self, X, component_index_given=None): """ If component_index_given is given, then just condition on it, otherwise, should compute the overall log_pdf """ if component_index_given == None: rez = zeros([len(X)]) for ii in range(len(X)): logpdfs = zeros([self.num_components]) for jj in range(self.num_components): logpdfs[jj] = self.components[jj].log_pdf([X[ii]]) lmax = max(logpdfs) rez[ii] = lmax + log( sum(self.mixing_proportion.omega * exp(logpdfs - lmax))) return rez else: assert (component_index_given < self.num_components) return self.components[component_index_given].log_pdf(X)
def test_log_mean_exp(self): X = asarray([-1, 1]) X = reshape(X, (len(X), 1)) y = asarray([+1. if x >= 0 else -1. for x in X]) covariance = SquaredExponentialCovariance(sigma=1, scale=1) likelihood = LogitLikelihood() gp = GaussianProcess(y, X, covariance, likelihood) laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3])) proposal=laplace.get_gaussian() n=200 prior = gp.get_gp_prior() samples = proposal.sample(n).samples log_likelihood=asarray([gp.log_likelihood(f) for f in samples]) log_prior = prior.log_pdf(samples) log_proposal = proposal.log_pdf(samples) X=log_likelihood+log_prior-log_proposal a=log(mean(exp(X))) b=GPTools.log_mean_exp(X) self.assertLessEqual(a-b, 1e-5)
def log_likelihood_without_logdet(self, tau, kappa): logging.debug("Entering") y, A = OzonePosterior.load_ozone_data() AtA = A.T.dot(A) Q = self.create_Q_matrix(kappa) n = len(y) M = Q + tau * AtA second_a = -0.5 * tau * (y.T.dot(y)) second_b = A.T.dot(y) second_b = self.solve_sparse_linear_system(M, second_b) second_b = A.dot(second_b) second_b = y.T.dot(second_b) second_b = 0.5 * (tau**2) * second_b quadratic_part = second_a + second_b const_part = -0.5 * n * log(2 * pi) result = const_part + quadratic_part logging.debug("Leaving") return result
def log_likelihood_without_logdet(self, tau, kappa): logging.debug("Entering") y, A = OzonePosterior.load_ozone_data() AtA = A.T.dot(A) Q = self.create_Q_matrix(kappa); n = len(y); M = Q + tau * AtA; second_a = -0.5 * tau * (y.T.dot(y)) second_b = A.T.dot(y) second_b = self.solve_sparse_linear_system(M, second_b) second_b = A.dot(second_b) second_b = y.T.dot(second_b) second_b = 0.5 * (tau ** 2) * second_b quadratic_part = second_a + second_b const_part = -0.5 * n * log(2 * pi) result = const_part + quadratic_part logging.debug("Leaving") return result
def test_log_mean_exp(self): X = asarray([-1, 1]) X = reshape(X, (len(X), 1)) y = asarray([+1. if x >= 0 else -1. for x in X]) covariance = SquaredExponentialCovariance(sigma=1, scale=1) likelihood = LogitLikelihood() gp = GaussianProcess(y, X, covariance, likelihood) laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3])) proposal = laplace.get_gaussian() n = 200 prior = gp.get_gp_prior() samples = proposal.sample(n).samples log_likelihood = asarray([gp.log_likelihood(f) for f in samples]) log_prior = prior.log_pdf(samples) log_proposal = proposal.log_pdf(samples) X = log_likelihood + log_prior - log_proposal a = log(mean(exp(X))) b = GPTools.log_mean_exp(X) self.assertLessEqual(a - b, 1e-5)
def inverseOfIncreasingExponentialFunction(p, y): ''' Inverse exponential funcion: x = e^(log(y/p[0])/p[1]) ''' return exp(log(y / p[0]) / p[1])
def log_det_scikits(Q): d = cholesky(csc_matrix(Q)).L().diagonal() return 2 * sum(log(d)) raise Exception("cholmod not installed")
def exponential(self, estimates): logging.debug("Entering") # find a strict lower bound on the estimates and remove it from list bound = estimates.min() bound_idx = estimates.argmin() estimates = delete(estimates, bound_idx) estimates = estimates - bound # find an integer close to the mean of the transformed estimates and divide E = max(int(round(abs(mean(estimates)))), 1) estimates = estimates / E logging.info("Using %f as lower bound on estimates" % bound) logging.info("Computing product of E=%d RR estimates" % E) logging.info("Std-deviation after scaling is %f" % std(estimates)) # index for iterating through the used estimates # (might be averaged, so might be lower than the number of available estimates # if the block size is greater than one estimate_idx = 0 samples = zeros(E) for iteration in range(E): weight = 1 # start with x^0 which is 1 samples[iteration] = 1 term = 1 # index for computed samples series_term_idx = 1 while weight > 0: # update current term of infinite series # average over block x_inner = self.get_estimate(estimates, estimate_idx) term *= (x_inner / series_term_idx) # if summation has reached threshold, update weights if abs(term) < self.threshold: q = term / self.threshold if rand() < q: # continue and update weight weight = weight / q else: # stop summation weight = 0 samples[iteration] += weight * term; estimate_idx += 1 series_term_idx += 1 logging.info("RR estimate %d/%d with threshold %.2f is %.4f and took %d series terms" % (iteration + 1, E, self.threshold, samples[iteration], series_term_idx)) # now put things together. Note that samples contains an unbiased estimate # which might be quite small. However, due to the removal of the bound, # this will not cause an underflow and we can just take the log. logging.debug("Leaving") return bound + sum(log(samples));
def precompute_likelihood_estimates(self, tau, kappa): logging.debug("Entering") # submit all jobs for log-determinant Q aggregators_Q = [] for _ in range(self.num_estimates): job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "Q") aggregators_Q.append(self.computation_engine.submit_job(job)) # submit all jobs for log-determinant M aggregators_M = [] for _ in range(self.num_estimates): job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "M") aggregators_M.append(self.computation_engine.submit_job(job)) # submit job for remainder of likelihood job = OzoneLikelihoodWithoutLogDetJob(ScalarResultAggregator(), self, tau, kappa) aggregator_remainder = self.computation_engine.submit_job(job) # grab a coffee self.computation_engine.wait_for_all() # collect results from all aggregators log_dets_Q = zeros(self.num_estimates) log_dets_M = zeros(self.num_estimates) for i in range(self.num_estimates): aggregators_Q[i].finalize() aggregators_M[i].finalize() log_dets_Q[i] = aggregators_Q[i].get_final_result().result log_dets_M[i] = aggregators_M[i].get_final_result().result aggregators_Q[i].clean_up() aggregators_M[i].clean_up() aggregator_remainder.finalize() result_remainder = aggregator_remainder.get_final_result().result aggregator_remainder.clean_up() # load n since needed for likelihood y, _ = OzonePosterior.load_ozone_data() n = len(y) # construct all likelihood estimates log_det_parts = 0.5 * log_dets_Q + 0.5 * n * log( tau) - 0.5 * log_dets_M estimates = log_det_parts + result_remainder # crude check for an overflow to print error details limit = 1e100 indices = where(abs(estimates) > limit)[0] if len(indices) > 0: logging.info( "Log-likelihood estimates overflow occured at the following indices:" ) for idx in indices: logging.info("At index %d. Details are: " % idx) logging.info("log-det Q: " + aggregators_Q[idx].job_name + ". Result is %f" % log_dets_Q[idx]) logging.info("log-det M: " + aggregators_M[idx].job_name + ". Result is %f" % log_dets_M[idx]) logging.info("log-lik-without-log-det: " + aggregator_remainder.job_name + ". Result is %f" % result_remainder[idx]) logging.info("Removing mentioned estimates from list") estimates = estimates[abs(estimates) < limit] logging.info("New number of estimates is %d, old was %d" % (len(estimates), self.num_estimates)) logging.debug("Leaving") return estimates
def step(self): """ Performs on Metropolis-Hastings step, updates internal state and returns sample_object, proposal_2d, accepted, log_lik, log_ratio where sample_object - new or old sample_object (row-vector) accepted - boolean whether accepted log_lik - log-likelihood of returned sample_object log_ratio - log probability of acceptance """ # create proposal around current_sample_object point in first step only dim = self.distribution.dimension if self.Q is None: current_1d = reshape(self.current_sample_object.samples, (dim,)) self.Q = self.construct_proposal(current_1d) # propose sample_object and construct new Q centred at proposal_2d proposal_object = self.Q.sample(1) proposal_2d = proposal_object.samples proposal_1d = reshape(proposal_2d, (dim,)) Q_new = self.construct_proposal(proposal_1d) # 2d view for current_sample_object point current_2d = reshape(self.current_sample_object.samples, (1, dim)) # First find out whether this sampler is gibbs (which has a full target) # or a MH (otherwise). if isinstance(self.distribution, FullConditionals): log_lik_proposal = self.distribution.full_target.log_pdf(self.distribution.get_current_state_array()) accepted = True log_ratio = log(1) else: # do normal MH-step, compute acceptance ratio # evaluate both Q if not self.is_symmetric: log_Q_proposal_given_current = self.Q.log_pdf(proposal_2d) log_Q_current_given_proposal = Q_new.log_pdf(current_2d) else: log_Q_proposal_given_current = 0 log_Q_current_given_proposal = 0 log_lik_proposal = self.distribution.log_pdf(proposal_2d) log_ratio = log_lik_proposal - self.log_lik_current \ + log_Q_current_given_proposal - log_Q_proposal_given_current log_ratio = min(log(1), log_ratio) accepted = log_ratio > log(rand(1)) if accepted: self.log_lik_current = log_lik_proposal sample_object = proposal_object self.Q = Q_new else: sample_object = self.current_sample_object # adapt state: position and proposal_2d self.current_sample_object = sample_object return StepOutput(sample_object, proposal_object, accepted, self.log_lik_current, log_ratio)
def log_lik_vector(self, y, f): s = -y * f ps = asarray([min(x, 0) for x in s]) lp = -(ps + log(exp(-ps) + exp(s - ps))) return lp
def test_masked_unary_operations(self): # Tests masked_unary_operation (x, mx) = self.data with np.errstate(divide='ignore'): self.assertTrue(isinstance(log(mx), mmatrix)) assert_equal(log(x), np.log(x))
def step(self): """ Performs on Metropolis-Hastings step, updates internal state and returns sample_object, proposal_2d, accepted, log_lik, log_ratio where sample_object - new or old sample_object (row-vector) accepted - boolean whether accepted log_lik - log-likelihood of returned sample_object log_ratio - log probability of acceptance """ # create proposal around current_sample_object point in first step only dim = self.distribution.dimension if self.Q is None: current_1d = reshape(self.current_sample_object.samples, (dim, )) self.Q = self.construct_proposal(current_1d) # propose sample_object and construct new Q centred at proposal_2d proposal_object = self.Q.sample(1) proposal_2d = proposal_object.samples proposal_1d = reshape(proposal_2d, (dim, )) Q_new = self.construct_proposal(proposal_1d) # 2d view for current_sample_object point current_2d = reshape(self.current_sample_object.samples, (1, dim)) # First find out whether this sampler is gibbs (which has a full target) # or a MH (otherwise). if isinstance(self.distribution, FullConditionals): log_lik_proposal = self.distribution.full_target.log_pdf( self.distribution.get_current_state_array()) accepted = True log_ratio = log(1) else: # do normal MH-step, compute acceptance ratio # evaluate both Q if not self.is_symmetric: log_Q_proposal_given_current = self.Q.log_pdf(proposal_2d) log_Q_current_given_proposal = Q_new.log_pdf(current_2d) else: log_Q_proposal_given_current = 0 log_Q_current_given_proposal = 0 log_lik_proposal = self.distribution.log_pdf(proposal_2d) log_ratio = log_lik_proposal - self.log_lik_current \ + log_Q_current_given_proposal - log_Q_proposal_given_current log_ratio = min(log(1), log_ratio) accepted = log_ratio > log(rand(1)) if accepted: self.log_lik_current = log_lik_proposal sample_object = proposal_object self.Q = Q_new else: sample_object = self.current_sample_object # adapt state: position and proposal_2d self.current_sample_object = sample_object return StepOutput(sample_object, proposal_object, accepted, self.log_lik_current, log_ratio)
def logFunction(p, x): ''' Log function: = p[0]+log(x*p[1])''' return p[0] + log(p[1] * x)
def test_log_sum_exp(self): X = asarray([0.1, 0.2, 0.3, 0.4]) direct = log(sum(exp(X))) indirect = GPTools.log_sum_exp(X) self.assertLessEqual(norm(direct - indirect), 1e-10)
def logFunction(p, x): ''' Log function: = p[0]+log(x*p[1])''' return p[0]+log(p[1]*x)
def precompute_likelihood_estimates(self, tau, kappa): logging.debug("Entering") # submit all jobs for log-determinant Q aggregators_Q = [] for _ in range(self.num_estimates): job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "Q") aggregators_Q.append(self.computation_engine.submit_job(job)) # submit all jobs for log-determinant M aggregators_M = [] for _ in range(self.num_estimates): job = OzoneLogDetJob(ScalarResultAggregator(), self, tau, kappa, "M") aggregators_M.append(self.computation_engine.submit_job(job)) # submit job for remainder of likelihood job = OzoneLikelihoodWithoutLogDetJob(ScalarResultAggregator(), self, tau, kappa) aggregator_remainder = self.computation_engine.submit_job(job) # grab a coffee self.computation_engine.wait_for_all() # collect results from all aggregators log_dets_Q = zeros(self.num_estimates) log_dets_M = zeros(self.num_estimates) for i in range(self.num_estimates): aggregators_Q[i].finalize() aggregators_M[i].finalize() log_dets_Q[i] = aggregators_Q[i].get_final_result().result log_dets_M[i] = aggregators_M[i].get_final_result().result aggregators_Q[i].clean_up() aggregators_M[i].clean_up() aggregator_remainder.finalize() result_remainder = aggregator_remainder.get_final_result().result aggregator_remainder.clean_up() # load n since needed for likelihood y, _ = OzonePosterior.load_ozone_data() n = len(y) # construct all likelihood estimates log_det_parts = 0.5 * log_dets_Q + 0.5 * n * log(tau) - 0.5 * log_dets_M estimates = log_det_parts + result_remainder # crude check for an overflow to print error details limit = 1e100 indices = where(abs(estimates) > limit)[0] if len(indices) > 0: logging.info("Log-likelihood estimates overflow occured at the following indices:") for idx in indices: logging.info("At index %d. Details are: " % idx) logging.info("log-det Q: " + aggregators_Q[idx].job_name + ". Result is %f" % log_dets_Q[idx]) logging.info("log-det M: " + aggregators_M[idx].job_name + ". Result is %f" % log_dets_M[idx]) logging.info("log-lik-without-log-det: " + aggregator_remainder.job_name + ". Result is %f" % result_remainder[idx]) logging.info("Removing mentioned estimates from list") estimates = estimates[abs(estimates) < limit] logging.info("New number of estimates is %d, old was %d" % (len(estimates), self.num_estimates)) logging.debug("Leaving") return estimates
def inverseOfIncreasingExponentialFunction(p, y): ''' Inverse exponential funcion: x = e^(log(y/p[0])/p[1]) ''' return exp(log(y/p[0])/p[1])
def log_lik_vector_multiple(self, y, F): S = -y * F PS = asarray([asarray([min(x, 0) for x in s]) for s in S]) LP = -(PS + log(exp(-PS) + exp(S - PS))) return LP
def scale_adapt(self, learn_scale, step_output): which_component = step_output.sample.which_component self.dwscale[which_component] = exp(log(self.dwscale[which_component]) + learn_scale * (exp(step_output.log_ratio) - self.accstar))
def test_masked_unary_operations(self): # Tests masked_unary_operation (x, mx) = self.data with np.errstate(divide='ignore'): assert_(isinstance(log(mx), MMatrix)) assert_equal(log(x), np.log(x))
def test_log_sum_exp(self): X=asarray([0.1,0.2,0.3,0.4]) direct=log(sum(exp(X))) indirect=GPTools.log_sum_exp(X) self.assertLessEqual(norm(direct-indirect), 1e-10)
def inverseOfDecreasingExponentialFunction(p, y): """ Inverse exponential funcion: x = e^-(log(y/p[0])/p[1]) """ return exp(-1 * log(y / p[0]) / p[1])
def scale_adapt(self,learn_scale,step_output): # learn_scale is 1./iterations scheme, which is learning rate self.globalscale = exp(log(self.globalscale) + learn_scale * (exp(step_output.log_ratio) - self.accstar))
def logFunction(p, x): """ Log function: = p[0]+log(x*p[1])""" return p[0] + log(p[1] * x)