def UpdateModelParamsFromPrior(state): alpha = state.hypers['alpha'] alpha_token = state.hypers['alpha_token'] state.topic_sub_weights = ( cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_subs)))) state.topic_rel_weights = ( cDists.SampleDirichletArray(alpha * np.ones((state.n_topics, state.n_rels)))) state.type_sub_weights = ( cDists.SampleDirichletArray(alpha * np.ones((state.n_types, state.n_subs)))) try: state.sub_token_weights = ( cDists.SampleDirichletArray(alpha_token * np.ones((state.n_subs, state.n_tokens)))) state.rel_token_weights = ( cDists.SampleDirichletArray(alpha_token * np.ones((state.n_rels, state.n_tokens)))) except ZeroDivisionError: # If alpha is extremely small, approximate the Dirichlet draw # with a delta function logging.warning('Token alpha %r very small. Approximating with Dirichlet draw.', alpha_token) eps = 1e-4 # Small constant to prevent numeric instability sub_assign = random.randint(state.n_tokens, size=state.n_subs) ent_token_weights = np.zeros((state.n_subs, state.n_tokens)) + eps ent_token_weights[np.arange(state.n_subs), sub_assign] = 1. - eps state.sub_token_weights = ent_token_weights rel_assign = random.randint(state.n_rels, size=state.n_rels) rel_token_weights = np.zeros((state.n_rels, state.n_tokens)) + eps rel_token_weights[np.arange(state.n_rels), rel_assign] = 1. - eps state.rel_token_weights = rel_token_weights state.type_names = pandas.Series({0: 'ref', 1: 'real'}) state.topic_weights = random.dirichlet(np.repeat(alpha, state.n_topics)) state.type_weights = random.dirichlet(np.repeat(alpha, state.n_types))
def sample_topic_parameters(self): for i in xrange(self.N): z_sum = 0 for j in I_U[i]: z_sum += self.z_U[i,j] self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum) for j in xrange(self.M): z_sum = 0 for i in I_V[j]: z_sum += self.z_V[i,j] self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)
def compute_logp_independent_block_mc(N, alpha_row=None, alpha_col=None, iterations=1e5): """Compute the montecarlo log likelihood of a matrix under the assumption of independence. """ if N.size == 1 : return 0 if alpha_row is None: alpha_row = np.ones(N.shape[1]) if alpha_col is None: alpha_col = np.ones(N.shape[0]) theta_row = dirichlet(alpha_row, size=int(iterations)).T theta_col = dirichlet(alpha_col, size=int(iterations)).T Theta = theta_row[:,None,:] * theta_col logp_ibs = gammaln(N.sum()+1) - gammaln(N+1).sum() + (np.log(Theta)*N[:,:,None]).sum(0).sum(0) return logmean(logp_ibs)
def _initialize_gibbs_sampler(self): """ Initialize the Gibbs sampler This sets the initial values of the C{labels} and C{thetas} parameters. """ pi = log(dirichlet(self.hyp_pi, 1)[0]) categories = self._categories() documents = self._documents() self.thetas = empty(self.hyp_thetas.shape) for category_index in xrange(categories): self.thetas[category_index] = log(dirichlet(self.hyp_thetas[category_index], 1)[0]) self.labels = array([multinomial_sample(pi) for _ in xrange(documents)])
def sample_topic_parameters(self): # user topic assignment for i in xrange(self.N): z_sum = 0 for j in self.I_U[i]: z_sum += self.z_U[i,j] self.theta_U[i] = dirichlet(self.alpha/self.K_U + z_sum) # item topic assigment for j in xrange(self.M): z_sum = 0 for i in I_V[j]: z_sum += self.z_V[i,j] self.theta_V[j] = dirichlet(self.alpha/self.K_V + z_sum)
def simulate_combat(self, allowed_time, ant_0_scoring = ConservativeScore, ant_1_scoring = ConservativeScore, log = None): start = time.time() score_0 = ant_0_scoring(self, 0) score_1 = ant_1_scoring(self, 1) self.allowed_policies() init_poses = dict( (a, a.pos) for a in self.ants) killed = [] steps = 0 while (time.time() - start) < allowed_time: steps += 1 action = {} for k in killed: self.add_ant(k) for a,p in init_poses.iteritems(): a.pos = p for ant in self.ants: ps = dirichlet(self.policy[ant]) i = multinomial(1, ps).nonzero()[0][0] if not (self.move_direction(ant, self.actions[i])): print "CAZZZ" action[ant] = i killed = self.step_turn() for a, p in self.policy.iteritems(): if a.owner == 0: p[action[a]] += score_0(self) else: p[action[a]] += score_1(self) for k in killed: self.add_ant(k) for a,p in init_poses.iteritems(): a.pos = p retpolicy = {} for a,p in self.policy.iteritems(): ps = dirichlet(p) i = multinomial(1, ps).nonzero()[0][0] retpolicy[a] = self.actions[i] if log is not None: log.info("Number of steps: %d", steps) else: print "Number of steps: ", steps return retpolicy
def _sampleFromModel(self, D=200, T=100, K=10, avgWordsPerDoc = 500): ''' Create a test dataset according to the model Params: D - Sample documents (each with associated features) T - Vocabulary size, the number of "terms". Must be a square number K - Observed topics avgWordsPerDoc - average number of words per document generated (Poisson) Returns: modelState - a model state object configured for training tpcs - the matrix of per-document topic distribution vocab - the matrix of per-topic word distributions docLens - the vector of document lengths X - the DxF side information matrix W - The DxW word matrix ''' # Generate vocab beta = 0.1 betaVec = np.ndarray((T,)) betaVec.fill(beta) vocab = rd.dirichlet(betaVec, size=K) # Geneate the shared covariance matrix # ...no real structure in this. sigT = rd.random((K,K)) sigT = sigT.dot(sigT) # Generate topic mean alpha = 1 alphaVec = np.ndarray((K,)) alphaVec.fill(alpha) topicMean = rd.dirichlet(alphaVec) # Generate the actual topics. tpcs = rd.multivariate_normal(topicMean, sigT, size=D) tpcs = rowwise_softmax(tpcs) # Generate the corpus docLens = rd.poisson(avgWordsPerDoc, (D,)).astype(np.float32) W = tpcs.dot(vocab) W *= docLens[:, np.newaxis] W = np.array(W, dtype=np.int32) # truncate word counts to integers W = ssp.csr_matrix(W) # Return the initialised model, the true parameter values, and the # generated observations return tpcs, vocab, docLens, W
def generate_corpus(): beta = np.zeros((NUM_TOPICS, VOCAB_SIZE)) corpus = np.zeros((NUM_DOCS, VOCAB_SIZE), dtype='int64') # Draw per-topic word distributions for k in range(NUM_TOPICS): beta[k,:] = nprand.dirichlet(eta) for m in range(NUM_DOCS): # Draw per-document topic distribution theta = nprand.dirichlet(alpha) for i in range(DOC_LENGTH): topic = lda.sample(theta) word = lda.sample(beta[topic,:]) corpus[m,word] += 1 return (corpus, beta)
def sample_pi(alpha, zz): """ Sample mixing weights from the posterior distribution p(\pi|Z). Inputs ------ alpha: array [nr_clusters] The parameters of the prior of the mixing weights Dirichlet(\pi|alpha). zz: array [nr_points] The observer assignments of each of the N points. Output ------ pi: array [nr_clusters] Sample for mixing weights. """ K = len(alpha) # Count how many times appears each state. counts = np.zeros(K) for state in zz: counts[state] += 1 alpha_new = np.array(alpha) + counts pi = dirichlet(alpha_new) return pi
def _init_latent_params(self): V = self.n_actors A = self.n_actions T = self.n_timesteps R = self.n_regimes C = self.n_communities K = self.n_topics if self.gam is None: self.gam = (0.1 ** (1. / 4)) * (R + K + C + C) print 'Setting gam to: %f' % self.gam self.zeta = 1. self.delta = 1. self.rho_R = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=R) self.nu_K = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=K) self.eta_d_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C) self.eta_a_C = sample_gamma(self.gam / (R + K + C + C), 1. / self.zeta, size=C) self.d = 1. shp_RKCC = np.ones((R, K, C, C)) shp_RKCC[:] = np.outer(self.eta_d_C, self.eta_d_C) shp_RKCC[:, :, np.identity(C).astype(bool)] = self.eta_a_C * self.eta_d_C shp_RKCC *= self.nu_K[None, :, None, None] shp_RKCC *= self.rho_R[:, None, None, None] self.Lambda_RKCC = sample_gamma(shp_RKCC, 1. / self.d) self.Psi_TR = sample_gamma(self.e, 1. / self.f, size=(T, R)) self.Phi_AK = np.ones((A, K)) self.Phi_AK[:, :] = rn.dirichlet(self.e * np.ones(A), size=K).T self.alpha_V = np.ones(V) * self.e self.beta = 1. self.Theta_VC = np.ones((V, C))
def bayesian_bootstrap(self,par=1): weight=npr.dirichlet([par]*self.n,(self.times,self.num_samples)) samples=[npr.choice(self.data[j],size=self.n,p=weight[j][i]) for i in xrange(self.num_samples) for j in xrange(self.times)] #for i in xrange(self.times*self.num_samples)] samples=np.array(samples).reshape(self.times,self.num_samples,self.n) return samples
def sample(self, observations_by_state): """ Sample a new set of distribution parameters given a sample of observations from the given state. The internal parameters are updated. Parameters ---------- observations : [ numpy.array with shape (N_k,) ] with nstates elements observations[k] are all observations associated with hidden state k Examples -------- initialize output model >>> B = np.array([[0.5, 0.5], [0.1, 0.9]]) >>> output_model = DiscreteOutputModel(B) sample given observation >>> obs = [[0, 0, 0, 1, 1, 1], [1, 1, 1, 1, 1, 1]] >>> output_model.sample(obs) """ from numpy.random import dirichlet N, M = self._output_probabilities.shape # nstates, nsymbols for i in range(len(observations_by_state)): # count symbols found in data count = np.bincount(observations_by_state[i], minlength=M).astype(float) # sample dirichlet distribution count += self.prior[i] if count.sum() > 0: # if counts at all: can't sample, so leave output probabilities as they are. self._output_probabilities[i, :] = dirichlet(self.prior[i] + count)
def newQueryState(data, modelState): ''' Creates a new LDA QueryState object. This contains all parameters and random variables tied to individual datapoints. Param: W - the DxT document-term matrix used for training or querying. modelState - the model state object Return: A QueryState object ''' docLens = np.squeeze(np.asarray(data.words.sum(axis=1))) # Initialise the per-token assignments at random according to the dirichlet hyper # This is super-slow dist = modelState.topicPrior.copy() dist /= dist.sum() topicDists = rd.dirichlet(dist, size=data.doc_count).astype(modelState.dtype) topicDists *= docLens[:, np.newaxis] topicDists += modelState.topicPrior[np.newaxis, :] return QueryState(docLens, topicDists, False)
def test_case_generator(): sys.path.append('solutions/hw2') sys.path.append('suppl/hw2') from assignment_two_adaboost import weak_learner as wl sys.path.pop() sys.path.pop() from kernels import rbf seed(1) instances = normal(size=(50, 5)) labels = binomial(1, 0.5, 50) dist = dirichlet(uniform(size=50)) ker = rbf(1) mat = uniform(size=(5, 5)) mat = (mat / np.sum(mat, axis=1)).T test_cases = {'assignment_two_adaboost': { 'compute_error': [lambda x: x[3] < 0.2, instances, labels, dist], 'run_adaboost': [instances, labels, wl], 'update_dist': [lambda x: x[2] > -0.2, instances, labels, dist, normal()], 'weak_learner': [instances, labels, dist]}, 'assignment_two_pagerank': {'compute_pageranks': [mat], 'main': []}, 'assignment_two_svm': { 'evaluate_classifier': [lambda x: norm(x) > 5, instances, labels], 'svm_train': [instances, labels, ker]}} return test_cases
def newModelAtRandom(data, K, topicPrior=None, vocabPrior=None, dtype=DTYPE): ''' Creates a new LDA ModelState for the given training set and the given number of topics. Everything is instantiated purely at random. This contains all parameters independent of of the dataset (e.g. learnt priors) Param: data - the dataset of words, features and links of which only words are used in this model K - the number of topics topicPrior - the prior over topics, either a scalar or a K-dimensional vector vocabPrior - the prior over vocabs, either a scalar or a T-dimensional vector dtype - the datatype to be used throughout. Return: A ModelState object ''' assert K > 1, "There must be at least two topics" T = data.words.shape[1] if topicPrior is None: topicPrior = constantArray((K,), 50.0 / K, dtype) # From Griffiths and Steyvers 2004 if vocabPrior is None: vocabPrior = 1.1 # Also from G&S vocabPriorVec = constantArray((T,), vocabPrior, dtype) wordDists = rd.dirichlet(vocabPriorVec, size=K).astype(dtype) #Â Peturb to avoid zero probabilities wordDists += 1./T wordDists /= (wordDists.sum(axis=1))[:,np.newaxis] return ModelState(K, topicPrior, vocabPrior, wordDists, dtype, MODEL_NAME)
def sample(self): ''' alpha : array Parameter of the distribution (k dimension for sample of dimension k). size : array Number of samples to draw. ''' return npr.dirichlet(self.alpha, size=None)
def sample(self, eta, size=1): """ @param eta: the natural parameters @param size: the size of the sample A sample of sufficient statistics """ from numpy.random import dirichlet return self.T(dirichlet(self.theta(eta), size=size))
def compute_logp_independent_block_mc(X, alpha=None, iterations=1e5): """Compute the montecarlo log likelihood of a matrix under the assumption of independence. """ if alpha is None: alpha = np.ones(X.shape[1]) Theta = dirichlet(alpha, size=int(iterations)).T logp_ibs = gammaln(X.sum(1)+1).sum() - gammaln(X+1).sum(1).sum() + (np.log(Theta[:,None,:])*X[:,:,None]).sum(1).sum(0) # log(\prod(one Multinomial pdf for each row)) return logmean(logp_ibs)
def _random_population(self): """ Generate a random population on the unit simplex of appropriate dimensionality """ rand.seed() return rand.dirichlet([1] * len(self.types))
def node_prob(tran_prior, node_tran_sample, num_node): node_prob_mat = tran_prior + node_tran_sample for i in range(num_node): node_prob_mat[i, :] = npr.dirichlet(node_prob_mat[i, :]) node_prob_arr = np.reshape(node_prob_mat, num_node * num_node) # print(node_prob_arr,'node_prob_arr') # print(np.sum(node_prob_arr[0:9]),'np.sum(node_prob_arr[0:9])') return node_prob_arr
def apply(self, agents): alpha = dirichlet(ones(len(agents))) values = {} for param in self.params: aux = [agent.__dict__[param] for agent in agents] values[param] = real_crossover.recombinate(alpha, aux) _cls = agents[0].__class__ return [_cls(**values)]
def initialize_uniformly(data, D, K): mus = npr.uniform(low=0.7, high=1.3, size=[K, D]) sigmas = np.zeros((K, D, D)) for k in range(K): sigmas[k, :, :] = np.eye(D, D) pis = npr.dirichlet(np.ones(K)) #mixture coefficients return mus, sigmas, pis
def _randomise(self): """ Randomise the variational parameters. """ from numpy.random import dirichlet self.phi = dirichlet(ones(self.K), size=self.N) self.tau = outer(ones(self.K), self._lambda) self.gamma[:, 0] = 1. self.gamma[:, 1] = self.alpha
def posterior_samples(Nlx, Nly, nsamps=100, verbose=False, clipthresh=1e-6): estimates = [] if verbose: misc.pnn("\n %d samples to produce:" % nsamps) for i in range(nsamps): if verbose: misc.pnn(i) p = np.array([npr.dirichlet(x + 1) for x in Nlx]) htilde = np.array([npr.dirichlet(x + 1) for x in Nly]) qtilde = estimation.train_mlm_fixedp(htilde, p) # get the central q, for uniqueness q = polytopes.find_central(p, qtilde, clipthresh=1e-6) estimates.append((p, q)) return estimates
def gibbssample1(bn, likeexp): bn1 = copybn(bn) for node in bn1.keys(): for parstates in bn1[node]['cp']: p1 = tuple(likeexp[node]['cp'][parstates].values()) newparams = random.dirichlet(p1).tolist() skeys = bn[node]['cp'][parstates].keys() bn1[node]['cp'][parstates] = dict(zip(skeys, newparams)) return bn1
def gen_synthetic_graph(N, nc): graph = nx.Graph(name='synthezied author graph') cluster_sizes = [int(round(cs)) for cs in dirichlet([7] * nc) * N] ph_s = dirichlet([1] * nc) pr_s = dirichlet([1] * nc) pv_s = dirichlet([1] * nc) SIGMA = 0.6 TAU = 0.9 AVG_PER_CLASS_PROD = 5 mus = normal(loc=5.5, scale=3, size=nc) all_products = range(nc * AVG_PER_CLASS_PROD) pi_s = [] for ci in range(nc): pi_s.append(dirichlet([0.5] * len(all_products))) author_prod_map = {} # generate nodes for ci in range(nc): for ni in range(cluster_sizes[ci]): graph.add_node(len(graph), acluster=ci, revLen=normal(loc=mus[ci], scale=SIGMA), isRealName=binomial(1, pr_s[ci]) == 1, hlpful_fav_unfav=binomial(1, ph_s[ci]) == 1, vrf_prchs_fav_unfav=binomial(1, pv_s[ci]) == 1) # generate edges for a, b in itertools.combinations(graph.nodes(), 2): if not binomial(1, min(15.0/len(graph), 1.0)): continue if graph.node[a]['acluster'] == graph.node[b]['acluster']: if binomial(1, TAU): graph.add_edge(a, b, weight=np.clip(normal(1, scale=0.25), 0, 1), denom=5) else: if binomial(1, 1 - TAU): graph.add_edge(a, b, weight=np.clip(normal(0.5, scale=0.25), 0, 1), denom=5) # keep only the largest component # components = nx.connected_components(graph) # largest_component_i = np.argmax([len(c) for c in components]) # largest_component = set(components[largest_component_i]) # graph.remove_nodes_from([n for n in graph if n not in largest_component]) # generate author_prod_map for n in graph: ci = graph.node[n]['acluster'] nprods = randint(1, len(all_products)/2) author_prod_map[n] = list(np.nonzero(multinomial(nprods, pi_s[ci]))[0]) return graph, author_prod_map, cluster_sizes
def bayesian_bootstrap(self, par=1): weight = npr.dirichlet([par] * self.n, (self.times, self.num_samples)) samples = [ npr.choice(self.data[j], size=self.n, p=weight[j][i]) for i in xrange(self.num_samples) for j in xrange(self.times) ] #for i in xrange(self.times*self.num_samples)] samples = np.array(samples).reshape(self.times, self.num_samples, self.n) return samples
def jitter(self, concentration=100): pi = self.params[0] new_pi = npr.dirichlet(concentration * pi) + 1e-8 new_pi /= new_pi.sum() fwd_lp = dirichlet(concentration * pi).logpdf(new_pi) rev_lp = dirichlet(concentration * new_pi).logpdf(pi) new_cluster = copy.deepcopy(self) new_cluster._params = (new_pi,) return new_cluster, fwd_lp, rev_lp
def test_multiplicative_replacement(self): x1 = dirichlet(self.a) y1 = insert(x1, 3, 0) u = multiplicative_replacement(y1) assert allclose( y1, u, atol=1e-2), "Multiplicative replacement peturbation is too large." assert isclose( sum(u), 1), "Multiplicative replacement does not yield a composition."
def random_initialize_t(self): """ Initialize the translations by drawing a Categorical distribution (t(f_1|e),...,t(f_{V_f_size}|e)) for each e from a Dirichlet distribution: (t(f_1|e),...,t(f_{V_f_size}|e)) ~ Dir(0.1,...,0.1). """ print("Initializing t randomly") self.t = dirichlet((0.1, ) * self.V_f_size, size=self.V_e_size).T
def main_plot_multi_bar(): from numpy import ones from numpy.random import dirichlet n_states = 8 n_cat = 3 pi = dirichlet(ones(n_states), size=n_cat).T print(pi) pm = PlotModels(1, 1, 1) pm.multi_bar((0, 0), pi) embed(header='main_plot_multi_bar')
def log_multivariate_polya_mc(X, alpha, iterations=1e5): """Montecarlo estimation of the log-likelihood of the Dirichlet compound multinomial (DCM) distribution, a.k.a. the multivariate Polya distribution. """ Theta = dirichlet(alpha, size=int(iterations)) logp_Hs = gammaln(X.sum() + 1) - gammaln(X + 1).sum() logp_Hs += (X * np.log(Theta)).sum(1) return logmean(logp_Hs)
def parameterSample(self, allData): ''' Given the document labels, resample the parameters for each topic. ''' allData.sort(key = lambda a: a[0]) grouped = [(k, [thing[1] for thing in g]) for k,g in groupby(allData, lambda s: s[0])] group_counts = [(group[0], reduce(lambda a,b: [a[i] + b[i] for i in range(len(a))] ,group[1]) ) for group in grouped] group_probs = [(k, r.dirichlet(c).tolist()) for k,c in group_counts] return group_probs
def generate_corpus(categories, vocabulary, documents, hyp_pi = None, hyp_thetas = None): """Create true parameters and sample data for a corpus of labeled documents. @param categories: number of categories @type categories: integer @param vocabulary: vocabulary size @type vocabulary: integer @param documents: number of documents in the corpus @type documents: integer @param hyp_pi: optional category hyperparamter, default uninformative @type hyp_pi: list or None @param hyp_thetas: optional word count hyperparamter, default uninformative @type hyp_thetas: list or None @return: word distributions per category, documents, document labels @rtype: tuple """ # Postavljanje hiperparametara. if hyp_pi == None: hyp_pi = [1]*categories if len(hyp_pi) != categories: raise Exception() if hyp_thetas == None: hyp_thetas = [1]*vocabulary if len(hyp_thetas) != vocabulary: raise Exception() categories = 2 # broj kategorija L vocabulary = 499 # broj rijeci V documents = 10 # broj dokumenata W hyp_pi = ones(categories, int) hyp_thetas = ones((categories, vocabulary), int) pi = log(dirichlet(hyp_pi, 1)[0]) thetas = dirichlet(hyp_thetas, categories) corpus = empty((documents, vocabulary), int) labels = empty(documents, int) for document_index in range(documents): category = multinomial_sample(pi) labels[document_index] = category corpus[document_index] = multinomial(vocabulary*100, thetas[category]) return log(thetas), corpus, labels
def init_expt(self, data_len): ''' qz.init_expt(data_len, argvs) @argvs data_len: int @self expt: (n_states, data_lem) ''' alpha_pi = ones(self.n_cat) expt = dirichlet(alpha_pi, size=data_len).T self.set_expt(expt)
def gibbssample2(bn, prior, likeexp): bn1 = copybn(bn) for node in bn1.keys(): for parstates in bn1[node]['cp']: p0 = prior[node]['cp'][parstates].values() p1 = likeexp[node]['cp'][parstates].values() p2 = tuple([x + y for x, y in zip(p0, p1)]) newparams = random.dirichlet(p2).tolist() skeys = bn[node]['cp'][parstates].keys() bn1[node]['cp'][parstates] = dict(zip(skeys, newparams)) return bn1
def time_to_mutation_rate(tree): if not hasattr(GC, "NUMPY_SEEDED"): from numpy.random import seed as numpy_seed numpy_seed(seed=GC.random_number_seed) GC.random_number_seed += 1 GC.NUMPY_SEEDED = True t = read_tree_newick(tree) for node in t.traverse_preorder(): if node.edge_length is not None: node.edge_length *= dirichlet(alpha=GC.tree_rate_alpha) return str(t)
def gen_data(means, precis, n): weight = dirichlet(np.ones(means.shape[0])) count = multinomial(n, weight) data = np.zeros((n, means.shape[1])) start = 0 for i in range(len(count)): data[start: start + count[i], :] = normal(means[i], np.diag(precis[i] * np.ones(means.shape[1])), count[i]) start = start + count[i] s = np.arange(n) np.random.shuffle(s) return data[s]
def _process_cfp(self, cfp: "CFP"): if not cfp.is_buy: return if self.awi.is_bankrupt( cfp.publisher) or not self.can_expect_agreement(cfp=cfp, margin=0): return profile = self.profiles.get(cfp.product, None) if profile is None: return if profile.cv == 0: alpha_u, alpha_q, alpha_t = ( profile.alpha_u, profile.alpha_q, profile.alpha_t, ) else: alpha_u, alpha_q, alpha_t = tuple( dirichlet((profile.alpha_u, profile.alpha_q, profile.alpha_t), size=1)[0]) beta_u = pos_gauss(profile.beta_u, profile.cv) beta_t = pos_gauss(profile.beta_t, profile.cv) beta_q = pos_gauss(profile.beta_q, profile.cv) tau_u = pos_gauss(profile.tau_u, profile.cv) tau_t = pos_gauss(profile.tau_t, profile.cv) tau_q = pos_gauss(profile.tau_q, profile.cv) ufun = LinearUtilityAggregationFunction( issue_utilities={ "time": lambda x: x**tau_t / beta_t, "quantity": lambda x: x**tau_q / beta_q, "unit_price": lambda x: x**tau_u / beta_u, }, weights={ "time": alpha_t, "quantity": alpha_q, "unit_price": alpha_u }, ) ufun.reserved_value = ufun({ "time": cfp.max_time, "quantity": cfp.max_quantity, "unit_price": cfp.money_resolution if cfp.money_resolution is not None else 0.0, }) # ufun = normalize(, outcomes=cfp.outcomes, infeasible_cutoff=-1) negotiator = self.negotiator_type(name=self.name + "*" + cfp.publisher[:4], ufun=ufun) self.n_neg_trials[cfp.id] += 1 self.request_negotiation(cfp=cfp, negotiator=negotiator)
def generate_corpus(categories, vocabulary, documents, hyp_pi=None, hyp_thetas=None): """Create true parameters and sample data for a corpus of labeled documents. @param categories: number of categories @type categories: integer @param vocabulary: vocabulary size @type vocabulary: integer @param documents: number of documents in the corpus @type documents: integer @param hyp_pi: optional category hyperparamter, default uninformative @type hyp_pi: list or None @param hyp_thetas: optional word count hyperparamter, default uninformative @type hyp_thetas: list or None @return: word distributions per category, documents, document labels @rtype: tuple """ # Set up the hyperparameters. if hyp_pi == None: hyp_pi = [1] * categories if len(hyp_pi) != categories: raise Exception() if hyp_thetas == None: hyp_thetas = [1] * vocabulary if len(hyp_thetas) != vocabulary: raise Exception() # Generate the true model parameters. pi = log(dirichlet(hyp_pi, 1)[0]) thetas = dirichlet(hyp_thetas, categories) # Generate the corpus and the true labels. corpus = empty((documents, vocabulary), int) labels = empty(documents, int) for document_index in range(documents): category = multinomial_sample(pi) labels[document_index] = category corpus[document_index] = multinomial(vocabulary * 100, thetas[category]) return log(thetas), corpus, labels
def _initialize_sub_word_parameter(self): sub_word_parameter = np.zeros((self.n_sub_topics, self.V)) self.V_grid = np.array(range(self.V)).reshape(self.V_sqrt, self.V_sqrt) dim, index = None, None sampled = {(dim, index)} for sub_topic in range(self.n_sub_topics): while (dim, index) in sampled: dim, index = int(self.rng.rand() + 0.5), self.rng.randint(0, self.V_sqrt) sampled.add((dim, index)) words = self.V_grid[index, :] if dim < 0.5 else self.V_grid[:, index] sub_word_parameter[sub_topic, words] = dirichlet(self.sub_word_dirichlet_parameter) return sub_word_parameter
def init_emission(self, Obs): exists = [False] * self.num_symbols for obs in Obs: for o in obs: exists[o] = True no = exists.count(True) p = dirichlet([1.0] * no, self.num_states) i = 0 for idx, e in enumerate(exists): if e: self.B[idx, :] = p[:, i] i += 1
def generate_class_prob(classes: int) -> List[float]: """ Generate the random distribution (non-uniform) for each instance in the classifier :param classes: The number of classes in the dataset :return: The randomised generated probabilities of classes per instance in the data table """ # Call on dirichlet randomiser to generate probabilities # Convert the numpy array into a regular non-nested list ls: List[float] = ((random.dirichlet(np.ones(classes), size=1)).tolist())[0] return ls
def __init__(self, data_dim=1, pi=None, concentration=1): self.data_dim = data_dim self.concentration = concentration assert pi is not None or concentration is not None, "Either pi or concentration must be specified." if pi is not None: assert pi.ndim == 1 and pi.size == data_dim and np.all( pi >= 0) and np.allclose(pi.sum(), 1) else: pi = npr.dirichlet(concentration * np.ones(data_dim)) self.pi = pi
def simulate_pf(mean_ret, cov): perf, weights = [], [] for i in range(N_PORTFOLIOS): if i % 50000 == 0: print(i) weights = dirichlet([.08] * n_assets) weights /= np.sum(weights) r, sd = pf_performance(weights, mean_ret, cov) perf.append([r, sd, (r - RF_RATE) / sd]) perf_df = pd.DataFrame(perf, columns=['ret', 'vol', 'sharpe']) return perf_df, weights
def init_gibbs(rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, T, yt): K = 1 zt = np.zeros(T, dtype='int') beta_vec = dirichlet(np.array([1, gamma0]), size=1)[0] beta_new = beta_vec[-1] beta_vec = beta_vec[:-1] n_mat = np.array([[0]]); # t = 0 count as wt=0, don't need to infer wt ysum = np.array([yt[0]]) ycnt = np.array([np.ones(len(yt[0]))]) zt, n_mat, ysum, ycnt, beta_vec, beta_new, K = sample_one_step_ahead(zt, yt, n_mat, ysum, ycnt, beta_vec, beta_new, alpha0, gamma0, lam_a_pri, lam_b_pri, rho0, K) return rho0, alpha0, gamma0, lam_a_pri, lam_b_pri, K, zt, beta_vec, beta_new, n_mat, ysum, ycnt
def sample(self, data_len=1): ndim = self.alpha.ndim if ndim == 1: dst = dirichlet(self.alpha, size=data_len).T if data_len == 1: dst = dst[:, 0] elif ndim == 2: dst = zeros((self.n_states, self.len_2d, data_len)) if self.alpha.shape[0] == self.alpha.shape[1]: dst = zeros((self.n_states, self.len_2d, data_len)) for k in range(self.alpha.shape[0]): dst[:, k] = dirichlet(self.alpha[:, k], size=data_len).T else: for k in range(self.len_2d): dst[:, k, :] = dirichlet(self.alpha[:, k], size=data_len).T if data_len == 1: dst = dst[:, :, 0] else: logger.error('data dim %d is not supported' % ndim) dst = None return dst
def generate_corpus(): corpus = np.zeros((NUM_DOCS, vocab_size), dtype='int64') beta = generate_beta() for m in range(NUM_DOCS): # Get topic distribution for current document theta = nprand.dirichlet(alpha) for i in range(DOC_LENGTH): # Sample topic zi = lda.sample(theta) w = lda.sample(beta[zi,:]) corpus[m,w] += 1 return corpus
def test_sample_dirichlet(self): N = 100 for K in [2, 5, 10]: for alpha in [.1, .5, 1., 4., 50.]: samples0 = dirichlet(zeros(K) + alpha, size=N).T samples1 = sample_dirichlet(K, N, alpha) p = ks_2samp(samples0.ravel(), samples1.ravel())[1] self.assertGreater(p, 1e-6) self.assertLess(max(abs(1. - samples1.sum(0))), 1e-6)
def generateExpressionLevels(transcripts, distribution): expressionLevels = {} if distribution == UNIFORM_ARG: for transcript in transcripts.values(): expressionLevels[transcript.name] = 1.0 / len(transcripts) elif distribution == DIRICHLET_ARG: ones = numpy.ones(len(transcripts)) probabilities = dirichlet(ones, 1)[0] for i, transcript in enumerate(transcripts.values()): expressionLevels[transcript.name] = probabilities[i] print "Source expression: " + str(expressionLevels) return expressionLevels
def __init__(self, data): # observation parameters mu = np.mean(data) sigma = np.var(data)**0.5 self.obs_params = np.array([normal(loc=mu, scale=sigma, size=5), normal(loc=mu, scale=sigma, size=5), normal(loc=mu, scale=sigma, size=5), normal(loc=mu, scale=sigma, size=5), normal(loc=mu, scale=sigma, size=5)]) # pi[0] is initial state distribution self.pi = np.array([dirichlet([1, 1, 1, 1, 1]), dirichlet([1, 1, 1, 1, 1]), dirichlet([1, 1, 1, 1, 1]), dirichlet([1, 1, 1, 1, 1]), dirichlet([1, 1, 1, 1, 1]), dirichlet([1, 1, 1, 1, 1])]) # pi[1:] becomes transition matrix self.A = self.pi[1:] # state sequence self.x = np.zeros((data.size)) # observation sequence self.y_t = np.zeros((data.size)) # likelihood potentials self.L = np.zeros((5, data.size)) # standard message passing self.forward_messages = hf.compute_forward_messages(data.size, self.pi[0], self.A, self.L) self.backward_messages = hf.compute_backward_messages(data.size, self.A, self.L)
def simulate_portfolios( mean_ret, cov, rf_rate = rf_rate, short = True ): alpha = np.full( shape = n_assets, fill_value = .05 ) weights = dirichlet( alpha = alpha, size = NUM_PF ) if short: weights *= choice( [-1, 1], size = weights.shape ) returns = weights @mean_ret.values +1 returns = returns ** periods_per_year -1 std = (weights@weekly_returns.T).std(1) std *= np.sqrt( periods_per_year ) sharpe = (returns-rf_rate)/std return pd.DataFrame( { 'Annualized std' : std, 'Annualized Returns' : returns, 'Sharpe Ratio': sharpe}), weights
def generate_artificial_data(self, D, N, noise_threshold=0.0): N = int(N) self.super_Theta = dirichlet(self.super_dirichlet_parameter, size=D) self.sub_Theta = dirichlet(self.super_sub_dirichlet_parameter, size=(D, self.n_super_topics)) if noise_threshold > 0.0: sub_Theta_noise = dirichlet(self.super_sub_noise_dirichlet_parameter, size=(D, self.n_super_topics)) self.sub_Theta = np.concatenate((self.sub_Theta * (1 - noise_threshold), sub_Theta_noise * noise_threshold), axis=2) self.doc_super = np.array([np.random.multinomial(N, self.super_Theta[i]) for i in range(D)]) self.doc_sub = np.zeros((D, self.sub_Theta.shape[2])) for d, super_counts in enumerate(self.doc_super): for super_topic, num in enumerate(super_counts): if num > 0: self.doc_sub[d] += np.random.multinomial(num, self.sub_Theta[d][super_topic]) X = np.zeros((D, self.V), dtype=np.int) for d, sub_counts in enumerate(self.doc_sub): for sub_topic, num in enumerate(sub_counts): if num > 0: X[d] += np.random.multinomial(num, self.sub_word_parameter[sub_topic]) return X
def sample_node_prob(): """step 8 of update_network_model, sample new node probability beta""" ro = [] for node in node_ids: if node == -1: continue ro.append(sample_outlink_size(node) + sample_inlink_size(node)) print(ro) ro.append(fixed['gamma_H']) beta = dirichlet(alpha=ro) for i, node in enumerate(node_ids): state['beta'][node] = beta[i]
def __init__(self, returns, method, swarm=30): self.returns = returns self.swarm = swarm self.corr = returns.corr() self.n = len(self.returns.columns) self.method = method self.pbest_portfolios = [] self.xbest_portfolios = [] for i in range(self.swarm): weights = nrand.dirichlet(numpy.ones(self.n), 1)[0] self.xbest_portfolios.append(Portfolio(self.returns, weights)) self.pbest_portfolios.append(Portfolio(self.returns, weights))
def sample_dirichlet(W, beta, memberships, out=None): K, T = memberships.shape[1], W.shape[1] prior = np.ndarray((T,), dtype=np.float64) if out is None: out = np.ndarray((K, T), dtpe=np.float64) for k in range(K): prior[:] = W.T.dot(memberships[:, k]) prior += beta out[k, :] = rd.dirichlet(prior) return out