def fit_advi(self, n=3, method='advi', n_type='restart'): r"""Find posterior using ADVI (maximising likehood of the data and minimising KL-divergence of posterior to prior) :param n: number of independent initialisations :param method: to allow for potential use of SVGD or MCMC (currently only ADVI implemented). :param n_type: type of repeated initialisation: 'restart' to pick different initial value, 'cv' for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented 'bootstrap' for fitting the model to multiple downsampled datasets. Run `mod.bootstrap_data()` to generate variants of data ' :return: self.mean_field dictionary with MeanField pymc3 objects. """ if not np.isin(n_type, ['restart', 'cv', 'bootstrap']): raise ValueError( "n_type should be one of ['restart', 'cv', 'bootstrap']") self.mean_field = {} self.samples = {} self.node_samples = {} self.n_type = n_type if np.isin(n_type, ['bootstrap']): if self.X_data_sample is None: self.bootstrap_data(n=n) elif np.isin(n_type, ['cv']): self.generate_cv_data(n=n) # cv data added to self.X_data_sample init_names = ['init_' + str(i + 1) for i in np.arange(n)] with self.model: for i, name in enumerate(init_names): # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(n_type, ['cv', 'bootstrap']): more_replacements = { self.x_data: self.X_data_sample[i].astype(self.data_type) } else: more_replacements = {} # train the model self.mean_field[name] = pm.fit( self.n_iter, method='advi', callbacks=[CheckParametersConvergence()], obj_optimizer=pm.adam(learning_rate=self.learning_rate), total_grad_norm_constraint=self.total_grad_norm_constraint, more_replacements=more_replacements) # plot training history if self.verbose: print( plt.plot(np.log10(self.mean_field[name].hist[15000:])))
def _sample(self, num_epochs = None, num_draws = None): if not num_epochs: num_epochs = self.num_epochs if not num_draws: num_draws = self.num_draws with self.model: approx = pm.fit(n = num_epochs, obj_optimizer = pm.adam(learning_rate = self.learning_rate)) self.trace = approx.sample(draws = num_draws)
def fit(self, X, Y, samples=500, advi_n=50000, advi_n_mc=1, advi_obj_optimizer=pm.adam(learning_rate=.1)): self.num_samples = samples self._build_model(X, Y) with self.model: if self.inference_method == 'advi': mean_field = pm.fit( n=advi_n, method='advi', obj_n_mc=advi_n_mc, obj_optimizer=advi_obj_optimizer ) # TODO: how to determine hyperparameters? self.trace = mean_field.sample(draws=samples) elif self.inference_method == 'mcmc': self.trace = pm.sample(samples, tune=samples) else: raise Exception( "Unknown output parameter value: %s. Choose among 'normal', 'bernoulli'." % self.output)
def test_hh_flow(): cov = pm.floatX([[2, -1], [-1, 3]]) with pm.Model(): pm.MvNormal('mvN', mu=pm.floatX([0, 1]), cov=cov, shape=2) nf = NFVI('scale-hh*2-loc') nf.fit(25000, obj_optimizer=pm.adam(learning_rate=0.001)) trace = nf.approx.sample(10000) cov2 = pm.trace_cov(trace) np.testing.assert_allclose(cov, cov2, rtol=0.07)
def fit_model_LN(N, J, D, R, T, Sigmas, featvar_id, filename, c, normalize, batch=False): model = pm.Model() with model: """hyperparameters""" theta_prior = stickbreak_prior('theta', 1., T) alpha = .1 """priors""" theta = pm.Dirichlet('theta', theta_prior, shape=T) psi = [[ pm.MvNormal('psi_{}_{}'.format(t, d), mu=tt.zeros(R[d]), cov=tt.exp(-Sigmas[d]), shape=R[d]) for d in range(D) ] for t in range(T)] phi = tt.stack([ tt.concatenate([ pm.Deterministic('phi_{}_{}'.format(t, d), tt.nnet.softmax(psi[t][d]))[0] for d in range(D) ]) for t in range(T) ]) """likelihood""" target = pm.DensityDist('target', loglik(theta=theta, phi=phi), observed=dict(featvar_id=featvar_id)) """fit model""" inference = pm.ADVI() inference.fit(100000, obj_optimizer=pm.adam(learning_rate=.01, beta1=.8), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace = inference.approx.sample() posterior = { k: trace[k] for k in trace.varnames if not k.endswith('__') } posterior['ELBO'] = inference.hist if batch == False: f = open( 'posterior_LN_{}_{}_{}.pkl'.format( filename.split('.')[0], c, normalize), 'wb') else: f = open( 'posterior_LN_{}_{}_{}_holdout_{}.pkl'.format( filename.split('.')[0], c, normalize, batch), 'wb') pkl.dump(posterior, f) f.close()
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) np.random.shuffle(lang_ind) np.random.shuffle(sound_ind) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_ln = pm.Model() with model_ln: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) psi = [ pm.MvNormal('psi_{}'.format(k), mu=[0] * S, cov=Sigma, shape=S) for k in range(K) ] "phi = component-level collection of distributions over sound change" phi = tt.stack([ tt.concatenate([ pm.Deterministic( 'phi_{}_{}'.format(k, x), tt.nnet.softmax(psi[k][s_breaks[x][0]:s_breaks[x][1]])[0]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_ln = pm.ADVI() inference_ln.fit(50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_ln = inference_ln.approx.sample() posterior = { k: trace_ln[k] for k in trace_ln.varnames if not k.endswith('__') } posterior['ELBO'] = inference_ln.hist f = open('posterior_ln_shuffle_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_dir = pm.Model() with model_dir: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) phi = tt.stack([ tt.concatenate([ pm.Dirichlet('phi_{}_{}'.format(k, x), a=tt.ones(R[x]) * alpha, shape=R[x]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_dir = pm.ADVI() inference_dir.fit( 50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_dir = inference_dir.approx.sample() posterior = { k: trace_dir[k] for k in trace_dir.varnames if not k.endswith('__') } posterior['ELBO'] = inference_dir.hist f = open('posterior_dir_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def train(neural_network, inference_file, model_file, hypers): set_tt_rng(MRG_RandomStreams(42)) with neural_network: inference = pm.ADVI() approx = pm.fit(n=hypers['n_sample'], method=inference, obj_optimizer=pm.adam(learning_rate=hypers['lr'])) # approx = pm.fit(n=50000, method=inference, obj_optimizer=pm.adam(learning_rate=0.01)) with open(inference_file, "wb") as f: pickle.dump(inference, f, pickle.HIGHEST_PROTOCOL) with open(model_file, "wb") as f: pickle.dump(approx, f, pickle.HIGHEST_PROTOCOL) return inference, approx
ax2.hist(returns) ax2.set_title('Real returns') plt.show() #%% # 3. now let's relax the normal distribution assumption: let's fit a Cauchy distribution. with pm.Model() as model2: beta = pm.HalfNormal('beta', sd=10.) pm.Cauchy('returns', alpha=0.0, beta=beta, observed=returns) mean_field = pm.fit(n=150000, method='advi', obj_optimizer=pm.adam(learning_rate=.001)) trace2 = mean_field.sample(draws=10000) preds2 = pm.sample_ppc(trace2, samples=10000, model=model2) y2 = np.reshape(np.mean(preds2['returns'], axis=0), [-1]) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.hist(y2) ax1.set_title('Cauchy distribution returns') ax2.hist(returns) ax2.set_title('Real returns') plt.show()
def deconvolute(data, expr, meta=None, sample_deviation=False, samp_scale=False, method='combination', obj_n_mc=8, nf_obj_n_mc=100, svgd_kwargs=dict(n_particles=4000), n_increments=3, c_type=None, norm=None, progress=True, init_iter=32e3, max_iter=1e5, nfvi_formula='scale-loc-radial*8', sample_background=False): chars = data['chars'] features = data['features'] var = data['variations'] is_multico = 'multico' in data cell_types = list(chars.keys()) if 'backgrounds' in data and sample_background is True: background = data['backgrounds']['all'] if c_type is not None: for key in data['backgrounds'].keys(): if key.upper() == c_type.upper(): background = data[key] break cell_types += ['background'] n_types = len(cell_types) if meta is not None: save_function = saveFunction(meta, cell_types) else: save_function = None test_features = list(expr.index) index = np.array([f in test_features for f in features]) n_features = sum(index) filtered_features = [i for (i, v) in zip(features, index) if v] sample_float = expr.loc[filtered_features].values #sample = sample_float.astype(int) #seq_depth = np.sum(sample) ct_prior = np.ones(n_types) ct_start = ct_prior/np.sum(ct_prior) for i, ct in enumerate(cell_types): if ct == 'background': ct_prior[i] = 5e-1 ct_start[i] = 1-ct_start[i] ct_start = ct_start/np.sum(ct_start) break mess = '... using {} of {} features with {} available ...' print(mess.format(n_features, str(len(index)), str(len(test_features)))) def combine_and_embed(samp, deviation, A, Ainv, b): base = samp - tt.dot(deviation, A) return tt.dot(base, Ainv) + deviation + b def mix(components, decomp): return tt.dot(decomp[None, :], tt.nnet.softmax(components)) def reduce(samp, A, b): return np.dot(samp-b, A) def embedd(samp, Ainv, b): return np.dot(samp, Ainv) + b def project(sample, A, Ainv, b): mapping = reduce(sample, A, b) co_projection = sample - embedd(mapping, Ainv, b) return mapping, co_projection l_alphas = sample_float + 1 t_samp = tau_inv(np.log(l_alphas) - np.log(l_alphas.sum())) dist = pm.Dirichlet.dist(l_alphas) def make_model(scale=1e-3, dims=slice(None), prior=10): if is_multico is False: A = data['A'][index,dims] Ainv = data['Ainv'][dims,index] b = data['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(t_samp, A, Ainv, b) with pm.Model() as model: decomp = pm.Dirichlet('decomp', ct_prior*prior, shape=ct_prior.shape, testval=ct_start)#, transform=StickBreaking5(ct_prior.shape[0])) ct_expr = list() if samp_scale is True: scale = pm.Lognormal('scale', testval=10) for i, cell_type in enumerate(cell_types): if cell_type == 'background': dev_samp = pm.Normal('comb '+cell_type, mu=background['mean'][index], sigma=background['std'][index]/scale, shape=(1, n_features), testval=t_samp) ct_expr.append(dev_samp) continue if is_multico is True: A = chars[cell_type]['A'][index,dims] Ainv = chars[cell_type]['Ainv'][dims,index] b = chars[cell_type]['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(t_samp, A, Ainv, b) n = A.shape[1] samp = pm.Normal(cell_type, sigma=scale, shape=(1, n)) else: samp = pm.MvNormal(cell_type, chars[cell_type]['mean'][dims], cov=chars[cell_type]['sigma'][dims, dims]*scale, shape=(1, A.shape[1]), testval=chars[cell_type]['mean'][dims]) if sample_deviation is True: deviation = pm.Normal('deviation '+cell_type, mu=var[cell_type]['mean'][index], sigma=var[cell_type]['std'][index]*scale, shape=(1, n_features), testval=dev_start) else: deviation = theano.shared(dev_start) dev_samp = pm.Deterministic('comb '+cell_type, combine_and_embed(samp, deviation, s_A, s_Ainv, s_b)) ct_expr.append(dev_samp) ct_expr = tt.concatenate(ct_expr, axis=0) transcriptome = pm.Deterministic('trans', mix(ct_expr, decomp)) pot = pm.Potential('obs', dist.logp(transcriptome)) #obs = pm.Multinomial('obs', seq_depth, transcriptome, observed=sample, dtype='int64') return model sf = CheckAndSave(save_function=save_function) if method != 'increment': if mode == 'debug': print('Compiling model ...') model = make_model() if mode == 'debug': print('Starting inference ...') if method == 'increment': if is_multico is True: message = 'The method increment is not implemented for multico characterisations.' raise NotImplementedError(message) maxdim = np.min([data['A'].shape[1], 50]) start = np.min([n_types, maxdim]) steps = np.unique(np.geomspace(start, maxdim, num=n_increments, dtype=int)) if mode == 'debug': print('Doing increments {} ...'.format(steps)) lastparam = None for dims in steps: print('Increment {}'.format(dims)) if mode == 'debug': print('Compiling model ...') start_compile = time.time() model = make_model(dims=slice(dims)) compile_time = time.time() - start_compile if mode == 'debug': print('Starting inference ...') with model: advi = pm.ADVI() if lastparam is not None: rmap = advi.approx.groups[0].bij.rmap newpars = {param.name: rmap(param.eval()) for param in advi.approx.params} for ct in cell_types: if ct == 'background': continue mus = lastparam['mu'][ct] ind = np.indices(lastparam['mu'][ct].shape, sparse=True) lastparam['mu'][ct] = newpars['mu'][ct] lastparam['mu'][ct][ind] = mus rohs = lastparam['rho'][ct] ind = np.indices(lastparam['rho'][ct].shape, sparse=True) lastparam['rho'][ct] = newpars['rho'][ct] lastparam['rho'][ct][ind] = rohs fmap = advi.approx.groups[0].bij.map advi.approx.params[0].set_value(fmap(lastparam['mu'])) advi.approx.params[1].set_value(fmap(lastparam['rho'])) approx = advi.fit(n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) if sf.musst_stop(buffer=compile_time+60): print('Stopping: Not enought time for next increment ...') break rmap = approx.groups[0].bij.rmap lastparam = {param.name: rmap(param.eval()) for param in approx.params} lastdims = dims decomp = lastparam['mu']['decomp_stickbreaking__'] elif method == 'advi': approx = pm.fit(model=model, method='advi', n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) vals = approx.bij.rmap(approx.mean.get_value()) if 'scale_log__' in vals: print('Scale {}'.format(np.exp(vals['scale_log__']))) decomp = sf.get_decomp(approx) elif method == 'decrate': approx = pm.fit(model=model, method='advi', n=int(max_iter), obj_optimizer=pm.adam(), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) vals = approx.bij.rmap(approx.mean.get_value()) if 'scale_log__' in vals: print('Scale {}'.format(np.exp(vals['scale_log__']))) decomp = sf.get_decomp(approx) elif method == 'svgd': sf.every = 20 approx = pm.fit(model=model, method='svgd', inf_kwargs=svgd_kwargs, n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) vals = approx.params[0].eval() vals = np.mean(vals, aixs=0) vals = approx.bij.rmap(vals) if 'scale_log__' in vals: print('Scale {}'.format(np.exp(vals['scale_log__']))) decomp = vals['decomp_stickbreaking__'] elif method == 'nfvi': with model: nfvi = pm.NFVI(nfvi_formula) approx = nfvi.fit(n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=nf_obj_n_mc) decomps = approx.sample(1000)['decomp_stickbreaking__'] decomp = np.mean(decomps, axis=0) elif method == 'nuts': approx = pm.sample(model=model, draws=int(max_iter), progressbar=progress, init='advi', n_init=int(init_iter), chains=1) decomp = decomp_from_trace(approx) elif method == 'combination': with model: approx = pm.fit(method='advi', n=int(init_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) print('Starting SVGD ...') n = svgd_kwargs.get('n_particles', 100) rmap = approx.bij.rmap svgd = pm.SVGD(**svgd_kwargs) fmap = svgd.approx.bij.map means = fmap(rmap(approx.mean.get_value())) stds = fmap(rmap(approx.std.eval())) start = np.random.normal(means, stds, size=(n, len(means))) svgd.approx.params[0].set_value(start) sf.every = 20 sf.last_time = None approx = svgd.fit(n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) decomp = sf.get_decomp(approx) elif method == 'nfcomb': assert re.match('scale-loc', nfvi_formula), \ ('The nfvi formula needs to start with `scale-loc` in order ' + f'to be initiated with advi. Instead it is `{nfvi_formula}`.') with model: approx = pm.fit(method='advi', n=int(init_iter), progressbar=progress, callbacks=[sf], obj_n_mc=obj_n_mc) print('Starting NFVI ...') rmap = approx.bij.rmap startpars = {param.name: rmap(param.eval()) for param in approx.params} nfvi = pm.NFVI(nfvi_formula) fmap = nfvi.approx.bij.map nfvi.approx.params[-1].set_value(fmap(startpars['rho'])) nfvi.approx.params[-2].set_value(fmap(startpars['mu'])) approx = nfvi.fit(n=int(max_iter), progressbar=progress, callbacks=[sf], obj_n_mc=nf_obj_n_mc) decomp = sf.get_decomp(approx) else: message = 'The method {} is not implemented.' raise NotImplementedError(message.format(method)) if save_function is not None: save_function(decomp) return decomp, approx
# Plot the ELBO to make sure you have converged. # Print summaries and traceplots for the means, σ's and probabilities. # Number of iterations for ADVI fit num_iters: int = 50000 # Fit the model using ADVI # Tried to fit using FullRankADVI as well; results were horrible try: advi = vartbl['advi'] print(f'Loaded ADVI fit for Gaussian Mixture Model.') except: print(f'Running ADVI fit for Gaussian Mixture Model...') advi = pm.ADVI(model=model) advi.fit(n=num_iters, obj_optimizer=pm.adam(), callbacks=[CheckParametersConvergence()]) vartbl['advi'] = advi save_vartbl(vartbl, fname) def plot_elbo(elbo, plot_step, title): """Generate the ELBO plot""" fig, ax = plt.subplots(figsize=[12, 8]) ax.set_title(title) ax.set_xlabel('Iteration') ax.set_ylabel('ELBO') n = len(elbo) plot_x = np.arange(0, n, plot_step) plot_y = elbo[::plot_step] ax.plot(plot_x, plot_y, color='b')
def train(self, fol_path='../data/*'): fol_list = glob.glob(fol_path) print(fol_list) seq_list = [] for fol in fol_list: f_list = glob.glob(fol + '/*.jpg') im_list = [] for f in sorted(f_list): #Crop to ultrasound active area im = np.mean(cv2.resize( cv2.imread(f)[180:700, 500:1020, :], (self.w, self.h)), axis=-1) im_list.append(im) seq_list.append(np.array(im_list)) # Get latent states self.latent_list = [] self.bins_list = [] count = 0 for s in seq_list[:-1]: self.latent_list.append( self.vae_model.encoder.predict( s.reshape(-1, self.w, self.h, 1) / 255.0)[0]) self.bins_list.append(np.arange(s.shape[0]) + count) count = self.bins_list[-1][-1] self.latent = np.vstack(self.latent_list) np.savetxt(self.log_path + 'latent.txt', self.latent) Xt = tt.as_tensor(self.latent) bins_t = [] for t in self.bins_list: bins_t.append(tt.as_tensor(t)) def exp(reward): traj_reward = pm.math.sum(reward) return traj_reward with pm.Model() as reward_model: l = pm.Gamma("l", alpha=2.0, beta=0.5) cov_func = pm.gp.cov.Matern32(self.latent.shape[1], ls=l) Xu = pm.gp.util.kmeans_inducing_points(self.Ni, self.latent) sig = pm.HalfCauchy("sig", beta=np.ones((self.latent.shape[0], )), shape=self.latent.shape[0]) gp = pm.gp.MarginalSparse(cov_func=cov_func) f = gp.marginal_likelihood('reward', Xt, Xu, shape=self.latent.shape[0], y=None, noise=sig, is_observed=False) exp_list = [] for i, bins in enumerate(bins_t): exp_list.append( pm.DensityDist('me_%d' % i, exp, observed={'reward': f[bins]})) inference = pm.ADVI() approx = inference.fit(1000, obj_optimizer=pm.adam(learning_rate=0.1)) trace = approx.sample(5000) l = np.mean(trace['l']) sig = np.mean(trace['sig']) reward = np.mean(trace['reward'], axis=0) np.savetxt('./logs/l_me.txt', np.array([l])) np.savetxt('./logs/sig_me.txt', np.array([sig])) np.savetxt('./logs/reward_me.txt', reward) print('Saved trained reward parameters') return l, sig, reward
def infer(data, model_args={}, pymc3_args={}): model_args = {**MODEL_ARGS, **model_args} # model arguments pymc3_args = {**PYMC3_ARGS, **pymc3_args} # sampler arguments data = aux.prepare_data(data) # validate and pre-process data # choose and run model model = modelfct.get_model(data, **model_args) if pymc3_args['method'] == 'nuts': fit = None trace = pmc.sample(model=model, draws=pymc3_args['draws'], tune=pymc3_args['tune'], chains=1, compute_convergence_checks=False, target_accept=pymc3_args['target_accept'], random_seed=pymc3_args['random_seed']) elif pymc3_args['method'] == 'nfvi': fit = pmc.NFVI(model=model, flow=pymc3_args['flow'], random_seed=pymc3_args['random_seed']).fit( n=pymc3_args['niters'], obj_optimizer=pmc.adam( learning_rate=pymc3_args['learning_rate'])) trace = fit.sample(pymc3_args['draws']) else: fit = pmc.fit( model=model, n=pymc3_args['niters'], method=pymc3_args['method'], obj_optimizer=pmc.adam(learning_rate=pymc3_args['learning_rate']), random_seed=pymc3_args['random_seed']) trace = fit.sample(pymc3_args['draws']) # post-processing logger.info('Calculating posterior cluster weights and centres.') weights = sts.calculate_cluster_weights(trace, model_args['threshold'], model_args['ci_alpha']) centres = sts.calculate_cluster_centres(data, trace, model_args['ci_alpha']) logger.info('Calculating posterior CCF values.') posts, lppd = sts.calculate_ccf_and_hard_clusters(data, trace, model_args['threshold'], model_args['ci_alpha']) logger.info('Calculating posterior predictive distribution.') ppd = sts.calculate_ppd(data, trace, model_args['threshold'], model_args['ci_alpha'], model_args['npoints']) if model_args['prior'] in ['GP0', 'GP1', 'GP2', 'GP3']: logger.info('Calculating GP-related quantities.') try: centres_gp = sts.calculate_cluster_centres_gp( data, trace, prior=model_args['prior'], cov=model_args['cov'], npoints=model_args['npoints'], alpha=model_args['ci_alpha']) except: # ExpQ sometimes throws a singular matrix error logger.error( 'Exception occured while calculating GP-related quantities.') centres_gp = None l, h2 = sts.calculate_scales(trace, model_args['ci_alpha']) else: centres_gp, l, h2 = None, None, None if model_args['lik'] == 'BBin': logger.info('Calculating dispersion(s).') disps = sts.calculate_dispersions(data, trace, model_args['ci_alpha']) else: disps = None # return tidy data data = aux.pivot_longer(data) data = pnd.merge(data, posts) # logger.info('Finished.') return { 'model': model, 'fit': fit, 'trace': trace, 'data': data, 'weights': weights, 'centres': centres, 'centres_gp': centres_gp, 'PPD': ppd, 'LPPD': lppd, 'disps': disps, 'lengths': l, 'amplitudes': h2, 'model_args': model_args, 'pymc3_args': pymc3_args }
def run_lda(args): tf_vectorizer, docs_tr, docs_te = prepare_sparse_matrix_nonlabel(args.n_tr, args.n_te, args.n_word) feature_names = tf_vectorizer.get_feature_names() doc_tr_minibatch = pm.Minibatch(docs_tr.toarray(), args.bsz) doc_tr = shared(docs_tr.toarray()[:args.bsz]) def log_prob(beta, theta): """Returns the log-likelihood function for given documents. K : number of topics in the model V : number of words (size of vocabulary) D : number of documents (in a mini-batch) Parameters ---------- beta : tensor (K x V) Word distributions. theta : tensor (D x K) Topic distributions for documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = (vfreqs * pmmath.logsumexp(tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel()) return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) return ll_docs_f with pm.Model() as model: beta = Dirichlet("beta", a=pm.floatX((1. / args.n_topic) * np.ones((args.n_topic, args.n_word))), shape=(args.n_topic, args.n_word), ) theta = Dirichlet("theta", a=pm.floatX((10. / args.n_topic) * np.ones((args.bsz, args.n_topic))), shape=(args.bsz, args.n_topic), total_size=args.n_tr, ) doc = pm.DensityDist("doc", log_prob(beta, theta), observed=doc_tr) encoder = ThetaEncoder(n_words=args.n_word, n_hidden=100, n_topics=args.n_topic) local_RVs = OrderedDict([(theta, encoder.encode(doc_tr))]) encoder_params = encoder.get_params() s = shared(args.lr) def reduce_rate(a, h, i): s.set_value(args.lr / ((i / args.bsz) + 1) ** 0.7) with model: approx = pm.MeanField(local_rv=local_RVs) approx.scale_cost_to_minibatch = False inference = pm.KLqp(approx) inference.fit(args.n_iter, callbacks=[reduce_rate, pm.callbacks.CheckParametersConvergence(diff="absolute")], obj_optimizer=pm.adam(learning_rate=s), more_obj_params=encoder_params, total_grad_norm_constraint=200, more_replacements={ doc_tr: doc_tr_minibatch }, ) doc_tr.set_value(docs_tr.toarray()) inp = tt.matrix(dtype="int64") sample_vi_theta = theano.function([inp], approx.sample_node(approx.model.theta, args.n_sample, more_replacements={doc_tr: inp}), ) test = docs_te.toarray() test_n = test.sum(1) beta_pymc3 = pm.sample_approx(approx, draws=args.n_sample)['beta'] theta_pymc3 = sample_vi_theta(test) assert beta_pymc3.shape == (args.n_sample, args.n_topic, args.n_word) assert theta_pymc3.shape == (args.n_sample, args.n_te, args.n_topic) beta_mean = beta_pymc3.mean(0) theta_mean = theta_pymc3.mean(0) pred_rate = theta_mean.dot(beta_mean) pp_test = (test * np.log(pred_rate)).sum(1) / test_n posteriors = { 'theta': theta_pymc3, 'beta': beta_pymc3,} log_top_words(beta_pymc3.mean(0), feature_names, n_top_words=args.n_top_word) save_elbo(approx.hist) save_pp(pp_test) save_draws(posteriors)
#%% def cust_logp(z): return -pot3(z) #return bound(-pot4(z), z>-5, z<5) with pm.Model() as pot3m: pm.DensityDist('pot_func', logp=cust_logp, shape=(2, )) with pot3m: traceNUTS = pm.sample(2500, init=None, njobs=2) formula = 'planar*16' with pot3m: inference = pm.NFVI(formula, jitter=1.) inference.fit(25000, obj_optimizer=pm.adam(learning_rate=.01), obj_n_mc=200) traceNF = inference.approx.sample(5000) fig, ax = plt.subplots(1, 3, figsize=(18, 6)) contour_pot(pot3f, ax[0], 'pot3') ax[1].scatter(traceNUTS['pot_func'][:, 0], traceNUTS['pot_func'][:, 1], c='r', alpha=.02) ax[1].set_xlim(-5, 5) ax[1].set_ylim(-5, 5) ax[1].set_title('NUTS') ax[2].scatter(traceNF['pot_func'][:, 0], traceNF['pot_func'][:, 1], c='b', alpha=.02)
def fit_advi_iterative(self, n=3, method='advi', n_type='restart', n_iter=None, learning_rate=None, reducing_lr=False, progressbar=True, scale_cost_to_minibatch=True): """Find posterior using pm.ADVI() method directly (allows continuing training through `refine` method. (maximising likelihood of the data and minimising KL-divergence of posterior to prior - ELBO loss) Parameters ---------- n : number of independent initialisations (Default value = 3) method : advi', to allow for potential use of SVGD, MCMC, custom (currently only ADVI implemented). (Default value = 'advi') n_type : type of repeated initialisation: * **'restart'** to pick different initial value, * **'cv'** for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented * **'bootstrap'** for fitting the model to multiple downsampled datasets. Run `mod.bootstrap_data()` to generate variants of data (Default value = 'restart') n_iter : number of iterations, supersedes self.n_iter specified when creating model instance. (Default value = None) learning_rate : learning rate, supersedes self.learning_rate specified when creating model instance. (Default value = None) reducing_lr : boolean, use decaying learning rate? (Default value = False) progressbar : boolean, show progress bar? (Default value = True) scale_cost_to_minibatch : when using training in minibatches, scale cost function appropriately? See discussion https://discourse.pymc.io/t/effects-of-scale-cost-to-minibatch/1429 to understand the effects. (Default value = True) Returns ------- None self.mean_field dictionary with MeanField pymc3 objects, and self.advi dictionary with ADVI objects for each initialisation. """ self.n_type = n_type self.scale_cost_to_minibatch = scale_cost_to_minibatch if n_iter is None: n_iter = self.n_iter if learning_rate is None: learning_rate = self.learning_rate ### Initialise optimiser ### if reducing_lr: # initialise the function for adaptive learning rate s = theano.shared(np.array(learning_rate).astype(self.data_type)) def reduce_rate(a, h, i): s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type)) optimiser = pm.adam(learning_rate=s) callbacks = [reduce_rate, CheckParametersConvergence()] else: optimiser = pm.adam(learning_rate=learning_rate) callbacks = [CheckParametersConvergence()] if np.isin(n_type, ['bootstrap']): if self.X_data_sample is None: self.bootstrap_data(n=n) elif np.isin(n_type, ['cv']): self.generate_cv_data() # cv data added to self.X_data_sample init_names = ['init_' + str(i + 1) for i in np.arange(n)] for i, name in enumerate(init_names): with self.model: self.advi[name] = pm.ADVI() # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(n_type, ['cv', 'bootstrap']): # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) # or using all data else: more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)} # if any other data inputs should be added if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ self.extra_data[k].astype(self.data_type) else: # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) else: more_replacements = {} self.advi[name].scale_cost_to_minibatch = scale_cost_to_minibatch # train the model self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks, obj_optimizer=optimiser, total_grad_norm_constraint=self.total_grad_norm_constraint, progressbar=progressbar, more_replacements=more_replacements) # plot training history if self.verbose: print(plt.plot(np.log10(self.mean_field[name].hist[15000:])));
xs.tag.test_value = numpy.zeros((batch_size, 1, 28, 28)).astype('float32') logger.info("building model") with pm.Model() as model: zs = pm.Normal("zs", mu=0, sd=1, shape=(batch_size, n_latent), dtype=theano.config.floatX, total_size=len(data)) xs_ = pm.Normal("xs_", mu=vae.decode(zs), sd=0.1, observed=xs, dtype=theano.config.floatX, total_size=len(data)) local_RVs = OrderedDict({zs: vae.encode(xs)}) xs_t_minibatch = pm.Minibatch(data, batch_size) logger.info("fitting model") with model: approx = pm.fit(15000, local_rv=local_RVs, more_obj_params=list(vae.get_params()), obj_optimizer=pm.adam(learning_rate=1e-3), more_replacements={xs: xs_t_minibatch}) plt.plot(approx.hist) # evaluate analogy dec_zs = tt.matrix() dec_fun = theano.function([dec_zs], theano.clone(vae.decode(zs), {zs: dec_zs})) def test(): nn = 10 zs = numpy.array([(z1, z2) for z1 in numpy.linspace(-2, 2, nn) for z2 in numpy.linspace(-2, 2, nn)]).astype('float32') xs = dec_fun(zs)[:, 0, :, :] xs = numpy.bmat([[xs[i + j * nn] for i in range(nn)] for j in range(nn)])
def variational_inference(X_train, Y_train, X_test, Y_test, m, k): import numpy as np import pymc3 as pm from sklearn.preprocessing import MinMaxScaler import theano import matplotlib.pyplot as plt import numpy import random #输入数据并进行标准化 n, p = np.shape(X_train) Y_train = np.reshape(Y_train, (len(Y_train), 1)) Y_test = np.reshape(Y_test, (len(Y_test), 1)) scaler_x = MinMaxScaler(feature_range=(-1, 1)) X_train = scaler_x.fit_transform(X_train) X_test = scaler_x.transform(X_test) scaler_y = MinMaxScaler(feature_range=(0, 1)) Y_train = scaler_y.fit_transform(Y_train) Y_test = scaler_y.transform(Y_test) X_train = theano.shared(X_train) #添加噪音 #sigma=0.1 #rd_num=int(sigma*len(Y_train)) #rd=random.sample(range(len(Y_train)),rd_num) #sm=np.random.uniform(-0.1,0,size=rd_num) #Y_train=np.ravel(Y_train) #Y_train[rd]=sm #定义模型 basic_model = pm.Model() with basic_model: b = pm.Normal('b', mu=0, tau=1) A = pm.Normal('A', mu=0, tau=1, shape=(p, m)) gamma_0 = pm.Gamma('gamma_0', alpha=10**(-5), beta=10**(-5)) gamma_1 = pm.Gamma('gamma_1', alpha=10**(-5), beta=10**(-5)) beta = pm.Normal('beta', mu=0, tau=gamma_0, shape=(m, 1)) Y_obs = pm.Normal('Y_obs', mu=sigmoid_kernel(X_train, beta, A, b), tau=gamma_1, observed=Y_train) start = pm.find_MAP() #approx=pm.fit(k,start=start,obj_optimizer=pm.adam(),callbacks=[tracker]) approx = pm.fit(k, start=start, obj_optimizer=pm.adam()) #在拟合好的模型中,对参数z={beta,A,b,gamma_0,gamma_1}进行抽样 trace = pm.sample_approx(approx=approx, draws=5000) #pm.traceplot(trace) #pm.summary(trace) #取5000次后验预测的均值为最终结果。 post_pred = pm.sample_ppc(trace, samples=5000, model=basic_model) y_train_pred = np.mean(post_pred['Y_obs'], axis=0) #对预测结果与实际结果进行比较。 mse_train = (((y_train_pred - Y_train)**2).sum()) / np.size(Y_train, 0) X_train.set_value(X_test) post_pred = pm.sample_ppc(trace, samples=5000, model=basic_model) y_test_pred = np.mean(post_pred['Y_obs'], axis=0) mse_test = (((y_test_pred - Y_test)**2).sum()) / np.size(Y_test, 0) # Y_mean = np.ones_like(Y_test) * np.mean(Y_test) r2 = 1 - (((y_test_pred - Y_test)**2).sum()) / ( ((Y_test - Y_mean)**2).sum()) # n = len(Y_test) err = Y_test - y_test_pred err_mean = np.ones_like(err) * np.mean(err) err_var = (((err - err_mean)**2).sum()) / (n - 1) y_var = (((Y_test - Y_mean)**2).sum()) / (n - 1) Evar = 1 - err_var / y_var #print('mse_train=',mse_train,'\n mse_test=',mse_test,'\n r2=',r2,'\n Evar=',Evar,'\n m=',m) return mse_train, mse_test, r2, Evar, m
def deconvolute(data, expr, meta=None, sample_deviation=False, c_type=None, norm=None, progress=True, max_iter=1e7): chars = data['chars'] features = data['features'] var = data['variations'] is_multico = 'multico' in data cell_types = list(chars.keys()) if 'backgrounds' in data: background = data['backgrounds']['all'] for key in data['backgrounds'].keys(): if key.upper() == c_type.upper(): background = data[key] break cell_types += ['background'] n_types = len(cell_types) if meta is not None: save_function = saveFunction(meta, cell_types) else: save_function = None test_features = list(expr.index) index = np.array([f in test_features for f in features]) n_features = sum(index) filtered_features = [i for (i, v) in zip(features, index) if v] sample_float = expr.loc[filtered_features].values sample = sample_float.astype(int) seq_depth = np.sum(sample) ct_prior = np.ones(n_types) ct_start = ct_prior / np.sum(ct_prior) for i, ct in enumerate(cell_types): if ct == 'background': ct_prior[i] = 1e-1 ct_start[i] = 1 - ct_start[i] ct_start = ct_start / np.sum(ct_start) break if mode == 'debug': mess = '... using {} of {} features with {} available ...' print(mess.format(n_features, str(len(index)), str(len(test_features)))) print('{} reads in total.'.format(sum(expr))) def combine_and_embed(samp, deviation, A, Ainv, b): base = samp - tt.dot(deviation, A) return tt.dot(base, Ainv) + deviation + b def mix(components, decomp): return tt.dot(decomp[None, :], tt.nnet.softmax(components)) def reduce(samp, A, b): return np.dot(samp - b, A) def embedd(samp, Ainv, b): return np.dot(samp, Ainv) + b def project(deviation, A, Ainv, b): mapping = reduce(deviation, A, b) co_projection = deviation - embedd(mapping, Ainv, b) return mapping, co_projection l_alphas = sample_float + 1 t_samp = tau_inv(np.log(l_alphas) - np.log(l_alphas.sum())) if is_multico is False: A = data['A'][index, :] Ainv = data['Ainv'][:, index] b = data['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(t_samp, A, Ainv, b) del data s = 1 with pm.Model() as model: decomp = pm.Dirichlet('decomp', ct_prior, shape=ct_prior.shape, testval=ct_start) cert = (1 / (1 - decomp) - 1)**2 ct_expr = list() scale = pm.Lognormal('scale', testval=10) i = 0 for cell_type in cell_types: if cell_type == 'background': dev_samp = pm.Normal('comb ' + cell_type, mu=background['mean'][index], sigma=background['std'][index] / scale, shape=(1, n_features), testval=t_samp) ct_expr.append(dev_samp) continue if is_multico is True: A = chars[cell_type]['A'][index, :] Ainv = chars[cell_type]['Ainv'][:, index] b = chars[cell_type]['b'][index] s_Ainv = theano.shared(Ainv) s_A = theano.shared(A) s_b = theano.shared(b) samp_mapping, dev_start = project(dev_start, A, Ainv, b) n = A.shape[1] samp = pm.Normal(cell_type, sigma=scale / cert[i], shape=(1, n)) else: samp = pm.MvNormal(cell_type, chars[cell_type]['mean'], cov=chars[cell_type]['sigma'] * scale / cert[i], shape=(1, A.shape[1]), testval=chars[cell_type]['mean']) if sample_deviation is True: deviation = pm.Normal('deviation ' + cell_type, mu=var[cell_type]['mean'][index], sigma=var[cell_type]['std'][index] * scale / cert[i], shape=(1, n_features), testval=dev_start) else: deviation = theano.shared(dev_start) dev_samp = pm.Deterministic( 'comb ' + cell_type, combine_and_embed(samp, deviation, s_A, s_Ainv, s_b)) ct_expr.append(dev_samp) i += 1 ct_expr = tt.concatenate(ct_expr, axis=0) transcriptome = pm.Deterministic('trans', mix(ct_expr, decomp)) obs = pm.Multinomial('obs', seq_depth, transcriptome, observed=sample) mean_field = pm.fit( method='advi', n=int(max_iter), progressbar=progress, callbacks=[CheckAndSave(save_function=save_function)], obj_optimizer=pm.adam()) vals = mean_field.bij.rmap(mean_field.mean.get_value()) if 'scale_log__' in vals: print('Scale {}'.format(np.exp(vals['scale_log__']))) decomp = vals['decomp_stickbreaking__'] return decomp, mean_field
print(pmodel.check_test_point()) if args.saveModel is True: print("Saving the model...") with open("data/mc_model_{}.pkl".format(args.tag), "wb") as buff: pickle.dump(model, buff) print("Saving the pca...") with open("data/mc_model_{}_pca.pkl".format(args.tag), "wb") as buff: pickle.dump(model.pca, buff) print("Fitting...") save = SaveCallback(file_base="data/mc_model_{}".format(args.tag), note=vars(args)) save.note["gene_ids"] = list(model.counts.index) save.note["sample_ids"] = list(model.counts.columns) if args.learnrate: if args.optimizer == "adam": obj_optimizer = pm.adam(learning_rate=args.learnrate) elif args.optimizer == "adagrad_window": obj_optimizer = pm.adagrad_window(learning_rate=args.learnrate, n_win=args.nwin) elif args.optimizer == "nesterov_momentum": obj_optimizer = pm.nesterov_momentum(learning_rate=args.learnrate) elif args.optimizer == "adagrad": obj_optimizer = pm.adagrad_window(learning_rate=args.learnrate) elif args.optimizer == "momentum": obj_optimizer = pm.momentum(learning_rate=args.learnrate) else: raise ValueError( f'The given optimizer "{args.optimizer}" is unknown.') else: if args.optimizer == "adam": obj_optimizer = pm.adam()
return pm.Normal.dist(mu=expected, sd=0.1).logp(y_obs) corr = pm.Uniform('corr', lower=-1., upper=1., shape=1000) corr = tt.repeat(corr, 10) pm.DensityDist('obs', custom_likelihood, observed={ 'x_diffs': (x[:-1] - x[1:]), 'y_obs_last': y[:-1], 'y_obs': y[1:] }) mean_field = pm.fit(n=5000, method='advi', obj_optimizer=pm.adam(learning_rate=0.01)) trace = mean_field.sample(1000) estimated_corrs = np.mean(trace['corr'], axis=0) plt.plot(estimated_corrs) plt.show() # Now we model the V2 data, and examine the stability of the correlation with pm.Model() as model2: def custom_likelihood(x_diffs, y_obs_last, y_obs): expected = y_obs_last - corr * x_diffs return pm.Normal.dist(mu=expected, sd=0.1).logp(y_obs) corr = pm.Uniform('corr', lower=-10., upper=10., shape=1000)
def train(self, fol_path='../data/*'): fol_list = glob.glob(fol_path) print(fol_list) seq_list = [] for fol in fol_list: f_list = glob.glob(fol + '/*.jpg') im_list = [] for f in sorted(f_list): #Crop to ultrasound active area im = np.mean(cv2.resize( cv2.imread(f)[180:700, 500:1020, :], (self.w, self.h)), axis=-1) im_list.append(im) seq_list.append(np.array(im_list)) # Get latent states self.latent_list = [] for s in seq_list[:-1]: self.latent_list.append( self.vae_model.encoder.predict( s.reshape(-1, self.w, self.h, 1) / 255.0)[0]) self.latent = np.vstack(self.latent_list) np.savetxt(self.log_path + 'latent.txt', self.latent) #Generate training pairs print('Generating training pairs') G = self.generate_pairs(self.latent_list) W = np.arange(self.latent.shape[0]).astype(int) Gt = tt.as_tensor(G) W = W.astype(int) Xt = tt.as_tensor(self.latent) with pm.Model() as reward_model: l = pm.Gamma("l", alpha=2.0, beta=0.5) cov_func = pm.gp.cov.Matern32(self.latent.shape[1], ls=l) Xu = pm.gp.util.kmeans_inducing_points(self.Ni, self.latent) sig = pm.HalfCauchy("sig", beta=np.ones((self.latent.shape[0], )), shape=self.latent.shape[0]) gp = pm.gp.MarginalSparse(cov_func=cov_func) f = gp.marginal_likelihood('reward', Xt, Xu, shape=self.latent.shape[0], y=None, noise=sig, is_observed=False) diff = f[Gt[:, 0]] - f[Gt[:, 1]] p = pm.math.sigmoid(diff) wl = pm.Bernoulli('observed wl', p=p, observed=np.ones((G.shape[0], )), total_size=self.latent.shape[0]) inference = pm.ADVI() train_probs = inference.approx.sample_node(p) train_accuracy = (train_probs > 0.5).mean(-1) eval_tracker = pm.callbacks.Tracker(train_accuracy=train_accuracy.eval) approx = inference.fit(1000, obj_optimizer=pm.adam(learning_rate=0.1), callbacks=[eval_tracker]) trace = approx.sample(5000) l = np.mean(trace['l']) sig = np.mean(trace['sig']) reward = np.mean(trace['reward'], axis=0) np.savetxt('./logs/l.txt', np.array([l])) np.savetxt('./logs/sig.txt', np.array([sig])) np.savetxt('./logs/reward.txt', reward) print('Saved trained reward parameters') return l, sig, reward
def fit_advi_refine(self, n_iter=10000, learning_rate=None, progressbar=True, reducing_lr=False): """Refine posterior using ADVI - continue training after `.fit_advi_iterative()` Parameters ---------- n_iter : number of additional iterations (Default value = 10000) learning_rate : same as in `.fit_advi_iterative()` (Default value = None) progressbar : same as in `.fit_advi_iterative()` (Default value = True) reducing_lr : same as in `.fit_advi_iterative()` (Default value = False) Returns ------- dict update the self.mean_field dictionary with MeanField pymc3 objects. """ self.n_iter = self.n_iter + n_iter if learning_rate is None: learning_rate = self.learning_rate ### Initialise optimiser ### if reducing_lr: # initialise the function for adaptive learning rate s = theano.shared(np.array(learning_rate).astype(self.data_type)) def reduce_rate(a, h, i): s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type)) optimiser = pm.adam(learning_rate=s) callbacks = [reduce_rate, CheckParametersConvergence()] else: optimiser = pm.adam(learning_rate=learning_rate) callbacks = [CheckParametersConvergence()] for i, name in enumerate(self.advi.keys()): # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(self.n_type, ['cv', 'bootstrap']): # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) # or using all data else: more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)} # if any other data inputs should be added if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ self.extra_data[k].astype(self.data_type) else: # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) else: more_replacements = {} with self.model: # train for more iterations & export trained model by overwriting the initial mean field object self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks, obj_optimizer=optimiser, total_grad_norm_constraint=self.total_grad_norm_constraint, progressbar=progressbar, more_replacements=more_replacements) if self.verbose: print(plt.plot(np.log10(self.mean_field[name].hist[15000:])))
theta = pm.Dirichlet("theta", a=alpha, shape=(D, K), transform=t_stick_breaking(1e-9))#.astype('float32') #Kth topic is fixed as ambient distribution, therefore not learned phi = pm.Dirichlet("phi", a=beta, shape=(K-1, V), transform=t_stick_breaking(1e-9))#.astype('float32') doc = pm.DensityDist('doc', log_lda, observed=dict(theta=theta, phi=phi, value=sparse_array,phiAmbient=np.matrix([phiAmbientDict[x] for x in feature_names]),rowsums=rowsums,sumall=sumall)) eta = .3 s = shared(eta) def reduce_rate(a, h, i): s.set_value(eta/((i/200)+1)**.4) with model1: #inference = pm.ADVI() #inference = pm.FullRankADVI() inference=pm.variational.NFVI() approx = pm.fit(n=n_iterations,method=inference,obj_optimizer=pm.adam(learning_rate=s),callbacks=[reduce_rate]) tr1 = approx.sample(draws=1000) advi_elbo = pd.DataFrame( {'log-ELBO': -np.log(approx.hist), 'n': np.arange(approx.hist.shape[0])}) plt.clf() sns.lineplot(y='log-ELBO', x='n', data=advi_elbo) plt.savefig(os.path.join(sc.settings.figdir,'ELBO.png')) theta=tr1['theta'].mean(0) theta=anndata.AnnData(theta,var=pd.DataFrame(index=['lda_'+str(i) for i in range(K) ]),obs=pd.DataFrame(index=list(adata.obs.index))) for i in range(theta.shape[1]): freshadata.obs['lda_'+str(i)]=theta[:,i].X # In[15]: phi=tr1['phi'].mean(0)