def fit(self, X, y, cats, inference_type='advi', minibatch_size=None, inference_args=None): """ Train the Hierarchical Logistic Regression model Parameters ---------- X : numpy array, shape [n_samples, n_features] y : numpy array, shape [n_samples, ] cats : numpy array, shape [n_samples, ] inference_type : string, specifies which inference method to call. Defaults to 'advi'. Currently, only 'advi' and 'nuts' are supported minibatch_size : number of samples to include in each minibatch for ADVI, defaults to None, so minibatch is not run by default inference_args : dict, arguments to be passed to the inference methods. Check the PyMC3 docs for permissable values. If no arguments are specified, default values will be set. """ self.num_cats = len(np.unique(cats)) self.num_training_samples, self.num_pred = X.shape self.inference_type = inference_type if y.ndim != 1: y = np.squeeze(y) if not inference_args: inference_args = self._set_default_inference_args() if self.cached_model is None: self.cached_model = self.create_model() if minibatch_size: with self.cached_model: minibatches = { self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size), self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size), self.shared_vars['model_cats']: pm.Minibatch(cats, batch_size=minibatch_size) } inference_args['more_replacements'] = minibatches else: self._set_shared_vars({ 'model_input': X, 'model_output': y, 'model_cats': cats }) self._inference(inference_type, inference_args) return self
def fit(self, X, y, inference_type='advi', minibatch_size=None, inference_args=None): """ Train the Naive Bayes model. Parameters ---------- X : numpy array, shape [num_training_samples, num_pred]. Contains the data points. y : numpy array, shape [num_training_samples,]. Contains the category of the data points. inference_type : string, specifies which inference method to call. Default is 'advi'. Currently, only 'advi' and 'nuts' are implemented. minibatch_size : int, number of samples to include in each minibatch for ADVI. Defaults to None so minibatch is not run by default. inference_args : dict, arguments to be passed to the inference methods. Check the PyMC3 documentation. Returns ------- The current instance of the GaussianNB class. """ self.num_training_samples, self.num_pred = X.shape self.num_cats = len(np.unique(y)) self.inference_type = inference_type if not inference_args: inference_args = self._set_default_inference_args() if not self.cached_model: self.cached_model = self.create_model() if minibatch_size: with self.cached_model: minibatches = { self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size), self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size), } inference_args['more_replacements'] = minibatches else: self._set_shared_vars({'model_input': X, 'model_output': y}) self._inference(inference_type, inference_args) return self
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) np.random.shuffle(lang_ind) np.random.shuffle(sound_ind) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_ln = pm.Model() with model_ln: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) psi = [ pm.MvNormal('psi_{}'.format(k), mu=[0] * S, cov=Sigma, shape=S) for k in range(K) ] "phi = component-level collection of distributions over sound change" phi = tt.stack([ tt.concatenate([ pm.Deterministic( 'phi_{}_{}'.format(k, x), tt.nnet.softmax(psi[k][s_breaks[x][0]:s_breaks[x][1]])[0]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_ln = pm.ADVI() inference_ln.fit(50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_ln = inference_ln.approx.sample() posterior = { k: trace_ln[k] for k in trace_ln.varnames if not k.endswith('__') } posterior['ELBO'] = inference_ln.hist f = open('posterior_ln_shuffle_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print( 'usage: python3 inference_dir.py [chain no] [optional output no]') sys.exit() elif len(sys.argv) == 2: c = int(sys.argv[1]) d = int(sys.argv[1]) if len(sys.argv) == 3: c = int(sys.argv[1]) d = int(sys.argv[2]) np.random.seed(c) lang_minibatch = pm.Minibatch(lang_ind, 500) sound_minibatch = pm.Minibatch(sound_ind, 500) model_dir = pm.Model() with model_dir: beta = pm.HalfFlat('beta') "theta = language-level prior over components" theta = tt.stack([ pm.Dirichlet('theta_{}'.format(l), a=tt.ones(K) * beta, shape=K) for l in range(L) ]) phi = tt.stack([ tt.concatenate([ pm.Dirichlet('phi_{}_{}'.format(k, x), a=tt.ones(R[x]) * alpha, shape=R[x]) for x in range(X) ]) for k in range(K) ]) target = pm.DensityDist('target', logprob(theta=theta, phi=phi), observed=dict(lang_array=lang_minibatch, sound_array=sound_minibatch), total_size=N) inference_dir = pm.ADVI() inference_dir.fit( 50000, obj_optimizer=pm.adam(learning_rate=.01, beta1=uniform(.7, .9)), callbacks=[pm.callbacks.CheckParametersConvergence()]) trace_dir = inference_dir.approx.sample() posterior = { k: trace_dir[k] for k in trace_dir.varnames if not k.endswith('__') } posterior['ELBO'] = inference_dir.hist f = open('posterior_dir_{}.pkl'.format(d), 'wb') pkl.dump(posterior, f) f.close()
def test_cloning_available(self): gop = pm.Minibatch(np.arange(100), 1) res = gop ** 2 shared = theano.shared(np.array([10])) res1 = theano.clone(res, {gop: shared}) f = theano.function([], res1) assert f() == np.array([100])
def construct_bayes_model(nems_model, signals, pred_name, resp_name, batches=None): ''' Builds the Bayesian version of the NEMS model. This essentially converts the NEMS set of modules into a symbolic evaluation graph that is used for maximizing the likelihood of a Poisson prior. ''' signals = signals.copy() nems_priors = nems_model.get_priors(signals) # Now, batch the signal if requested. The get_priors code typically doesn't # work with batched tensors, so we need to do this *after* getting the # priors. if batches is not None: for k, v in signals.items(): signals[k] = mc.Minibatch(v, batch_size=batches) with mc.Model() as mc_model: mc_priors = construct_priors(nems_priors) tensors = nems_model.generate_tensor(signals, mc_priors) pred = tensors[pred_name] obs = tensors[resp_name] likelihood = mc.Poisson('likelihood', mu=pred, observed=obs) return mc_model
def generate_groups_data_matrix_minibatch(groups, n_mi, s_mi): groups['train']['n_series_idx'] = pm.Minibatch( groups['train']['n_series_idx'], s_mi) for group in groups['train']['groups_idx'].keys(): groups['train']['groups_idx'][group] = pm.Minibatch( groups['train']['groups_idx'][group], s_mi) groups['train']['data'] = pm.Minibatch(groups['train']['data'], ((n_mi, s_mi))) X = np.arange(groups['train']['n']).reshape(-1, 1) X_mi = pm.Minibatch(X.ravel(), n_mi).reshape((-1, 1)) return groups, X_mi
def test_vae(): minibatch_size = 10 data = pm.floatX(np.random.rand(100)) x_mini = pm.Minibatch(data, minibatch_size) x_inp = tt.vector() x_inp.tag.test_value = data[:minibatch_size] ae = theano.shared(pm.floatX([.1, .1])) be = theano.shared(pm.floatX(1.)) ad = theano.shared(pm.floatX(1.)) bd = theano.shared(pm.floatX(1.)) enc = x_inp.dimshuffle(0, 'x') * ae.dimshuffle('x', 0) + be mu, rho = enc[:, 0], enc[:, 1] with pm.Model(): # Hidden variables zs = pm.Normal('zs', mu=0, sigma=1, shape=minibatch_size) dec = zs * ad + bd # Observation model pm.Normal('xs_', mu=dec, sigma=0.1, observed=x_inp) pm.fit(1, local_rv={zs: dict(mu=mu, rho=rho)}, more_replacements={x_inp: x_mini}, more_obj_params=[ae, be, ad, bd])
def predict_proba(self, X): if self.sample is None: raise NotFittedError("Please call model.fit(X, y) first") minibatch_x = pm.Minibatch(X, batch_size=self.batch_size) samples = self.sample(minibatch_x, self.inf_samples) # Average over inf_samples dimension return samples.mean(0)
def test_align(self): m = pm.Minibatch(np.arange(1000), 1, random_seed=1) n = pm.Minibatch(np.arange(1000), 1, random_seed=1) f = theano.function([], [m, n]) n.eval() # not aligned a, b = zip(*(f() for _ in range(1000))) assert a != b pm.align_minibatches() a, b = zip(*(f() for _ in range(1000))) assert a == b n.eval() # not aligned pm.align_minibatches([m]) a, b = zip(*(f() for _ in range(1000))) assert a != b pm.align_minibatches([m, n]) a, b = zip(*(f() for _ in range(1000))) assert a == b
def fit(self, X, y, n=200000, batch_size=10): """ Train the Bayesian NN model. """ num_samples, self.num_pred = X.shape if self.cached_model is None: self.cached_model = self.create_model() with self.cached_model: minibatches = { self.shared_vars['model_input']: pm.Minibatch(X, batch_size=batch_size), self.shared_vars['model_output']: pm.Minibatch(y, batch_size=batch_size), } self._inference(minibatches, n) return self
def fit(self, x, y, epochs=30000, method='advi', batch_size=128, n_models=1, **sample_kwargs): """ :param x: :param y: :param epochs: :param method: :param batch_size: int or array. For hierarchical models, batch along the second dimension (e.g., [None, 128]) :param sample_kwargs: :return: """ self.train_x = x with self.model: if method == 'nuts': # self.x.set_value(x) # self.y.set_value(y) for _ in range(n_models): self.trace.append(pm.sample(epochs, **sample_kwargs)) else: mini_x = pm.Minibatch(x, batch_size=batch_size, dtype=floatX) mini_y = pm.Minibatch(y, batch_size=batch_size, dtype=floatX) if method == 'advi': inference = pm.ADVI() elif method == 'svgd': inference = pm.SVGD() for _ in range(n_models): approx = pm.fit(n=epochs, method=inference, more_replacements={ self.x: mini_x, self.y: mini_y }, **sample_kwargs) self.trace.append(approx.sample(draws=20000)) self.approx.append(approx)
def __init__(self, df, true_prior=None, mini_batch=0): self.df = df self.true_prior = true_prior self.predictors = df.ppv.predictors self.target = df.ppv.target if not isinstance(mini_batch, int) or mini_batch < 0: raise ValueError("mini_batch must be a positive integer, not {}".format(mini_batch)) self.mini_batch = mini_batch # scale data self.meanx = self.predictors.mean() self.scalex = self.predictors.std() zX, y = self._prep_data() if self.mini_batch: zX = pm.Minibatch(zX, batch_size=self.mini_batch) y = pm.Minibatch(y, batch_size=self.mini_batch) self.model = self._create_model(zX, y) # inferred from trace self.trace = self.intercept = self.parameters = None
def fit(self, X, y, cats, n=200000, batch_size=100): """ Train the HLR model Parameters ---------- X : numpy array, shape [n_samples, n_features] y : numpy array, shape [n_samples, ] cats: numpy array, shape [n_samples, ] n: number of iterations for ADVI fit, defaults to 200000 batch_size: number of samples to include in each minibatch for ADVI, defaults to 100 """ self.num_cats = len(np.unique(cats)) num_samples, self.num_pred = X.shape if self.cached_model is None: self.cached_model = self.create_model() with self.cached_model: minibatches = { self.shared_vars['model_input']: pm.Minibatch(X, batch_size=batch_size), self.shared_vars['model_output']: pm.Minibatch(y, batch_size=batch_size), self.shared_vars['model_cats']: pm.Minibatch(cats, batch_size=batch_size) } self._inference(minibatches, n) return self
def simple_model_data(using_minibatch): n = 1000 sd0 = 2. mu0 = 4. sd = 3. mu = -5. data = sd * np.random.randn(n) + mu d = n / sd**2 + 1 / sd0**2 mu_post = (n * np.mean(data) / sd**2 + mu0 / sd0**2) / d if using_minibatch: data = pm.Minibatch(data) return dict( n=n, data=data, mu_post=mu_post, d=d, mu0=mu0, sd0=sd0, sd=sd, )
def simple_model_data(use_minibatch): n = 1000 sigma0 = 2. mu0 = 4. sigma = 3. mu = -5. data = sigma * np.random.randn(n) + mu d = n / sigma ** 2 + 1 / sigma0 ** 2 mu_post = (n * np.mean(data) / sigma ** 2 + mu0 / sigma0 ** 2) / d if use_minibatch: data = pm.Minibatch(data) return dict( n=n, data=data, mu_post=mu_post, d=d, mu0=mu0, sigma0=sigma0, sigma=sigma, )
def test_1d(self): mb = pm.Minibatch(self.data, 20) assert mb.eval().shape == (20, 10, 40, 10, 50)
sns.kdeplot(xloc, yloc, bw=lengthscale_, cmap="viridis", shade=True, ax=ax[0]) ax[0].scatter(xloc, yloc, color='r', alpha=.25) ax[0].set_xlim(0, 100) ax[0].set_ylim(0, 100) ax[1].imshow(hist, cmap='viridis', origin='lower') ax[1].axis('off') #%% #input/output xv, yv = np.meshgrid(xcenters, ycenters) x_data = np.vstack((yv.flatten(), xv.flatten())).T y_data = hist.flatten() #%% pymc3 minibatch setup # Not suitable for 2D mapping problem, overestimated lengthscale batchsize = 10 Xbatch = pm.Minibatch(x_data, batchsize**2) Ybatch = pm.Minibatch(y_data, batchsize**2) #%% set up minibatch data = hist batchsize = 10 z1, z2 = batchsize, batchsize s1, s2 = np.shape(data) yshared = theano.shared(data) x1shared = theano.shared(ycenters[:, np.newaxis].repeat(64, axis=1)) x2shared = theano.shared(xcenters[:, np.newaxis].T.repeat(64, axis=0)) ixs1 = pm.tt_rng().uniform(size=(1, ), low=0, high=s1 - z1 - 1e-10).astype('int64') ixs2 = pm.tt_rng().uniform(size=(1, ), low=0, high=s2 - z2 - 1e-10).astype('int64') range1 = tt.arange(ixs1.squeeze(), (ixs1 + z1).squeeze())
def fit_advi_refine(self, n_iter=10000, learning_rate=None, progressbar=True, reducing_lr=False): """Refine posterior using ADVI - continue training after `.fit_advi_iterative()` Parameters ---------- n_iter : number of additional iterations (Default value = 10000) learning_rate : same as in `.fit_advi_iterative()` (Default value = None) progressbar : same as in `.fit_advi_iterative()` (Default value = True) reducing_lr : same as in `.fit_advi_iterative()` (Default value = False) Returns ------- dict update the self.mean_field dictionary with MeanField pymc3 objects. """ self.n_iter = self.n_iter + n_iter if learning_rate is None: learning_rate = self.learning_rate ### Initialise optimiser ### if reducing_lr: # initialise the function for adaptive learning rate s = theano.shared(np.array(learning_rate).astype(self.data_type)) def reduce_rate(a, h, i): s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type)) optimiser = pm.adam(learning_rate=s) callbacks = [reduce_rate, CheckParametersConvergence()] else: optimiser = pm.adam(learning_rate=learning_rate) callbacks = [CheckParametersConvergence()] for i, name in enumerate(self.advi.keys()): # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(self.n_type, ['cv', 'bootstrap']): # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) # or using all data else: more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)} # if any other data inputs should be added if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ self.extra_data[k].astype(self.data_type) else: # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) else: more_replacements = {} with self.model: # train for more iterations & export trained model by overwriting the initial mean field object self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks, obj_optimizer=optimiser, total_grad_norm_constraint=self.total_grad_norm_constraint, progressbar=progressbar, more_replacements=more_replacements) if self.verbose: print(plt.plot(np.log10(self.mean_field[name].hist[15000:])))
def fit_advi_iterative(self, n=3, method='advi', n_type='restart', n_iter=None, learning_rate=None, reducing_lr=False, progressbar=True, scale_cost_to_minibatch=True): """Find posterior using pm.ADVI() method directly (allows continuing training through `refine` method. (maximising likelihood of the data and minimising KL-divergence of posterior to prior - ELBO loss) Parameters ---------- n : number of independent initialisations (Default value = 3) method : advi', to allow for potential use of SVGD, MCMC, custom (currently only ADVI implemented). (Default value = 'advi') n_type : type of repeated initialisation: * **'restart'** to pick different initial value, * **'cv'** for molecular cross-validation - splits counts into n datasets, for now, only n=2 is implemented * **'bootstrap'** for fitting the model to multiple downsampled datasets. Run `mod.bootstrap_data()` to generate variants of data (Default value = 'restart') n_iter : number of iterations, supersedes self.n_iter specified when creating model instance. (Default value = None) learning_rate : learning rate, supersedes self.learning_rate specified when creating model instance. (Default value = None) reducing_lr : boolean, use decaying learning rate? (Default value = False) progressbar : boolean, show progress bar? (Default value = True) scale_cost_to_minibatch : when using training in minibatches, scale cost function appropriately? See discussion https://discourse.pymc.io/t/effects-of-scale-cost-to-minibatch/1429 to understand the effects. (Default value = True) Returns ------- None self.mean_field dictionary with MeanField pymc3 objects, and self.advi dictionary with ADVI objects for each initialisation. """ self.n_type = n_type self.scale_cost_to_minibatch = scale_cost_to_minibatch if n_iter is None: n_iter = self.n_iter if learning_rate is None: learning_rate = self.learning_rate ### Initialise optimiser ### if reducing_lr: # initialise the function for adaptive learning rate s = theano.shared(np.array(learning_rate).astype(self.data_type)) def reduce_rate(a, h, i): s.set_value(np.array(learning_rate / ((i / self.n_obs) + 1) ** .7).astype(self.data_type)) optimiser = pm.adam(learning_rate=s) callbacks = [reduce_rate, CheckParametersConvergence()] else: optimiser = pm.adam(learning_rate=learning_rate) callbacks = [CheckParametersConvergence()] if np.isin(n_type, ['bootstrap']): if self.X_data_sample is None: self.bootstrap_data(n=n) elif np.isin(n_type, ['cv']): self.generate_cv_data() # cv data added to self.X_data_sample init_names = ['init_' + str(i + 1) for i in np.arange(n)] for i, name in enumerate(init_names): with self.model: self.advi[name] = pm.ADVI() # when type is molecular cross-validation or bootstrap, # replace self.x_data tensor with new data if np.isin(n_type, ['cv', 'bootstrap']): # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data_sample[i].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) # or using all data else: more_replacements = {self.x_data: self.X_data_sample[i].astype(self.data_type)} # if any other data inputs should be added if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ self.extra_data[k].astype(self.data_type) else: # defining minibatch if self.minibatch_size is not None: # minibatch main data - expression matrix self.x_data_minibatch = pm.Minibatch(self.X_data.astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) more_replacements = {self.x_data: self.x_data_minibatch} # if any other data inputs should be minibatched add them too if self.extra_data is not None: # for each parameter in the dictionary add it to more_replacements for k in self.extra_data.keys(): more_replacements[self.extra_data_tt[k]] = \ pm.Minibatch(self.extra_data[k].astype(self.data_type), batch_size=[self.minibatch_size, None], random_seed=self.minibatch_seed[i]) else: more_replacements = {} self.advi[name].scale_cost_to_minibatch = scale_cost_to_minibatch # train the model self.mean_field[name] = self.advi[name].fit(n_iter, callbacks=callbacks, obj_optimizer=optimiser, total_grad_norm_constraint=self.total_grad_norm_constraint, progressbar=progressbar, more_replacements=more_replacements) # plot training history if self.verbose: print(plt.plot(np.log10(self.mean_field[name].hist[15000:])));
def sgd_optimization(NNInput): RandomSeed = 42 set_tt_rng(MRG_RandomStreams(RandomSeed)) NSigmaSamples = 1000 SigmaIntCoeff = 2 ################################################################################################################################## ### LOADING DATA ################################################################################################################################## print('\nLoading Data ... \n') if (NNInput.TryNNFlg): datasets, datasetsPlot, RDataOrig, yDataOrig, yDataDiatOrig = load_data( NNInput) else: datasets, RDataOrig, yDataOrig, yDataDiatOrig = load_data(NNInput) RSetTrain, ySetTrain, ySetTrainDiat, ySetTrainTriat = datasets[0] RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[0] RSetPlotTemp = RSetPlot #NNInput.NIn = xSetTrain.get_value(borrow=True).shape[1] NNInput.NOut = ySetTrain.get_value(borrow=True).shape[1] print((' Nb of Input: %i') % NNInput.NIn) print((' Nb of Output: %i \n') % NNInput.NOut) NNInput.NLayers = NNInput.NHid NNInput.NLayers.insert(0, NNInput.NIn) NNInput.NLayers.append(NNInput.NOut) NNInput.NTrain = RSetTrain.get_value(borrow=True).shape[0] print((' Nb of Training Examples: %i') % NNInput.NTrain) # compute number of minibatches for training, validation and testing if (NNInput.NMiniBatch != 0): NNInput.NBatchTrain = NNInput.NTrain // NNInput.NMiniBatch print((' Nb of Training Batches: %i') % NNInput.NBatchTrain) else: print(' No-BATCH Version') ############################################################################################## ### TESTING REAL PARAMETERS ################################################################## if (NNInput.ReadIniParamsFlg): if (NNInput.Model == 'PIP'): LambdaVec = NNInput.LambdaVec reVec = NNInput.reVec WIni = [ load_parameters(NNInput.PathToWeightFldr + NNInput.LayersName[iLayer] + '/')[0] for iLayer in range(1, len(NNInput.LayersName)) ] bIni = [ load_parameters(NNInput.PathToWeightFldr + NNInput.LayersName[iLayer] + '/')[1] for iLayer in range(1, len(NNInput.LayersName)) ] if (NNInput.Model == 'ModPIP'): LambdaIni = load_parameters_PIP(NNInput.PathToWeightFldr + NNInput.LayersName[1] + '/')[0] reIni = load_parameters_PIP(NNInput.PathToWeightFldr + NNInput.LayersName[1] + '/')[1] LambdaVec = numpy.array([1.0, 1.0, 1.0]) * LambdaIni reVec = numpy.array([1.0, 1.0, 1.0]) * reIni #print('Lambda = ', LambdaVec) #print('re = ', reVec) WIni = [ load_parameters(NNInput.PathToWeightFldr + NNInput.LayersName[iLayer] + '/')[0] for iLayer in range(3, len(NNInput.LayersName)) ] bIni = [ load_parameters(NNInput.PathToWeightFldr + NNInput.LayersName[iLayer] + '/')[1] for iLayer in range(3, len(NNInput.LayersName)) ] elif (NNInput.Model == 'LEPS'): DeVec = NNInput.DeVec betaVec = NNInput.betaVec reVec = NNInput.reVec k = NNInput.k i = -1 for Ang in NNInput.AngVector: i = i + 1 RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i] if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'): yPredInitial = try_model_PIP(NNInput, RSetPlot.get_value(borrow=True), LambdaVec, reVec, WIni, bIni) elif (NNInput.Model == 'LEPS'): yPredInitial = try_model_LEPS(NNInput, RSetPlot.get_value(borrow=True), DeiVec, betaiVec, reiVec, ki) yPredInitial = InverseTransformation(NNInput, yPredInitial, ySetPlotDiat.get_value()) PathToPlotLabels = NNInput.PathToOutputFldr + '/REInitial.csv.' + str( int(numpy.floor(Ang))) ySetPlot = T.cast(ySetPlot, 'float64') ySetPlot = ySetPlot.eval() ySetPlot = InverseTransformation(NNInput, ySetPlot, ySetPlotDiat.get_value()) save_to_plot( PathToPlotLabels, 'Initial', numpy.column_stack( [RSetPlot.get_value(), ySetPlot, yPredInitial])) print(' Initial Evaluation Saved in File: ', PathToPlotLabels, '\n') RSetPlotTemp = RSetPlot ############################################################################################## ################################################################################################################################## # BUILD ACTUAL MODEL # ################################################################################################################################## ### COMPUTING / UPDATING INFERENCE ###################################################################### # print(RSetTrain.get_value()) # print(ySetTrain.get_value()) # time.sleep(5) if (NNInput.TrainFlg): if (NNInput.NMiniBatch > 0): RSetTrainTemp = pymc3.Minibatch(RSetTrain.get_value(), batch_size=NNInput.NMiniBatch, dtype='float64') ySetTrainTemp = pymc3.Minibatch(ySetTrain.get_value(), batch_size=NNInput.NMiniBatch, dtype='float64') else: RSetTrainTemp = RSetTrain ySetTrainTemp = ySetTrain NNInput.NMiniBatch = NNInput.NTrain #ADVIApprox, ADVIInference, ADVITracker, SVGDApprox, NUTSTrace, model, yPred, Sigma, Layers = construct_model(NNInput, RSetTrainTemp, ySetTrainTemp, GaussWeightsW, GaussWeightsb) ADVIApprox, ADVIInference, SVGDApprox, NUTSTrace, Params, yPred = construct_model( NNInput, RSetTrain, ySetTrain, RSetTrainTemp, ySetTrainTemp, GaussWeightsW, GaussWeightsb) # plot_ADVI_ELBO(NNInput, ADVIInference) # if (NNInput.SaveInference): PathToModTrace = NNInput.PathToOutputFldr + '/Approx&Preds.pkl' with open(PathToModTrace, 'wb') as buff: #pickle.dump({'model': model, 'trace': ADVITrace, 'tracker': ADVITracker, 'inference': ADVIInference, 'approx': ADVIApprox, 'yLike': yLike}, buff) pickle.dump( { 'ADVIApprox': ADVIApprox, 'Params': Params, 'yPred': yPred }, buff) # else: PathToWeightFldr = NNInput.PathToOutputFldr + '/Model&Trace.pkl' with open(PathToWeightFldr, 'rb') as buff: data = pickle.load(buff) #model, ADVITrace, ADVITracker, ADVIInference, ADVIApprox, yPred = data['model'], data['trace'], data['tracker'], data['inference'], data['approx'], data['yPred'] ADVIApprox, Params, yPred = data['ADVIApprox'], data['Params'], data[ 'yPred'] RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[0] RSetPlotTemp = RSetPlot if (NNInput.NTraceADVI > 0): ADVITrace = ADVIApprox.sample(draws=NNInput.NTraceADVI) plot_ADVI_trace(NNInput, ADVITrace) else: ADVITrace = 1 ############################################################################################## ### SAMPLING PARAMETERS POSTERIOR ####################################################################### PathToADVI = NNInput.PathToOutputFldr + '/ParamsPosts/' if not os.path.exists(PathToADVI): os.makedirs(PathToADVI) if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'): save_ADVI_reconstruction_PIP(NNInput, PathToADVI, ADVIApprox, Params) save_ADVI_sample_PIP(NNInput, PathToADVI, ADVIApprox, Params) elif (NNInput.Model == 'LEPS'): save_ADVI_reconstruction_LEPS(PathToADVI, ADVIApprox, Params) ############################################################################################## ### RECONSTRUCTING MOMENTS ################################################################### #means = ADVIApprox.bij.rmap(ADVIApprox.mean.eval()) #sds = ADVIApprox.bij.rmap(ADVIApprox.std.eval()) #plot_ADVI_reconstruction(NNInput, means, sds) # PathToADVI = NNInput.PathToOutputFldr + '/ParamsPosts/' # if not os.path.exists(PathToADVI): # os.makedirs(PathToADVI) # save_ADVI_reconstruction(PathToADVI, ADVITrace, model, 0.0, 0.0) ############################################################################################## ### RUNNING NUTS ############################################################################# # xSetTrainTemp = xSetTrain # ySetTrainTemp = ySetTrain # fig = plt.figure() # pymc3.traceplot(NUTSTrace); # plt.show() # FigPath = NNInput.PathToOutputFldr + '/NUTSTrace.png' # fig.savefig(FigPath) # #plt.close() # varnames = means.keys() # fig, axs = plt.subplots(nrows=len(varnames), figsize=(12, 18)) # for var, ax in zip(varnames, axs): # mu_arr = means[var] # sigma_arr = sds[var] # ax.set_title(var) # for i, (mu, sigma) in enumerate(zip(mu_arr.flatten(), sigma_arr.flatten())): # sd3 = (-4*sigma + mu, 4*sigma + mu) # x = numpy.linspace(sd3[0], sd3[1], 300) # y = stats.norm(mu, sigma).pdf(x) # ax.plot(x, y) # if hierarchical_trace[var].ndim > 1: # t = NUTSTrace[var][i] # else: # t = NUTSTrace[var] # sns.distplot(t, kde=False, norm_hist=True, ax=ax) # fig.tight_layout() # plt.show() # FigPath = NNInput.PathToOutputFldr + '/ADVIDistributionsReconstruction.png' # fig.savefig(FigPath) # #plt.close() ############################################################################################## # ## COMPUTING MAX POSTERIOR ################################################################## # map_estimate = pymc3.find_MAP(model=model) # if (NNInput.Model == 'ModPIP'): # LambdaVec = map_estimate.get('Lambda') # reVec = map_estimate.get('re') # WNames = ['W1', 'W2', 'W3'] # WIni = [ map_estimate.get(WNames[iLayer]) for iLayer in range(len(NNInput.LayersName))] # bNames = ['b1', 'b2', 'b3'] # bIni = [ map_estimate.get(bNames[iLayer]) for iLayer in range(len(NNInput.LayersName))] # elif (NNInput.Model == 'PIP'): # LambdaVec = NNInput.reVec # reVec = NNInput.reVec # WNames = ['W1', 'W2', 'W3'] # WIni = [ map_estimate.get(WNames[iLayer]) for iLayer in range(len(NNInput.LayersName))] # bNames = ['b1', 'b2', 'b3'] # bIni = [ map_estimate.get(bNames[iLayer]) for iLayer in range(len(NNInput.LayersName))] # elif (NNInput.Model == 'LEPS'): # DeVec = map_estimate.get('De') # betaVec = map_estimate.get('beta') # reVec = map_estimate.get('re') # k = map_estimate.get('k') # i=-1 # for Ang in NNInput.AngVector: # i=i+1 # xSetTry, ySetTry = datasetsTry[i] # PathToAbscissaToPlot = NNInput.PathToDataFldr + '/R.csv.' + str(Ang) # xPlot = abscissa_to_plot(PathToAbscissaToPlot) # if (NNInput.Model == 'PIP') or (NNInput.Model == 'ModPIP'): # yPredMaxPosterior = try_model_PIP(NNInput, xSetTry.get_value(borrow=True), LambdaVec, reVec, WIni, bIni, IniMean, IniStD) # elif (NNInput.Model == 'LEPS'): # yPredMaxPosterior = try_model_LEPS(NNInput, xSetTry.get_value(borrow=True), DeVec, betaVec, reVec, k) # #print(WIni, bIni) # PathToTryLabels = NNInput.PathToOutputFldr + '/REMaxPosterior.' + str(Ang) + '.csv' # save_to_plot(PathToTryLabels, 'Evaluated', numpy.column_stack([xPlot, yPredMaxPosterior])) # ############################################################################################# ### SAMPLING OUTPUT POSTERIOR ####################################################################### PathToADVI = NNInput.PathToOutputFldr + '/OutputPosts/' if not os.path.exists(PathToADVI): os.makedirs(PathToADVI) x = T.dmatrix('X') n = T.iscalar('n') x.tag.test_value = numpy.empty_like(RSetPlotTemp) x.tag.test_value = numpy.random.randint(100, size=(100, 3)) n.tag.test_value = 100 _sample_proba_yPred = ADVIApprox.sample_node( yPred, size=n, more_replacements={RSetTrainTemp: x}) sample_proba_yPred = theano.function([x, n], _sample_proba_yPred) m = T.iscalar('m') _sample_proba_SigmaPred = ADVIApprox.sample_node(Params.get('Sigma'), size=n * m) sample_proba_SigmaPred = theano.function([n, m], _sample_proba_SigmaPred) SigmaPred = sample_proba_SigmaPred(NNInput.NOutPostSamples, NSigmaSamples) SigmaPred = numpy.reshape(SigmaPred, (NNInput.NOutPostSamples, NSigmaSamples)) i = -1 for Ang in NNInput.AngVector: numpy.random.seed(RandomSeed) pymc3.set_tt_rng(RandomSeed) i = i + 1 RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i] ySetPlot = T.cast(ySetPlot, 'float64') ySetPlot = ySetPlot.eval() #ySetPlot = InverseTransformation(NNInput, ySetPlot, ySetPlotDiat.get_value()) yPredPlot = sample_proba_yPred(RSetPlot.get_value(borrow=True), NNInput.NOutPostSamples) yPredSum = ySetPlot * 0.0 yPredSumSqr = ySetPlot * 0.0 for j in range(NNInput.NOutPostSamples): yPredTemp = numpy.array(yPredPlot[j, :]) yPredTemp = InverseTransformation(NNInput, yPredTemp, ySetPlotDiat.get_value()) yPredSum = yPredSum + yPredTemp yPredSumSqr = yPredSumSqr + numpy.square(yPredTemp) # yMean = yPredSum / NNInput.NOutPostSamples yStD = numpy.sqrt(yPredSumSqr / NNInput.NOutPostSamples - numpy.square(yMean)) yPlus = yMean + SigmaIntCoeff * yStD yMinus = yMean - SigmaIntCoeff * yStD PathToPlotLabels = NNInput.PathToOutputFldr + '/OutputPosts/yPred' + str( int(numpy.floor(Ang))) + '.csv' save_moments( PathToPlotLabels, 'yPred', numpy.column_stack( [RSetPlot.get_value(), ySetPlot, yMean, yStD, yMinus, yPlus])) print(' Wrote Sampled yPred for Angle ', Ang, '\n') if (NNInput.AddNoiseToPredsFlg): i = -1 for Ang in NNInput.AngVector: numpy.random.seed(RandomSeed) pymc3.set_tt_rng(RandomSeed) i = i + 1 RSetPlot, ySetPlot, ySetPlotDiat, ySetPlotTriat = datasetsPlot[i] ySetPlot = T.cast(ySetPlot, 'float64') ySetPlot = ySetPlot.eval() #ySetPlot = InverseTransformation(NNInput, ySetPlot, ySetPlotDiat.get_value()) yPredPlot = sample_proba_yPred(RSetPlot.get_value(borrow=True), NNInput.NOutPostSamples) yPostSum = ySetPlot * 0.0 yPostSumSqr = ySetPlot * 0.0 for j in range(NNInput.NOutPostSamples): yPredTemp = numpy.array(yPredPlot[j, :]) if (NNInput.AddNoiseToPredsFlg): for k in range(NSigmaSamples): yPostTemp = InverseTransformation( NNInput, yPostTemp, ySetPlotDiat.get_value()) SigmaTemp = SigmaPred[j, k] RandNum = numpy.random.normal(loc=0.0, scale=SigmaTemp) yPostTemp = yPredTemp * RandNum yPostSum = yPostSum + yPostTemp yPostSumSqr = yPostSumSqr + numpy.square(yPostTemp) # yMean = yPostSum / NNInput.NOutPostSamples yStD = numpy.sqrt(yPostSumSqr / NNInput.NOutPostSamples - numpy.square(yMean)) yPlus = yMean + SigmaIntCoeff * yStD yMinus = yMean - SigmaIntCoeff * yStD PathToPlotLabels = NNInput.PathToOutputFldr + '/OutputPosts/yPost' + str( int(numpy.floor(Ang))) + '.csv' save_moments( PathToPlotLabels, 'yPost', numpy.column_stack([ RSetPlot.get_value(), ySetPlot, yMean, yStD, yMinus, yPlus ])) print(' Wrote Sampled yPost for Angle ', Ang, '\n')
_ = ax.set(xlim=(-3, 3), ylim=(-3, 3), xlabel='X', ylabel='Y') cbar.ax.set_ylabel('Uncertainty (posterior predictive standard deviation)') # We can see that very close to the decision boundary, our uncertainty as to which label to predict is highest. You can imagine that associating predictions with uncertainty is a critical property for many applications like health care. To further maximize accuracy, we might want to train the model primarily on samples from that high-uncertainty region. # It is also clear that the uncertainty is large in the region where there is no training data. That is what should be expected, and it is good that our network shows this explicitly. The normal neural network would not give any such signals. # ## Mini-batch ADVI # # So far, we have trained our model on all data at once. Obviously this won't scale to something like ImageNet. Moreover, training on mini-batches of data (stochastic gradient descent) avoids local minima and can lead to faster convergence. # # Fortunately, ADVI can be run on mini-batches as well. It just requires some setting up: # In[22]: minibatch_x = pm.Minibatch(X_train, batch_size=50) minibatch_y = pm.Minibatch(Y_train, batch_size=50) neural_network_minibatch = construct_nn(minibatch_x, minibatch_y) with neural_network_minibatch: approx = pm.fit(40000, method=pm.ADVI()) # In[23]: fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(-inference.hist) ax.set_ylabel('ELBO') ax.set_xlabel('iteration') # As you can see, mini-batch ADVI's running time is much lower. It also seems to converge faster. #
def fit( self, X, y, inference_type='advi', num_advi_sample_draws=10000, minibatch_size=None, inference_args=None, ): """ Train the Linear Regression model Parameters ---------- X : numpy array shape [num_training_samples, num_pred] y : numpy array shape [num_training_samples, ] inference_type : str (defaults to 'advi') specifies which inference method to call Currently, only 'advi' and 'nuts' are supported. num_advi_sample_draws : int (defaults to 10000) Number of samples to draw from ADVI approximation after it has been fit; not used if inference_type != 'advi' minibatch_size : int (defaults to None) number of samples to include in each minibatch for ADVI If None, minibatch is not run. inference_args : dict (defaults to None) arguments to be passed to the inference methods. Check the PyMC3 docs for permissable values. If None, default values will be set. """ self.num_training_samples, self.num_pred = X.shape self.inference_type = inference_type if y.ndim != 1: y = np.squeeze(y) if not inference_args: inference_args = self._set_default_inference_args() if self.cached_model is None: self.cached_model = self.create_model() if minibatch_size: with self.cached_model: minibatches = { self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size), self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size), } inference_args['more_replacements'] = minibatches else: self._set_shared_vars({'model_input': X, 'model_output': y}) self._inference(inference_type, inference_args, num_advi_sample_draws=num_advi_sample_draws) return self
def train_pymc3(docs_te, docs_tr, n_samples_te, n_samples_tr, n_words, n_topics, n_tokens): """ Return: Pymc3 LDA results Parameters: docs_tr: training documents (processed) docs_te: testing documents (processed) n_samples_te: number of testing docs n_samples_tr: number of training docs n_words: size of vocabulary n_topics: number of topics to learn n_tokens: number of non-zero datapoints in processed training tf matrix """ # Log-likelihood of documents for LDA def logp_lda_doc(beta, theta): """ Returns the log-likelihood function for given documents. K : number of topics in the model V : number of words (size of vocabulary) D : number of documents (in a mini-batch) Parameters ---------- beta : tensor (K x V) Word distribution. theta : tensor (D x K) Topic distributions for the documents. """ def ll_docs_f(docs): dixs, vixs = docs.nonzero() vfreqs = docs[dixs, vixs] ll_docs = vfreqs * pmmath.logsumexp( tt.log(theta[dixs]) + tt.log(beta.T[vixs]), axis=1).ravel() # Per-word log-likelihood times no. of tokens in the whole dataset return tt.sum(ll_docs) / (tt.sum(vfreqs) + 1e-9) * n_tokens return ll_docs_f # fit the pymc3 LDA # we have sparse dataset. It's better to have dence batch so that all words accure there minibatch_size = 128 # defining minibatch doc_t_minibatch = pm.Minibatch(docs_tr.toarray(), minibatch_size) doc_t = shared(docs_tr.toarray()[:minibatch_size]) with pm.Model() as model: theta = Dirichlet( 'theta', a=pm.floatX((1.0 / n_topics) * np.ones( (minibatch_size, n_topics))), shape=(minibatch_size, n_topics), transform=t_stick_breaking(1e-9), # do not forget scaling total_size=n_samples_tr) beta = Dirichlet('beta', a=pm.floatX((1.0 / n_topics) * np.ones( (n_topics, n_words))), shape=(n_topics, n_words), transform=t_stick_breaking(1e-9)) # Note, that we defined likelihood with scaling, so here we need no additional `total_size` kwarg doc = pm.DensityDist('doc', logp_lda_doc(beta, theta), observed=doc_t) # Encoder class LDAEncoder: """Encode (term-frequency) document vectors to variational means and (log-transformed) stds. """ def __init__(self, n_words, n_hidden, n_topics, p_corruption=0, random_seed=1): rng = np.random.RandomState(random_seed) self.n_words = n_words self.n_hidden = n_hidden self.n_topics = n_topics self.w0 = shared(0.01 * rng.randn(n_words, n_hidden).ravel(), name='w0') self.b0 = shared(0.01 * rng.randn(n_hidden), name='b0') self.w1 = shared(0.01 * rng.randn(n_hidden, 2 * (n_topics - 1)).ravel(), name='w1') self.b1 = shared(0.01 * rng.randn(2 * (n_topics - 1)), name='b1') self.rng = MRG_RandomStreams(seed=random_seed) self.p_corruption = p_corruption def encode(self, xs): if 0 < self.p_corruption: dixs, vixs = xs.nonzero() mask = tt.set_subtensor( tt.zeros_like(xs)[dixs, vixs], self.rng.binomial(size=dixs.shape, n=1, p=1 - self.p_corruption)) xs_ = xs * mask else: xs_ = xs w0 = self.w0.reshape((self.n_words, self.n_hidden)) w1 = self.w1.reshape((self.n_hidden, 2 * (self.n_topics - 1))) hs = tt.tanh(xs_.dot(w0) + self.b0) zs = hs.dot(w1) + self.b1 zs_mean = zs[:, :(self.n_topics - 1)] zs_rho = zs[:, (self.n_topics - 1):] return {'mu': zs_mean, 'rho': zs_rho} def get_params(self): return [self.w0, self.b0, self.w1, self.b1] # call Encoder encoder = LDAEncoder(n_words=n_words, n_hidden=100, n_topics=n_topics, p_corruption=0.0) local_RVs = OrderedDict([(theta, encoder.encode(doc_t))]) # get parameters encoder_params = encoder.get_params() # Train pymc3 Model η = .1 s = shared(η) def reduce_rate(a, h, i): s.set_value(η / ((i / minibatch_size) + 1)**.7) with model: approx = pm.MeanField(local_rv=local_RVs) approx.scale_cost_to_minibatch = False inference = pm.KLqp(approx) inference.fit(10000, callbacks=[reduce_rate], obj_optimizer=pm.sgd(learning_rate=s), more_obj_params=encoder_params, total_grad_norm_constraint=200, more_replacements={doc_t: doc_t_minibatch}) # Extracting characteristic words doc_t.set_value(docs_tr.toarray()) samples = pm.sample_approx(approx, draws=100) beta_pymc3 = samples['beta'].mean(axis=0) # Predictive distribution def calc_pp(ws, thetas, beta, wix): """ Parameters ---------- ws: ndarray (N,) Number of times the held-out word appeared in N documents. thetas: ndarray, shape=(N, K) Topic distributions for N documents. beta: ndarray, shape=(K, V) Word distributions for K topics. wix: int Index of the held-out word Return ------ Log probability of held-out words. """ return ws * np.log(thetas.dot(beta[:, wix])) def eval_lda(transform, beta, docs_te, wixs): """Evaluate LDA model by log predictive probability. Parameters ---------- transform: Python function Transform document vectors to posterior mean of topic proportions. wixs: iterable of int Word indices to be held-out. """ lpss = [] docs_ = deepcopy(docs_te) thetass = [] wss = [] total_words = 0 for wix in wixs: ws = docs_te[:, wix].ravel() if 0 < ws.sum(): # Hold-out docs_[:, wix] = 0 # Topic distributions thetas = transform(docs_) # Predictive log probability lpss.append(calc_pp(ws, thetas, beta, wix)) docs_[:, wix] = ws thetass.append(thetas) wss.append(ws) total_words += ws.sum() else: thetass.append(None) wss.append(None) # Log-probability lp = np.sum(np.hstack(lpss)) / total_words return {'lp': lp, 'thetass': thetass, 'beta': beta, 'wss': wss} inp = tt.matrix(dtype='int64') sample_vi_theta = theano.function([inp], approx.sample_node( approx.model.theta, 100, more_replacements={ doc_t: inp }).mean(0)) def transform_pymc3(docs): return sample_vi_theta(docs) result_pymc3 = eval_lda(transform_pymc3, beta_pymc3, docs_te.toarray(), np.arange(100)) print('Predictive log prob (pm3) = {}'.format(result_pymc3['lp'])) return result_pymc3
def __init__(self, time, event, x, rs, minibatch=1, labels=None, priors=None, vars=None, name='', model=None): super(FrailtyIndependentComponent_Fix, self).__init__(name, model) if priors is None: priors = {} if vars is None: vars = {} ### first thing to do is determine whether we are working with tensors or np.matrices ## Debugging print(str(time)) # if we are working with a matrix, we need to grab the value of the array that populates it if str(time) == '<TensorType(float64, matrix)>': data_tensor = True self.k = k = time.get_value().shape[1] # outcome dimentionality self.n = n = time.get_value().shape[ 0] # total number of observations self.p = p = x.get_value().shape[1] # number of covariates else: data_tensor = False self.k = k = time.shape[1] # outcome dimentionality self.n = n = time.shape[0] # total number of observations self.p = p = x.shape[1] # number of covariates x, labels = any_to_tensor_and_labels( x, labels) # might need to do this for the other variables ## now for secondary delta for the gamma frac if data_tensor == True: # Create tensor variable for the gamma_frac component of the likelihood self.event_change = event_change = theano.shared(np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\ for s in np.sum(event.get_value(), axis = 1)]), borrow = True) else: self.event_change = event_change = np.array([np.append(np.repeat(1, s), np.repeat(0, k-s)).tolist()\ for s in np.sum(event, axis = 1)]) ## Keep track of total size of the dataset, for minibatching ## new 10.10.2018 # If minibatch, then we need the x component to be a generator and not just a tensor # by this step in the computation, X is already in tensor form if minibatch >= 2: # kinda hacky but whatever, we can fix this later print("We're Mini batching") # If we're using mini-batch, then we have to tell the inner workings to fix the MAP estimate minibatch = int( minibatch) #just in case some n00b puts in a double/float here x_mini = pm.Minibatch( data=x.get_value(), batch_size=minibatch ) # make minibatch instance of the design matrix time_mini = pm.Minibatch( data=time.get_value(), batch_size=minibatch) # minibatch instance of the time array event_mini = pm.Minibatch( data=event.get_value(), batch_size=minibatch) # minibatch instance of the event array event_change_mini = pm.Minibatch( data=event_change.get_value(), batch_size=minibatch ) # minibatch instance of the transformed event array ## assign self. attributes to later parameterize the logp function self.x = x_mini self.time = time_mini self.event = event_mini self.event_change = event_change_mini else: # if not minibatching, just pass the tensors as they are self.x = x self.time = time self.event = event self.event_change = event_change # now we have x, shape and labels # init a list to store all of the parameters that go into our likelihood coeffs_all = list() lams = list() rhos = list() for level in range( k ): # for each dimension, instantiate a covariate effect for each predictor labels_this = [s + "_" + str(level) for s in labels] coeffs_this = list() for name in labels_this: if name in vars: v = Deterministic(name, vars[name]) else: v = self.Var(name=name, dist=priors.get( name, priors.get('Regressor', self.default_regressor_prior))) coeffs_this.append(v) coeffs_this = tt.stack(coeffs_this, axis=0) coeffs_all.append(coeffs_this) ### Now for the baseline hazard portions lam_name = 'lam_' + str(level) lam = self.Var(name=lam_name, dist=priors.get( lam_name, priors.get('lam', self.default_lambda_prior)) ) # create labels for the lambdas lams.append(lam) # rhos rho_name = 'rho_' + str(level) rho = self.Var(name=rho_name, dist=priors.get( rho_name, priors.get('rho', self.default_rho_prior))) rhos.append(rho) # finally, transformation parameters r # frailty parameter theta = self.Var(name='theta', dist=priors.get( 'theta', priors.get('Theta', self.default_theta_prior))) # make self attribute for the coefficients self.coeffs_all = coeffs_all # changing 10.18 self.theta = theta self.lams = lams = tt.stack(lams, axis=0) self.rhos = rhos = tt.stack(rhos, axis=0)
N, D = X.shape # Out-path out_path = "out/" + img_name + "_{0:d}".format(test_idx) os.mkdir(out_path) plt.imshow(img) plt.grid(None) plt.savefig(out_path + "/" + img_name + ".jpg") print("defining model...") # Define model X_shared = theano.shared(X) minibatch_size = 500 X_minibatch = pm.Minibatch(X, minibatch_size) # set up model with pm.Model() as model: pi = pm.Dirichlet('pi', np.ones(K)) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): temp_mean = np.random.randint(low=50, high=200, size=D) mu.append(pm.Normal('mu%i' % i, temp_mean, 20, shape=D)) packed_chol.append( pm.LKJCholeskyCov('chol_cov_%i' % i, eta=2, n=D,
def test_special4(self): mb = pm.Minibatch(self.data, [10, None, Ellipsis, (4, 42)]) assert mb.eval().shape == (10, 10, 40, 10, 4)
def test_special1(self): mb = pm.Minibatch(self.data, [(10, 42), None, (4, 42)]) assert mb.eval().shape == (10, 10, 4, 10, 50)
def test_2d(self): mb = pm.Minibatch(self.data, [(10, 42), (4, 42)]) assert mb.eval().shape == (10, 4, 40, 10, 50)
def test_mixed2(self): with pm.Model(): data = np.random.rand(10, 20, 30, 40, 50) mb = pm.Minibatch(data, [2, None, 20]) Normal('n', observed=mb, total_size=(10, None, 30))