if oracle: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8 * np.random.randn() else: if indep_distr: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8 * np.random.randn() S2 = random_cov(d) m2 = np.random.randn(d) + 0.8 * np.random.randn() else: S1, S2 = random_cov(d, diff=diff) m1 = np.random.randn(d) + 0.6 * np.random.randn() m2 = m1 + diff * np.random.randn(d) # Convert to natural parameters N1 = multivariate_normal(mean=m1, cov=S1) Q1, r1 = invert_normal_params(S1, m1) if not oracle: Q2, r2 = invert_normal_params(S2, m2, out_A="in_place", out_b="in_place") # Output arrays Q_hats = np.empty((d, d, N), order="F") r_hats = np.empty((d, N), order="F") Q_samps = np.empty((d, d, N), order="F") r_samps = np.empty((d, N), order="F") if rand_distr_every_iter: r1s = np.empty((d, N), order="F") Q1s = np.empty((d, d, N), order="F") if not oracle: r2s = np.empty((d, N), order="F") Q2s = np.empty((d, d, N), order="F")
if oracle: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8 * np.random.randn() else: if indep_distr: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8 * np.random.randn() S2 = random_cov(d) m2 = np.random.randn(d) + 0.8 * np.random.randn() else: S1, S2 = random_cov(d, diff=diff) m1 = np.random.randn(d) + 0.6 * np.random.randn() m2 = m1 + diff * np.random.randn(d) # Convert to natural parameters N1 = multivariate_normal(mean=m1, cov=S1) Q1, r1 = invert_normal_params(S1, m1) if not oracle: Q2, r2 = invert_normal_params(S2, m2, out_A='in-place', out_b='in-place') # Output arrays Q_hats = np.empty((d, d, N), order='F') r_hats = np.empty((d, N), order='F') Q_samps = np.empty((d, d, N), order='F') r_samps = np.empty((d, N), order='F') if rand_distr_every_iter: r1s = np.empty((d, N), order='F') Q1s = np.empty((d, d, N), order='F')
def run(self, niter, calc_moments=True, save_last_fits=True, verbose=True): """Run the distributed EP algorithm. Parameters ---------- niter : int Number of iterations to run. calc_moments : bool, optional If True, the moment parameters (mean and covariance) of the posterior approximation are calculated every iteration and returned. Default is True. save_last_fits : bool If True (default), the Stan fit-objects from the last iteration are saved for future use (mix_phi and mix_pred methods). verbose : bool, optional If true, some progress information is printed. Default is True. Returns ------- m_phi, var_phi : ndarray Mean and variance of the posterior approximation at every iteration. Returned only if `calc_moments` is True. info : int Return code. Zero if all ok. See variables Master.INFO_*. """ if niter < 1: if verbose: print "Nothing to do here as provided arg. `niter` is {}" \ .format(niter) if calc_moments: return None, None, self.INFO_OK else: return self.INFO_OK # Localise some instance variables # Mean and cov of the posterior approximation S = self.S m = self.m # Natural parameters of the approximation Q = self.Q r = self.r # Natural site parameters Qi = self.Qi ri = self.ri # Natural site proposal parameters Qi2 = self.Qi2 ri2 = self.ri2 # Site parameter updates dQi = self.dQi dri = self.dri # Array for positive definitness checking of each cavity distribution posdefs = np.empty(self.K, dtype=bool) if calc_moments: # Allocate memory for results m_phi_s = np.zeros((niter, self.dphi)) cov_phi_s = np.zeros((niter, self.dphi, self.dphi)) # Monitor sampling times stimes = np.zeros(niter) # Iterate niter rounds for cur_iter in xrange(niter): self.iter += 1 # Initial dampig factor if self.iter > 1: df = self.df0(self.iter) else: # At the first round (rond zero) there is nothing to damp yet df = 1 if verbose: print "Iter {}, starting df {:.3g}".format(self.iter, df) fail_printline_pos = False fail_printline_cov = False while True: # Try to update the global posterior approximation # These 4 lines could be run in parallel also np.add(Qi, np.multiply(df, dQi, out=Qi2), out=Qi2) np.add(ri, np.multiply(df, dri, out=ri2), out=ri2) np.add(Qi2.sum(2, out=Q), self.Q0, out=Q) np.add(ri2.sum(1, out=r), self.r0, out=r) # N.B. In the first iteration Q=Q0, r=r0 (if zero initialised) # Check for positive definiteness cho_Q = S np.copyto(cho_Q, Q) try: linalg.cho_factor(cho_Q, overwrite_a=True) except linalg.LinAlgError: # Not positive definite -> reduce damping factor df *= self.df_decay if verbose: fail_printline_pos = True sys.stdout.write("\rNon pos. def. posterior cov, " + "reducing df to {:.3}".format(df) + " " * 5 + "\b" * 5) sys.stdout.flush() if self.iter == 1: if verbose: print "\nInvalid prior." if calc_moments: return m_phi_s, cov_phi_s, self.INFO_INVALID_PRIOR else: return self.INFO_INVALID_PRIOR if df < self.df_treshold: if verbose: print "\nDamping factor reached minimum." if calc_moments: return m_phi_s, cov_phi_s, \ self.INFO_DF_TRESHOLD_REACHED_GLOBAL else: return self.INFO_DF_TRESHOLD_REACHED_GLOBAL continue # Cavity distributions (parallelisable) # ------------------------------------- # Check positive definitness for each cavity distribution for k in xrange(self.K): posdefs[k] = \ self.workers[k].cavity(Q, r, Qi2[:,:,k], ri2[:,k]) # Early stopping criterion (when in serial) if not posdefs[k]: break if np.all(posdefs): # All cavity distributions are positive definite. # Accept step (switch Qi-Qi2 and ri-ri2) temp = Qi Qi = Qi2 Qi2 = temp temp = ri ri = ri2 ri2 = temp self.Qi = Qi self.Qi2 = Qi2 self.ri = ri self.ri2 = ri2 break else: # Not all cavity distributions are positive definite ... # reduce the damping factor df *= self.df_decay if verbose: if fail_printline_pos: fail_printline_pos = False print fail_printline_cov = True sys.stdout.write("\rNon pos. def. cavity, " + "(first encountered in site {}), ". format(np.nonzero(~posdefs)[0][0]) + "reducing df to {:.3}".format(df) + " " * 5 + "\b" * 5) sys.stdout.flush() if df < self.df_treshold: if verbose: print "\nDamping factor reached minimum." if calc_moments: return m_phi_s, cov_phi_s, \ self.INFO_DF_TRESHOLD_REACHED_CAVITY else: return self.INFO_DF_TRESHOLD_REACHED_CAVITY if verbose and (fail_printline_pos or fail_printline_cov): print if calc_moments: # Invert Q (chol was already calculated) # N.B. The following inversion could be done while # parallel jobs are running, thus saving time. invert_normal_params(cho_Q, r, out_A='in-place', out_b=m, cho_form=True) # Store the approximation moments np.copyto(m_phi_s[cur_iter], m) np.copyto(cov_phi_s[cur_iter], S.T) if verbose: print "Mean and std of phi[0]: {:.3}, {:.3}" \ .format(m_phi_s[cur_iter,0], np.sqrt(cov_phi_s[cur_iter,0,0])) # Tilted distributions (parallelisable) # ------------------------------------- if verbose: print "Process tilted distributions" for k in xrange(self.K): if verbose: sys.stdout.write("\r site {}".format(k + 1) + ' ' * 10 + '\b' * 9) # Force flush here as it is not done automatically sys.stdout.flush() # Process the site posdefs[k] = self.workers[k].tilted( dQi[:, :, k], dri[:, k], save_fit=(save_last_fits and cur_iter == niter - 1)) if verbose and not posdefs[k]: sys.stdout.write("fail\n") if verbose: if np.all(posdefs): print "\rAll sites ok" elif np.any(posdefs): print "\rSome sites failed and are not updated" else: print "\rEvery site failed" if not np.any(posdefs): if calc_moments: return m_phi_s, cov_phi_s, self.INFO_ALL_SITES_FAIL else: return self.INFO_ALL_SITES_FAIL # Store max sampling time stimes[cur_iter] = max([w.last_time for w in self.workers]) if verbose and calc_moments: print("Iter {} done, max sampling time {}".format( self.iter, stimes[cur_iter])) if verbose: print( "{} iterations done\nTotal limiting sampling time: {}".format( niter, stimes.sum())) if calc_moments: return m_phi_s, cov_phi_s, self.INFO_OK else: return self.INFO_OK
if not rand_distr_every_iter: if not use_pre_defined: # Generate random distr if indep_distr: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8*np.random.randn() S2 = random_cov(d) m2 = np.random.randn(d) + 0.8*np.random.randn() else: S1, S2 = random_cov(d, diff=diff) m1 = np.random.randn(d) + 0.6*np.random.randn() m2 = m1 + diff*np.random.randn(d) # Freezed distr N1 = multivariate_normal(mean=m1, cov=S1) # Convert S2,m2 to natural parameters Q2, r2 = invert_normal_params(S2, m2) # Calc half det of Q2 ldet_Q_tilde = np.sum(np.log(np.diag(linalg.cho_factor(Q2)[0]))) # Output arrays d2 = (d*(d+1))/2 S_hats = np.empty((d,d,N), order='F') m_hats = np.empty((d,N), order='F') S_samps = np.empty((d,d,N), order='F') m_samps = np.empty((d,N), order='F') a_Ss = np.empty((d2,d2,N), order='F') a_ms = np.empty((d,d,N), order='F') tresh = np.empty(N, dtype=bool) if rand_distr_every_iter: m1s = np.empty((d,N), order='F') S1s = np.empty((d,d,N), order='F')
def __init__(self, site_model, X, y, **kwargs): # Parse keyword arguments self.worker_options = {} for (kw, val) in kwargs.iteritems(): if (Worker.DEFAULT_OPTIONS.has_key(kw) or Worker.DEFAULT_STAN_PARAMS.has_key(kw)): self.worker_options[kw] = val elif not self.DEFAULT_KWARGS.has_key(kw): # Unrecognised keyword argument raise TypeError("Unexpected keyword argument '{}'".format(kw)) # Set missing kwargs to defaults for (kw, default) in self.DEFAULT_KWARGS.iteritems(): if not kwargs.has_key(kw): kwargs[kw] = default # Set missing worker options to defaults for (kw, default) in Worker.DEFAULT_OPTIONS.iteritems(): if not self.worker_options.has_key(kw): self.worker_options[kw] = default for (kw, default) in Worker.DEFAULT_STAN_PARAMS.iteritems(): if not self.worker_options.has_key(kw): self.worker_options[kw] = default # Validate X self.N = X.shape[0] if len(X.shape) == 2: self.D = X.shape[1] elif len(X.shape) == 1: self.D = None else: raise ValueError("Argument `X` should be one or two dimensional") self.X = X # Validate y if len(y.shape) != 1: raise ValueError("Argument `y` should be one dimensional") if y.shape[0] != self.N: raise ValueError("The shapes of `y` and `X` does not match") self.y = y # Process site indices # K : number of sites # Nk : number of samples per site # k_ind : site index of each sample # k_lim : sample index limits if not kwargs['site_sizes'] is None: # Size of each site provided self.Nk = kwargs['site_sizes'] self.K = len(self.Nk) self.k_lim = np.concatenate(([0], np.cumsum(self.Nk))) self.k_ind = np.empty(self.N, dtype=np.int64) for k in xrange(self.K): self.k_ind[self.k_lim[k]:self.k_lim[k + 1]] = k elif not kwargs['site_ind_ord'] is None: # Sorted array of site indices provided self.k_ind = kwargs['site_ind_ord'] self.Nk = np.bincount(self.k_ind) self.K = len(self.Nk) self.k_lim = np.concatenate(([0], np.cumsum(self.Nk))) elif not kwargs['site_ind'] is None: # Unsorted array of site indices provided k_ind = kwargs['site_ind'] k_sort = k_ind.argsort(kind='mergesort') # Stable sort self.k_ind = k_ind[k_sort] self.Nk = np.bincount(self.k_ind) self.K = len(self.Nk) self.k_lim = np.concatenate(([0], np.cumsum(self.Nk))) # Copy X and y to a new sorted array self.X = self.X[k_sort] self.y = self.y[k_sort] else: raise NotImplementedError("Auto clustering not yet implemented") if self.k_lim[-1] != self.N: raise ValueError("Site definition does not match with `X`") if np.any(self.Nk == 0): raise ValueError( "Empty sites: {}. Index the sites from 1 to K-1".format( np.nonzero(self.Nk == 0)[0])) if self.K < 2: raise ValueError("Distributed EP should be run with at least " "two sites.") # Ensure that X and y are C contiguous self.X = np.ascontiguousarray(self.X) self.y = np.ascontiguousarray(self.y) # Process A self.A = kwargs['A'] # Check for name clashes for key in self.A.iterkeys(): if key in Worker.RESERVED_STAN_PARAMETER_NAMES: raise ValueError( "Additional data name {} clashes.".format(key)) # Process A_n self.A_n = kwargs['A_n'].copy() for (key, val) in kwargs['A_n'].iteritems(): if val.shape[0] != self.N: raise ValueError("The shapes of `A_n[{}]` and `X` does not " "match".format(repr(key))) # Check for name clashes if (key in Worker.RESERVED_STAN_PARAMETER_NAMES or key in self.A): raise ValueError( "Additional data name {} clashes.".format(key)) # Ensure C-contiguous if not val.flags['CARRAY']: self.A_n[key] = np.ascontiguousarray(val) # Process A_k self.A_k = kwargs['A_k'] for (key, val) in self.A_k.iteritems(): # Check for length if len(val) != self.K: raise ValueError("Array-like length mismatch in `A_k` " "(should be: {}, found: {})".format( self.K, len(val))) # Check for name clashes if (key in Worker.RESERVED_STAN_PARAMETER_NAMES or key in self.A or key in self.A_n): raise ValueError( "Additional data name {} clashes.".format(key)) # Initialise prior prior = kwargs['prior'] self.dphi = kwargs['dphi'] if prior is None: # Use default prior if self.dphi is None: raise ValueError("If arg. `prior` is not provided, " "arg. `dphi` has to be given") self.Q0 = np.eye(self.dphi).T # Transposed for F contiguous self.r0 = np.zeros(self.dphi) else: # Use provided prior if not hasattr(prior, 'has_key'): raise TypeError("Argument `prior` is of wrong type") if prior.has_key('Q') and prior.has_key('r'): # In a natural form already self.Q0 = np.asfortranarray(prior['Q']) self.r0 = prior['r'] elif prior.has_key('S') and prior.has_key('m'): # Convert into natural format self.Q0, self.r0 = invert_normal_params(prior['S'], prior['m']) else: raise ValueError("Argument `prior` is not appropriate") if self.dphi is None: self.dphi = self.Q0.shape[0] if self.Q0.shape[0] != self.dphi or self.r0.shape[0] != self.dphi: raise ValueError("Arg. `dphi` does not match with `prior`") # Damping factor self.df_decay = kwargs['df_decay'] self.df_treshold = kwargs['df_treshold'] if kwargs['df0'] is None: # Use default sinusoidal function df0_start = kwargs['df0_start'] if df0_start is None: df0_start = 1.0 / self.K df0_end = kwargs['df0_end'] if df0_end is None: df0_end = ((self.K - 1) * 0.5 + 1) / self.K df0_iter = kwargs['df0_iter'] self.df0 = lambda i: (df0_start + (df0_end - df0_start) * 0.5 * (1 + np.sin(np.pi * (max( 0, min(i - 2, df0_iter - 1)) / (df0_iter - 1) - 0.5)))) elif isinstance(kwargs['df0'], (float, int)): # Use constant initial damping factor if kwargs['df0'] <= 0 or kwargs['df0'] > 1: raise ValueError("Constant initial damping factor has to be " "in (0,1]") self.df0 = lambda i: kwargs['df0'] else: # Use provided initial damping factor function self.df0 = kwargs['df0'] # Get Stan model if isinstance(site_model, basestring): # From file self.site_model = load_stan(site_model, overwrite=kwargs['overwrite_model']) else: self.site_model = site_model # Process seed in worker options if not isinstance(self.worker_options['seed'], np.random.RandomState): self.worker_options['seed'] = \ np.random.RandomState(seed=self.worker_options['seed']) # Initialise the workers self.workers = [] for k in xrange(self.K): A = dict((key, val[self.k_lim[k]:self.k_lim[k + 1]]) for (key, val) in self.A_n.iteritems()) A.update(self.A) for (key, val) in self.A_k.iteritems(): A[key] = val[k] self.workers.append( Worker(k, self.site_model, self.dphi, X[self.k_lim[k]:self.k_lim[k + 1]], y[self.k_lim[k]:self.k_lim[k + 1]], A=A, **self.worker_options)) # Allocate space for calculations # Mean and cov of the approximation self.S = np.empty((self.dphi, self.dphi), order='F') self.m = np.empty(self.dphi) # Natural parameters of the approximation self.Q = self.Q0.copy(order='F') self.r = self.r0.copy() # Natural site parameters self.Qi = np.zeros((self.dphi, self.dphi, self.K), order='F') self.ri = np.zeros((self.dphi, self.K), order='F') # Natural site proposal parameters self.Qi2 = np.zeros((self.dphi, self.dphi, self.K), order='F') self.ri2 = np.zeros((self.dphi, self.K), order='F') # Site parameter updates self.dQi = np.zeros((self.dphi, self.dphi, self.K), order='F') self.dri = np.zeros((self.dphi, self.K), order='F') if not kwargs['init_site'] is None: # Config initial site distributions if isinstance(kwargs['init_site'], np.ndarray): for k in xrange(self.K): np.copyto(self.Qi[:, :, k], kwargs['init_site']) else: diag_elem = self.K / (kwargs['init_site']**2) for k in xrange(self.K): self.Qi[:, :, k].flat[::self.dphi + 1] = diag_elem # Track iterations self.iter = 0
def tilted(self, dQi, dri, save_fit=False): """Estimate the tilted distribution parameters. This method estimates the tilted distribution parameters and calculates the resulting site parameter updates into the given arrays. The cavity distribution has to be calculated before this method is called, i.e. the method cavity has to be run before this. After calling this method the instance variables self.Mat and self.vec hold the tilted distribution moment parameters (note however that the covariance matrix is unnormalised and the number of samples contributing to this matrix is stored in the instance variable self.nsamp). Parameters ---------- dQi, dri : ndarray Output arrays where the site parameter updates are placed. save_fit : bool, optional If True, the Stan fit-object is saved into the instance variable `fit` for later use. Default is False. Returns ------- pos_def True if the estimated tilted distribution covariance matrix is positive definite. False otherwise. """ if self.phase != 1: raise RuntimeError('Cavity has to be calculated before tilted.') # FIXME: Temp fix for RandomState problem in 32-bit Python if self.fix32bit: self.stan_params['seed'] = self.rstate.randint(2**31 - 1) # Sample from the model with suppress_stdout(): time_start = timer() fit = self.stan_model.sampling(data=self.data, **self.stan_params) time_end = timer() self.last_time = (time_end - time_start) if self.verbose: # Mean stepsize steps = [ np.mean(p['stepsize__']) for p in fit.get_sampler_params() ] print '\n mean stepsize: {:.4}'.format(np.mean(steps)) # Max Rhat (from all but last row in the last column) print ' max Rhat: {:.4}'.format( np.max(fit.summary()['summary'][:-1, -1])) if self.init_prev: # Store the last sample of each chain if isinstance(self.stan_params['init'], basestring): # No samples stored before ... initialise list of dicts self.stan_params['init'] = get_last_fit_sample(fit) else: get_last_fit_sample(fit, out=self.stan_params['init']) # Extract samples # TODO: preallocate space for samples samp = copy_fit_samples(fit, self.fit_pnames) self.nsamp = samp.shape[0] if save_fit: # Save fit self.fit = fit else: # Dereference fit here so that it can be garbage collected fit = None # Estimate precision matrix try: # Basic sample estimate if self.prec_estim == 'sample' or self.prec_estim_skip > 0: # Mean mt = np.mean(samp, axis=0, out=self.vec) # Center samples samp -= mt # Use QR-decomposition for obtaining Cholesky of the scatter # matrix (only R needed, Q-less algorithm would be nice) _, _, _, info = dgeqrf_routine(samp, overwrite_a=True) if info: raise linalg.LinAlgError( "dgeqrf LAPACK routine failed with error code {}". format(info)) # Copy the relevant part of the array into contiguous memory np.copyto(self.Mat, samp[:self.dphi, :]) invert_normal_params(self.Mat, mt, out_A=dQi, out_b=dri, cho_form=True) # Unbiased (for normal distr.) natural parameter estimates unbias_k = (self.nsamp - self.dphi - 2) dQi *= unbias_k dri *= unbias_k if self.prec_estim_skip > 0: self.prec_estim_skip -= 1 # Optimal linear shrinkage estimate elif self.prec_estim == 'olse': # Mean mt = np.mean(samp, axis=0, out=self.vec) # Center samples samp -= mt # Sample covariance np.dot(samp.T, samp, out=self.Mat.T) # Normalise self.Mat into dQi np.divide(self.Mat, self.nsamp, out=dQi) # Estimate olse(dQi, self.nsamp, P=self.Q, out='in-place') np.dot(dQi, mt, out=dri) # Graphical lasso with cross validation elif self.prec_estim == 'glassocv': # Mean mt = np.mean(samp, axis=0, out=self.vec) # Center samples samp -= mt # Fit self.glassocv.fit(samp) if self.verbose: print ' glasso alpha: {:.4}'.format( self.glassocv.alpha_) np.copyto(dQi, self.glassocv.precision_.T) # Calculate corresponding r np.dot(dQi, mt, out=dri) else: raise ValueError("Invalid value for option `prec_estim`") # Calculate the difference into the output arrays np.subtract(dQi, self.Q, out=dQi) np.subtract(dri, self.r, out=dri) except linalg.LinAlgError: # Precision estimate failed pos_def = False self.phase = 0 dQi.fill(0) dri.fill(0) if self.init_prev: # Reset initialisation method self.init = self.init_orig else: # Set return and phase flag pos_def = True self.phase = 2 self.iteration += 1 return pos_def
def run(self, niter, calc_moments=True, verbose=True): """Run the distributed EP algorithm. Parameters ---------- niter : int Number of iterations to run. calc_moments : bool, optional If True, the moment parameters (mean and covariance) of the posterior approximation are calculated every iteration and returned. Default is True. verbose : bool, optional If true, some progress information is printed. Default is True. Returns ------- m_phi, var_phi : ndarray Mean and variance of the posterior approximation at every iteration. Returned only if `calc_moments` is True. """ # Localise some instance variables # Mean and cov of the posterior approximation S = self.S m = self.m # Natural parameters of the approximation Q = self.Q r = self.r # Natural site parameters Qi = self.Qi ri = self.ri # Natural site proposal parameters Qi2 = self.Qi2 ri2 = self.ri2 # Site parameter updates dQi = self.dQi dri = self.dri # Array for positive definitness checking of each cavity distribution posdefs = np.empty(self.K, dtype=bool) if calc_moments: # Allocate memory for results m_phi_s = np.zeros((niter, self.dphi)) var_phi_s = np.zeros((niter, self.dphi)) # Iterate niter rounds for cur_iter in xrange(niter): self.iter += 1 # Initial dampig factor if self.iter > 1: df = self.df0(self.iter) else: # At the first round (rond zero) there is nothing to damp yet df = 1 if verbose: print 'Iter {}, starting df {:.3g}.'.format(self.iter, df) while True: # Try to update the global posterior approximation # These 4 lines could be run in parallel also np.add(Qi, np.multiply(df, dQi, out=Qi2), out=Qi2) np.add(ri, np.multiply(df, dri, out=ri2), out=ri2) np.add(Qi2.sum(2, out=Q), self.Q0, out=Q) np.add(ri2.sum(1, out=r), self.r0, out=r) # N.B. In the first iteration Q=Q0 and r=r0 # Check for positive definiteness cho_Q = S np.copyto(cho_Q, Q) try: linalg.cho_factor(cho_Q, overwrite_a=True) except linalg.LinAlgError: # Not positive definite -> reduce damping factor df *= self.df_decay if verbose: print 'Neg def posterior cov,', \ 'reducing df to {:.3}'.format(df) if self.iter == 1: if verbose: print 'Invalid prior.' return self.INVALID_PRIOR if df < self.df_treshold: if verbose: print 'Damping factor reached minimum.' return self.DF_TRESHOLD_REACHED_GLOBAL continue # Cavity distributions (parallelisable) # ------------------------------- # Check positive definitness for each cavity distribution for k in xrange(self.K): posdefs[k] = \ self.workers[k].cavity(Q, r, Qi2[:,:,k], ri2[:,k]) # Early stopping criterion (when in serial) if not posdefs[k]: break if np.all(posdefs): # All cavity distributions are positive definite. # Accept step (switch Qi-Qi2 and ri-ri2) temp = Qi Qi = Qi2 Qi2 = temp temp = ri ri = ri2 ri2 = temp self.Qi = Qi self.Qi2 = Qi2 self.ri = ri self.ri2 = ri2 break else: # Not all cavity distributions are positive definite ... # reduce the damping factor df *= self.df_decay if verbose: print 'Neg.def. cavity', \ '(first encountered in site {}),' \ .format(np.nonzero(~posdefs)[0][0]), \ 'reducing df to {:.3}.'.format(df) if df < self.df_treshold: if verbose: print 'Damping factor reached minimum.' return self.DF_TRESHOLD_REACHED_CAVITY if calc_moments: # Invert Q (chol was already calculated) # N.B. The following inversion could be done while # parallel jobs are running, thus saving time. invert_normal_params(cho_Q, r, out_A='in_place', out_b=m, cho_form=True) # Store the approximation moments np.copyto(m_phi_s[cur_iter], m) np.copyto(var_phi_s[cur_iter], np.diag(S)) # Tilted distributions (parallelisable) # ------------------------------- for k in xrange(self.K): posdefs[k] = self.workers[k].tilted(dQi[:, :, k], dri[:, k]) if verbose and not np.all(posdefs): print 'Neg.def. tilted in site(s) {}.' \ .format(np.nonzero(~posdefs)[0]) if verbose and calc_moments: print 'Iter {} done, std of phi[0]: {}' \ .format(self.iter, np.sqrt(var_phi_s[cur_iter,0])) if calc_moments: return m_phi_s, var_phi_s
def tilted(self, dQi, dri): """Estimate the tilted distribution parameters. This method estimates the tilted distribution parameters and calculates the resulting site parameter updates into the given arrays. The cavity distribution has to be calculated before this method is called, i.e. the method cavity has to be run before this. After calling this method the instance variables self.Mat and self.vec hold the tilted distribution moment parameters (note however that the covariance matrix is unnormalised and the number of samples contributing to this matrix is stored in the instance variable self.nsamp). Parameters ---------- dQi, dri : ndarray Output arrays where the site parameter updates are placed. Returns ------- pos_def True if the estimated tilted distribution covariance matrix is positive definite. False otherwise. """ if self.phase != 1: raise RuntimeError('Cavity has to be calculated before tilted.') # FIXME: Temp fix for RandomState problem in 32-bit Python if self.fix32bit: self.stan_params['seed'] = self.rstate.randint(2**31 - 1) # Sample from the model try: with suppress_stdout(): fit = self.stan_model.sampling(data=self.data, pars=('phi'), **self.stan_params) except ValueError: print 'Worker {} failed'.format(self.index) with open('stan_params.pkl', 'wb') as f: pickle.dump(self.stan_params, f) with open('data.pkl', 'wb') as f: pickle.dump(self.data, f) raise ValueError('Jaahast') if self.init_prev: # Store the last sample of each chain if isinstance(self.stan_params['init'], basestring): # No samples stored before ... initialise list of dicts self.stan_params['init'] = get_last_sample(fit) else: get_last_sample(fit, out=self.stan_params['init']) # TODO: Make a non-copying extract samp = fit.extract(pars='phi')['phi'] self.nsamp = samp.shape[0] # Assign arrays St = self.Mat mt = self.vec # Sample mean and covariance np.mean(samp, axis=0, out=mt) samp -= mt np.dot(samp.T, samp, out=St.T) if not self.smooth is None: # Smoothen the distribution (use dri and dQi as temp arrays) St, mt = self._apply_smooth(dri, dQi) # Estimate precision matrix try: # Basic sample estimate if self.prec_estim == 'sample' or self.prec_estim_skip > 0: # Normalise St unbiased into dQi np.divide(St, self.nsamp - 1, out=dQi) # Convert moment params to natural params invert_normal_params(dQi, mt, out_A='in_place', out_b=dri) # Unbiased natural parameter estimates unbias_k = (self.nsamp - self.dphi - 2) / (self.nsamp - 1) dQi *= unbias_k dri *= unbias_k # Optimal linear shrinkage estimate elif self.prec_estim == 'olse': # Normalise St into dQi np.divide(St, self.nsamp, out=dQi) # Estimate olse(dQi, self.nsamp, P=self.Q, out='in_place') np.dot(dQi, mt, out=dri) else: raise ValueError("Invalid value for option `prec_estim`") # Calculate the difference into the output arrays np.subtract(dQi, self.Q, out=dQi) np.subtract(dri, self.r, out=dri) except linalg.LinAlgError: # Precision estimate failed pos_def = False self.phase = 0 dQi.fill(0) dri.fill(0) if not self.smooth is None: # Reset tilted memory self.prev_stored = 0 if self.init_prev: # Reset initialisation method self.init = self.init_orig else: # Set return and phase flag pos_def = True self.phase = 2 self.iteration += 1 return pos_def
if not rand_distr_every_iter: if not use_pre_defined: # Generate random distr if indep_distr: S1 = random_cov(d) m1 = np.random.randn(d) + 0.8 * np.random.randn() S2 = random_cov(d) m2 = np.random.randn(d) + 0.8 * np.random.randn() else: S1, S2 = random_cov(d, diff=diff) m1 = np.random.randn(d) + 0.6 * np.random.randn() m2 = m1 + diff * np.random.randn(d) # Freezed distr N1 = multivariate_normal(mean=m1, cov=S1) # Convert S2,m2 to natural parameters Q2, r2 = invert_normal_params(S2, m2) # Calc half det of Q2 ldet_Q_tilde = np.sum(np.log(np.diag(linalg.cho_factor(Q2)[0]))) # Output arrays d2 = (d * (d + 1)) / 2 S_hats = np.empty((d, d, N), order='F') m_hats = np.empty((d, N), order='F') S_samps = np.empty((d, d, N), order='F') m_samps = np.empty((d, N), order='F') a_Ss = np.empty((d2, d2, N), order='F') a_ms = np.empty((d, d, N), order='F') tresh = np.empty(N, dtype=bool) if rand_distr_every_iter: m1s = np.empty((d, N), order='F') S1s = np.empty((d, d, N), order='F')