def plot_autocorr( sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=subplots_to_ignore, ): """ Plots autocorrelations """ ext = ['png' if save_as_png else 'pdf'][0] MDL = sol.MDL keys = [k for k in sol.var_dict.keys() if k not in ignore] for (i, k) in enumerate(keys): vect = old_div((MDL.trace(k)[:].size), (len(MDL.trace(k)[:]))) if vect > 1: keys[i] = [k + "%d" % n for n in range(1, vect + 1)] keys = list(flatten(keys)) ncols = 2 nrows = int(ceil(len(keys) * 1.0 / ncols)) fig, ax = plt.subplots(nrows, ncols, figsize=(10, nrows * 2)) plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) for (a, k) in zip(ax.flat, keys): if k[-1] not in ["%d" % d for d in range(1, 8)] or k == "R0": data = sorted(MDL.trace(k)[:].ravel()) else: data = sorted(MDL.trace(k[:-1])[:][:, int(k[-1]) - 1].ravel()) plt.sca(a) plt.gca().get_yaxis().get_major_formatter().set_useOffset(False) plt.gca().get_xaxis().get_major_formatter().set_useOffset(False) plt.yticks(fontsize=12) plt.xticks(fontsize=12) plt.ylabel(k, fontsize=12) to_thin = old_div(len(data), 50) if to_thin != 0: plt.xlabel("Lags / %d" % to_thin, fontsize=12) else: plt.xlabel("Lags", fontsize=12) max_lags = None if len(data) > 50: data = data[::to_thin] plt.acorr(data, usevlines=True, maxlags=max_lags, detrend=plt.mlab.detrend_mean) plt.grid(None) fig.tight_layout() for a in ax.flat[ax.size - 1:len(keys) - 1:-1]: a.set_visible(False) if save: fn = 'AC-%s-%s.%s' % (sol.model_type_str, sol.filename, ext) save_figure(fig, subfolder='Autocorrelations', fname=fn, dpi=dpi) plt.close(fig) if draw: return fig else: return None
def plot_autocorr(sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=default_ignore, ): """ Plots autocorrelations """ ext = ['png' if save_as_png else 'pdf'][0] MDL = sol.MDL keys = [k for k in sol.var_dict.keys() if k not in ignore] for (i, k) in enumerate(keys): vect = old_div((MDL.trace(k)[:].size),(len(MDL.trace(k)[:]))) if vect > 1: keys[i] = [k+"%d"%n for n in range(1,vect+1)] keys = list(flatten(keys)) ncols = 2 nrows = int(ceil(len(keys)*1.0 / ncols)) fig, ax = plt.subplots(nrows, ncols, figsize=(10,nrows*2)) plt.ticklabel_format(style='sci', axis='both', scilimits=(0,0)) for (a, k) in zip(ax.flat, keys): if k[-1] not in ["%d"%d for d in range(1,8)] or k =="R0": data = sorted(MDL.trace(k)[:].ravel()) else: data = sorted(MDL.trace(k[:-1])[:][:,int(k[-1])-1].ravel()) plt.sca(a) plt.gca().get_yaxis().get_major_formatter().set_useOffset(False) plt.gca().get_xaxis().get_major_formatter().set_useOffset(False) plt.yticks(fontsize=12) plt.xticks(fontsize=12) plt.ylabel(k, fontsize=12) to_thin = old_div(len(data),50) if to_thin != 0: plt.xlabel("Lags / %d"%to_thin, fontsize=12) else: plt.xlabel("Lags", fontsize=12) max_lags = None if len(data) > 50: data= data[::to_thin] plt.acorr(data, usevlines=True, maxlags=max_lags, detrend=plt.mlab.detrend_mean) plt.grid(None) fig.tight_layout() for a in ax.flat[ax.size - 1:len(keys) - 1:-1]: a.set_visible(False) if save: fn = 'AC-%s-%s.%s'%(sol.model_type_str,sol.filename,ext) save_figure(fig, subfolder='Autocorrelations', fname=fn, dpi=dpi) plt.close(fig) if draw: return fig else: return None
def start(self): #============================================================================== """Cole-Cole Bayesian Model""" #============================================================================== def ColeColeModel(cc_modes): # Initial guesses p0 = {'R0' : 1.0, 'm' : None, 'log_tau' : None, 'c' : None, } # Stochastic variables R0 = pymc.Uniform('R0', lower=0.7, upper=1.3 , value=p0["R0"]) m = pymc.Uniform('m', lower=0.0, upper=1.0, value=p0["m"], size=cc_modes) log_tau = pymc.Uniform('log_tau', lower=-7.0, upper=4.0, value=p0['log_tau'], size=cc_modes) c = pymc.Uniform('c', lower=0.0, upper=1.0, value=p0['c'], size=cc_modes) # Deterministic variables @pymc.deterministic() def zmod(cc_modes=cc_modes, R0=R0, m=m, lt=log_tau, c=c): return ColeCole_cyth1(w, R0, m, lt, c) @pymc.deterministic() def NRMSE_r(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[0] - data[0])**2))/abs(max(data[0])-min(data[0])) @pymc.deterministic() def NRMSE_i(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[1] - data[1])**2))/abs(max(data[1])-min(data[1])) # Likelihood obs = pymc.Normal('obs', mu=zmod, tau=old_div(1.0,(self.data["zn_err"]**2)), value=self.data["zn"], size=(2,len(w)), observed=True) return locals() #============================================================================== """Shin Bayesian Model""" #============================================================================== def ShinModel(): # Initial guesses p0 = {'R' : [0.5, 0.5], 'log_Q' : [0,-4], 'n' : [0.5, 0.5], 'log_tau': None, 'm' : None, } # Stochastics R = pymc.Uniform('R', lower=0.0, upper=1.0, value=p0["R"], size=2) log_Q = pymc.Uniform('log_Q', lower=-7, upper=2, value=p0["log_Q"], size=2) n = pymc.Uniform('n', lower=0.0, upper=1.0, value=p0["n"], size=2) # Deterministics @pymc.deterministic(plot=False) def zmod(R=R, log_Q=log_Q, n=n): return Shin_cyth(w, R, log_Q, n) @pymc.deterministic(plot=False) def log_tau(R=R, log_Q=log_Q, n=n): return np.log10((R*(10**log_Q))**(old_div(1.,n))) @pymc.deterministic(plot=False) def R0(R=R): return R[0]+R[1] @pymc.deterministic(plot=False) def m(R=R): return seigle_m*( old_div(max(R), (max(R) + min(R)))) @pymc.deterministic(plot=False) def NRMSE_r(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[0] - data[0])**2))/abs(max(data[0])-min(data[0])) @pymc.deterministic(plot=False) def NRMSE_i(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[1] - data[1])**2))/abs(max(data[1])-min(data[1])) #Likelihood obs = pymc.Normal('obs', mu=zmod, tau=old_div(1.0,(self.data["zn_err"]**2)), value=self.data["zn"], size = (2,len(w)), observed=True) return locals() #============================================================================== """Dias Bayesian Model""" #============================================================================== def DiasModel(): # Initial guesses p0 = {'R0' : 1.0, 'm' : seigle_m, 'log_tau': None, 'eta' : None, 'delta' : None, } # Stochastics R0 = pymc.Uniform('R0', lower=0.9, upper=1.1 , value=1) m = pymc.Uniform('m', lower=0.0, upper=1.0, value=p0['m']) log_tau = pymc.Uniform('log_tau', lower=-7.0, upper=0.0, value=p0['log_tau']) eta = pymc.Uniform('eta', lower=0.0, upper=50.0, value=p0['eta']) delta = pymc.Uniform('delta', lower=0.0, upper=1.0, value=p0['delta']) # Deterministics @pymc.deterministic(plot=False) def zmod(R0=R0, m=m, lt=log_tau, eta=eta, delta=delta): return Dias_cyth(w, R0, m, lt, eta, delta) # Likelihood obs = pymc.Normal('obs', mu=zmod, tau=old_div(1.0,(self.data["zn_err"]**2)), value=self.data["zn"], size = (2,len(w)), observed=True) @pymc.deterministic(plot=False) def NRMSE_r(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[0] - data[0])**2))/abs(max(data[0])-min(data[0])) @pymc.deterministic(plot=False) def NRMSE_i(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[1] - data[1])**2))/abs(max(data[1])-min(data[1])) return locals() def regularize(obj): # Stochastic variables # norm = pymc.Uniform('norm', lower=1, upper=1.1) log_f_lambda = pymc.Uniform('log_f_lambda', lower=1, upper=6) @pymc.deterministic(plot=False) def zmod(obj=obj, f_lambda=10**log_f_lambda): obj.config['norm'] = 10 obj.config['fixed_lambda'] = f_lambda obj.fit_data() l_it = obj.results[-1].iterations[-1] z = l_it.Model.F(l_it.m)[::-1].T z[1,:] *= -1 return z/obj.config['norm'] # return -10**l_it.m[1:] # Likelihood function obs = pymc.Normal('obs', mu=zmod, tau=1./(2*self.data["zn_err"]**2), value=self.data["zn"], size = (2, len(w)), observed=True) # obs = pymc.Normal('obs', mu=zmod, tau=1./(2*np.mean(self.data["pha_err"])**2), value=self.interp_pha, observed=True) return locals() def stoCCD(c_exp, ccd_priors): # Stochastic variables (noise on CCDtools output) # The only assumption we make is that the RTD noise is # assumed to be equal to 0 and below 20% with 1 standard deviation noise_tau = pymc.Normal('log_noise_tau', mu=0, tau=1/(0.2**2)) noise_m = pymc.Normal('log_noise_m', mu=0, tau=1/(0.2**2)) noise_rho = pymc.Normal('log_noise_rho', mu=0, tau=1/(0.2**2)) # Deterministic variables of CCD @pymc.deterministic(plot=False) def log_m_i(logm=ccd_priors['log_m'], dm=noise_m): # log chargeability array return logm + dm @pymc.deterministic(plot=False) def log_tau_i(logt=ccd_priors['log_tau'], dt=noise_tau): # log tau array return logt + dt @pymc.deterministic(plot=False) def R0(R=ccd_priors['R0'], dR=noise_rho): # DC resistivity (normalized) return R + dR @pymc.deterministic(plot=False) def cond(log_tau = log_tau_i): # Condition on log_tau to compute integrating parameters log_tau_min = np.log10(1./w.max()) log_tau_max = np.log10(1./w.min()) return (log_tau >= log_tau_min)&(log_tau <= log_tau_max) @pymc.deterministic(plot=False) def log_total_m(m=10**log_m_i[cond]): # Total chargeability return np.log10(np.nansum(m)) @pymc.deterministic(plot=False) def log_half_tau(m_i=10**log_m_i[cond], log_tau=log_tau_i[cond]): # Tau 50 return log_tau[np.where(np.cumsum(m_i)/np.nansum(m_i) > 0.5)[0][0]] @pymc.deterministic(plot=False) def log_U_tau(m_i=10**log_m_i[cond], log_tau=log_tau_i[cond]): tau_60 = log_tau[np.where(np.cumsum(m_i)/np.nansum(m_i) > 0.6)[0][0]] tau_10 = log_tau[np.where(np.cumsum(m_i)/np.nansum(m_i) > 0.1)[0][0]] return np.log10(10**tau_60 / 10**tau_10) @pymc.deterministic(plot=False) def log_peak_tau(m_i=log_m_i, log_tau=log_tau_i): # Tau peaks # peak_cond = np.r_[True, m_i[1:] > m_i[:-1]] & np.r_[m_i[:-1] > m_i[1:], True] peak_cond = argrelextrema(m_i, np.greater) return np.squeeze(log_tau[peak_cond]) @pymc.deterministic(plot=False) def log_peak_m(log_m=log_m_i): peak_cond = argrelextrema(log_m, np.greater) # Peak chargeability return np.squeeze(log_m[peak_cond]) @pymc.deterministic(plot=False) def log_mean_tau(m_i=10**log_m_i[cond], log_tau=log_tau_i[cond]): # Tau logarithmic average return np.log10(np.exp(np.nansum(m_i*np.log(10**log_tau)) / np.nansum(m_i))) @pymc.deterministic(plot=False) def zmod(R0=R0, m=10**log_m_i, tau=10**log_tau_i): Z = R0 * (1 - np.sum(m*(1 - 1.0/(1 + ((1j*w[:,np.newaxis]*tau)**c_exp))), axis=1)) return np.array([Z.real, Z.imag]) @pymc.deterministic(plot=False) def NRMSE_r(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[0] - data[0])**2))/abs(max(data[0])-min(data[0])) @pymc.deterministic(plot=False) def NRMSE_i(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[1] - data[1])**2))/abs(max(data[1])-min(data[1])) # Likelihood function obs = pymc.Normal('obs', mu=zmod, tau=1./(2*self.data["zn_err"]**2), value=self.data["zn"], size = (2, len(w)), observed=True) return locals() #============================================================================== """Debye, Warburg, Cole-Cole decomposition Bayesian Model""" #============================================================================== def PolyDecompModel(decomp_poly, c_exp, ccd_priors): # Initial guesses p0 = {'R0' : 1.0, 'a' : None, # 'a' : ([0.01, -0.01, -0.01, 0.001, 0.001]+[0.0]*(decomp_poly-4))[:(decomp_poly+1)], 'a_mu' : np.array([0.00590622364129, -0.00259869937567, -0.00080727429007, 0.00051369743841, 0.000176048226508]), 'a_sd' : np.array([0.00448686724083, 0.00354717249566, 0.00153254695967, 0.00109002742145, 0.000189386869372]), 'log_tau_hi' : -5.0, 'm_hi' : 0.5, 'TotalM' : None, 'log_MeanTau': None, 'U' : None, } # Stochastics R0 = pymc.Uniform('R0', lower=0.7, upper=1.3, value=p0['R0']) # R0 = pymc.Normal('R0', mu=0.989222579813, tau=1./(0.0630422467962**2)) # R0 = pymc.Normal('R0', mu=ccd_priors['R0'], tau=1./(1e-10**2)) # m_hi = pymc.Uniform('m_hi', lower=0.0, upper=1.0, value=p0['m_hi']) # log_tau_hi = pymc.Uniform('log_tau_hi', lower=-8.0, upper=-3.0, value=p0['log_tau_hi']) # a = pymc.Uniform('a', lower=0.9*np.array([-0.0018978657,-0.01669747315,-0.00507228575,-0.0058924686,-0.0008685198]), upper=1.1*np.array([0.0222362157,0.00528944015,0.00767281475,0.0052059286,0.0009839638]), size=decomp_poly+1) # a = pymc.MvNormal('a', mu=p0['a_mu']*np.ones(decomp_poly+1), tau=(1./(2*p0['a_sd'])**2)*np.eye(decomp_poly+1)) # a = pymc.MvNormal('a', mu=ccd_priors['a'], tau=(1./(1e-10)**2)*np.eye(decomp_poly+1)) a = pymc.Normal('a', mu=0, tau=1./(0.01**2), value=p0["a"], size=decomp_poly+1) # noise = pymc.Uniform('noise', lower=0., upper=1.) if self.guess_noise: noise_r = pymc.Uniform('noise_real', lower=0., upper=1.) noise_i = pymc.Uniform('noise_imag', lower=0., upper=1.) # noises = pymc.Lambda('noises', lambda noise=noise: np.reshape(noise, (2,1))) # Deterministics # @pymc.deterministic(plot=False) # def m_hi(mp_hi=mp_hi): # return 10**mp_hi / (1 + 10**mp_hi) # @pymc.deterministic(plot=False) # def cond(log_tau=log_tau): # # Condition on log_tau to compute integrating parameters # log_tau_min = np.log10(1./w.max()) # log_tau_max = np.log10(1./w.min()) ## log_tau_min = np.log10(self.ccdt_last_it.Data.obj.tau_data_min) ## log_tau_max = np.log10(self.ccdt_last_it.Data.obj.tau_data_max) # return (log_tau >= log_tau_min)&(log_tau <= log_tau_max) @pymc.deterministic(plot=False) def zmod(R0=R0, a=a): return Decomp_cyth(w, tau_10, log_taus, c_exp, R0, a) @pymc.deterministic(plot=False) def m_i(a=a): return np.sum((a*log_taus.T).T, axis=0) # return np.poly1d(a)(ccd_priors['log_tau']) @pymc.deterministic(plot=False) def total_m(m=m_i[cond]): return np.nansum(m) # return np.sum(m_i[(log_tau >= self.log_min_tau)&(m_i >= 0)&(log_tau <= 0)]) @pymc.deterministic(plot=False) def log_half_tau(m=m_i[cond], log_tau=log_tau[cond]): # Tau 50 return log_tau[np.where(np.cumsum(m)/np.nansum(m) > 0.5)[0][0]] @pymc.deterministic(plot=False) def log_mean_tau(m=m_i[cond], log_tau=log_tau[cond]): return np.log10(np.exp(old_div(np.sum(m*np.log(10**log_tau)),np.sum(m)))) @pymc.deterministic(plot=False) def log_U_tau(m=m_i[cond], log_tau=log_tau[cond]): tau_60 = log_tau[np.where(np.cumsum(m)/np.nansum(m) > 0.6)[0][0]] tau_10 = log_tau[np.where(np.cumsum(m)/np.nansum(m) > 0.1)[0][0]] return np.log10(10**tau_60 / 10**tau_10) # @pymc.deterministic(plot=False) # def log_peak_tau(m=m_i[cond], log_tau=log_tau[cond]): # # Tau peaks # return log_tau[argrelextrema(m, np.greater)] # @pymc.deterministic(plot=False) # def peak_m(m=m_i[cond], log_tau=log_tau[cond]): # # Peak chargeability # return m[argrelextrema(m, np.greater)] @pymc.deterministic(plot=False) def NRMSE_r(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[0] - data[0])**2))/abs(max(data[0])-min(data[0])) @pymc.deterministic(plot=False) def NRMSE_i(zmod=zmod, data=self.data["zn"]): return np.sqrt(np.mean((zmod[1] - data[1])**2))/abs(max(data[1])-min(data[1])) if self.guess_noise: obs_r = pymc.Normal('obs_r', mu=zmod[0], tau=1./((noise_r)**2), value=self.data["zn"][0], size = len(w), observed=True) obs_i = pymc.Normal('obs_i', mu=zmod[1], tau=1./((noise_i)**2), value=self.data["zn"][1], size = len(w), observed=True) else: obs = pymc.Normal('obs', mu=zmod, tau=1./(self.data["zn_err"]**2), value=self.data["zn"], size = (2, len(w)), observed=True) return locals() #============================================================================== """ Main section """ #============================================================================== # Importing data self.data = get_data(self.filepath, self.headers, self.ph_units) data_ccd = np.hstack((self.data['amp'][::-1], 1000*self.data['pha'][::-1])) frequencies_ccd = self.data['freq'][::-1] # generate a ccd object self.obj = ccd_single.ccd_single(cfg_single.cfg_single()) self.obj.config['frequency_file'] = frequencies_ccd self.obj.config['data_file'] = data_ccd if (self.data["pha_err"] == 0).all(): self.guess_noise = True seigle_m = (old_div((self.data["amp"][-1] - self.data["amp"][0]), self.data["amp"][-1]) ) # Estimating Seigel chargeability w = 2*np.pi*self.data["freq"] # Frequencies measured in rad/s # n_freq = len(w) # n_decades = np.ceil(max(np.log10(old_div(1.0,w)))) - np.floor(min(np.log10(old_div(1.0,w)))) # Relaxation times associated with the measured frequencies (Debye decomposition only) # log_tau = self.ccd_priors['log_tau'] if self.model == "PDecomp": log_tau = np.linspace(np.floor(min(np.log10(old_div(1.0,w)))-1), np.floor(max(np.log10(old_div(1.0,w)))+1), 50) cond = (log_tau >= min(log_tau)+1)&(log_tau <= max(log_tau)-1) log_taus = np.array([log_tau**i for i in list(reversed(range(0,self.decomp_poly+1)))]) # Polynomial approximation for the RTD tau_10 = 10**log_tau # Accelerates sampling self.data["tau"] = tau_10 # Put relaxation times in data dictionary # Time and date (for saving traces) sample_name = self.filepath.replace("\\", "/").split("/")[-1].split(".")[0] # actual_path = str(path.dirname(path.realpath(argv[0]))) working_path = getcwd().replace("\\", "/")+"/" now = datetime.now() save_time = now.strftime('%Y%m%d_%H%M%S') save_date = now.strftime('%Y%m%d') out_path = '%s/Txt traces/%s/%s/%s-%s-%s/'%(working_path, save_date, sample_name, self.model, sample_name, save_time) """ #========================================================================== Call to run_MCMC function #========================================================================== """ # "ColeCole", "Dias", "Debye" or "Shin" sim_dict = {"ColeCole": {"func": ColeColeModel, "args": [self.cc_modes] }, "Dias": {"func": DiasModel, "args": [] }, "PDecomp": {"func": PolyDecompModel, "args": [self.decomp_poly, self.c_exp, self.ccd_priors]}, "Shin": {"func": ShinModel, "args": [] }, # "Custom": {"func": YourModel, "args": [opt_args] }, "CCD": {"func": stoCCD, "args": [self.c_exp, self.ccd_priors]}, "lam": {"func": regularize, "args": [self.obj]}, } simulation = sim_dict[self.model] # Pick entries for the selected model self.MDL = run_MCMC(simulation["func"](*simulation["args"]), self.mcmc, save_traces=self.keep_traces, save_where=out_path) # Run MCMC simulation with selected model and arguments # if not keep_tracfes: rmtree(out_path) # Deletes the traces if not wanted """ #========================================================================== Results #========================================================================== """ self.pm = format_results(self.MDL, self.data["Z_max"]) # Format output zmodstats = self.MDL.stats(chain=-1)["zmod"] # Take last chain zn_avg = zmodstats["mean"] zn_l95 = zmodstats["95% HPD interval"][0] zn_u95 = zmodstats["95% HPD interval"][1] avg = self.data["Z_max"]*(zn_avg[0] + 1j*zn_avg[1]) # (In complex notation, de-normalized) l95 = self.data["Z_max"]*(zn_l95[0] + 1j*zn_l95[1]) # (In complex notation, de-normalized) u95 = self.data["Z_max"]*(zn_u95[0] + 1j*zn_u95[1]) # (In complex notation, de-normalized) self.fit = {"best": avg, "lo95": l95, "up95": u95} # Best fit dict with 95% HDP self.model_type = {"log_min_tau":self.log_min_tau, "c_exp":self.c_exp, "decomp_polyn":self.decomp_poly, "cc_modes":self.cc_modes} self.model_type_str = get_model_type(self) self.var_dict = dict([(x.__name__,x) for x in self.MDL.deterministics] + [(x.__name__,x) for x in self.MDL.stochastics]) # Get names of parameters for save file pm_names = [x for x in sorted(self.var_dict.keys())] # Get all stochastic and deterministic variables trl = [self.var_dict[x] for x in pm_names] # Concatenate all traces in 1 matrix trace_mat = np.hstack([t.trace().reshape(-1, var_depth(t)) for t in trl]) # Get numbers for each subheader num_names = [var_depth(v) for v in trl] # Make list of headers headers = flatten([['%s%d'%(pm_names[p],x+1) for x in range(num_names[p])] if num_names[p] > 1 else [pm_names[p]] for p in range(len(pm_names))]) self.trace_dict = {k: t for k,t in zip(headers, trace_mat.T)}
def plot_summary( sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=subplots_to_ignore, fig_nb="", ): """ Plots a parameter summary and Gelman-Rubin R-hat for multiple chains """ ext = ['png' if save_as_png else 'pdf'][0] ch_nb = sol.mcmc["nb_chain"] keys = sorted([k for k in sol.var_dict.keys() if k not in ignore]) trac = [[sol.var_dict[x].trace(chain=n).mean(axis=0) for x in keys] for n in range(ch_nb)] deps = [var_depth(sol.var_dict[x]) for x in keys] lbls = list( reversed( flatten([[k + '%s' % (x + 1) for x in range(d)] if d > 1 else k for k, d in zip(keys, deps)]))) if ch_nb >= 2: rhat = [ gelman_rubin([ sol.MDL.trace(var, -x)[:] for x in range(sol.mcmc['nb_chain']) ]) for var in keys ] R = np.array(flatten(rhat)) R[R > 5] = 5 else: print( "\nTwo or more chains of equal length required for Gelman-Rubin convergence" ) R = len(lbls) * [None] fig, axes = plt.subplots(figsize=(6, 4)) gs2 = gridspec.GridSpec(3, 3) ax1 = plt.subplot(gs2[:, :-1]) ax2 = plt.subplot(gs2[:, -1], sharey=ax1) for i in range(len(lbls)): for c in range(ch_nb): val_m = np.array(flatten(trac[c])) ax1.scatter(val_m[i], len(val_m) - (i + 1), color="C0", marker=".", s=50, facecolor='k', edgecolors='k', alpha=1) ax2.scatter(R[i], i, color="C3", marker="<", s=50, alpha=1) ax1.set_ylim([-1, len(lbls)]) ax1.set_yticks(list(range(0, len(lbls)))) ax1.set_yticklabels([parlbl_dic[l] for l in lbls]) ax1.set_axisbelow(True) ax1.yaxis.grid(True) ax1.xaxis.grid(False) ax1.set_xlim(ax1.get_xlim()) ax1.set_xlabel(r'Parameter value') plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_xlim([0.5, 5.5]) ax2.set_xticklabels(["", "1", "2", "3", "4", "5+"]) ax2.set_xticks([ 0.5, 1, 2, 3, 4, 5, ]) ax2.set_axisbelow(True) ax2.yaxis.grid(True) ax2.xaxis.grid(False) ax2.set_xlabel(r'$\hat{R}$') ax2.axvline(1, ls='--', color='C0', zorder=0) plt.tight_layout() plt.close(fig) if save: fn = '%sSUM-%s-%s.%s' % (fig_nb, sol.model_type_str, sol.filename, ext) save_figure(fig, subfolder='Summaries', fname=fn, dpi=dpi) if draw: return fig else: return None
def plot_traces( sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=subplots_to_ignore, ): """ Plots the traces of stochastic and deterministic parameters in mcmcinv object (sol) Ignores the ones in list argument ignore """ ext = ['png' if save_as_png else 'pdf'][0] MDL = sol.MDL sampler = MDL.get_state()["sampler"] keys = [k for k in sol.var_dict.keys() if k not in ignore] for (i, k) in enumerate(keys): vect = old_div((MDL.trace(k)[:].size), (len(MDL.trace(k)[:]))) if vect > 1: keys[i] = [k + "%d" % n for n in range(1, vect + 1)] keys = list(flatten(keys)) ncols = 2 nrows = int(ceil(len(keys) * 1.0 / ncols)) fig, ax = plt.subplots(nrows, ncols, figsize=(8, nrows * 1.5), sharex=True) for c, (a, k) in enumerate(zip(ax.flat, keys)): if k == "R0": stoc = "R0" else: stoc = ''.join([i for i in k if not i.isdigit()]) stoc_num = [int(i) for i in k if i.isdigit()] try: data = MDL.trace(stoc)[:][:, stoc_num[0] - 1] except: data = MDL.trace(stoc)[:] x = np.arange(sampler["_burn"] + 1, sampler["_iter"] + 1, sampler["_thin"]) plt.sca(a) plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) plt.ylabel(parlbl_dic[k]) plt.plot(x, data, '-', alpha=0.8) plt.plot(x, np.mean(data) * np.ones(len(x)), color='k', linestyle='--', linewidth=2) # plt.plot(x, np.median(data)*np.ones(len(x)), color='k',linestyle=':', linewidth=2) if sampler["_burn"] == 0: plt.xscale('log') else: plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.grid(False) plt.tight_layout(pad=0, w_pad=0.5, h_pad=0) for a in ax[-1]: a.set_xlabel("Iteration number") if save: fn = 'TRA-%s-%s.%s' % (sol.model_type_str, sol.filename, ext) save_figure(fig, subfolder='Traces', fname=fn, dpi=dpi) plt.close(fig) if draw: return fig else: return None
def plot_histo( sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=subplots_to_ignore, ): """ Plots the traces of stochastic and deterministic parameters in mcmcinv object (sol) Ignores the ones in list argument ignore """ ext = ['png' if save_as_png else 'pdf'][0] MDL = sol.MDL keys = [k for k in sol.var_dict.keys() if k not in ignore] for (i, k) in enumerate(keys): vect = old_div((MDL.trace(k)[:].size), (len(MDL.trace(k)[:]))) if vect > 1: keys[i] = [k + "%d" % n for n in range(1, vect + 1)] keys = list(flatten(keys)) ncols = 2 nrows = int(ceil(len(keys) * 1.0 / ncols)) fig, ax = plt.subplots(nrows, ncols, figsize=(8, nrows * 1.8)) for c, (a, k) in enumerate(zip(ax.flat, keys)): if k == "R0": stoc = "R0" else: stoc = ''.join([i for i in k if not i.isdigit()]) stoc_num = [int(i) for i in k if i.isdigit()] try: data = sorted(MDL.trace(stoc)[:][:, stoc_num[0] - 1]) except: data = sorted(MDL.trace(stoc)[:]) plt.sca(a) plt.xlabel(parlbl_dic[k]) try: hist = plt.hist(data, bins=20, histtype='stepfilled', density=False, linewidth=1.0, color='0.95', alpha=1) plt.hist(data, bins=20, histtype='step', density=False, linewidth=1.0, alpha=1) fit = norm.pdf(data, np.mean(data), np.std(data)) xh = [ 0.5 * (hist[1][r] + hist[1][r + 1]) for r in range(len(hist[1]) - 1) ] binwidth = old_div((max(xh) - min(xh)), len(hist[1])) fit *= len(data) * binwidth plt.plot(data, fit, "-", color='k', linewidth=1) except: print( "File %s: failed to plot %s histogram. Parameter not mobile enough (see traces)." % (sol.filename, k)) plt.grid(False) plt.ticklabel_format(style='sci', axis='both', scilimits=(0, 0)) for c in range(nrows): ax[c][0].set_ylabel("Frequency") plt.tight_layout(pad=1, w_pad=1, h_pad=0) for a in ax.flat[ax.size - 1:len(keys) - 1:-1]: a.set_visible(False) if save: fn = 'HST-%s-%s.%s' % (sol.model_type_str, sol.filename, ext) save_figure(fig, subfolder='Histograms', fname=fn, dpi=dpi) plt.close(fig) if draw: return fig else: return None
def save_resul(sol): # Fonction pour enregistrer les résultats MDL, pm = sol.MDL, sol.pm model = sol.model_type_str sample_name = sol.filename save_where = '/Results/' working_path = getcwd().replace("\\", "/") + "/" save_path = working_path + save_where + "%s/" % sample_name print("\nSaving csv file in:\n", save_path) if not path.exists(save_path): makedirs(save_path) if sol.model == 'PDecomp': tag = 0 else: tag = 1 A = [] B = [] headers = [] keys = sorted(pm.keys()) if sol.model in ["CCD", "PDecomp"]: keys += [keys.pop(keys.index("peak_tau"))] # Move to end keys += [keys.pop(keys.index("peak_m"))] # Move to end keys = [k for k in keys if "_std" not in k] for c, key in enumerate(keys): A.append(list(np.array(pm[key]).ravel())) B.append(list(np.array(pm[key + "_std"]).ravel())) length = len(np.atleast_1d(pm[key])) if length > 1: for i in range(len(A[c])): headers.append(model + "_" + key + "_%d" % (i + tag)) headers.append(model + "_" + key + ("_%d" % (i + tag)) + "_std") else: if (key == "peak_tau") | (key == "peak_m"): headers.append(model + "_" + key + "_1") headers.append(model + "_" + key + "_1" + "_std") else: headers.append(model + "_" + key) headers.append(model + "_" + key + "_std") A = flatten(A) B = flatten(B) results = [None] * (len(A) + len(B)) results[::2] = A results[1::2] = B headers = ','.join(headers) results = np.array(results) if sol.model == 'PDecomp': tau_ = sol.data["tau"] add = ["%s_tau" % model + "%d" % (i) for i in range(len(tau_))] add = ','.join(add) + ',' headers = add + headers results = np.concatenate((tau_, results)) headers = "Z_max,Input_c_exponent," + headers results = np.concatenate( (np.array([sol.data["Z_max"]]), np.array([sol.c_exp]), results)) np.savetxt(save_path + 'INV_%s-%s_%s.csv' % (sol.model, model, sample_name), results[None], header=headers, comments='', delimiter=',') vars_ = ["%s" % x for x in MDL.stochastics ] + ["%s" % x for x in MDL.deterministics] if "zmod" in vars_: vars_.remove("zmod") MDL.write_csv(save_path + 'STATS_%s-%s_%s.csv' % (sol.model, model, sample_name), variables=(vars_))
def save_resul(sol): # To do: rewrite with new mcmcinv object attributes # Fonction pour enregistrer les résultats MDL, pm = sol.MDL, sol.pm model = sol.model_type_str sample_name = sol.filename save_where = '/Results/' working_path = getcwd().replace("\\", "/")+"/" save_path = working_path+save_where+"%s/"%sample_name print("\nSaving csv file in:\n", save_path) if not path.exists(save_path): makedirs(save_path) if sol.model == 'PDecomp': tag = 0 else: tag = 1 A = [] B = [] headers = [] keys = sorted(pm.keys()) if sol.model in ["CCD", "PDecomp"]: keys += [keys.pop(keys.index("peak_tau"))] # Move to end keys += [keys.pop(keys.index("peak_m"))] # Move to end keys = [k for k in keys if "_std" not in k] for c, key in enumerate(keys): A.append(list(np.array(pm[key]).ravel())) B.append(list(np.array(pm[key+"_std"]).ravel())) length = len(np.atleast_1d(pm[key])) if length > 1: for i in range(len(A[c])): headers.append(model+"_"+key+"_%d" %(i+tag)) headers.append(model+"_"+key+("_%d"%(i+tag))+"_std") else: if (key == "peak_tau")|(key == "peak_m"): headers.append(model+"_"+key+"_1") headers.append(model+"_"+key+"_1"+"_std") else: headers.append(model+"_"+key) headers.append(model+"_"+key+"_std") A=flatten(A) B=flatten(B) results = [None]*(len(A)+len(B)) results[::2] = A results[1::2] = B headers = ','.join(headers) results = np.array(results) if sol.model == 'PDecomp': tau_ = sol.data["tau"] add = ["%s_tau"%model+"%d"%(i) for i in range(len(tau_))] add = ','.join(add) + ',' headers = add+headers results = np.concatenate((tau_,results)) headers = "Z_max,Input_c_exponent," + headers results = np.concatenate((np.array([sol.data["Z_max"]]),np.array([sol.c_exp]),results)) np.savetxt(save_path+'INV_%s-%s_%s.csv' %(sol.model,model,sample_name), results[None], header=headers, comments='', delimiter=',') vars_ = ["%s"%x for x in MDL.stochastics]+["%s"%x for x in MDL.deterministics] if "zmod" in vars_: vars_.remove("zmod") MDL.write_csv(save_path+'STATS_%s-%s_%s.csv' %(sol.model,model,sample_name), variables=(vars_))
def plot_summary(sol, save=False, draw=True, save_as_png=False, dpi=None, ignore=default_ignore, fig_nb="", ): """ Plots a parameter summary and Gelman-Rubin R-hat for multiple chains """ ext = ['png' if save_as_png else 'pdf'][0] ch_nb = sol.mcmc["nb_chain"] keys = sorted([k for k in sol.var_dict.keys() if k not in ignore]) trac = [[sol.var_dict[x].trace(chain=n).mean(axis=0) for x in keys] for n in range(ch_nb)] deps = [var_depth(sol.var_dict[x]) for x in keys] lbls = list(reversed(flatten([[k+'%s'%(x+1) for x in range(d)] if d > 1 else k for k, d in zip(keys,deps)]))) if ch_nb >= 2: rhat = [gelman_rubin([sol.MDL.trace(var, -x)[:] for x in range(sol.mcmc['nb_chain'])]) for var in keys] R = np.array(flatten(rhat)) R[R > 5] = 5 else: print("\nTwo or more chains of equal length required for Gelman-Rubin convergence") R = len(lbls)*[None] fig, axes = plt.subplots(figsize=(6,4)) gs2 = gridspec.GridSpec(3, 3) ax1 = plt.subplot(gs2[:, :-1]) ax2 = plt.subplot(gs2[:, -1], sharey = ax1) for i in range(len(lbls)): for c in range(ch_nb): val_m = np.array(flatten(trac[c])) ax1.scatter(val_m[i], len(val_m)-(i+1) , color="C0", marker=".", s=50, facecolor='k', edgecolors='k',alpha=1) ax2.scatter(R[i], i, color="C3", marker="<", s=50, alpha=1) ax1.set_ylim([-1, len(lbls)]) ax1.set_yticks(list(range(0,len(lbls)))) ax1.set_yticklabels([parlbl_dic[l] for l in lbls]) ax1.set_axisbelow(True) ax1.yaxis.grid(True) ax1.xaxis.grid(False) ax1.set_xlim(ax1.get_xlim()) ax1.set_xlabel(r'Parameter value') plt.setp(ax2.get_yticklabels(), visible=False) ax2.set_xlim([0.5, 5.5]) ax2.set_xticklabels(["","1","2","3","4","5+"]) ax2.set_xticks([0.5, 1, 2, 3, 4, 5, ]) ax2.set_axisbelow(True) ax2.yaxis.grid(True) ax2.xaxis.grid(False) ax2.set_xlabel(r'$\hat{R}$') ax2.axvline(1, ls='--', color='C0', zorder=0) plt.tight_layout() plt.close(fig) if save: fn = '%sSUM-%s-%s.%s'%(fig_nb,sol.model_type_str,sol.filename,ext) save_figure(fig, subfolder='Summaries', fname=fn, dpi=dpi) if draw: return fig else: return None