def stats(self, alpha=0.05, start=0, batches=100, chain=None, quantiles=(2.5, 25, 50, 75, 97.5)): """ Generate posterior statistics for node. :Parameters: name : string The name of the tallyable object. alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). quantiles : tuple or list The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5). """ try: trace = np.squeeze( np.array(self.db.trace(self.name)(chain=chain), float))[start:] n = len(trace) if not n: print_('Cannot generate statistics for zero-length trace in', self.__name__) return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): utils.hpd(trace, alpha), 'mc error': batchsd(trace, batches), 'quantiles': utils.quantiles(trace, qlist=quantiles) } except: print_('Could not generate output statistics for', self.name) return
def analyze(parameters, datasets): image_path = os.path.join('Data', parameters['sumatra_label']) # Save traces trace_file = str(os.path.join('Data', parameters['sumatra_label'], 'traces.h5')) data_dict = OrderedDict() os.makedirs(os.path.join(image_path, 'acf')) with tables.open_file(trace_file, mode='r') as data: parnames = [x for x in data.root.chain0.PyMCsamples.colnames if not x.startswith('Metropolis') and x != 'deviance'] for param in sorted(parnames): data_dict[param] = np.asarray(data.root.chain0.PyMCsamples.read(field=param), dtype='float') for param, trace in data_dict.items(): figure = plt.figure() figure.gca().plot(autocorr(trace)) figure.gca().set_title(param+' Autocorrelation') figure.savefig(str(os.path.join(image_path, 'acf', param+'.png'))) plt.close(figure) output_files.append(str(os.path.join(parameters['sumatra_label'], 'acf', param+'.png'))) data = np.vstack(list(data_dict.values())).T data_truths = [parameters.as_dict()['parameters'][key].get('compare', None) for key in data_dict.keys()] figure = corner(data, labels=list(data_dict.keys()), quantiles=[0.16, 0.5, 0.84], truths=data_truths, show_titles=True, title_args={"fontsize": 40}, rasterized=True) figure.savefig(str(os.path.join(image_path, 'cornerplot.png'))) output_files.append(str(os.path.join(parameters['sumatra_label'], 'cornerplot.png'))) plt.close(figure) # Write CSV file with parameter summary (should be close to pymc's format) with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile: fieldnames = ['Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD', 'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5'] writer = csv.DictWriter(csvfile, fieldnames) writer.writeheader() for parname, trace in data_dict.items(): qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5)) q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[75], qxx[97.5] lower_hpd, upper_hpd = utils.hpd(trace, 0.05) row = { 'Parameter': parname, 'Mean': trace.mean(0), 'SD': trace.std(0), 'Lower 95% HPD': lower_hpd, 'Upper 95% HPD': upper_hpd, 'MC error': batchsd(trace, min(len(trace), 100)), 'q2.5': q2d5, 'q25': q25, 'q50': q50, 'q75': q75, 'q97.5': q975 } writer.writerow(row) output_files.append(str(os.path.join(parameters['sumatra_label'], 'parameters.csv'))) # Generate comparison figures os.makedirs(os.path.join(image_path, 'results')) input_database = Database(parameters['input_database']) compare_databases = {key: Database(value) for key, value in parameters['compare_databases'].items()} idx = 1 for fig in plot_results(input_database, datasets, data_dict, databases=compare_databases): fig.savefig(str(os.path.join(image_path, 'results', 'Figure{}.png'.format(idx)))) output_files.append(str(os.path.join(parameters['sumatra_label'], 'results', 'Figure{}.png'.format(idx)))) plt.close(fig) idx += 1
def stats(self, alpha=0.05, start=0, batches=100, chain=None, quantiles=(2.5, 25, 50, 75, 97.5)): """ Generate posterior statistics for node. :Parameters: name : string The name of the tallyable object. alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). quantiles : tuple or list The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5). """ try: trace = np.squeeze( np.array( self.db.trace( self.name)( chain=chain), float))[ start:] n = len(trace) if not n: print_( 'Cannot generate statistics for zero-length trace in', self.__name__) return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): utils.hpd(trace, alpha), 'mc error': batchsd(trace, min(n, batches)), 'quantiles': utils.quantiles(trace, qlist=quantiles) } except: print_('Could not generate output statistics for', self.name) return
def precipProxy(scores, flag): """Compute... Notes: *scores* is assumed to be a pandas Series. """ scores_late = scores.ix[1901:1981] if flag == 'pdsi': pdsi = pandas.read_csv(base_path + 'csv/jjPdsi.csv', index_col=[0]) climVar = pdsi['p'].ix[1901:1981].values else: precip = pandas.read_csv(base_path + 'csv/mjPrecip.csv', index_col=[0]) ref_mean = np.mean(precip['precip'].ix[1961:1990].values) climVar = precip['precip'].ix[1901:1981].values climVar_anom = climVar - ref_mean*np.ones(np.shape(climVar)[0]) #center the climate variable climVar_cent = climVar - climVar.mean() years = range(1901,1981+1) # define priors beta = Normal('beta', mu=zeros(2), tau=.001, value=zeros(2)) sigma = Uniform('sigma', lower=0., upper=100., value=1.) # define predictions @deterministic def mu(beta=beta, chron=scores_late): return beta[0] + beta[1]*chron @deterministic def predicted(mu=mu, sigma=sigma): return rnormal(mu, sigma**-2.) # define likelihood @observed def y(value=climVar_cent, mu=mu, sigma=sigma): return normal_like(value, mu, sigma**-2.) # generate MCMC samples vars = [beta, sigma, mu, predicted, y] mc = MCMC(vars) mc.use_step_method(Metropolis, beta) mc.sample(iter=20000, thin=10, burn=10000, verbose=1) betas = beta.trace.gettrace() sigmas = sigma.trace.gettrace() chron = scores.values pred = zeros((betas.shape[0], chron.shape[0])) for i in range(betas.shape[0]): pred[i, :] = predicted._eval_fun(mu=mu._eval_fun(beta=betas[i], chron=chron), sigma=sigmas[i]) # plotting setup #t = range(1845, 1981+1) #t = range(1750, 1981+1) t = scores.index plot_vals = quantiles(pred, (5, 50, 95)) recon = pandas.DataFrame(plot_vals[50], index = t, columns=['recon']) reconMonthly = pandas.DataFrame({ x: plot_vals[50] for x in ['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] }, index=t) if flag == 'precip': recon.to_csv('csv/reconPrecip.csv') reconMonthly.to_csv('csv/reconPrecipMonthly.csv', cols=['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']) else: recon.to_csv('csv/reconPdsi.csv') reconMonthly.to_csv('csv/reconPdsiMonthly.csv', cols=['Jan', 'Feb', 'Mar', 'April', 'May', 'June', 'July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']) #print recon.ix[1750:1850] # why do we need to rescale variance? i find this weird pred_std = zeros((betas.shape[0], chron.shape[0])) for i in range(betas.shape[0]): pred_std[i, :] = pred[i, :]/plot_vals[50].std()*climVar_anom.std() + climVar.mean() plot_vals = pandas.DataFrame({'pred5': plot_vals[5], 'pred50': plot_vals[50], 'pred95': plot_vals[95]}, index = t)#, columns=['pred5', 'pred50', 'pred95']) plot_vals.to_csv('csv/plot_vals.csv') plot_vals_std = quantiles(pred_std, (5, 50, 95)) plot_vals_std = pandas.DataFrame({'pred5': plot_vals_std[5], 'pred50': plot_vals_std[50], 'pred95': plot_vals_std[95]}, index = t)#, columns=['pred5', 'pred50', 'pred95']) plot_vals_std.to_csv('csv/plot_vals_std.csv')
def analyze(parameters, datasets): image_path = os.path.join('Data', parameters['sumatra_label']) # Save traces trace_file = str( os.path.join('Data', parameters['sumatra_label'], 'traces.h5')) data_dict = OrderedDict() os.makedirs(os.path.join(image_path, 'acf')) with tables.open_file(trace_file, mode='r') as data: parnames = [ x for x in data.root.chain0.PyMCsamples.colnames if not x.startswith('Metropolis') and x != 'deviance' ] for param in sorted(parnames): data_dict[param] = np.asarray( data.root.chain0.PyMCsamples.read(field=param), dtype='float') for param, trace in data_dict.items(): figure = plt.figure() figure.gca().plot(autocorr(trace)) figure.gca().set_title(param + ' Autocorrelation') figure.savefig(str(os.path.join(image_path, 'acf', param + '.png'))) plt.close(figure) output_files.append( str( os.path.join(parameters['sumatra_label'], 'acf', param + '.png'))) data = np.vstack(list(data_dict.values())).T data_truths = [ parameters.as_dict()['parameters'][key].get('compare', None) for key in data_dict.keys() ] figure = corner(data, labels=list(data_dict.keys()), quantiles=[0.16, 0.5, 0.84], truths=data_truths, show_titles=True, title_args={"fontsize": 40}, rasterized=True) figure.savefig(str(os.path.join(image_path, 'cornerplot.png'))) output_files.append( str(os.path.join(parameters['sumatra_label'], 'cornerplot.png'))) plt.close(figure) # Write CSV file with parameter summary (should be close to pymc's format) with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile: fieldnames = [ 'Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD', 'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5' ] writer = csv.DictWriter(csvfile, fieldnames) writer.writeheader() for parname, trace in data_dict.items(): qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5)) q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[ 75], qxx[97.5] lower_hpd, upper_hpd = utils.hpd(trace, 0.05) row = { 'Parameter': parname, 'Mean': trace.mean(0), 'SD': trace.std(0), 'Lower 95% HPD': lower_hpd, 'Upper 95% HPD': upper_hpd, 'MC error': batchsd(trace, min(len(trace), 100)), 'q2.5': q2d5, 'q25': q25, 'q50': q50, 'q75': q75, 'q97.5': q975 } writer.writerow(row) output_files.append( str(os.path.join(parameters['sumatra_label'], 'parameters.csv'))) # Generate comparison figures os.makedirs(os.path.join(image_path, 'results')) input_database = Database(parameters['input_database']) compare_databases = { key: Database(value) for key, value in parameters['compare_databases'].items() } idx = 1 for fig in plot_results(input_database, datasets, data_dict, databases=compare_databases): fig.savefig( str(os.path.join(image_path, 'results', 'Figure{}.png'.format(idx)))) output_files.append( str( os.path.join(parameters['sumatra_label'], 'results', 'Figure{}.png'.format(idx)))) plt.close(fig) idx += 1