def plot_result(): fig = plt.figure( figsize=(8,4) ) ax = fig.add_subplot(1,2,1, projection = ccrs.Orthographic(0.,30.)) #ax = fig.add_subplot(1,2,1, projection = ccrs.Mollweide(0.)) ax.gridlines() ax.set_global() colors = itertools.cycle([cmap_red, cmap_green]) direction_samples = path.euler_directions() for directions in direction_samples: mcplates.plot.plot_distribution( ax, directions[:,0], directions[:,1], resolution=60, cmap=next(colors)) for hidden_euler_pole in hidden_euler_poles: euler_lon = hidden_euler_pole[0] euler_lat = hidden_euler_pole[1] ax.plot(euler_lon,euler_lat, 'k*', transform=ccrs.Geodetic(), markersize=10) n_paths=100 interval = max(1, int(len(path.mcmc.db.trace('rate_0')[:]) / n_paths)) pathlons, pathlats = path.compute_synthetic_paths(n=n_paths) changepoints = path.changepoints()[0][::interval] for pathlon,pathlat,change in zip(pathlons,pathlats,changepoints): switch = int(float(len(pathlon))*change/(max(ages)-min(ages))) ax.plot(pathlon[:switch],pathlat[:switch], transform=ccrs.PlateCarree(), color='darkred', alpha=0.05 ) ax.plot(pathlon[switch:],pathlat[switch:], transform=ccrs.PlateCarree(), color='darkgreen', alpha=0.05 ) for p in pole_list: p.plot(ax) ax.set_title('(a)') ax = fig.add_subplot(1,2,2) rate_samples = path.euler_rates() c = 'darkred' ax.hist(rate_samples[0], bins=15, normed=True, edgecolor='none', color=c, alpha=0.5) # plot median, credible interval credible_interval = hpd(rate_samples[0], 0.05) median = np.median(rate_samples) print("Rotation 0: median %f, credible interval "%(median), credible_interval) ax.axvline( median, lw=2, color=c ) ax.axvline( credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline( credible_interval[1], lw=2, color=c, linestyle='dashed') ax.axvline( hidden_euler_rates[0], lw=2, color='black', linestyle='dotted') c = 'darkgreen' ax.hist(rate_samples[1], bins=15, normed=True, edgecolor='none', color=c, alpha=0.5) # plot median, credible interval credible_interval = hpd(rate_samples[1], 0.05) median = np.median(rate_samples) print("Rotation 1: median %f, credible interval "%(median), credible_interval) ax.axvline( median, lw=2, color=c ) ax.axvline( credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline( credible_interval[1], lw=2, color=c, linestyle='dashed') ax.axvline( hidden_euler_rates[1], lw=2, color='black', linestyle='dotted') ax.set_title('(b)') ax.set_xlabel(r'Rotation rate $\,^\circ / \mathrm{Myr}$') ax.set_ylabel(r'Posterior probability density') plt.tight_layout() plt.savefig("two_euler_poles.pdf")
def stats(self, alpha=0.05, start=0, batches=100, chain=None, quantiles=(2.5, 25, 50, 75, 97.5)): """ Generate posterior statistics for node. :Parameters: name : string The name of the tallyable object. alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). quantiles : tuple or list The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5). """ try: trace = np.squeeze( np.array(self.db.trace(self.name)(chain=chain), float))[start:] n = len(trace) if not n: print_('Cannot generate statistics for zero-length trace in', self.__name__) return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): utils.hpd(trace, alpha), 'mc error': batchsd(trace, batches), 'quantiles': utils.quantiles(trace, qlist=quantiles) } except: print_('Could not generate output statistics for', self.name) return
def plot_HDI(sampleVec, y_value, axis=None, vert=False, marker='o', **kwargs): """Plot the HDI as a error bar graph. Only plots one error bar at the moment. Think THis is only useful for the motor_attainment_posterior file Args: sampleVec: A vector of mcmc samples credMass: The mass within the HDI """ if axis == None: axis = plt.gca() theta_means = np.mean(sampleVec) #theta_hpd = HDI_of_MCMC(sampleVec, 0.95) theta_hpd = hpd(sampleVec, 0.05) theta_hpd = np.array( (theta_means - theta_hpd[0], theta_hpd[1] - theta_means)) # pdb.set_trace() if vert == False: axis.errorbar(theta_means, y_value, xerr=theta_hpd.reshape(2, 1), marker=marker, color='k', **kwargs) ##reshape error to be a 2XN array else: axis.errorbar(y_value, theta_means, yerr=theta_hpd.reshape(2, 1), marker=marker, color='k', **kwargs)
def plot_result(): fig = plt.figure(figsize=(8, 4)) ax = fig.add_subplot(1, 2, 1, projection=ccrs.Orthographic(0., 15.)) ax.gridlines() ax.set_global() colors = itertools.cycle([cmap_red, cmap_green]) direction_samples = path.euler_directions() for directions in direction_samples: mcplates.plot.plot_distribution(ax, directions[:, 0], directions[:, 1], resolution=60, cmap=next(colors)) euler_lon = hidden_euler_pole[0] euler_lat = hidden_euler_pole[1] ax.plot(euler_lon, euler_lat, 'k*', transform=ccrs.Geodetic(), markersize=10) pathlons, pathlats = path.compute_synthetic_paths(n=200) for pathlon, pathlat in zip(pathlons, pathlats): ax.plot(pathlon, pathlat, transform=ccrs.PlateCarree(), color='darkred', alpha=0.05) for p in pole_list: p.plot(ax) ax.set_title('(a)') ax = fig.add_subplot(1, 2, 2) c = 'darkred' rate_samples = path.euler_rates() ax.hist(rate_samples, bins=15, normed=True, edgecolor='none', color=c, alpha=0.5) # plot median, credible interval credible_interval = hpd(rate_samples[0], 0.05) median = np.median(rate_samples) print("Median %f, credible interval " % (median), credible_interval) ax.axvline(median, lw=2, color=c) ax.axvline(credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline(credible_interval[1], lw=2, color=c, linestyle='dashed') ax.axvline(hidden_euler_rate, lw=2, color='black', linestyle='dotted') ax.set_title('(b)') ax.set_xlabel(r'Rotation rate $\,^\circ / \mathrm{Myr}$') ax.set_ylabel(r'Posterior probability density') plt.tight_layout() plt.savefig("one_euler_pole.pdf")
def analyze(parameters, datasets): image_path = os.path.join('Data', parameters['sumatra_label']) # Save traces trace_file = str(os.path.join('Data', parameters['sumatra_label'], 'traces.h5')) data_dict = OrderedDict() os.makedirs(os.path.join(image_path, 'acf')) with tables.open_file(trace_file, mode='r') as data: parnames = [x for x in data.root.chain0.PyMCsamples.colnames if not x.startswith('Metropolis') and x != 'deviance'] for param in sorted(parnames): data_dict[param] = np.asarray(data.root.chain0.PyMCsamples.read(field=param), dtype='float') for param, trace in data_dict.items(): figure = plt.figure() figure.gca().plot(autocorr(trace)) figure.gca().set_title(param+' Autocorrelation') figure.savefig(str(os.path.join(image_path, 'acf', param+'.png'))) plt.close(figure) output_files.append(str(os.path.join(parameters['sumatra_label'], 'acf', param+'.png'))) data = np.vstack(list(data_dict.values())).T data_truths = [parameters.as_dict()['parameters'][key].get('compare', None) for key in data_dict.keys()] figure = corner(data, labels=list(data_dict.keys()), quantiles=[0.16, 0.5, 0.84], truths=data_truths, show_titles=True, title_args={"fontsize": 40}, rasterized=True) figure.savefig(str(os.path.join(image_path, 'cornerplot.png'))) output_files.append(str(os.path.join(parameters['sumatra_label'], 'cornerplot.png'))) plt.close(figure) # Write CSV file with parameter summary (should be close to pymc's format) with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile: fieldnames = ['Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD', 'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5'] writer = csv.DictWriter(csvfile, fieldnames) writer.writeheader() for parname, trace in data_dict.items(): qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5)) q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[75], qxx[97.5] lower_hpd, upper_hpd = utils.hpd(trace, 0.05) row = { 'Parameter': parname, 'Mean': trace.mean(0), 'SD': trace.std(0), 'Lower 95% HPD': lower_hpd, 'Upper 95% HPD': upper_hpd, 'MC error': batchsd(trace, min(len(trace), 100)), 'q2.5': q2d5, 'q25': q25, 'q50': q50, 'q75': q75, 'q97.5': q975 } writer.writerow(row) output_files.append(str(os.path.join(parameters['sumatra_label'], 'parameters.csv'))) # Generate comparison figures os.makedirs(os.path.join(image_path, 'results')) input_database = Database(parameters['input_database']) compare_databases = {key: Database(value) for key, value in parameters['compare_databases'].items()} idx = 1 for fig in plot_results(input_database, datasets, data_dict, databases=compare_databases): fig.savefig(str(os.path.join(image_path, 'results', 'Figure{}.png'.format(idx)))) output_files.append(str(os.path.join(parameters['sumatra_label'], 'results', 'Figure{}.png'.format(idx)))) plt.close(fig) idx += 1
def plot_plate_speeds( ax = None, title = ''): if ax is None: fig = plt.figure() myax = fig.add_subplot(111) else: myax = ax euler_directions = path.euler_directions() euler_rates = path.euler_rates() # Get a list of intervals for the rotations if n_euler_rotations > 1: changepoints = [ np.median(c) for c in path.changepoints() ] else: changepoints = [] age_list = [p.age for p in poles] changepoints.insert( 0, max(age_list) ) changepoints.append( min(age_list) ) myax.set_xlabel('Plate speed (cm/yr)') myax.set_ylabel('Probability density') xmin = 1000. xmax = 0. colorcycle = itertools.cycle( dist_colors_short ) for i, (directions, rates) in enumerate(zip(euler_directions, euler_rates)): #comptute plate speeds speed_samples = np.empty_like(rates) for j in range(len(rates)): euler = mcplates.EulerPole( directions[j, 0], directions[j, 1], rates[j]) speed_samples[j] = euler.speed_at_point(uluru) c = next(colorcycle) #plot histogram myax.hist(speed_samples, bins=30, normed=True, alpha=0.5, color=c, label='%i - %i Ma'%(changepoints[i], changepoints[i+1])) # plot median, credible interval credible_interval = hpd(speed_samples, 0.05) median = np.median(speed_samples) print("Rotation %i: median %f, credible interval "%(i, median), credible_interval) myax.axvline( median, lw=2, color=c ) myax.axvline( credible_interval[0], lw=2, color=c, linestyle='dashed') myax.axvline( credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min( xmin, median - 2.*(median-credible_interval[0]))) xmax = max( xmax, median + 2.*(credible_interval[1]-median)) if n_euler_rotations > 1: myax.legend(loc='upper right') myax.set_xlim(xmin, xmax) if title != '': myax.set_title(title) if ax is None: plt.savefig(prefix + "_speeds.pdf")
def mean_confidence_interval(phi_trace, alpha): n = len(phi_trace) m = np.mean(phi_trace) #m, se = np.mean(phi_trace), np.std(phi_trace,ddof=1)/(np.sqrt(n)) #h = se * sp.stats.t._ppf((1+(1-alpha))/2., n-1) phi_hpd = hpd(phi_trace, alpha) return round(m, 4), round(phi_hpd[0], 4), round(phi_hpd[1], 4) #round(m-h,4), round(m+h,4)
def stats(self, alpha=0.05, start=0, batches=100, chain=None, quantiles=(2.5, 25, 50, 75, 97.5)): """ Generate posterior statistics for node. :Parameters: name : string The name of the tallyable object. alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). quantiles : tuple or list The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5). """ try: trace = np.squeeze( np.array( self.db.trace( self.name)( chain=chain), float))[ start:] n = len(trace) if not n: print_( 'Cannot generate statistics for zero-length trace in', self.__name__) return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): utils.hpd(trace, alpha), 'mc error': batchsd(trace, min(n, batches)), 'quantiles': utils.quantiles(trace, qlist=quantiles) } except: print_('Could not generate output statistics for', self.name) return
def updateFitsHeader(model, hdr, clobber=False, conf=0.9): """ Update the delivered fits-header with the parameters of the model. Parameters: - `hdr` - Pyfits header which should be updated with the model parameters. - `conf' - Confidence level for MCMC errors. - `clobber` - Allows to overwrite parameters which might be already present in the header. """ if not ic.check["pyfits"]: raise (PE.PyARequiredImport("pyfits required to use fits file.", where="Params::updateFitsHeader")) return try: # @FIXME The next line can NEVER work # x=hdr[p] raise (PE.pyaOtherErrors( " Keyword PA_model already present in fits-header, aborting...", where="Params::updateFitsHeader")) return except: pass hdr.update('PA_model', model.naming.getRoot(), 'PyAstronomy model type') for p in model.parameters(): if clobber == False: try: x = hdr[p] raise (PE.pyaOtherErrors(" Parameter " + str(p) + " already present in fits-header", where="Params::updateFitsHeader")) return except: pass hdr.update(p, model[p]) if ic.check["pymc"]: from pymc.utils import hpd hdr.update('Conf', conf, "Error Confidence Level") for p in model.parameters(): try: v_err = hpd(model.MCMC.trace(p)[:], 1.0 - conf) p0, p1 = str(p + '_e0'), str(p + '_e1') if len(p0) > 8: raise (PE.pyaOtherErrors( " Cannot save Error for parameter " + str(p) + " because len(" + p0 + ")>8", where="Params::updateFitsHeader")) return hdr.update(p0, v_err[0], "Lower confidence boundary") hdr.update(p1, v_err[1], "Upper confidence boundary") except: pass return hdr
def histogram(data, name, nbins=None, datarange=(None, None), format='png', suffix='', path='./', rows=1, columns=1, num=1, last=True, fontmap = {1:10, 2:8, 3:6, 4:5, 5:4}, verbose=1): # Internal histogram specification for handling nested arrays try: # Stand-alone plot or subplot? standalone = rows==1 and columns==1 and num==1 if standalone: if verbose>0: print 'Generating histogram of', name figure() subplot(rows, columns, num) #Specify number of bins (10 as default) uniquevals = len(unique(data)) nbins = nbins or uniquevals*(uniquevals<=25) or int(4 + 1.5*log(len(data))) # Generate histogram hist(data.tolist(), nbins, histtype='stepfilled') xlim(datarange) # Plot options title('\n\n %s hist'%name, x=0., y=1., ha='left', va='top', fontsize='medium') ylabel("Frequency", fontsize='x-small') # Plot vertical lines for median and 95% HPD interval quant = calc_quantiles(data) axvline(x=quant[50], linewidth=2, color='black') for q in hpd(data, 0.05): axvline(x=q, linewidth=2, color='grey', linestyle='dotted') # Smaller tick labels tlabels = gca().get_xticklabels() setp(tlabels, 'fontsize', fontmap[rows]) tlabels = gca().get_yticklabels() setp(tlabels, 'fontsize', fontmap[rows]) if standalone: if not os.path.exists(path): os.mkdir(path) if not path.endswith('/'): path += '/' # Save to file savefig("%s%s%s.%s" % (path, name, suffix, format)) #close() except OverflowError: print '... cannot generate histogram'
def plot_changepoints(ax=None, title=''): if ax is None: fig = plt.figure() myax = fig.add_subplot(111) else: myax = ax changepoints = path.changepoints() myax.set_xlabel('Changepoint (Ma)') myax.set_ylabel('Probability density') xmin = 1.e10 xmax = 0.0 colorcycle = itertools.cycle(dist_colors_short) for i, change in enumerate(changepoints): c = next(colorcycle) #plot histogram myax.hist(change, bins=30, normed=True, alpha=0.5, color=c, label='Changepoint %i' % (i)) # plot median, credible interval credible_interval = hpd(change, 0.05) median = np.median(change) print("Changepoint %i: median %f, credible interval " % (i, median), credible_interval) myax.axvline(median, lw=2, color=c) myax.axvline(credible_interval[0], lw=2, color=c, linestyle='dashed') myax.axvline(credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min(xmin, median - 2. * (median - credible_interval[0]))) xmax = max(xmax, median + 2. * (credible_interval[1] - median)) if n_euler_rotations > 2: myax.legend(loc='upper right') myax.set_xlim(xmin, xmax) if title != '': myax.set_title(title) if ax is None: plt.savefig("keweenawan_changepoints_" + str(n_euler_rotations) + ".pdf")
def updateFitsHeader(model, hdr, clobber=False, conf=0.9): """ Update the delivered fits-header with the parameters of the model. Parameters: - `hdr` - Pyfits header which should be updated with the model parameters. - `conf' - Confidence level for MCMC errors. - `clobber` - Allows to overwrite parameters which might be already present in the header. """ if not ic.check["pyfits"]: raise(PE.PyARequiredImport("pyfits required to use fits file.", where="Params::updateFitsHeader")) return try: # @FIXME The next line can NEVER work # x=hdr[p] raise(PE.pyaOtherErrors(" Keyword PA_model already present in fits-header, aborting...", where="Params::updateFitsHeader")) return except: pass hdr.update('PA_model',model.naming.getRoot(),'PyAstronomy model type') for p in model.parameters(): if clobber==False: try: x=hdr[p] raise(PE.pyaOtherErrors(" Parameter "+str(p)+" already present in fits-header", where="Params::updateFitsHeader")) return except: pass hdr.update(p, model[p]) if ic.check["pymc"]: from pymc.utils import hpd hdr.update('Conf',conf,"Error Confidence Level") for p in model.parameters(): try: v_err=hpd(model.MCMC.trace(p)[:], 1.0-conf) p0,p1=str(p+'_e0'),str(p+'_e1') if len(p0) > 8: raise(PE.pyaOtherErrors(" Cannot save Error for parameter "+str(p)+" because len(" + p0 + ")>8", where="Params::updateFitsHeader")) return hdr.update(p0,v_err[0],"Lower confidence boundary") hdr.update(p1,v_err[1],"Upper confidence boundary") except: pass return hdr
def plot_changepoints(path, ax, title=''): changepoints = path.changepoints() ax.set_xlabel('Changepoint (Ma)') ax.set_ylabel('Probability density') xmin = 1.e10 xmax = 0.0 colorcycle = itertools.cycle(dist_colors_short) for i, change in enumerate(changepoints): c = next(colorcycle) #plot histogram ax.hist(change, bins=30, normed=True, alpha=0.5, color=c, label='Changepoint %i' % (i)) # plot median, credible interval credible_interval = hpd(change, 0.05) median = np.median(change) print("Changepoint %i: median %f, credible interval " % (i, median), credible_interval) ax.axvline(median, lw=2, color=c) ax.axvline(credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline(credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min(xmin, median - 2. * (median - credible_interval[0]))) xmax = max(xmax, median + 2. * (credible_interval[1] - median)) if path.n_euler_rotations > 2: ax.legend(loc='upper right') ax.set_xlim(xmin, xmax) ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%i')) if title != '': ax.set_title(title)
def plot_changepoints( ax=None, title=''): if ax is None: fig = plt.figure() myax = fig.add_subplot(111) else: myax = ax changepoints = path.changepoints() myax.set_xlabel('Changepoint (Ma)') myax.set_ylabel('Probability density') xmin= 1.e10 xmax=0.0 colorcycle = itertools.cycle( dist_colors_short ) for i, change in enumerate(changepoints): c = next(colorcycle) #plot histogram myax.hist(change, bins=30, normed=True, alpha=0.5, color=c, label='Changepoint %i'%(i)) # plot median, credible interval credible_interval = hpd(change, 0.05) median = np.median(change) print("Changepoint %i: median %f, credible interval "%(i, median), credible_interval) myax.axvline( median, lw=2, color=c ) myax.axvline( credible_interval[0], lw=2, color=c, linestyle='dashed') myax.axvline( credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min( xmin, median - 2.*(median-credible_interval[0]))) xmax = max( xmax, median + 2.*(credible_interval[1]-median)) if n_euler_rotations > 2: myax.legend(loc='upper right') myax.set_xlim(xmin, xmax) if title != '': myax.set_title(title) if ax is None: plt.savefig(prefix+'_changepoints.pdf')
def trace_stats(trace, alpha=0.05, batches=100, quantiles=(2.5, 25, 50, 75, 97.5)): """ Generate posterior statistics for the trace ala pymc (this was adapted from pymc.database.base) :Parameters: trace : ndarray alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). quantiles : tuple or list The desired quantiles to be calculated. Defaults to (2.5, 25, 50, 75, 97.5). """ trace = np.squeeze(trace) return { 'n': len(trace), 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100 * (1 - alpha)), '%'): hpd(trace, alpha), 'mc error': batchsd(trace, batches), 'quantiles': pymc.utils.quantiles(trace, qlist=quantiles), 'acorr': standardized_autocorrelation(trace) # [1:] }
def plot_result(): fig = plt.figure( figsize=(8,4) ) ax = fig.add_subplot(1,2,1, projection = ccrs.Orthographic(-30.,15.)) ax.gridlines() ax.set_global() colors = itertools.cycle([cmap_red, cmap_green]) direction_samples = path.euler_directions() for directions in direction_samples: mcplates.plot.plot_distribution( ax, directions[:,0], directions[:,1], resolution=60, cmap=next(colors)) pathlons, pathlats = path.compute_synthetic_paths(n=200) for pathlon,pathlat in zip(pathlons,pathlats): ax.plot(pathlon,pathlat, transform=ccrs.PlateCarree(), color='darkred', alpha=0.05 ) euler_lon = hidden_euler_pole[0] euler_lat = hidden_euler_pole[1] ax.plot(euler_lon,euler_lat, 'k*', transform=ccrs.Geodetic(), markersize=10) for p in pole_list: p.plot(ax) ax.set_title('(a)') ax = fig.add_subplot(1,2,2) c='darkred' rate_samples = path.euler_rates() ax.hist(rate_samples, bins=15, normed=True, edgecolor='none', color=c, alpha=0.5) # plot median, credible interval credible_interval = hpd(rate_samples[0], 0.05) median = np.median(rate_samples) print("Median %f, credible interval "%(median), credible_interval) ax.axvline( median, lw=2, color=c ) ax.axvline( credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline( credible_interval[1], lw=2, color=c, linestyle='dashed') ax.axvline( hidden_euler_rate, lw=2, color='black', linestyle='dotted') ax.set_title('(b)') ax.set_xlabel(r'Rotation rate $\,^\circ / \mathrm{Myr}$') ax.set_ylabel(r'Posterior probability density') plt.tight_layout() plt.savefig("one_euler_pole_scenario_b.pdf")
def plot_plate_speeds(path, poles, ax, title=''): # Load a series of points for Laurentia laurentia_data = np.loadtxt('Laurentia_lon_lat.csv', skiprows=1, delimiter=',') laurentia_lon = laurentia_data[:, 2] - lon_shift laurentia_lat = laurentia_data[:, 1] direction_samples = [] rate_samples = [] if path.n_euler_rotations > 0: direction_samples = path.euler_directions() rate_samples = path.euler_rates() if path.include_tpw: direction_samples.insert(0, path.tpw_poles()) rate_samples.insert(0, path.tpw_rates()) # Get a list of intervals for the rotations if path.n_euler_rotations > 1: changepoints = [np.median(c) for c in path.changepoints()] else: changepoints = [] age_list = [p.age for p in poles] changepoints.insert(0, max(age_list)) changepoints.append(min(age_list)) ax.set_xlabel('Plate speed (cm/yr)') ax.set_ylabel('Probability density') xmin = 1000. xmax = 0. colorcycle = itertools.cycle(dist_colors_short) if path.include_tpw == False: next(colorcycle) for i, (directions, rates) in enumerate(zip(direction_samples, rate_samples)): #comptute plate speeds speed_samples = np.empty_like(rates) for j in range(len(rates)): euler = mcplates.EulerPole(directions[j, 0], directions[j, 1], rates[j]) speed_sample = 0. for slon, slat in zip(laurentia_lon, laurentia_lat): loc = mcplates.PlateCentroid(slon, slat) speed = euler.speed_at_point(loc) speed_sample += speed * speed / len(laurentia_lon) speed_samples[j] = np.sqrt(speed_sample) c = next(colorcycle) #plot histogram if path.include_tpw and i == 0: hist_label = 'TPW' elif path.include_tpw and i != 0: hist_label = '%i - %i Ma' % (changepoints[i - 1], changepoints[i]) else: hist_label = '%i - %i Ma' % (changepoints[i], changepoints[i + 1]) ax.hist(speed_samples, bins=30, normed=True, alpha=0.5, color=c, label=hist_label) # plot median, credible interval credible_interval = hpd(speed_samples, 0.05) median = np.median(speed_samples) print("Rotation %i: median %f, credible interval " % (i, median), credible_interval) ax.axvline(median, lw=2, color=c) ax.axvline(credible_interval[0], lw=2, color=c, linestyle='dashed') ax.axvline(credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min(xmin, median - 2. * (median - credible_interval[0]))) xmax = max(xmax, median + 2. * (credible_interval[1] - median)) if len(rate_samples) > 1: ax.legend(loc='upper right') ax.set_xlim(0, 40) ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%i')) tick_interval = 5 ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_interval)) if title != '': ax.set_title(title)
def plot_plate_speeds(ax=None, title=''): if ax is None: fig = plt.figure() myax = fig.add_subplot(111) else: myax = ax euler_directions = path.euler_directions() euler_rates = path.euler_rates() # Get a list of intervals for the rotations if n_euler_rotations > 1: changepoints = [np.median(c) for c in path.changepoints()] else: changepoints = [] age_list = [p.age for p in poles] changepoints.insert(0, max(age_list)) changepoints.append(min(age_list)) myax.set_xlabel('Plate speed (cm/yr)') myax.set_ylabel('Probability density') xmin = 1000. xmax = 0. colorcycle = itertools.cycle(dist_colors_short) for i, (directions, rates) in enumerate(zip(euler_directions, euler_rates)): #comptute plate speeds speed_samples = np.empty_like(rates) for j in range(len(rates)): euler = mcplates.EulerPole(directions[j, 0], directions[j, 1], rates[j]) speed_samples[j] = euler.speed_at_point(uluru) c = next(colorcycle) #plot histogram myax.hist(speed_samples, bins=30, normed=True, alpha=0.5, color=c, label='%i - %i Ma' % (changepoints[i], changepoints[i + 1])) # plot median, credible interval credible_interval = hpd(speed_samples, 0.05) median = np.median(speed_samples) print("Rotation %i: median %f, credible interval " % (i, median), credible_interval) myax.axvline(median, lw=2, color=c) myax.axvline(credible_interval[0], lw=2, color=c, linestyle='dashed') myax.axvline(credible_interval[1], lw=2, color=c, linestyle='dashed') xmin = max(0., min(xmin, median - 2. * (median - credible_interval[0]))) xmax = max(xmax, median + 2. * (credible_interval[1] - median)) if n_euler_rotations > 1: myax.legend(loc='upper right') myax.set_xlim(xmin, xmax) if title != '': myax.set_title(title) if ax is None: plt.savefig(prefix + "_speeds.pdf")
def my_summary(stoch, i, li, ui, factor=0.001): row = [] row += [mean(trace[stoch][:, i]) * factor] row += list(hpd(trace[stoch][[li, ui], i], 0.05) * factor) return row
def iqr(X): from pymc.utils import hpd lb, ub = hpd(X, .4) return '(%.0f, %.0f)' % (round(lb-.5), round(ub+.5))
def bodelike_plot(pbproject=HS_PROJECT, model_id='gpa3', varname='phase', control_genotype='VT37804_TNTin', blocked_genotype='VT37804_TNTE', num_chains=4, takelast=10000, alpha=0.05, plot_control=True, plot_silenced=True, img_format='png', show=False): def varnames(result, varname): hyperfly, hyperfly_postfix, hyperfly_variables, flies, flies_variables = flies_and_variables(result) hvar = varname + hyperfly_postfix if varname in set(hyperfly_variables) else None fvars = [varname + '_' + fly for fly in flies] if varname in set(flies_variables) else None return hvar, fvars def mix_chains(chains): # assert len(chains) >= num_chains mixed = np.array([np.nan] * (num_chains * takelast)) for i, chain in enumerate(chains): mixed[i * takelast: (i+1) * takelast] = chain[-takelast:] return mixed # Available results results = all_computed_results(pbproject.mcmc_dir) results = results[results.model_id == model_id] ctraces = {} straces = {} results.genotype = results.genotype.apply(lambda gen: gen.partition('__')[0]) # Collect and mix traces for all frequencies for (model_id, freq), data in results.groupby(('model_id', 'freq')): print '\t\t\tCollecting traces for frequency %g' % freq control = MCMCRunManager(data[data.genotype == control_genotype].iloc[0]['path']) # ad-hoc silenced = MCMCRunManager(data[data.genotype == blocked_genotype].iloc[0]['path']) # ad-hoc chvar, _ = varnames(control, varname) # control hierarchical var, fly vars shvar, _ = varnames(silenced, varname) # silenced hierarchical var, fly vars ctraces[freq] = mix_chains(control.traces(chvar)) straces[freq] = mix_chains(silenced.traces(shvar)) # The frequencies we are interested in... freqs = (0.5, 1, 2, 4, 8, 16, 32, 40) # Copute HPDs. Compute the rope too, see Kruschke. chpds = [hpd(ctraces[freq], alpha) for freq in freqs] shpds = [hpd(straces[freq], alpha) for freq in freqs] # Plot the traces if plot_control: plt.plot(np.hstack([ctraces[freq] for freq in freqs]), color='b', label=control_genotype.replace('_', 'x')) if plot_silenced: plt.plot(np.hstack([straces[freq] for freq in freqs]), color='r', label=blocked_genotype.replace('_', 'x')) # Plot the HPD regions + setup ticks xticklocations = [] xticklabels = [] for i, freq in enumerate(freqs): xmin = num_chains * takelast * i xmax = num_chains * takelast * (i + 1) plt.axvline(x=xmax, color='k') plt.plot((xmin, xmax), [chpds[i][0]] * 2, color='c', linewidth=4) plt.plot((xmin, xmax), [chpds[i][1]] * 2, color='c', linewidth=4) plt.plot((xmin, xmax), [shpds[i][0]] * 2, color='m', linewidth=4) plt.plot((xmin, xmax), [shpds[i][1]] * 2, color='m', linewidth=4) # Gelman-Rubin R^2 (might interest: Geweke, autocorr, put graphically in the plot) cgr = gelman_rubin(ctraces[freq].reshape(num_chains, -1)) print '\t%s %s control freq %.1f; GR=%.2f' % (model_id, varname, freq, cgr) sgr = gelman_rubin(straces[freq].reshape(num_chains, -1)) print '\t%s %s blocked freq %.1f; GR=%.2f' % (model_id, varname, freq, sgr) # xticks xticklocations.append(xmin + (xmax - xmin) / 2.) xticklabels.append('%g\nbgr=%.2f\ncgr=%.2f' % (freq, sgr, cgr)) plt.title('Model: %s; Variable: %s' % (model_id, varname)) plt.xlabel('$\omega$') plt.ylabel('%s' % varname) plt.tick_params(axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected top='off', # ticks along the top edge are off bottom='on', # ticks along the bottom edge are on labelbottom='on') # labels along the bottom edge are off plt.xticks(xticklocations, xticklabels) plt.legend() plt.tight_layout() # Save dest_dir = op.join(pbproject.plots_dir, 'bbode', model, '%s-vs-%s' % (control_genotype, blocked_genotype)) ensure_dir(dest_dir) plt.savefig(op.join(dest_dir, '%s-vs-%s-%s-%s.%s' % (control_genotype, blocked_genotype, model_id, varname, img_format))) # Show if show: plt.show()
def histogram(data, name, nbins=None, datarange=(None, None), format='png', suffix='', path='./', rows=1, columns=1, num=1, last=True, fontmap={ 1: 10, 2: 8, 3: 6, 4: 5, 5: 4 }, verbose=1): # Internal histogram specification for handling nested arrays try: # Stand-alone plot or subplot? standalone = rows == 1 and columns == 1 and num == 1 if standalone: if verbose > 0: print 'Generating histogram of', name figure() subplot(rows, columns, num) #Specify number of bins (10 as default) uniquevals = len(unique(data)) nbins = nbins or uniquevals * (uniquevals <= 25) or int(4 + 1.5 * log(len(data))) # Generate histogram hist(data.tolist(), nbins, histtype='stepfilled') xlim(datarange) # Plot options title('\n\n %s hist' % name, x=0., y=1., ha='left', va='top', fontsize='medium') ylabel("Frequency", fontsize='x-small') # Plot vertical lines for median and 95% HPD interval quant = calc_quantiles(data) axvline(x=quant[50], linewidth=2, color='black') for q in hpd(data, 0.05): axvline(x=q, linewidth=2, color='grey', linestyle='dotted') # Smaller tick labels tlabels = gca().get_xticklabels() setp(tlabels, 'fontsize', fontmap[rows]) tlabels = gca().get_yticklabels() setp(tlabels, 'fontsize', fontmap[rows]) if standalone: if not os.path.exists(path): os.mkdir(path) if not path.endswith('/'): path += '/' # Save to file savefig("%s%s%s.%s" % (path, name, suffix, format)) #close() except OverflowError: print '... cannot generate histogram'
fig, ax = plt.subplots(3, 7, figsize=(8.6, 3.8), sharex='col') samples = pd.read_csv("..//MCMC_samples//maths_samples.csv", index_col=0) #Get the data beta_names = [x for x in samples.columns if 'beta' in x] beta = samples[beta_names].values beta = beta[:, 1:] #Drop the intercept sigma = samples['sigma'] for i, var in enumerate(x_names): beta_val = beta[:, i] sns.kdeplot(beta_val, ax=ax[0, i], color=beta_colors[i], shade=True) hdi = hpd(beta_val, 0.1) #Translate from axes coordiantes to data coorindates to get the y position of the errorbar line axis_to_data = ax[0, i].transAxes + ax[0, i].transData.inverted() y_pos = axis_to_data.transform((0, 0.08))[1] ax[0, i].errorbar(hdi.mean(), y_pos, xerr=hdi.mean() - hdi[0], color='k', elinewidth=2) ax[0, i].plot(beta_val.mean(), y_pos, 'ok') if i > 0: ax[0, i].axvline(0, linestyle='--', color="0") ax[0, i].get_yaxis().set_ticks([])
def error_plot(x=None, y=None, hue=None, order=None, hue_order=None, ax=None, estimator=np.mean, hpd_alpha=0.05, data=None, stride=0.8, **kwargs): """Draw a point plot Pass a pandas data frame and plot using x, y and (optionally) hue. hpd_alpha: 1-hpd_alpha is the percentage HDI to plot with error bars If a hue is passed a hue order can be given""" if (x == None): raise TypeError("Missing x label") if (y == None): raise TypeError("Missing y label") if ax == None: ax = plt.gca() if hue == None: #Order data by x if order: data[x] = pd.Categorical(data[x], order) data.sort_values(x, inplace=True) summary_vals = data.groupby([x]) y_err = np.empty((2, len(summary_vals))) i = 0 for name, val in summary_vals: # print(val[y]) y_err[:, i] = hpd(val[y], hpd_alpha) i += 1 # pdb.set_trace() ax.errorbar(x=range(len(summary_vals)), y=y_err.mean(0), yerr=y_err[0] - y_err.mean(0), **kwargs) else: offsets = hue_offsets(len(data[hue].unique()), width=stride) if order: data[x] = pd.Categorical(data[x], order) data = data.sort_values(x) if hue_order: data[hue] = pd.Categorical(data[hue], hue_order) data = data.sort_values([x, hue]) # pdb.set_trace() summary_vals = data.groupby([x, hue]) # if isinstance(hpd_alpha, float): y_err = np.empty((2, len(summary_vals))) # i = 0 for name, val in summary_vals: y_err[:, i] = hpd(val[y], hpd_alpha) i += 1 y_err = y_err.reshape( (2, len(data[x].unique()), len(data[hue].unique()))) elif callable(hpd_alpha): y_err = np.empty((len(summary_vals))) y_trend = np.empty((len(summary_vals))) i = 0 for name, val in summary_vals: y_err[i] = hpd_alpha(val[y]) y_trend[i] = np.mean(val[y]) i += 1 y_err = y_err.reshape( (len(data[x].unique()), len(data[hue].unique()))) y_trend = y_trend.reshape( (len(data[x].unique()), len(data[hue].unique()))) n_x = range(len(data[x].unique())) i = 0 for inner in data[hue].unique(): if callable(hpd_alpha): # pdb.set_trace() y_err_ = np.zeros((2, y_err.shape[0])) y_err_[1] = y_err[:, i] ax.errorbar(n_x + offsets[i], y_trend[:, i], yerr=y_err_, **kwargs) else: ax.errorbar(n_x + offsets[i], y_err.mean(0)[:, i], yerr=(y_err[0] - y_err.mean(0))[:, i], **kwargs) i += 1
def errorplot(x, y, hue=None, ax=None, estimator=np.mean, alpha=0.05, color='k', label_rotation='horizontal', marker='o', ls='--', **kwargs): """ x: X values or X labels y: nXm array where the first axis is the samples and the second is the xfactor ax: matplotlib axis to draw to estimator: A numpy function (must have the axis argument) kwargs: Any other plt.errorbar args """ if ax == None: ax = plt.gca() label = None if y.ndim == 2: x_val = np.array(range(len(x))) central_tendancy = estimator(y, axis=0) hpd_vals = hpd(y, alpha) error = np.empty(hpd_vals.shape) error[0] = central_tendancy - hpd_vals[0] error[1] = hpd_vals[1] - central_tendancy ax.errorbar(x_val, central_tendancy, yerr=error, ecolor=color, fmt="None", **kwargs) ax.plot(x_val, central_tendancy, color=color, marker=marker, ls=ls) elif y.ndim == 3: x_val = np.array(xrange(len(x))) central_tendancy = estimator(y, axis=0) n_hue_levels = y.shape[-1] hue_width = 0.5 / n_hue_levels offsets = np.linspace(0, 0.5 - hue_width, n_hue_levels) offsets -= offsets.mean() hpd_vals = hpd(y, alpha) for i in range(central_tendancy.shape[1]): error = np.empty(hpd_vals.shape[:2]) error[0] = central_tendancy[:, i] - hpd_vals[0, :, i] error[1] = hpd_vals[1, :, i] - central_tendancy[:, i] label = hue[i] if isinstance(color, list): col = color[i] elif isinstance(color, str): col = color ax.errorbar(x_val + offsets[i], central_tendancy[:, i], yerr=error, ecolor=col, fmt="None", **kwargs) ax.plot(x_val + offsets[i], central_tendancy[:, i], color=col, marker=marker, ls=ls, label=label) else: raise AttributeError("Input has too many dimensions") ax.set_xticks(x_val) ax.set_xticklabels(x, rotation=label_rotation) if y.ndim == 2: ax.set_xlim([-1, x_val.max() + 1]) elif y.ndim == 3: ax.set_xlim( [x_val[0] - offsets[i] - 0.5, x_val[-1] + offsets[-1] + 0.5])
def my_summary(stoch, i, li, ui, factor=.001): row = [] row += [mean(trace[stoch][:, i]) * factor] row += list(hpd(trace[stoch][[li, ui], i], .05) * factor) return row
def analyze(parameters, datasets): image_path = os.path.join('Data', parameters['sumatra_label']) # Save traces trace_file = str( os.path.join('Data', parameters['sumatra_label'], 'traces.h5')) data_dict = OrderedDict() os.makedirs(os.path.join(image_path, 'acf')) with tables.open_file(trace_file, mode='r') as data: parnames = [ x for x in data.root.chain0.PyMCsamples.colnames if not x.startswith('Metropolis') and x != 'deviance' ] for param in sorted(parnames): data_dict[param] = np.asarray( data.root.chain0.PyMCsamples.read(field=param), dtype='float') for param, trace in data_dict.items(): figure = plt.figure() figure.gca().plot(autocorr(trace)) figure.gca().set_title(param + ' Autocorrelation') figure.savefig(str(os.path.join(image_path, 'acf', param + '.png'))) plt.close(figure) output_files.append( str( os.path.join(parameters['sumatra_label'], 'acf', param + '.png'))) data = np.vstack(list(data_dict.values())).T data_truths = [ parameters.as_dict()['parameters'][key].get('compare', None) for key in data_dict.keys() ] figure = corner(data, labels=list(data_dict.keys()), quantiles=[0.16, 0.5, 0.84], truths=data_truths, show_titles=True, title_args={"fontsize": 40}, rasterized=True) figure.savefig(str(os.path.join(image_path, 'cornerplot.png'))) output_files.append( str(os.path.join(parameters['sumatra_label'], 'cornerplot.png'))) plt.close(figure) # Write CSV file with parameter summary (should be close to pymc's format) with open(str(os.path.join(image_path, 'parameters.csv')), 'w') as csvfile: fieldnames = [ 'Parameter', 'Mean', 'SD', 'Lower 95% HPD', 'Upper 95% HPD', 'MC error', 'q2.5', 'q25', 'q50', 'q75', 'q97.5' ] writer = csv.DictWriter(csvfile, fieldnames) writer.writeheader() for parname, trace in data_dict.items(): qxx = utils.quantiles(trace, qlist=(2.5, 25, 50, 75, 97.5)) q2d5, q25, q50, q75, q975 = qxx[2.5], qxx[25], qxx[50], qxx[ 75], qxx[97.5] lower_hpd, upper_hpd = utils.hpd(trace, 0.05) row = { 'Parameter': parname, 'Mean': trace.mean(0), 'SD': trace.std(0), 'Lower 95% HPD': lower_hpd, 'Upper 95% HPD': upper_hpd, 'MC error': batchsd(trace, min(len(trace), 100)), 'q2.5': q2d5, 'q25': q25, 'q50': q50, 'q75': q75, 'q97.5': q975 } writer.writerow(row) output_files.append( str(os.path.join(parameters['sumatra_label'], 'parameters.csv'))) # Generate comparison figures os.makedirs(os.path.join(image_path, 'results')) input_database = Database(parameters['input_database']) compare_databases = { key: Database(value) for key, value in parameters['compare_databases'].items() } idx = 1 for fig in plot_results(input_database, datasets, data_dict, databases=compare_databases): fig.savefig( str(os.path.join(image_path, 'results', 'Figure{}.png'.format(idx)))) output_files.append( str( os.path.join(parameters['sumatra_label'], 'results', 'Figure{}.png'.format(idx)))) plt.close(fig) idx += 1