def rendHist3D(x, y, z, imageBounds, pixelSize, zb, sliceSize=100): X = numpy.arange(imageBounds.x0, imageBounds.x1, pixelSize) Y = numpy.arange(imageBounds.y0, imageBounds.y1, pixelSize) Z = numpy.arange(zb[0], zb[1] + sliceSize, sliceSize) im, ed = scipy.histogramdd([x, y, z], bins=(X, Y, Z)) return im
def rendHist3D(x,y,z, imageBounds, pixelSize, zb,sliceSize=100): X = numpy.arange(imageBounds.x0,imageBounds.x1, pixelSize) Y = numpy.arange(imageBounds.y0,imageBounds.y1, pixelSize) Z = numpy.arange(zb[0], zb[1] + sliceSize, sliceSize) im, ed = scipy.histogramdd([x,y, z], bins=(X,Y,Z)) return im
def volWeightBeam3d(beam, xgrid, ygrid, zgrid, trace=0, ds=2e-3, **kwargs): r"""Generate a tuple of Beam objects from tuples of surface objects Args: beam: tuple of Surfaces or a Surface object Beam origin surfaces, based on the coordinate system of the surfaces. Center position is accessible through Beam(0), Beam.x()[...,0] or Beam.r()[...,0] (last two options create numpy arrays, the first generats a geometry.Vec object). rgrid: tuple of Surfaces or a Surface object Direction of the ray can be defined by a vector object (assumed to be in the space of the pt1 origin) from pt1, or a point, which generates a vector pointing from pt1 to pt2. zgrid: tuple of Surfaces or a Surface object Direction of the ray can be defined by a vector object (assumed to be in the space of the pt1 origin) from pt1, or a point, which generates a vector pointing from pt1 to pt2. Returns: output: tuple of beam objects. Examples: Accepts all surface or surface-derived object inputs, though all data is stored as a python object. Generate an y direction Ray in cartesian coords using a Vec from (0,0,1):: cen = geometry.Center(flag=True) ydir = geometry.Vecx((0,1,0)) zpt = geometry.Point((0,0,1),cen) """ out = scipy.zeros((len(xgrid) - 1, len(ygrid) - 1, len(zgrid) - 1)) try: temp = beam(scipy.mgrid[beam.norm.s[trace]:beam.norm.s[-1]:ds]).x() out += scipy.histogramdd(temp.T, bins=[xgrid, ygrid, zgrid], weights=scipy.ones(temp[0].shape) * beam.etendue * ds)[0] except AttributeError: for i in beam: try: out += volWeightBeam(i, xgrid, ygrid, zgrid, trace=trace, ds=ds, **kwargs) except TypeError: pass return out
def rendHist3D(x, y, z, imageBounds, pixelSize, sliceSize=100): X = numpy.arange(imageBounds.x0, imageBounds.x1 + 1.01 * pixelSize, pixelSize) Y = numpy.arange(imageBounds.y0, imageBounds.y1 + 1.01 * pixelSize, pixelSize) Z = numpy.arange(imageBounds.z0, imageBounds.z1 + 1.01 * sliceSize, sliceSize) im, ed = scipy.histogramdd([x, y, z], bins=(X, Y, Z)) return im
def bovy_dens2d(x,y, *args,**kwargs): ''' wrapper around bovy_dens2d hist_2d = bovy_dens2d(x,y, *args,**kwargs) ''' try: import bovy_plot except ImportError: print 'Import bovyplot failed' if kwargs.has_key('xrange'): xrange=kwargs['xrange'] kwargs.pop('xrange') else: xrange=[x.min(),x.max()] if kwargs.has_key('yrange'): yrange=kwargs['yrange'] kwargs.pop('yrange') else: yrange=[y.min(),y.max()] ndata= len(x) if kwargs.has_key('bins'): bins= kwargs['bins'] kwargs.pop('bins') else: bins= round(0.3*sc.sqrt(ndata)) if kwargs.has_key('aspect'): aspect= kwargs['aspect'] kwargs.pop('aspect') else: aspect= (xrange[1]-xrange[0])/(yrange[1]-yrange[0]) if kwargs.has_key('weights'): weights= kwargs['weights'] kwargs.pop('weights') else: weights= None if kwargs.has_key('levels'): levels= kwargs['levels'] kwargs.pop('levels') else: levels= special.erf(0.5*sc.arange(1,4)) hh_2d, edges= sc.histogramdd(sc.array([x, y]).T, bins=bins, range=[xrange ,yrange]) bovy_plot.bovy_dens2d(hh_2d.T, contours=True,levels=levels,cntrmass=True, cmap='gist_yarg',origin='lower', xrange=xrange, yrange=yrange, aspect=aspect, interpolation='nearest', retCumImage=True, **kwargs) return hh_2d
def volWeightBeam3d(beam, xgrid, ygrid, zgrid, trace=0, ds=2e-3, **kwargs): r"""Generate a tuple of Beam objects from tuples of surface objects Args: beam: tuple of Surfaces or a Surface object Beam origin surfaces, based on the coordinate system of the surfaces. Center position is accessible through Beam(0), Beam.x()[...,0] or Beam.r()[...,0] (last two options create numpy arrays, the first generats a geometry.Vec object). rgrid: tuple of Surfaces or a Surface object Direction of the ray can be defined by a vector object (assumed to be in the space of the pt1 origin) from pt1, or a point, which generates a vector pointing from pt1 to pt2. zgrid: tuple of Surfaces or a Surface object Direction of the ray can be defined by a vector object (assumed to be in the space of the pt1 origin) from pt1, or a point, which generates a vector pointing from pt1 to pt2. Returns: output: tuple of beam objects. Examples: Accepts all surface or surface-derived object inputs, though all data is stored as a python object. Generate an y direction Ray in cartesian coords using a Vec from (0,0,1):: cen = geometry.Center(flag=True) ydir = geometry.Vecx((0,1,0)) zpt = geometry.Point((0,0,1),cen) """ out = scipy.zeros((len(xgrid)-1,len(ygrid)-1,len(zgrid)-1)) try: temp = beam(scipy.mgrid[beam.norm.s[trace]:beam.norm.s[-1]:ds]).x() out += scipy.histogramdd(temp.T, bins = [xgrid, ygrid, zgrid], weights=scipy.ones(temp[0].shape)*beam.etendue*ds)[0] except AttributeError: for i in beam: try: out += volWeightBeam(i, xgrid, ygrid, zgrid, trace=trace, ds=ds, **kwargs) except TypeError: pass return out
def from_samples(self,data,params,weight=None,bins=30,method='cic',bw_method='scott'): self.parameters = params if not isinstance(data,(list,scipy.ndarray)): data = [data[par] for par in params] self.nodes = [] if not isinstance(bins,list): bins = [bins]*len(data) for d,b in zip(data,bins): tmp = scipy.linspace(d.min(),d.max(),b) if scipy.isscalar(b) else b self.nodes.append(tmp) if method == 'gaussian_kde': density = scipy.stats.gaussian_kde(data,weights=weight,bw_method=bw_method) self.mesh = density(self.mnodes()) elif method == 'cic': self.mesh = sample_cic(self.nodes,data,weight=weight) else: self.mesh = scipy.histogramdd(data,weights=weight,bins=self.nodes,density=True)[0] self.nodes = [(nodes[:-1] + nodes[1:])/2. for nodes in self.nodes] self.mesh /= self.mesh.max()
def HistP(nodes, FeatPoints, NC, VNORM, KDTnc, radius, dd): PHW = np.array([] * 6) for inc in nodes: i = FeatPoints[inc] wd = VNORM[i].reshape((1, 3)) ud = np.cross(wd, np.cross(dd, wd)) vd = np.cross(wd, ud) neighb = np.array(KDTnc.query_ball_point(NC[i, ], radius), int) NNC = NC[neighb[neighb <> i, ], ] - NC[i, ] NNC_u, NNC_v, NNC_w = np.dot(NNC, ud.reshape( (3, ))), np.dot(NNC, vd.reshape( (3, ))), np.dot(NNC, wd.reshape((3, ))) rho = cdist(NNC, np.array([[0, 0, 0]]), 'euclidean')[:, 0] theta, phi = np.arctan(NNC_v / NNC_u), np.arccos(NNC_w / rho) Logrho = np.log(rho) PolarHist[inc, ] = sp.histogramdd(np.c_[theta, phi, Logrho], bins=(thetaB, phiB, rhoB))[0] * PHW return PolarHist[nodes, ]
def density_contours(x,y, levels=[0.95],colors=['k'], bins=[20,20], xrange=None, yrange=None, cmap=None): ''' overplot density contours optional input: - xrange, yrange - bins=[20,02] - levels=[0.95], levels are probability masses contained within the contour - colors=['k'] - cmap=None : if len(levels) != len(colors), we use the cubehelix_r colormap sniplets taken from Jo Bovy's bovy_plot.py ''' if xrange is None: xrange = [min(x),max(x)] if yrange is None: yrange = [min(y),max(y)] if type(colors) is str: colors = [colors] if type(levels) is float: levels = [levels] if (len(colors) != len(levels)) and (cmap is None): colors=None cmap = 'cubehelix_r' data= sc.array([x,y]).T hist, edges= sc.histogramdd(data,bins=bins,range=[xrange,yrange]) X = hist.T extent = [edges[0][0], edges[0][-1], edges[1][0], edges[1][-1]] aspect= (xrange[1]-xrange[0])/(yrange[1]-yrange[0]) # make umulative sortindx= sc.argsort(X.flatten())[::-1] cumul= sc.cumsum(sc.sort(X.flatten())[::-1])/sc.sum(X.flatten()) cntrThis= sc.zeros(sc.prod(X.shape)) cntrThis[sortindx]= cumul cntrThis= sc.reshape(cntrThis,X.shape) # plot plt.contour(cntrThis, levels, colors=colors, cmap=cmap, extent=extent, aspect=aspect)
def scatterplot(x,y,*args,**kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio onedhists - if True, make one-d histograms on the sides onedhistcolor, onedhistfc, onedhistec OUTPUT: HISTORY: 2010-04-15 - Written - Bovy (NYU) """ if kwargs.has_key('xlabel'): xlabel= kwargs['xlabel'] kwargs.pop('xlabel') else: xlabel=None if kwargs.has_key('ylabel'): ylabel= kwargs['ylabel'] kwargs.pop('ylabel') else: ylabel=None if kwargs.has_key('xrange'): xrange=kwargs['xrange'] kwargs.pop('xrange') else: xrange=[x.min(),x.max()] if kwargs.has_key('yrange'): yrange=kwargs['yrange'] kwargs.pop('yrange') else: yrange=[y.min(),y.max()] ndata= len(x) if kwargs.has_key('bins'): bins= kwargs['bins'] kwargs.pop('bins') else: bins= round(0.3*sc.sqrt(ndata)) if kwargs.has_key('weights'): weights= kwargs['weights'] kwargs.pop('weights') else: weights= None if kwargs.has_key('levels'): levels= kwargs['levels'] kwargs.pop('levels') else: levels= special.erf(0.5*sc.arange(1,4)) if kwargs.has_key('aspect'): aspect= kwargs['aspect'] kwargs.pop('aspect') else: aspect= (xrange[1]-xrange[0])/(yrange[1]-yrange[0]) if kwargs.has_key('onedhists'): onedhists= kwargs['onedhists'] kwargs.pop('onedhists') else: onedhists= False if kwargs.has_key('onedhisttype'): onedhisttype= kwargs['onedhisttype'] kwargs.pop('onedhisttype') else: onedhisttype= 'step' if kwargs.has_key('onedhistcolor'): onedhistcolor= kwargs['onedhistcolor'] kwargs.pop('onedhistcolor') else: onedhistcolor= 'k' if kwargs.has_key('onedhistfc'): onedhistfc=kwargs['onedhistfc'] kwargs.pop('onedhistfc') else: onedhistfc= 'w' if kwargs.has_key('onedhistec'): onedhistec=kwargs['onedhistec'] kwargs.pop('onedhistec') else: onedhistec= 'k' if onedhists: fig= pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left+width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) axHistx = pyplot.axes(rect_histx) axHisty = pyplot.axes(rect_histy) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data= sc.array([x,y]).T hist, edges= sc.histogramdd(data,bins=bins,range=[xrange,yrange], weights=weights) cumimage= bovy_dens2d(hist.T,contours=True,levels=levels,cntrmass=True, cntrcolors='k',cmap=cm.gist_yarg,origin='lower', xrange=xrange,yrange=yrange,xlabel=xlabel, ylabel=ylabel,interpolation='nearest', retCumImage=True,aspect=aspect, overplot=onedhists) binxs= [] xedge= edges[0] for ii in range(len(xedge)-1): binxs.append((xedge[ii]+xedge[ii+1])/2.) binxs= sc.array(binxs) binys= [] yedge= edges[1] for ii in range(len(yedge)-1): binys.append((yedge[ii]+yedge[ii+1])/2.) binys= sc.array(binys) cumInterp= interpolate.RectBivariateSpline(binxs,binys,cumimage.T, kx=1,ky=1) cums= [] for ii in range(len(x)): cums.append(cumInterp(x[ii],y[ii])[0,0]) cums= sc.array(cums) plotx= x[cums > levels[-1]] ploty= y[cums > levels[-1]] if not weights == None: w8= weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii],ploty[ii],overplot=True, color='%.2f'%(1.-w8[ii]),*args,**kwargs) else: bovy_plot(plotx,ploty,overplot=True,*args,**kwargs) #Add onedhists if not onedhists: return axHistx.hist(x, bins=bins,normed=True,histtype=onedhisttype,range=xrange, color=onedhistcolor,fc=onedhistfc,ec=onedhistec) axHisty.hist(y, bins=bins, orientation='horizontal',normed=True, histtype=onedhisttype,range=yrange, color=onedhistcolor,fc=onedhistfc,ec=onedhistec) axHistx.set_xlim( axScatter.get_xlim() ) axHisty.set_ylim( axScatter.get_ylim() )
def scatterplot(x,y,*args,**kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio conditional - normalize each column separately (for probability densities, i.e., cntrmass=True) contours - if False, don't plot contours justcontours - if True, only draw contours, no density cntrcolors - color of contours (can be array as for bovy_dens2d) cntrlw, cntrls - linewidths and linestyles for contour cntrSmooth - use ndimage.gaussian_filter to smooth before contouring levels - contour-levels; data points outside of the last level will be individually shown (so, e.g., if this list is descending, contours and data points will be overplotted) onedhists - if True, make one-d histograms on the sides onedhistx - if True, make one-d histograms on the side of the x distribution onedhisty - if True, make one-d histograms on the side of the y distribution onedhistcolor, onedhistfc, onedhistec onedhistxnormed, onedhistynormed - normed keyword for one-d histograms onedhistxweights, onedhistyweights - weights keyword for one-d histograms cmap= cmap for density plot hist= and edges= - you can supply the histogram of the data yourself, this can be useful if you want to censor the data, both need to be set and calculated using scipy.histogramdd with the given range retAxes= return all Axes instances OUTPUT: plot to output device, Axes instance(s) or not, depending on input HISTORY: 2010-04-15 - Written - Bovy (NYU) """ xlabel= kwargs.pop('xlabel',None) ylabel= kwargs.pop('ylabel',None) if 'xrange' in kwargs: xrange= kwargs.pop('xrange') else: if isinstance(x,list): xrange=[sc.amin(x),sc.amax(x)] else: xrange=[x.min(),x.max()] if 'yrange' in kwargs: yrange= kwargs.pop('yrange') else: if isinstance(y,list): yrange=[sc.amin(y),sc.amax(y)] else: yrange=[y.min(),y.max()] ndata= len(x) bins= kwargs.pop('bins',round(0.3*sc.sqrt(ndata))) weights= kwargs.pop('weights',None) levels= kwargs.pop('levels',special.erf(sc.arange(1,4)/sc.sqrt(2.))) aspect= kwargs.pop('aspect',(xrange[1]-xrange[0])/(yrange[1]-yrange[0])) conditional= kwargs.pop('conditional',False) contours= kwargs.pop('contours',True) justcontours= kwargs.pop('justcontours',False) cntrcolors= kwargs.pop('cntrcolors','k') cntrlw= kwargs.pop('cntrlw',None) cntrls= kwargs.pop('cntrls',None) cntrSmooth= kwargs.pop('cntrSmooth',None) onedhists= kwargs.pop('onedhists',False) onedhistx= kwargs.pop('onedhistx',onedhists) onedhisty= kwargs.pop('onedhisty',onedhists) onedhisttype= kwargs.pop('onedhisttype','step') onedhistcolor= kwargs.pop('onedhistcolor','k') onedhistfc= kwargs.pop('onedhistfc','w') onedhistec= kwargs.pop('onedhistec','k') onedhistls= kwargs.pop('onedhistls','solid') onedhistlw= kwargs.pop('onedhistlw',None) onedhistsbins= kwargs.pop('onedhistsbins',round(0.3*sc.sqrt(ndata))) overplot= kwargs.pop('overplot',False) cmap= kwargs.pop('cmap',cm.gist_yarg) onedhistxnormed= kwargs.pop('onedhistxnormed',True) onedhistynormed= kwargs.pop('onedhistynormed',True) onedhistxweights= kwargs.pop('onedhistxweights',weights) onedhistyweights= kwargs.pop('onedhistyweights',weights) retAxes= kwargs.pop('retAxes',False) if onedhists or onedhistx or onedhisty: if overplot: fig= pyplot.gcf() else: fig= pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left+width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) if onedhistx: axHistx = pyplot.axes(rect_histx) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) if onedhisty: axHisty = pyplot.axes(rect_histy) # no labels axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data= sc.array([x,y]).T if 'hist' in kwargs and 'edges' in kwargs: hist=kwargs['hist'] kwargs.pop('hist') edges=kwargs['edges'] kwargs.pop('edges') else: hist, edges= sc.histogramdd(data,bins=bins,range=[xrange,yrange], weights=weights) if contours: cumimage= bovy_dens2d(hist.T,contours=contours,levels=levels, cntrmass=contours,cntrSmooth=cntrSmooth, cntrcolors=cntrcolors,cmap=cmap,origin='lower', xrange=xrange,yrange=yrange,xlabel=xlabel, ylabel=ylabel,interpolation='nearest', retCumImage=True,aspect=aspect, conditional=conditional, cntrlw=cntrlw,cntrls=cntrls, justcontours=justcontours,zorder=5*justcontours, overplot=(onedhists or overplot or onedhistx or onedhisty)) else: cumimage= bovy_dens2d(hist.T,contours=contours, cntrcolors=cntrcolors, cmap=cmap,origin='lower', xrange=xrange,yrange=yrange,xlabel=xlabel, ylabel=ylabel,interpolation='nearest', conditional=conditional, retCumImage=True,aspect=aspect, cntrlw=cntrlw,cntrls=cntrls, overplot=(onedhists or overplot or onedhistx or onedhisty)) #Set axes and labels pyplot.axis(list(xrange)+list(yrange)) if not overplot: _add_axislabels(xlabel,ylabel) _add_ticks() binxs= [] xedge= edges[0] for ii in range(len(xedge)-1): binxs.append((xedge[ii]+xedge[ii+1])/2.) binxs= sc.array(binxs) binys= [] yedge= edges[1] for ii in range(len(yedge)-1): binys.append((yedge[ii]+yedge[ii+1])/2.) binys= sc.array(binys) cumInterp= interpolate.RectBivariateSpline(binxs,binys,cumimage.T, kx=1,ky=1) cums= [] for ii in range(len(x)): cums.append(cumInterp(x[ii],y[ii])[0,0]) cums= sc.array(cums) plotx= x[cums > levels[-1]] ploty= y[cums > levels[-1]] if not len(plotx) == 0: if not weights == None: w8= weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii],ploty[ii],overplot=True, color='%.2f'%(1.-w8[ii]),*args,**kwargs) else: bovy_plot(plotx,ploty,overplot=True,zorder=1,*args,**kwargs) #Add onedhists if not (onedhists or onedhistx or onedhisty): if retAxes: return pyplot.gca() else: return None if onedhistx: histx, edges, patches= axHistx.hist(x,bins=onedhistsbins, normed=onedhistxnormed, weights=onedhistxweights, histtype=onedhisttype, range=sorted(xrange), color=onedhistcolor,fc=onedhistfc, ec=onedhistec,ls=onedhistls, lw=onedhistlw) if onedhisty: histy, edges, patches= axHisty.hist(y,bins=onedhistsbins, orientation='horizontal', weights=onedhistyweights, normed=onedhistynormed, histtype=onedhisttype, range=sorted(yrange), color=onedhistcolor,fc=onedhistfc, ec=onedhistec,ls=onedhistls, lw=onedhistlw) if onedhistx and not overplot: axHistx.set_xlim( axScatter.get_xlim() ) axHistx.set_ylim( 0, 1.2*sc.amax(histx)) if onedhisty and not overplot: axHisty.set_ylim( axScatter.get_ylim() ) axHisty.set_xlim( 0, 1.2*sc.amax(histy)) if not onedhistx: axHistx= None if not onedhisty: axHisty= None if retAxes: return (axScatter,axHistx,axHisty) else: return None
def ex10(exclude=sc.array([1,2,3,4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000,nsamples=200000, parsigma=[5,.075,0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.])#(m,b,logS) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(3) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() accept=False try: test= m.exp(newX-currentX) if u < test: accept= True except OverflowError: accept= True if accept: #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] print "Best-fit for S marginalized" histS,edgesS= sc.histogram(samples.T[:,2],bins=round(sc.sqrt(nsamples)/2.)) indx= sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= bestfit[0] bestm= bestfit[1] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestfit[2]/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0])+r'$'+'\n'+r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2]/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb= edges[0][indxi] bestm= edges[1][indxj] bestS= edgesS[indx] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,sc.exp(bestS/2.), marker='o',color='k',linestyle='None',zorder=1) plot.bovy_text(r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb)+r'$'+'\n'+r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS/2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def scatterplot(x,y,*args,**kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio contours - if False, don't plot contours cntrcolors - color of contours (can be array as for bovy_dens2d) onedhists - if True, make one-d histograms on the sides onedhistcolor, onedhistfc, onedhistec onedhistxnormed, onedhistynormed - normed keyword for one-d histograms onedhistxweights, onedhistyweights - weights keyword for one-d histograms cmap= cmap for density plot hist= and edges= - you can supply the histogram of the data yourself, this can be useful if you want to censor the data, both need to be set and calculated using scipy.histogramdd with the given range OUTPUT: HISTORY: 2010-04-15 - Written - Bovy (NYU) """ if kwargs.has_key('xlabel'): xlabel= kwargs['xlabel'] kwargs.pop('xlabel') else: xlabel=None if kwargs.has_key('ylabel'): ylabel= kwargs['ylabel'] kwargs.pop('ylabel') else: ylabel=None if kwargs.has_key('xrange'): xrange=kwargs['xrange'] kwargs.pop('xrange') else: if isinstance(x,list): xrange=[sc.amin(x),sc.amax(x)] else: xrange=[x.min(),x.max()] if kwargs.has_key('yrange'): yrange=kwargs['yrange'] kwargs.pop('yrange') else: if isinstance(y,list): yrange=[sc.amin(y),sc.amax(y)] else: yrange=[y.min(),y.max()] ndata= len(x) if kwargs.has_key('bins'): bins= kwargs['bins'] kwargs.pop('bins') else: bins= round(0.3*sc.sqrt(ndata)) if kwargs.has_key('weights'): weights= kwargs['weights'] kwargs.pop('weights') else: weights= None if kwargs.has_key('levels'): levels= kwargs['levels'] kwargs.pop('levels') else: levels= special.erf(0.5*sc.arange(1,4)) if kwargs.has_key('aspect'): aspect= kwargs['aspect'] kwargs.pop('aspect') else: aspect= (xrange[1]-xrange[0])/(yrange[1]-yrange[0]) if kwargs.has_key('contours'): contours= kwargs['contours'] kwargs.pop('contours') else: contours= True if kwargs.has_key('cntrcolors'): cntrcolors= kwargs['cntrcolors'] kwargs.pop('cntrcolors') else: cntrcolors= 'k' if kwargs.has_key('onedhists'): onedhists= kwargs['onedhists'] kwargs.pop('onedhists') else: onedhists= False if kwargs.has_key('onedhisttype'): onedhisttype= kwargs['onedhisttype'] kwargs.pop('onedhisttype') else: onedhisttype= 'step' if kwargs.has_key('onedhistcolor'): onedhistcolor= kwargs['onedhistcolor'] kwargs.pop('onedhistcolor') else: onedhistcolor= 'k' if kwargs.has_key('onedhistfc'): onedhistfc=kwargs['onedhistfc'] kwargs.pop('onedhistfc') else: onedhistfc= 'w' if kwargs.has_key('onedhistec'): onedhistec=kwargs['onedhistec'] kwargs.pop('onedhistec') else: onedhistec= 'k' if kwargs.has_key('onedhistls'): onedhistls=kwargs['onedhistls'] kwargs.pop('onedhistls') else: onedhistls= 'solid' if kwargs.has_key('onedhistlw'): onedhistlw=kwargs['onedhistlw'] kwargs.pop('onedhistlw') else: onedhistlw= None if kwargs.has_key('overplot'): overplot= kwargs['overplot'] kwargs.pop('overplot') else: overplot= False if kwargs.has_key('cmap'): cmap= kwargs['cmap'] kwargs.pop('cmap') else: cmap= cm.gist_yarg if kwargs.has_key('onedhistxnormed'): onedhistxnormed= kwargs['onedhistxnormed'] kwargs.pop('onedhistxnormed') else: onedhistxnormed= True if kwargs.has_key('onedhistynormed'): onedhistynormed= kwargs['onedhistynormed'] kwargs.pop('onedhistynormed') else: onedhistynormed= True if kwargs.has_key('onedhistxweights'): onedhistxweights= kwargs['onedhistxweights'] kwargs.pop('onedhistxweights') else: onedhistxweights= None if kwargs.has_key('onedhistyweights'): onedhistyweights= kwargs['onedhistyweights'] kwargs.pop('onedhistyweights') else: onedhistyweights= None if onedhists: if overplot: fig= pyplot.gcf() else: fig= pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left+width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) axHistx = pyplot.axes(rect_histx) axHisty = pyplot.axes(rect_histy) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data= sc.array([x,y]).T if kwargs.has_key('hist') and kwargs.has_key('edges'): hist=kwargs['hist'] kwargs.pop('hist') edges=kwargs['edges'] kwargs.pop('edges') else: hist, edges= sc.histogramdd(data,bins=bins,range=[xrange,yrange], weights=weights) if contours: cumimage= bovy_dens2d(hist.T,contours=contours,levels=levels, cntrmass=contours, cntrcolors=cntrcolors,cmap=cmap,origin='lower', xrange=xrange,yrange=yrange,xlabel=xlabel, ylabel=ylabel,interpolation='nearest', retCumImage=True,aspect=aspect, overplot=(onedhists or overplot)) else: cumimage= bovy_dens2d(hist.T,contours=contours, cntrcolors=cntrcolors, cmap=cmap,origin='lower', xrange=xrange,yrange=yrange,xlabel=xlabel, ylabel=ylabel,interpolation='nearest', retCumImage=True,aspect=aspect, overplot=(onedhists or overplot)) binxs= [] xedge= edges[0] for ii in range(len(xedge)-1): binxs.append((xedge[ii]+xedge[ii+1])/2.) binxs= sc.array(binxs) binys= [] yedge= edges[1] for ii in range(len(yedge)-1): binys.append((yedge[ii]+yedge[ii+1])/2.) binys= sc.array(binys) cumInterp= interpolate.RectBivariateSpline(binxs,binys,cumimage.T, kx=1,ky=1) cums= [] for ii in range(len(x)): cums.append(cumInterp(x[ii],y[ii])[0,0]) cums= sc.array(cums) plotx= x[cums > levels[-1]] ploty= y[cums > levels[-1]] if not len(plotx) == 0: if not weights == None: w8= weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii],ploty[ii],overplot=True, color='%.2f'%(1.-w8[ii]),*args,**kwargs) else: bovy_plot(plotx,ploty,overplot=True,*args,**kwargs) #Add onedhists if not onedhists: return histx, edges, patches= axHistx.hist(x, bins=bins,normed=onedhistxnormed, weights=onedhistxweights, histtype=onedhisttype, range=sorted(xrange), color=onedhistcolor,fc=onedhistfc, ec=onedhistec,ls=onedhistls, lw=onedhistlw) histy, edges, patches= axHisty.hist(y, bins=bins, orientation='horizontal', weights=onedhistyweights, normed=onedhistynormed, histtype=onedhisttype, range=sorted(yrange), color=onedhistcolor,fc=onedhistfc, ec=onedhistec,ls=onedhistls, lw=onedhistlw) axHistx.set_xlim( axScatter.get_xlim() ) axHisty.set_ylim( axScatter.get_ylim() ) axHistx.set_ylim( 0, 1.2*sc.amax(histx)) axHisty.set_xlim( 0, 1.2*sc.amax(histy))
def plotXDall(parser): nu.random.seed(1) (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() return if os.path.exists(args[0]): savefile = open(args[0], "rb") xamp = pickle.load(savefile) xmean = pickle.load(savefile) xcovar = pickle.load(savefile) savefile.close() else: print args[0] + " does not exist ..." print "Returning ..." return if os.path.exists(args[1]): savefile = open(args[1], "rb") starxamp = pickle.load(savefile) starxmean = pickle.load(savefile) starxcovar = pickle.load(savefile) savefile.close() else: print args[1] + " does not exist ..." print "Returning ..." return if os.path.exists(args[2]): savefile = open(args[2], "rb") rrxamp = pickle.load(savefile) rrxmean = pickle.load(savefile) rrxcovar = pickle.load(savefile) savefile.close() else: print args[2] + " does not exist ..." print "Returning ..." return if options.nsamplesstar is None: options.nsamplesstar = options.nsamples if options.nsamplesrrlyrae is None: options.nsamplesrrlyrae = options.nsamples # Load XD object in xdtarget xdt = xdtarget.xdtarget(amp=xamp, mean=xmean, covar=xcovar) out = xdt.sample(nsample=options.nsamples) xdt = xdtarget.xdtarget(amp=starxamp, mean=starxmean, covar=starxcovar) starout = xdt.sample(nsample=options.nsamplesstar) xdt = xdtarget.xdtarget(amp=rrxamp, mean=rrxmean, covar=rrxcovar) rrout = xdt.sample(nsample=options.nsamplesrrlyrae) # Prepare for plotting if options.expd1: xs = nu.exp(out[:, options.d1]) elif not options.divided1 is None: xs = out[:, options.d1] / options.divided1 else: xs = out[:, options.d1] if options.expd2: ys = nu.exp(out[:, options.d2]) elif not options.divided2 is None: ys = out[:, options.d2] / options.divided2 else: ys = out[:, options.d2] if options.type == "DRW": # plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: # Convert to logA xs = (nu.log(2.0) + xs + nu.log(1.0 - nu.exp(-1.0 / nu.exp(ys)))) / 2.0 elif options.d1 == 1 and options.d2 == 0: # Convert to logA ys = (nu.log(2.0) + ys + nu.log(1.0 - nu.exp(-1.0 / nu.exp(xs)))) / 2.0 else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return # stars if options.expd1: starxs = nu.exp(starout[:, options.d1]) elif not options.divided1 is None: starxs = starout[:, options.d1] / options.divided1 else: starxs = starout[:, options.d1] if options.expd2: starys = nu.exp(starout[:, options.d2]) elif not options.divided2 is None: starys = starout[:, options.d2] / options.divided2 else: starys = starout[:, options.d2] if options.type == "DRW": # plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: # Convert to logA starxs = (nu.log(2.0) + starxs + nu.log(1.0 - nu.exp(-1.0 / nu.exp(starys)))) / 2.0 elif options.d1 == 1 and options.d2 == 0: # Convert to logA starys = (nu.log(2.0) + starys + nu.log(1.0 - nu.exp(-1.0 / nu.exp(starxs)))) / 2.0 else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return # RR Lyrae if options.expd1: rrxs = nu.exp(rrout[:, options.d1]) elif not options.divided1 is None: rrxs = rrout[:, options.d1] / options.divided1 else: rrxs = rrout[:, options.d1] if options.expd2: rrys = nu.exp(rrout[:, options.d2]) elif not options.divided2 is None: rrys = rrout[:, options.d2] / options.divided2 else: rrys = rrout[:, options.d2] if options.type == "DRW": # plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: # Convert to logA rrxs = (nu.log(2.0) + rrxs + nu.log(1.0 - nu.exp(-1.0 / nu.exp(rrys)))) / 2.0 elif options.d1 == 1 and options.d2 == 0: # Convert to logA rrys = (nu.log(2.0) + rrys + nu.log(1.0 - nu.exp(-1.0 / nu.exp(rrxs)))) / 2.0 else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return # Plot xrange = [options.xmin, options.xmax] yrange = [options.ymin, options.ymax] data = sc.array([xs, ys]).T bins = int(round(0.3 * sc.sqrt(options.nsamples))) hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) # Censor hist ASSUMES gamma=[0.,1.2], logA=[-9.21/2.,0.] for powerlawSF x = nu.zeros((bins, bins)) y = nu.zeros((bins, bins)) for bb in range(bins): x[:, bb] = nu.linspace(options.xmin, options.xmax, bins) y[bb, :] = nu.linspace(options.ymin, options.ymax, bins) # mask if options.type == "powerlawSF": hist[(y < 0.1) * (x > -3.0) * (x < -1.5)] = nu.nan hist[(x < -3.0)] = nu.nan hist[(x > -2.0) * (y < (0.25 * x + 0.6))] = nu.nan onedhistyweights = nu.ones(len(ys)) / 100.0 elif options.type == "DRW": hist[(y < (-4.223 * (x + 2) - 10))] = nu.nan hist[(y < -4.153) * (y < (58.47 * (x + 2.1) - 10.0))] = nu.nan hist[(y > -4.153) * (y < (2.93 * x + 2.0) - 1.0)] = nu.nan onedhistyweights = nu.ones(len(ys)) / 2500.0 bovy_plot.bovy_print() # First just plot contours cdict = { "red": ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)), "green": ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)), "blue": ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)), } allwhite = matplotlib.colors.LinearSegmentedColormap("allwhite", cdict, 256) bovy_plot.scatterplot( xs, ys, "b,", onedhists=True, bins=bins, cmap=allwhite, onedhistynormed=False, onedhistyweights=onedhistyweights, xrange=xrange, yrange=yrange, onedhistec="b", xlabel=options.xlabel, ylabel=options.ylabel, ) bovy_plot.scatterplot( starxs, starys, "k,", onedhists=True, bins=bins, cmap=allwhite, xrange=xrange, yrange=yrange, overplot=True ) bovy_plot.scatterplot( rrxs, rrys, "r,", onedhists=True, bins=bins, cmap=allwhite, onedhistec="r", xrange=xrange, yrange=yrange, overplot=True, ) hist /= nu.nansum(hist) # Custom colormap cdict = { "red": ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)), "green": ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)), "blue": ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)), } my_cmap = matplotlib.colors.LinearSegmentedColormap("my_colormap", cdict, 256) bovy_plot.scatterplot( xs, ys, "b,", onedhists=False, contours=False, levels=[1.01], bins=bins, cmap=my_cmap, hist=hist, edges=edges, onedhistynormed=False, onedhistyweights=onedhistyweights, xrange=xrange, yrange=yrange, overplot=True, ) # Stars data = sc.array([starxs, starys]).T hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) if options.type == "powerlawSF": hist[(x > -2.5)] = nu.nan hist[(x < -2.5) * (y > (-0.19 * (x + 2.5)))] = nu.nan elif options.type == "DRW": hist[(y >= (-4.223 * (x + 2) - 10))] = nu.nan hist /= nu.nansum(hist) bovy_plot.scatterplot( starxs, starys, "k,", onedhists=True, contours=False, levels=[1.01], # HACK such that outliers aren't plotted bins=bins, hist=hist, edges=edges, xrange=xrange, yrange=yrange, overplot=True, ) # RR Lyrae data = sc.array([rrxs, rrys]).T hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) if options.type == "powerlawSF": hist[(x < -2.5)] = nu.nan hist[(x > -2.5) * (y > ((x + 2.5) / 1.5) ** 9.0 * 1.15 + 0.1)] = nu.nan # hist[(x > -2.5)*(y > (.25*x+.6))]= nu.nan elif options.type == "DRW": hist[(y < -4.153) * (y >= (58.47 * (x + 2.1) - 10.0)) * (y > -7.5)] = nu.nan hist[(y < -7.5) * (x < -2.1)] = nu.nan hist[(y > -4.153) * (y >= (2.93 * x + 2.0 - 1.5))] = nu.nan # Custom colormap cdict = { "red": ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)), "green": ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)), "blue": ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)), } my_cmap = matplotlib.colors.LinearSegmentedColormap("my_colormap", cdict, 256) hist /= nu.nansum(hist) bovy_plot.scatterplot( rrxs, rrys, "r,", onedhists=False, contours=False, levels=[1.01], # HACK such that outliers aren't plotted bins=bins, cmap=my_cmap, hist=hist, edges=edges, xrange=xrange, yrange=yrange, overplot=True, ) # Label if options.type == "powerlawSF": bovy_plot.bovy_text(-4.4, 1.15, r"$\mathrm{F/G\ stars}$", color="k") bovy_plot.bovy_text(-4.4, 1.05, r"$\mathrm{QSOs}$", color="b") bovy_plot.bovy_text(-4.4, 0.95, r"$\mathrm{RR\ Lyrae}$", color="r") elif options.type == "DRW": bovy_plot.bovy_text(-4.4, 2.88, r"$\mathrm{F/G\ stars}$", color="k") bovy_plot.bovy_text(-4.4, 1.76, r"$\mathrm{QSOs}$", color="b") bovy_plot.bovy_text(-4.4, 0.64, r"$\mathrm{RR\ Lyrae}$", color="r") bovy_plot.bovy_end_print(options.plotfilename)
def scatterplot(x, y, *args, **kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio onedhists - if True, make one-d histograms on the sides onedhistcolor, onedhistfc, onedhistec OUTPUT: HISTORY: 2010-04-15 - Written - Bovy (NYU) """ if 'xlabel' in kwargs: xlabel = kwargs['xlabel'] kwargs.pop('xlabel') else: xlabel = None if 'ylabel' in kwargs: ylabel = kwargs['ylabel'] kwargs.pop('ylabel') else: ylabel = None if 'xrange' in kwargs: xrange = kwargs['xrange'] kwargs.pop('xrange') else: xrange = [x.min(), x.max()] if 'yrange' in kwargs: yrange = kwargs['yrange'] kwargs.pop('yrange') else: yrange = [y.min(), y.max()] ndata = len(x) if 'bins' in kwargs: bins = kwargs['bins'] kwargs.pop('bins') else: bins = round(0.3 * sc.sqrt(ndata)) if 'weights' in kwargs: weights = kwargs['weights'] kwargs.pop('weights') else: weights = None if 'levels' in kwargs: levels = kwargs['levels'] kwargs.pop('levels') else: levels = special.erf(0.5 * sc.arange(1, 4)) if 'aspect' in kwargs: aspect = kwargs['aspect'] kwargs.pop('aspect') else: aspect = (xrange[1] - xrange[0]) / (yrange[1] - yrange[0]) if 'onedhists' in kwargs: onedhists = kwargs['onedhists'] kwargs.pop('onedhists') else: onedhists = False if 'onedhisttype' in kwargs: onedhisttype = kwargs['onedhisttype'] kwargs.pop('onedhisttype') else: onedhisttype = 'step' if 'onedhistcolor' in kwargs: onedhistcolor = kwargs['onedhistcolor'] kwargs.pop('onedhistcolor') else: onedhistcolor = 'k' if 'onedhistfc' in kwargs: onedhistfc = kwargs['onedhistfc'] kwargs.pop('onedhistfc') else: onedhistfc = 'w' if 'onedhistec' in kwargs: onedhistec = kwargs['onedhistec'] kwargs.pop('onedhistec') else: onedhistec = 'k' if onedhists: fig = pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) axHistx = pyplot.axes(rect_histx) axHisty = pyplot.axes(rect_histy) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data = sc.array([x, y]).T hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange], weights=weights) cumimage = bovy_dens2d(hist.T, contours=True, levels=levels, cntrmass=True, cntrcolors='k', cmap=cm.gist_yarg, origin='lower', xrange=xrange, yrange=yrange, xlabel=xlabel, ylabel=ylabel, interpolation='nearest', retCumImage=True, aspect=aspect, overplot=onedhists) binxs = [] xedge = edges[0] for ii in range(len(xedge) - 1): binxs.append((xedge[ii] + xedge[ii + 1]) / 2.) binxs = sc.array(binxs) binys = [] yedge = edges[1] for ii in range(len(yedge) - 1): binys.append((yedge[ii] + yedge[ii + 1]) / 2.) binys = sc.array(binys) cumInterp = interpolate.RectBivariateSpline(binxs, binys, cumimage.T, kx=1, ky=1) cums = [] for ii in range(len(x)): cums.append(cumInterp(x[ii], y[ii])[0, 0]) cums = sc.array(cums) plotx = x[cums > levels[-1]] ploty = y[cums > levels[-1]] if not weights == None: w8 = weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii], ploty[ii], overplot=True, color='%.2f' % (1. - w8[ii]), *args, **kwargs) else: bovy_plot(plotx, ploty, overplot=True, *args, **kwargs) #Add onedhists if not onedhists: return axHistx.hist(x, bins=bins, normed=True, histtype=onedhisttype, range=xrange, color=onedhistcolor, fc=onedhistfc, ec=onedhistec) axHisty.hist(y, bins=bins, orientation='horizontal', normed=True, histtype=onedhisttype, range=yrange, color=onedhistcolor, fc=onedhistfc, ec=onedhistec) axHistx.set_xlim(axScatter.get_xlim()) axHisty.set_ylim(axScatter.get_ylim())
Description of Usage: scottnla@faraday-cage:~/$ python readSerial.py [filename] Reads serial information from an arduino circuit, writes it to file. """ import scipy import pylab import sys filename = sys.argv[1] width = int(sys.argv[2]) height = int(sys.argv[3]) #load list of 2d points raw_data = scipy.loadtxt(filename) #scale the data scale = scipy.array([width/2.0, height/2.0]) output = scipy.multiply(raw_data, scale) #bin the data output = scipy.floor(output) #histogram it nx = range(-width/2,width/2) ny = range(-height/2,height/2) output = scipy.histogramdd(output,[nx,ny])[0] output = scipy.transpose(output) outputFile = filename.rsplit(".",1)[0] + '.dat' scipy.savetxt(outputFile, output)
def ex13(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex13.png', nburn=1000, nsamples=100000, parsigma=[1, m.pi / 200., .01, .5, 1., .05, .1, .005], bovyprintargs={}): """ex13: solve exercise 13 by MCMC Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data data = read_data('data_allerr.dat', allerr=True) ndata = len(data) nsample = ndata - len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) Z = sc.zeros((nsample, 2)) yerr = sc.zeros(nsample) ycovar = sc.zeros((2, nsample, 2)) #Makes the sc.dot easier jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] Z[jj, 0] = X[jj] Z[jj, 1] = Y[jj] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] ycovar[0, jj, 0] = data[ii][3]**2. ycovar[1, jj, 1] = data[ii][2]**2. ycovar[0, jj, 1] = data[ii][4] * m.sqrt( ycovar[0, jj, 0] * ycovar[1, jj, 1]) ycovar[1, jj, 0] = ycovar[0, jj, 1] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) #Now sample inittheta = m.acos(1. / m.sqrt(1. + bestfit[1]**2.)) if bestfit[1] < 0.: inittheta = m.pi - inittheta initialguess = sc.array([ m.cos(inittheta), inittheta, 0., sc.mean(X), sc.mean(Y), m.log(sc.var(X)), m.log(sc.var(X)), 0. ]) #(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX = objective(initialguess, Z, ycovar) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(8) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] newsample[3] = currentguess[3] + stats.norm.rvs() * parsigma[3] newsample[4] = currentguess[4] + stats.norm.rvs() * parsigma[4] newsample[5] = currentguess[5] + stats.norm.rvs() * parsigma[5] newsample[6] = currentguess[6] + stats.norm.rvs() * parsigma[6] newsample[7] = currentguess[7] + stats.norm.rvs() * parsigma[7] #Calculate the objective function for the newsample newX = objective(newsample, Z, ycovar) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() try: test = m.exp(newX - currentX) except OverflowError: test = 2. if u < test: #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 2.)) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] t = edges[1][indxj] bcost = edges[0][indxi] mf = m.sqrt(1. / m.cos(t)**2. - 1.) b = bcost / m.cos(t) print b, mf #Plot result plot.bovy_print(**bovyprintargs) xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(sc.array(xrange), mf * sc.array(xrange) + b, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) for ii in range(10): #Random sample ransample = sc.floor((stats.uniform.rvs() * nsamples)).astype('int') ransample = samples.T[ransample, 0:2] mf = m.sqrt(1. / m.cos(ransample[1])**2. - 1.) b = ransample[0] / m.cos(ransample[1]) bestb = b bestm = mf plot.bovy_plot(sc.array(xrange), bestm * sc.array(xrange) + bestb, overplot=True, color='0.75', zorder=0) #Add labels nsamples = samples.shape[1] for ii in range(nsample): Pb = 0. for jj in range(nsamples): Pb += Pbad(samples[:, jj], Z[ii, :], ycovar[:, ii, :]) Pb /= nsamples text(Z[ii, 0] + 5, Z[ii, 1] + 5, '%.1f' % Pb, color='0.5', zorder=3) #Plot the data OMG straight from plot_data.py data = read_data('data_allerr.dat', True) ndata = len(data) #Create the ellipses and the data points id = sc.zeros(nsample) x = sc.zeros(nsample) y = sc.zeros(nsample) ellipses = [] ymin, ymax = 0, 0 xmin, xmax = 0, 0 jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj] = data[ii][0] x[jj] = data[ii][1][0] y[jj] = data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar = sc.zeros((2, 2)) ycovar[0, 0] = data[ii][3]**2. ycovar[1, 1] = data[ii][2]**2. ycovar[0, 1] = data[ii][4] * m.sqrt(ycovar[0, 0] * ycovar[1, 1]) ycovar[1, 0] = ycovar[0, 1] eigs = linalg.eig(ycovar) angle = m.atan(-eigs[1][0, 1] / eigs[1][1, 1]) / m.pi * 180. thisellipse = Ellipse(sc.array([x[jj], y[jj]]), 2 * m.sqrt(eigs[0][0]), 2 * m.sqrt(eigs[0][1]), angle) ellipses.append(thisellipse) if (x[jj] + m.sqrt(ycovar[0, 0])) > xmax: xmax = (x[jj] + m.sqrt(ycovar[0, 0])) if (x[jj] - m.sqrt(ycovar[0, 0])) < xmin: xmin = (x[jj] - m.sqrt(ycovar[0, 0])) if (y[jj] + m.sqrt(ycovar[1, 1])) > ymax: ymax = (y[jj] + m.sqrt(ycovar[1, 1])) if (y[jj] - m.sqrt(ycovar[1, 1])) < ymin: ymin = (y[jj] - m.sqrt(ycovar[1, 1])) jj = jj + 1 #Add the error ellipses ax = gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x, y, color='k', marker='o', linestyle='None') plot.bovy_end_print(plotfilename)
def ex10(exclude=sc.array([1, 2, 3, 4]), plotfilenameA='ex10a.png', plotfilenameB='ex10b.png', nburn=1000, nsamples=200000, parsigma=[5, .075, 0.1], bovyprintargs={}): """ex10: solve exercise 10 using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-07 - Written - Bovy (NYU) """ sc.random.seed(-1) #In the interest of reproducibility (if that's a word) #Read the data data = read_data('data_yerr.dat') ndata = len(data) if not exclude == None: nsample = ndata - len(exclude) else: nsample = ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y = sc.zeros(nsample) X = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] X[jj] = data[ii][1][0] A[jj, 1] = data[ii][1][0] C[jj, jj] = data[ii][2]**2. yerr[jj] = data[ii][2] jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1], 0.]) #(m,b,logS) #With this initial guess start off the sampling procedure initialX = objective(initialguess, X, Y, yerr) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(3) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] #Calculate the objective function for the newsample newX = objective(newsample, X, Y, yerr) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() accept = False try: test = m.exp(newX - currentX) if u < test: accept = True except OverflowError: accept = True if accept: #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 2.)) indxi = sc.argmax(sc.amax(histmb, axis=1)) indxj = sc.argmax(sc.amax(histmb, axis=0)) print "Best-fit, marginalized" print edges[0][indxi - 1], edges[1][indxj - 1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi + 1], edges[1][indxj + 1] print "Best-fit for S marginalized" histS, edgesS = sc.histogram(samples.T[:, 2], bins=round(sc.sqrt(nsamples) / 2.)) indx = sc.argmax(histS) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = bestfit[0] bestm = bestfit[1] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestfit[2] / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text(r'$\mathrm{MAP}\ :\ y = %4.2f \,x+ %4.0f' % (bestfit[1], bestfit[0]) + r'$' + '\n' + r'$\mathrm{MAP}\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestfit[2] / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameA) #Data with MAP line and sampling plot.bovy_print(**bovyprintargs) bestb = edges[0][indxi] bestm = edges[1][indxj] bestS = edgesS[indx] xrange = [0, 300] yrange = [0, 700] plot.bovy_plot(xrange, bestm * sc.array(xrange) + bestb, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$x$', ylabel=r'$y$', zorder=2) errorbar(X, Y, sc.exp(bestS / 2.), marker='o', color='k', linestyle='None', zorder=1) plot.bovy_text( r'$\mathrm{marginalized\ over\ S}\ :\ y = %4.2f \,x+ %4.0f' % (bestm, bestb) + r'$' + '\n' + r'$\mathrm{marginalized\ over}\ (m,b)\ :\ \sqrt{S} = %3.1f$' % (sc.exp(bestS / 2.)), bottom_right=True) plot.bovy_end_print(plotfilenameB) return
def runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange): """Runs the MCMC sampler, and returns the summary quantities that will be plotted: """ # Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array([bestfit[0], bestfit[1], 0.0, sc.mean(Y), m.log(sc.var(Y))]) # (m,b,Pb,Yb,Vb) # With this initial guess start off the sampling procedure initialX = objective(initialguess, X, Y, yerr) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): # Draw a sample from the proposal distribution newsample = sc.zeros(5) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] # newsample[2]= stats.uniform.rvs()#Sample from prior newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] newsample[3] = currentguess[3] + stats.norm.rvs() * parsigma[3] newsample[4] = currentguess[4] + stats.norm.rvs() * parsigma[4] # Calculate the objective function for the newsample newX = objective(newsample, X, Y, yerr) # Accept or reject # Reject with the appropriate probability u = stats.uniform.rvs() if u < m.exp(newX - currentX): # Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < 0.5 or double(naccept) / (nburn + nsamples) > 0.8: print "Acceptance ratio was " + str(double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 5.0), range=mbrange) mbsamples = [] for ii in range(10): # Random sample ransample = sc.floor((stats.uniform.rvs() * nsamples)) ransample = samples.T[ransample, 0:2] bestb = ransample[0] bestm = ransample[1] mbsamples.append((bestm, bestb)) (pbhist, pbedges) = histogram(samples[2, :], bins=round(sc.sqrt(nsamples) / 5.0), range=[0, 1]) return (histmb, edges, mbsamples, pbhist, pbedges)
def plotXDall(parser): nu.random.seed(1) (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() return if os.path.exists(args[0]): savefile = open(args[0], 'rb') xamp = pickle.load(savefile) xmean = pickle.load(savefile) xcovar = pickle.load(savefile) savefile.close() else: print args[0] + " does not exist ..." print "Returning ..." return if os.path.exists(args[1]): savefile = open(args[1], 'rb') starxamp = pickle.load(savefile) starxmean = pickle.load(savefile) starxcovar = pickle.load(savefile) savefile.close() else: print args[1] + " does not exist ..." print "Returning ..." return if os.path.exists(args[2]): savefile = open(args[2], 'rb') rrxamp = pickle.load(savefile) rrxmean = pickle.load(savefile) rrxcovar = pickle.load(savefile) savefile.close() else: print args[2] + " does not exist ..." print "Returning ..." return if options.nsamplesstar is None: options.nsamplesstar = options.nsamples if options.nsamplesrrlyrae is None: options.nsamplesrrlyrae = options.nsamples #Load XD object in xdtarget xdt = xdtarget.xdtarget(amp=xamp, mean=xmean, covar=xcovar) out = xdt.sample(nsample=options.nsamples) xdt = xdtarget.xdtarget(amp=starxamp, mean=starxmean, covar=starxcovar) starout = xdt.sample(nsample=options.nsamplesstar) xdt = xdtarget.xdtarget(amp=rrxamp, mean=rrxmean, covar=rrxcovar) rrout = xdt.sample(nsample=options.nsamplesrrlyrae) #Prepare for plotting if options.expd1: xs = nu.exp(out[:, options.d1]) elif not options.divided1 is None: xs = out[:, options.d1] / options.divided1 else: xs = out[:, options.d1] if options.expd2: ys = nu.exp(out[:, options.d2]) elif not options.divided2 is None: ys = out[:, options.d2] / options.divided2 else: ys = out[:, options.d2] if options.type == 'DRW': #plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: #Convert to logA xs = (nu.log(2.) + xs + nu.log(1. - nu.exp(-1. / nu.exp(ys)))) / 2. elif options.d1 == 1 and options.d2 == 0: #Convert to logA ys = (nu.log(2.) + ys + nu.log(1. - nu.exp(-1. / nu.exp(xs)))) / 2. else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return #stars if options.expd1: starxs = nu.exp(starout[:, options.d1]) elif not options.divided1 is None: starxs = starout[:, options.d1] / options.divided1 else: starxs = starout[:, options.d1] if options.expd2: starys = nu.exp(starout[:, options.d2]) elif not options.divided2 is None: starys = starout[:, options.d2] / options.divided2 else: starys = starout[:, options.d2] if options.type == 'DRW': #plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: #Convert to logA starxs = (nu.log(2.) + starxs + nu.log(1. - nu.exp(-1. / nu.exp(starys)))) / 2. elif options.d1 == 1 and options.d2 == 0: #Convert to logA starys = (nu.log(2.) + starys + nu.log(1. - nu.exp(-1. / nu.exp(starxs)))) / 2. else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return #RR Lyrae if options.expd1: rrxs = nu.exp(rrout[:, options.d1]) elif not options.divided1 is None: rrxs = rrout[:, options.d1] / options.divided1 else: rrxs = rrout[:, options.d1] if options.expd2: rrys = nu.exp(rrout[:, options.d2]) elif not options.divided2 is None: rrys = rrout[:, options.d2] / options.divided2 else: rrys = rrout[:, options.d2] if options.type == 'DRW': #plot logA, logA = 0 if options.d1 == 0 and options.d2 == 1: #Convert to logA rrxs = (nu.log(2.) + rrxs + nu.log(1. - nu.exp(-1. / nu.exp(rrys)))) / 2. elif options.d1 == 1 and options.d2 == 0: #Convert to logA rrys = (nu.log(2.) + rrys + nu.log(1. - nu.exp(-1. / nu.exp(rrxs)))) / 2. else: print "d1 and d2 have to be 0 or 1 (and not the same!) ..." print "Returning ..." return #Plot xrange = [options.xmin, options.xmax] yrange = [options.ymin, options.ymax] data = sc.array([xs, ys]).T bins = int(round(0.3 * sc.sqrt(options.nsamples))) hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) #Censor hist ASSUMES gamma=[0.,1.2], logA=[-9.21/2.,0.] for powerlawSF x = nu.zeros((bins, bins)) y = nu.zeros((bins, bins)) for bb in range(bins): x[:, bb] = nu.linspace(options.xmin, options.xmax, bins) y[bb, :] = nu.linspace(options.ymin, options.ymax, bins) #mask if options.type == 'powerlawSF': hist[(y < 0.1) * (x > -3.) * (x < -1.5)] = nu.nan hist[(x < -3.)] = nu.nan hist[(x > -2.) * (y < (0.25 * x + 0.6))] = nu.nan onedhistyweights = nu.ones(len(ys)) / 100. elif options.type == 'DRW': hist[(y < (-4.223 * (x + 2) - 10))] = nu.nan hist[(y < -4.153) * (y < (58.47 * (x + 2.1) - 10.))] = nu.nan hist[(y > -4.153) * (y < (2.93 * x + 2.) - 1.)] = nu.nan onedhistyweights = nu.ones(len(ys)) / 2500. bovy_plot.bovy_print() #First just plot contours cdict = { 'red': ((.0, 1.0, 1.0), (1.0, 1.0, 1.0)), 'green': ((.0, 1.0, 1.0), (1.0, 1.0, 1.0)), 'blue': ((.0, 1.0, 1.0), (1.0, 1.0, 1.0)) } allwhite = matplotlib.colors.LinearSegmentedColormap( 'allwhite', cdict, 256) bovy_plot.scatterplot(xs, ys, 'b,', onedhists=True, bins=bins, cmap=allwhite, onedhistynormed=False, onedhistyweights=onedhistyweights, xrange=xrange, yrange=yrange, onedhistec='b', xlabel=options.xlabel, ylabel=options.ylabel) bovy_plot.scatterplot(starxs, starys, 'k,', onedhists=True, bins=bins, cmap=allwhite, xrange=xrange, yrange=yrange, overplot=True) bovy_plot.scatterplot(rrxs, rrys, 'r,', onedhists=True, bins=bins, cmap=allwhite, onedhistec='r', xrange=xrange, yrange=yrange, overplot=True) hist /= nu.nansum(hist) #Custom colormap cdict = { 'red': ((.0, 1.0, 1.0), (1.0, .0, .0)), 'green': ((0.0, 1.0, 1.0), (1.0, .0, .0)), 'blue': ((0.0, 1.0, 1.0), (1.0, 1.0, 1.0)) } my_cmap = matplotlib.colors.LinearSegmentedColormap( 'my_colormap', cdict, 256) bovy_plot.scatterplot(xs, ys, 'b,', onedhists=False, contours=False, levels=[1.01], bins=bins, cmap=my_cmap, hist=hist, edges=edges, onedhistynormed=False, onedhistyweights=onedhistyweights, xrange=xrange, yrange=yrange, overplot=True) #Stars data = sc.array([starxs, starys]).T hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) if options.type == 'powerlawSF': hist[(x > -2.5)] = nu.nan hist[(x < -2.5) * (y > (-.19 * (x + 2.5)))] = nu.nan elif options.type == 'DRW': hist[(y >= (-4.223 * (x + 2) - 10))] = nu.nan hist /= nu.nansum(hist) bovy_plot.scatterplot( starxs, starys, 'k,', onedhists=True, contours=False, levels=[1.01], #HACK such that outliers aren't plotted bins=bins, hist=hist, edges=edges, xrange=xrange, yrange=yrange, overplot=True) #RR Lyrae data = sc.array([rrxs, rrys]).T hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange]) if options.type == 'powerlawSF': hist[(x < -2.5)] = nu.nan hist[(x > -2.5) * (y > ((x + 2.5) / 1.5)**9. * 1.15 + 0.1)] = nu.nan #hist[(x > -2.5)*(y > (.25*x+.6))]= nu.nan elif options.type == 'DRW': hist[(y < -4.153) * (y >= (58.47 * (x + 2.1) - 10.)) * (y > -7.5)] = nu.nan hist[(y < -7.5) * (x < -2.1)] = nu.nan hist[(y > -4.153) * (y >= (2.93 * x + 2. - 1.5))] = nu.nan #Custom colormap cdict = { 'red': ((.0, 1.0, 1.0), (1.0, 1.0, 1.0)), 'green': ((0.0, 1.0, 1.0), (1.0, .0, .0)), 'blue': ((0.0, 1.0, 1.0), (1.0, .0, .0)) } my_cmap = matplotlib.colors.LinearSegmentedColormap( 'my_colormap', cdict, 256) hist /= nu.nansum(hist) bovy_plot.scatterplot( rrxs, rrys, 'r,', onedhists=False, contours=False, levels=[1.01], #HACK such that outliers aren't plotted bins=bins, cmap=my_cmap, hist=hist, edges=edges, xrange=xrange, yrange=yrange, overplot=True) #Label if options.type == 'powerlawSF': bovy_plot.bovy_text(-4.4, 1.15, r'$\mathrm{F/G\ stars}$', color='k') bovy_plot.bovy_text(-4.4, 1.05, r'$\mathrm{QSOs}$', color='b') bovy_plot.bovy_text(-4.4, 0.95, r'$\mathrm{RR\ Lyrae}$', color='r') elif options.type == 'DRW': bovy_plot.bovy_text(-4.4, 2.88, r'$\mathrm{F/G\ stars}$', color='k') bovy_plot.bovy_text(-4.4, 1.76, r'$\mathrm{QSOs}$', color='b') bovy_plot.bovy_text(-4.4, .64, r'$\mathrm{RR\ Lyrae}$', color='r') bovy_plot.bovy_end_print(options.plotfilename)
def scatterplot(x, y, *args, **kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio conditional - normalize each column separately (for probability densities, i.e., cntrmass=True) gcf=True does not start a new figure (does change the ranges and labels) contours - if False, don't plot contours justcontours - if True, only draw contours, no density cntrcolors - color of contours (can be array as for bovy_dens2d) cntrlw, cntrls - linewidths and linestyles for contour cntrSmooth - use ndimage.gaussian_filter to smooth before contouring levels - contour-levels; data points outside of the last level will be individually shown (so, e.g., if this list is descending, contours and data points will be overplotted) onedhists - if True, make one-d histograms on the sides onedhistx - if True, make one-d histograms on the side of the x distribution onedhisty - if True, make one-d histograms on the side of the y distribution onedhistcolor, onedhistfc, onedhistec onedhistxnormed, onedhistynormed - normed keyword for one-d histograms onedhistxweights, onedhistyweights - weights keyword for one-d histograms cmap= cmap for density plot hist= and edges= - you can supply the histogram of the data yourself, this can be useful if you want to censor the data, both need to be set and calculated using scipy.histogramdd with the given range retAxes= return all Axes instances OUTPUT: plot to output device, Axes instance(s) or not, depending on input HISTORY: 2010-04-15 - Written - Bovy (NYU) """ xlabel = kwargs.pop('xlabel', None) ylabel = kwargs.pop('ylabel', None) if 'xrange' in kwargs: xrange = kwargs.pop('xrange') else: if isinstance(x, list): xrange = [sc.amin(x), sc.amax(x)] else: xrange = [x.min(), x.max()] if 'yrange' in kwargs: yrange = kwargs.pop('yrange') else: if isinstance(y, list): yrange = [sc.amin(y), sc.amax(y)] else: yrange = [y.min(), y.max()] ndata = len(x) bins = kwargs.pop('bins', round(0.3 * sc.sqrt(ndata))) weights = kwargs.pop('weights', None) levels = kwargs.pop('levels', special.erf(sc.arange(1, 4) / sc.sqrt(2.))) aspect = kwargs.pop('aspect', (xrange[1] - xrange[0]) / (yrange[1] - yrange[0])) conditional = kwargs.pop('conditional', False) contours = kwargs.pop('contours', True) justcontours = kwargs.pop('justcontours', False) cntrcolors = kwargs.pop('cntrcolors', 'k') cntrlw = kwargs.pop('cntrlw', None) cntrls = kwargs.pop('cntrls', None) cntrSmooth = kwargs.pop('cntrSmooth', None) onedhists = kwargs.pop('onedhists', False) onedhistx = kwargs.pop('onedhistx', onedhists) onedhisty = kwargs.pop('onedhisty', onedhists) onedhisttype = kwargs.pop('onedhisttype', 'step') onedhistcolor = kwargs.pop('onedhistcolor', 'k') onedhistfc = kwargs.pop('onedhistfc', 'w') onedhistec = kwargs.pop('onedhistec', 'k') onedhistls = kwargs.pop('onedhistls', 'solid') onedhistlw = kwargs.pop('onedhistlw', None) onedhistsbins = kwargs.pop('onedhistsbins', round(0.3 * sc.sqrt(ndata))) overplot = kwargs.pop('overplot', False) gcf = kwargs.pop('gcf', False) cmap = kwargs.pop('cmap', cm.gist_yarg) onedhistxnormed = kwargs.pop('onedhistxnormed', True) onedhistynormed = kwargs.pop('onedhistynormed', True) onedhistxweights = kwargs.pop('onedhistxweights', weights) onedhistyweights = kwargs.pop('onedhistyweights', weights) retAxes = kwargs.pop('retAxes', False) if onedhists or onedhistx or onedhisty: if overplot or gcf: fig = pyplot.gcf() else: fig = pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) if onedhistx: axHistx = pyplot.axes(rect_histx) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) if onedhisty: axHisty = pyplot.axes(rect_histy) # no labels axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data = sc.array([x, y]).T if 'hist' in kwargs and 'edges' in kwargs: hist = kwargs['hist'] kwargs.pop('hist') edges = kwargs['edges'] kwargs.pop('edges') else: hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange], weights=weights) if contours: cumimage = bovy_dens2d(hist.T, contours=contours, levels=levels, cntrmass=contours, cntrSmooth=cntrSmooth, cntrcolors=cntrcolors, cmap=cmap, origin='lower', xrange=xrange, yrange=yrange, xlabel=xlabel, ylabel=ylabel, interpolation='nearest', retCumImage=True, aspect=aspect, conditional=conditional, cntrlw=cntrlw, cntrls=cntrls, justcontours=justcontours, zorder=5 * justcontours, overplot=(gcf or onedhists or overplot or onedhistx or onedhisty)) else: cumimage = bovy_dens2d(hist.T, contours=contours, cntrcolors=cntrcolors, cmap=cmap, origin='lower', xrange=xrange, yrange=yrange, xlabel=xlabel, ylabel=ylabel, interpolation='nearest', conditional=conditional, retCumImage=True, aspect=aspect, cntrlw=cntrlw, cntrls=cntrls, overplot=(gcf or onedhists or overplot or onedhistx or onedhisty)) #Set axes and labels pyplot.axis(list(xrange) + list(yrange)) if not overplot: _add_axislabels(xlabel, ylabel) _add_ticks() binxs = [] xedge = edges[0] for ii in range(len(xedge) - 1): binxs.append((xedge[ii] + xedge[ii + 1]) / 2.) binxs = sc.array(binxs) binys = [] yedge = edges[1] for ii in range(len(yedge) - 1): binys.append((yedge[ii] + yedge[ii + 1]) / 2.) binys = sc.array(binys) cumInterp = interpolate.RectBivariateSpline(binxs, binys, cumimage.T, kx=1, ky=1) cums = [] for ii in range(len(x)): cums.append(cumInterp(x[ii], y[ii])[0, 0]) cums = sc.array(cums) plotx = x[cums > levels[-1]] ploty = y[cums > levels[-1]] if not len(plotx) == 0: if not weights == None: w8 = weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii], ploty[ii], overplot=True, color='%.2f' % (1. - w8[ii]), *args, **kwargs) else: bovy_plot(plotx, ploty, overplot=True, zorder=1, *args, **kwargs) #Add onedhists if not (onedhists or onedhistx or onedhisty): if retAxes: return pyplot.gca() else: return None if onedhistx: histx, edges, patches = axHistx.hist(x, bins=onedhistsbins, normed=onedhistxnormed, weights=onedhistxweights, histtype=onedhisttype, range=sorted(xrange), color=onedhistcolor, fc=onedhistfc, ec=onedhistec, ls=onedhistls, lw=onedhistlw) if onedhisty: histy, edges, patches = axHisty.hist(y, bins=onedhistsbins, orientation='horizontal', weights=onedhistyweights, normed=onedhistynormed, histtype=onedhisttype, range=sorted(yrange), color=onedhistcolor, fc=onedhistfc, ec=onedhistec, ls=onedhistls, lw=onedhistlw) if onedhistx and not overplot: axHistx.set_xlim(axScatter.get_xlim()) axHistx.set_ylim(0, 1.2 * sc.amax(histx)) if onedhisty and not overplot: axHisty.set_ylim(axScatter.get_ylim()) axHisty.set_xlim(0, 1.2 * sc.amax(histy)) if not onedhistx: axHistx = None if not onedhisty: axHisty = None if retAxes: return (axScatter, axHistx, axHisty) else: return None
def ex13(exclude=sc.array([]),plotfilename='ex13.png', nburn=1000,nsamples=10000, parsigma=[1,m.pi/200.,.01,.5,1.,.05,.1,.005], bovyprintargs={}): """ex13: solve exercise 13 by MCMC Input: exclude - ID numbers to exclude from the analysis plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) Output: plot History: 2010-05-06 - Written - Bovy (NYU) """ #Read the data print 'reading' #data= np.genfromtxt('data_allerr.dat', delimiter=",") #print data, data.shape print 'reading' data= read_data('data_allerr_backup.dat', allerr=True) print data ndata= len(data) nsample= ndata- len(exclude) #First find the chi-squared solution, which we will use as an #initial gues #Put the dat in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) Z= sc.zeros((nsample,2)) yerr= sc.zeros(nsample) ycovar= sc.zeros((2,nsample,2))#Makes the sc.dot easier jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: #Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] Z[jj,0]= X[jj] Z[jj,1]= Y[jj] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2. yerr[jj]= data[ii][2] ycovar[0,jj,0]= data[ii][3]**2. ycovar[1,jj,1]= data[ii][2]**2. ycovar[0,jj,1]= data[ii][4]*m.sqrt(ycovar[0,jj,0]*ycovar[1,jj,1]) ycovar[1,jj,0]= ycovar[0,jj,1] jj= jj+1 print data[:][4] #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) #Now sample inittheta= m.acos(1./m.sqrt(1.+bestfit[1]**2.)) if bestfit[1] < 0.: inittheta= m.pi- inittheta initialguess= sc.array([m.cos(inittheta),inittheta,0.,sc.mean(X),sc.mean(Y),m.log(sc.var(X)),m.log(sc.var(X)),0.])#(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX= objective(initialguess,Z,ycovar) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(8) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3] newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4] newsample[5]= currentguess[5]+stats.norm.rvs()*parsigma[5] newsample[6]= currentguess[6]+stats.norm.rvs()*parsigma[6] newsample[7]= currentguess[7]+stats.norm.rvs()*parsigma[7] #Calculate the objective function for the newsample newX= objective(newsample,Z,ycovar) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() try: test= m.exp(newX-currentX) except OverflowError: test= 2. if u < test: #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .5 or double(naccept)/(nburn+nsamples) > .8: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/2.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] t= edges[1][indxj] bcost= edges[0][indxi] mf= m.sqrt(1./m.cos(t)**2.-1.) b= bcost/m.cos(t) print b, mf #Plot result plot.bovy_print(**bovyprintargs) xrange=[0,300] yrange=[0,700] plot.bovy_plot(sc.array(xrange),mf*sc.array(xrange)+b, 'k-',xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) for ii in range(10): #Random sample ransample= sc.floor((stats.uniform.rvs()*nsamples)) ransample= samples.T[ransample,0:2] mf= m.sqrt(1./m.cos(ransample[1])**2.-1.) b= ransample[0]/m.cos(ransample[1]) bestb= b bestm= mf plot.bovy_plot(sc.array(xrange),bestm*sc.array(xrange)+bestb, overplot=True,color='0.75',zorder=0) #Add labels nsamples= samples.shape[1] for ii in range(nsample): Pb= 0. for jj in range(nsamples): Pb+= Pbad(samples[:,jj],Z[ii,:],ycovar[:,ii,:]) Pb/= nsamples text(Z[ii,0]+5,Z[ii,1]+5,'%.1f'%Pb,color='0.5',zorder=3) #Plot the data OMG straight from plot_data.py data= read_data('data_allerr_backup.dat',True) ndata= len(data) #Create the ellipses and the data points id= sc.zeros(nsample) x= sc.zeros(nsample) y= sc.zeros(nsample) ellipses=[] ymin, ymax= 0, 0 xmin, xmax= 0,0 jj= 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): continue id[jj]= data[ii][0] x[jj]= data[ii][1][0] y[jj]= data[ii][1][1] #Calculate the eigenvalues and the rotation angle ycovar= sc.zeros((2,2)) ycovar[0,0]= data[ii][3]**2. ycovar[1,1]= data[ii][2]**2. ycovar[0,1]= data[ii][4]*m.sqrt(ycovar[0,0]*ycovar[1,1]) ycovar[1,0]= ycovar[0,1] eigs= linalg.eig(ycovar) angle= m.atan(-eigs[1][0,1]/eigs[1][1,1])/m.pi*180. thisellipse= Ellipse(sc.array([x[jj],y[jj]]),2*m.sqrt(eigs[0][0]), 2*m.sqrt(eigs[0][1]),angle) ellipses.append(thisellipse) if (x[jj]+m.sqrt(ycovar[0,0])) > xmax: xmax= (x[jj]+m.sqrt(ycovar[0,0])) if (x[jj]-m.sqrt(ycovar[0,0])) < xmin: xmin= (x[jj]-m.sqrt(ycovar[0,0])) if (y[jj]+m.sqrt(ycovar[1,1])) > ymax: ymax= (y[jj]+m.sqrt(ycovar[1,1])) if (y[jj]-m.sqrt(ycovar[1,1])) < ymin: ymin= (y[jj]-m.sqrt(ycovar[1,1])) jj= jj+1 #Add the error ellipses ax=gca() for e in ellipses: ax.add_artist(e) e.set_facecolor('none') ax.plot(x,y,color='k',marker='o',linestyle='None') print plotfilename, 'plot' plot.bovy_end_print(plotfilename)
def scatterplot(x, y, *args, **kwargs): """ NAME: scatterplot PURPOSE: make a 'smart' scatterplot that is a density plot in high-density regions and a regular scatterplot for outliers INPUT: x, y xlabel - (raw string!) x-axis label, LaTeX math mode, no $s needed ylabel - (raw string!) y-axis label, LaTeX math mode, no $s needed xrange yrange bins - number of bins to use in each dimension weights - data-weights aspect - aspect ratio contours - if False, don't plot contours cntrcolors - color of contours (can be array as for bovy_dens2d) cntrlw, cntrls - linewidths and linestyles for contour onedhists - if True, make one-d histograms on the sides onedhistx - if True, make one-d histograms on the side of the x distribution onedhisty - if True, make one-d histograms on the side of the y distribution onedhistcolor, onedhistfc, onedhistec onedhistxnormed, onedhistynormed - normed keyword for one-d histograms onedhistxweights, onedhistyweights - weights keyword for one-d histograms cmap= cmap for density plot hist= and edges= - you can supply the histogram of the data yourself, this can be useful if you want to censor the data, both need to be set and calculated using scipy.histogramdd with the given range retAxes= return all Axes instances OUTPUT: HISTORY: 2010-04-15 - Written - Bovy (NYU) """ if kwargs.has_key('xlabel'): xlabel = kwargs['xlabel'] kwargs.pop('xlabel') else: xlabel = None if kwargs.has_key('ylabel'): ylabel = kwargs['ylabel'] kwargs.pop('ylabel') else: ylabel = None if kwargs.has_key('xrange'): xrange = kwargs['xrange'] kwargs.pop('xrange') else: if isinstance(x, list): xrange = [sc.amin(x), sc.amax(x)] else: xrange = [x.min(), x.max()] if kwargs.has_key('yrange'): yrange = kwargs['yrange'] kwargs.pop('yrange') else: if isinstance(y, list): yrange = [sc.amin(y), sc.amax(y)] else: yrange = [y.min(), y.max()] ndata = len(x) if kwargs.has_key('bins'): bins = kwargs['bins'] kwargs.pop('bins') else: bins = round(0.3 * sc.sqrt(ndata)) if kwargs.has_key('weights'): weights = kwargs['weights'] kwargs.pop('weights') else: weights = None if kwargs.has_key('levels'): levels = kwargs['levels'] kwargs.pop('levels') else: levels = special.erf(0.5 * sc.arange(1, 4)) if kwargs.has_key('aspect'): aspect = kwargs['aspect'] kwargs.pop('aspect') else: aspect = (xrange[1] - xrange[0]) / (yrange[1] - yrange[0]) if kwargs.has_key('contours'): contours = kwargs['contours'] kwargs.pop('contours') else: contours = True if kwargs.has_key('cntrcolors'): cntrcolors = kwargs['cntrcolors'] kwargs.pop('cntrcolors') else: cntrcolors = 'k' if kwargs.has_key('cntrlw'): cntrlw = kwargs['cntrlw'] kwargs.pop('cntrlw') elif contours: cntrlw = None if kwargs.has_key('cntrls'): cntrls = kwargs['cntrls'] kwargs.pop('cntrls') elif contours: cntrls = None if kwargs.has_key('onedhists'): onedhists = kwargs['onedhists'] kwargs.pop('onedhists') else: onedhists = False if kwargs.has_key('onedhistx'): onedhistx = kwargs['onedhistx'] kwargs.pop('onedhistx') elif onedhists: onedhistx = True else: onedhistx = False if kwargs.has_key('onedhisty'): onedhisty = kwargs['onedhisty'] kwargs.pop('onedhisty') elif onedhists: onedhisty = True else: onedhisty = False if kwargs.has_key('onedhisttype'): onedhisttype = kwargs['onedhisttype'] kwargs.pop('onedhisttype') else: onedhisttype = 'step' if kwargs.has_key('onedhistcolor'): onedhistcolor = kwargs['onedhistcolor'] kwargs.pop('onedhistcolor') else: onedhistcolor = 'k' if kwargs.has_key('onedhistfc'): onedhistfc = kwargs['onedhistfc'] kwargs.pop('onedhistfc') else: onedhistfc = 'w' if kwargs.has_key('onedhistec'): onedhistec = kwargs['onedhistec'] kwargs.pop('onedhistec') else: onedhistec = 'k' if kwargs.has_key('onedhistls'): onedhistls = kwargs['onedhistls'] kwargs.pop('onedhistls') else: onedhistls = 'solid' if kwargs.has_key('onedhistlw'): onedhistlw = kwargs['onedhistlw'] kwargs.pop('onedhistlw') else: onedhistlw = None if kwargs.has_key('overplot'): overplot = kwargs['overplot'] kwargs.pop('overplot') else: overplot = False if kwargs.has_key('cmap'): cmap = kwargs['cmap'] kwargs.pop('cmap') else: cmap = cm.gist_yarg if kwargs.has_key('onedhistxnormed'): onedhistxnormed = kwargs['onedhistxnormed'] kwargs.pop('onedhistxnormed') else: onedhistxnormed = True if kwargs.has_key('onedhistynormed'): onedhistynormed = kwargs['onedhistynormed'] kwargs.pop('onedhistynormed') else: onedhistynormed = True if kwargs.has_key('onedhistxweights'): onedhistxweights = kwargs['onedhistxweights'] kwargs.pop('onedhistxweights') else: onedhistxweights = None if kwargs.has_key('onedhistyweights'): onedhistyweights = kwargs['onedhistyweights'] kwargs.pop('onedhistyweights') else: onedhistyweights = None if kwargs.has_key('retAxes'): retAxes = kwargs['retAxes'] kwargs.pop('retAxes') else: retAxes = False if onedhists or onedhistx or onedhisty: if overplot: fig = pyplot.gcf() else: fig = pyplot.figure() nullfmt = NullFormatter() # no labels # definitions for the axes left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width rect_scatter = [left, bottom, width, height] rect_histx = [left, bottom_h, width, 0.2] rect_histy = [left_h, bottom, 0.2, height] axScatter = pyplot.axes(rect_scatter) if onedhistx: axHistx = pyplot.axes(rect_histx) # no labels axHistx.xaxis.set_major_formatter(nullfmt) axHistx.yaxis.set_major_formatter(nullfmt) if onedhisty: axHisty = pyplot.axes(rect_histy) # no labels axHisty.xaxis.set_major_formatter(nullfmt) axHisty.yaxis.set_major_formatter(nullfmt) fig.sca(axScatter) data = sc.array([x, y]).T if kwargs.has_key('hist') and kwargs.has_key('edges'): hist = kwargs['hist'] kwargs.pop('hist') edges = kwargs['edges'] kwargs.pop('edges') else: hist, edges = sc.histogramdd(data, bins=bins, range=[xrange, yrange], weights=weights) if contours: cumimage = bovy_dens2d(hist.T, contours=contours, levels=levels, cntrmass=contours, cntrcolors=cntrcolors, cmap=cmap, origin='lower', xrange=xrange, yrange=yrange, xlabel=xlabel, ylabel=ylabel, interpolation='nearest', retCumImage=True, aspect=aspect, cntrlw=cntrlw, cntrls=cntrls, overplot=(onedhists or overplot or onedhistx or onedhisty)) else: cumimage = bovy_dens2d(hist.T, contours=contours, cntrcolors=cntrcolors, cmap=cmap, origin='lower', xrange=xrange, yrange=yrange, xlabel=xlabel, ylabel=ylabel, interpolation='nearest', retCumImage=True, aspect=aspect, cntrlw=cntrlw, cntrls=cntrls, overplot=(onedhists or overplot or onedhistx or onedhisty)) binxs = [] xedge = edges[0] for ii in range(len(xedge) - 1): binxs.append((xedge[ii] + xedge[ii + 1]) / 2.) binxs = sc.array(binxs) binys = [] yedge = edges[1] for ii in range(len(yedge) - 1): binys.append((yedge[ii] + yedge[ii + 1]) / 2.) binys = sc.array(binys) cumInterp = interpolate.RectBivariateSpline(binxs, binys, cumimage.T, kx=1, ky=1) cums = [] for ii in range(len(x)): cums.append(cumInterp(x[ii], y[ii])[0, 0]) cums = sc.array(cums) plotx = x[cums > levels[-1]] ploty = y[cums > levels[-1]] if not len(plotx) == 0: if not weights == None: w8 = weights[cums > levels[-1]] for ii in range(len(plotx)): bovy_plot(plotx[ii], ploty[ii], overplot=True, color='%.2f' % (1. - w8[ii]), *args, **kwargs) else: bovy_plot(plotx, ploty, overplot=True, *args, **kwargs) #Add onedhists if not (onedhists or onedhistx or onedhisty): if retAxes: return pyplot.gca() else: return if onedhistx: histx, edges, patches = axHistx.hist(x, bins=bins, normed=onedhistxnormed, weights=onedhistxweights, histtype=onedhisttype, range=sorted(xrange), color=onedhistcolor, fc=onedhistfc, ec=onedhistec, ls=onedhistls, lw=onedhistlw) if onedhisty: histy, edges, patches = axHisty.hist(y, bins=bins, orientation='horizontal', weights=onedhistyweights, normed=onedhistynormed, histtype=onedhisttype, range=sorted(yrange), color=onedhistcolor, fc=onedhistfc, ec=onedhistec, ls=onedhistls, lw=onedhistlw) if onedhistx: axHistx.set_xlim(axScatter.get_xlim()) axHistx.set_ylim(0, 1.2 * sc.amax(histx)) if onedhisty: axHisty.set_ylim(axScatter.get_ylim()) axHisty.set_xlim(0, 1.2 * sc.amax(histy)) if not onedhistx: axHistx = None if not onedhisty: axHisty = None if retAxes: return (axScatter, axHistx, axHisty) else: return None
def exNew(exclude=sc.array([1,2,3,4]), plotfilename='exNew.png',nburn=20000,nsamples=200000, parsigma=[5,.075,.01,1,.1],dsigma=1.): """exMix1: solve the new exercise using MCMC sampling Input: exclude - ID numbers to exclude from the analysis (can be None) plotfilename - filename for the output plot nburn - number of burn-in samples nsamples - number of samples to take after burn-in parsigma - proposal distribution width (Gaussian) dsigma - divide uncertainties by this amount Output: plot History: 2010-04-28 - Written - Bovy (NYU) """ sc.random.seed(1) #In the interest of reproducibility (if that's a word) #Read the data data= read_data('data_yerr.dat') ndata= len(data) if not exclude == None: nsample= ndata- len(exclude) else: nsample= ndata #First find the chi-squared solution, which we will use as an #initial guess #Put the data in the appropriate arrays and matrices Y= sc.zeros(nsample) X= sc.zeros(nsample) A= sc.ones((nsample,2)) C= sc.zeros((nsample,nsample)) yerr= sc.zeros(nsample) jj= 0 for ii in range(ndata): if not exclude == None and sc.any(exclude == data[ii][0]): pass else: Y[jj]= data[ii][1][1] X[jj]= data[ii][1][0] A[jj,1]= data[ii][1][0] C[jj,jj]= data[ii][2]**2./dsigma**2. yerr[jj]= data[ii][2]/dsigma jj= jj+1 #Now compute the best fit and the uncertainties bestfit= sc.dot(linalg.inv(C),Y.T) bestfit= sc.dot(A.T,bestfit) bestfitvar= sc.dot(linalg.inv(C),A) bestfitvar= sc.dot(A.T,bestfitvar) bestfitvar= linalg.inv(bestfitvar) bestfit= sc.dot(bestfitvar,bestfit) initialguess= sc.array([bestfit[0],bestfit[1],0.,sc.mean(Y),m.log(sc.var(Y))])#(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX= objective(initialguess,X,Y,yerr) currentX= initialX bestX= initialX bestfit= initialguess currentguess= initialguess naccept= 0 samples= [] samples.append(currentguess) for jj in range(nburn+nsamples): #Draw a sample from the proposal distribution newsample= sc.zeros(5) newsample[0]= currentguess[0]+stats.norm.rvs()*parsigma[0] newsample[1]= currentguess[1]+stats.norm.rvs()*parsigma[1] #newsample[2]= stats.uniform.rvs() newsample[2]= currentguess[2]+stats.norm.rvs()*parsigma[2] newsample[3]= currentguess[3]+stats.norm.rvs()*parsigma[3] newsample[4]= currentguess[4]+stats.norm.rvs()*parsigma[4] #Calculate the objective function for the newsample newX= objective(newsample,X,Y,yerr) #Accept or reject #Reject with the appropriate probability u= stats.uniform.rvs() if u < m.exp(newX-currentX): #Accept currentX= newX currentguess= newsample naccept= naccept+1 if currentX > bestX: bestfit= currentguess bestX= currentX samples.append(currentguess) if double(naccept)/(nburn+nsamples) < .2 or double(naccept)/(nburn+nsamples) > .6: print "Acceptance ratio was "+str(double(naccept)/(nburn+nsamples)) samples= sc.array(samples).T[:,nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2,:]), sc.median(samples[2,:]) histmb,edges= sc.histogramdd(samples.T[:,0:2],bins=round(sc.sqrt(nsamples)/5.)) indxi= sc.argmax(sc.amax(histmb,axis=1)) indxj= sc.argmax(sc.amax(histmb,axis=0)) print "Best-fit, marginalized" print edges[0][indxi-1], edges[1][indxj-1] print edges[0][indxi], edges[1][indxj] print edges[0][indxi+1], edges[1][indxj+1] #2D histogram plot.bovy_print() levels= special.erf(0.5*sc.arange(1,4)) #xrange=[edges[0][0],edges[0][-1]] #yrange=[edges[1][0],edges[1][-1]] xrange=[-120,120] yrange=[1.5,3.2] histmb,edges= sc.histogramdd(samples.T[:,0:2], range=[[-120,120],[1.5,3.2]], bins=(round(sc.sqrt(nsamples)/5.)/(edges[0][-1]-edges[0][0])*(xrange[1]-xrange[0]), round(sc.sqrt(nsamples)/5.)/(edges[1][-1]-edges[1][0])*(yrange[1]-yrange[0]))) aspect=(xrange[1]-xrange[0])/(yrange[1]-yrange[0]) plot.bovy_dens2d(histmb.T,origin='lower',cmap='gist_yarg', contours=True,cntrmass=True, xrange=xrange,yrange=yrange, levels=levels, aspect=aspect, xlabel=r'$b$',ylabel=r'$m$') if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1a.png') else: plot.bovy_end_print('exNew2a.png') #Data with MAP line and sampling plot.bovy_print() bestb= edges[0][indxi] bestm= edges[1][indxj] xrange=[0,300] yrange=[0,700] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb,'k-', xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',zorder=2) errorbar(X,Y,yerr,color='k',marker='o',color='k',linestyle='None',zorder=1) for ii in range(10): #Random sample ransample= sc.floor((stats.uniform.rvs()*nsamples)) ransample= samples.T[ransample,0:2] bestb= ransample[0] bestm= ransample[1] plot.bovy_plot(xrange,bestm*sc.array(xrange)+bestb, overplot=True,xrange=xrange,yrange=yrange, xlabel=r'$x$',ylabel=r'$y$',color='0.75',zorder=1) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1b.png') else: plot.bovy_end_print('exNew2b.png') #Pb plot plot.bovy_print() plot.bovy_hist(samples.T[:,2],color='k',bins=round(sc.sqrt(nsamples)/5.), xlabel=r'$P_\mathrm{b}$',normed=True,histtype='step', range=[0,1]) if dsigma == 1.: plot.bovy_text(r'$\mathrm{using\ correct\ data\ uncertainties}$', top_right=True) else: plot.bovy_text(r'$\mathrm{using\ data\ uncertainties\ /\ 2}$', top_right=True) if dsigma == 1.: plot.bovy_end_print('exNew1c.png') else: plot.bovy_end_print('exNew2c.png') return
def runSampler(X, Y, A, C, yerr, nburn, nsamples, parsigma, mbrange): '''Runs the MCMC sampler, and returns the summary quantities that will be plotted: ''' #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) initialguess = sc.array( [bestfit[0], bestfit[1], 0., sc.mean(Y), m.log(sc.var(Y))]) #(m,b,Pb,Yb,Vb) #With this initial guess start off the sampling procedure initialX = objective(initialguess, X, Y, yerr) currentX = initialX bestX = initialX bestfit = initialguess currentguess = initialguess naccept = 0 samples = [] samples.append(currentguess) for jj in range(nburn + nsamples): #Draw a sample from the proposal distribution newsample = sc.zeros(5) newsample[0] = currentguess[0] + stats.norm.rvs() * parsigma[0] newsample[1] = currentguess[1] + stats.norm.rvs() * parsigma[1] #newsample[2]= stats.uniform.rvs()#Sample from prior newsample[2] = currentguess[2] + stats.norm.rvs() * parsigma[2] newsample[3] = currentguess[3] + stats.norm.rvs() * parsigma[3] newsample[4] = currentguess[4] + stats.norm.rvs() * parsigma[4] #Calculate the objective function for the newsample newX = objective(newsample, X, Y, yerr) #Accept or reject #Reject with the appropriate probability u = stats.uniform.rvs() if u < m.exp(newX - currentX): #Accept currentX = newX currentguess = newsample naccept = naccept + 1 if currentX > bestX: bestfit = currentguess bestX = currentX samples.append(currentguess) if double(naccept) / (nburn + nsamples) < .5 or double(naccept) / ( nburn + nsamples) > .8: print "Acceptance ratio was " + str( double(naccept) / (nburn + nsamples)) samples = sc.array(samples).T[:, nburn:-1] print "Best-fit, overall" print bestfit, sc.mean(samples[2, :]), sc.median(samples[2, :]) histmb, edges = sc.histogramdd(samples.T[:, 0:2], bins=round(sc.sqrt(nsamples) / 5.), range=mbrange) mbsamples = [] for ii in range(10): #Random sample ransample = sc.floor((stats.uniform.rvs() * nsamples)) ransample = samples.T[ransample, 0:2] bestb = ransample[0] bestm = ransample[1] mbsamples.append((bestm, bestb)) (pbhist, pbedges) = histogram(samples[2, :], bins=round(sc.sqrt(nsamples) / 5.), range=[0, 1]) return (histmb, edges, mbsamples, pbhist, pbedges)