def check(runpath='./',outpath='./',itmin=0,itmax=None,kTthresh=0.17): """ Name: clean Author: Kari A. Frank Date: November 23, 2015 Purpose: Make and display some diagnostic plots to check ongoing xmc runs. Plots chi2, norm image, spectrum, and histograms. Usage: import xmcinter.diagnostics as xd xd.check(runpath='./',itmin=0,itmax=None) Input: runpath: string of path to the deconvolution files outpath: string of path to store output files itmin/itmax: minimum iteration to use in check kTthresh: float to optionally specify a minimum kT. if set, then all blobs with kT<kTthresh will not be included in the map (but will be included in all other plots) Output: - Displays plots. - Returns DataFrame of blob parameters Usage Notes: Example: """ # -- import modules -- import os from file_utilities import ls_to_list import xmcmap as xm if kTthresh is None: kTthresh = 0.0 # -- plot chi2 -- print "\nPlotting chi2 ...\n" sf = xplt.chi2(runpath,itmax=itmax, outfile=outpath+'/chi2_vs_iteration.html') # -- calculate median chi2 -- if itmax is None: itmax = np.max(sf.iteration) sf = xw.filterblobs(sf,'iteration',minvals=itmin,maxvals=itmax) medchi2 = xw.weighted_median(sf['redchi2']) # -- read deconvolution files -- print "\nReading deconvolution files ...\n" dfall=merge_output(runpath,save=False) # -- add derivative columns -- dfall = clean(dfall,itmin=itmin,itmax=itmax) print '\nIterations '+str(itmin)+' - '+str(itmax)+' : ' print "Total Number of Blobs = ",len(dfall.index) print 'Median chi2/dof = '+str(medchi2)+'\n' # -- plot model and data spectra -- print "\nPlotting spectrum ...\n" smin = itmin/100 if itmax is None: smax = None else: smax = itmax/100 sfig = xplt.spectrum(runpath=runpath,smin=smin,smax=smax,bins=0.03, ylog=True,xlog=True, outfile=outpath+'/spectrum.html', lines=True,nlines=100,energy_range=(0.5,10.0)) # -- make median traceplots -- print "\nPlotting traces ...\n" efig = xplt.trace(dfall,weights=None, outfile=outpath+'/trace_plots.html') # -- make histograms -- print "\nPlotting posteriors ...\n" nbins = 75 w = 500 h = 200 hfigs = xplt.histogram_grid([dfall,dfall],weights=[None,'blob_em'], bins=nbins,ncols=2,norm=True, outfile=outpath+'/histogram_grid.html', legends=['Unweighted','EM weighted'], width=w,height=h,iterations='iteration') print "\nPlotting posteriors with kT threshold ...\n" hfigs = xplt.histogram_grid([xw.filterblobs(dfall,'blob_kT', minvals=kTthresh), xw.filterblobs(dfall,'blob_kT', minvals=kTthresh)], weights=[None,'blob_em'], bins=nbins,ncols=2,norm=True, outfile=outpath+'/histogram_grid_kTthresh.html', legends=['Unweighted','EM weighted'], width=w,height=h,iterations='iteration') # -- scatter plots-- print "\nPlotting scatter plots ...\n" blobcols = [c for c in dfall.columns if 'blob' in c] sfigs2 = xplt.scatter_grid(dfall[blobcols],agg=None,sampling=2000) # -- make norm map from most recent iteration -- print "\nMaking blob em map ...\n" pixelsize = (dfall.phi.max()-dfall.phi.min())/50.0 # img1file = (outpath+'/bin'+str(int(pixelsize))+ # '_iter'+str(itmin)+'-'+str(itmax)) img1file = (outpath+'/bin'+str(int(pixelsize))+ '_iter'+str(itmax)) img = xm.make_map(xw.filterblobs(dfall,['blob_kT'], minvals=[kTthresh,itmax], maxvals=[None,itmax]), paramname='blob_em', paramweights=None,iteration_type='total', binsize=pixelsize,nlayers=1, withsignificance=True,nproc=2, outfile=img1file,clobber=True) return (dfall,sf)
def clean(runpath='./',itmin=0,itmax=None,distance=8.0): """ Name: clean Author: Kari A. Frank Date: November 1, 2015 Purpose: Create a dataframe and associated saved file that includes an iteration and emission measure column, and only includes iterations after convergence Usage: import xmcinter.diagnostics as xd xd.clean(runpath='./',itmin=0,itmax=None,distance=8.0) Input: runpath: string of path to the deconvolution files, or a dataframe resulting from a previous call to xw.merge_output() itmin: minimum iteration to keep distance: distance to the object in kpc (default=8.0), used to calculate the emission measure Output: Returns the dataframe of deconvolution parameters, filtered by iteration add with the emission measure and iteration columns included, plus a column with the blob sizes in arcsec (if blob shape = gaussian) Usage Notes: - typically this is run after xplt.chi2, to determine the minimum iteration - assumes that relevant column names begin with 'blob'. if not found, will skip adding the new column. Example: """ # -- import modules -- import astro_utilities as astro # -- read deconvolution files -- if isinstance(runpath,str): df = merge_output(runpath,save=False) else: df = runpath # -- add blob size in arcsec column -- if 'blob_lnsigma' in df.columns: df['blob_sigma'] = np.exp(df['blob_lnsigma']) # -- add tau column, if used lvpshock -- if 'blob_logtau' in df.columns: df['blob_tau'] = 10.0**(df['blob_tau']) # -- add emission measure column -- if 'blob_norm' in df.columns: df['blob_em'] = astro.norm_to_em(df['blob_norm'], astro.convert_distance(distance, 'kpc', 'cm')) # -- add hydrogen number densities of blobs in cm^-3, hydrogen mass -- if 'blob_sigma' in df.columns: df['blob_volume'] = xw.gaussian_volume(astro.convert_arcsec(\ df['blob_sigma'],distance,'kpc','cm')) df['blob_numberdensity'] = astro.em_to_density(df['blob_em'],\ df['blob_volume'],density_type='number') df['blob_mass'] =astro.em_to_mass(df['blob_em'],df['blob_volume'], tounit='sol') # -- remove iterations before convergence -- if itmax == None: itmax = np.max(df['iteration']) df = xw.filterblobs(df,'iteration',minvals=itmin,maxvals=itmax) # -- save as file -- outfile = ('deconvolution_merged_iter' +str(int(itmin))+'-'+str(int(itmax))+'.txt') df.to_csv(outfile,sep='\t') # -- make traceplots -- # tracefigs = xplt.traceplots(df) return df
def make_map(indata,outfile=None,paramname='blob_kT',paramweights=None, binsize=10.0,itmod=100,paramshape='gauss',ctype='median', x0=None,y0=None,imagesize=None,witherror=True,sigthresh=0.0, sigthreshparam=None,imgthresh=None,imgthreshparam=None, paramx='blob_phi', paramy='blob_psi',paramsize='blob_sigma',exclude_region=None, iteration_type='median',clobber=False,nlayers=None, parallel=True,nproc=3,cint=True,movie=False,moviedir=None, cumulativemovie=False,withsignificance=False,rotation=0.0): """ Author: Kari A. Frank Date: November 19, 2015 Purpose: Read a file containing blob parameters and create an associated map. Input: indata (string or DataFrame): name of file containing the blob parameters (string), formatted as output from the python function xmcinter.xmcfiles.merge_output() OR a pandas dataframe of blob parameters in same format. outfile (string): name of the output fits file paramname (string or list of str) : name of the column (parameter name) to be mapped (default='blob_kT') paramx,paramy (strings) : names of the column of the x and y blob positions (default='blob_phi', 'blob_psi') paramshape (string) : shape of the blobs, 'gauss','sphere', or 'points'. 'points' simply assumes every blob is a point, even if the model was not (default='gauss') paramsize : string name of the column containing the size of the blobs (default='blob_sigma' for shape='gauss' or 'blob_radius' for shape='sphere') paramweights : string name of the column containing the weights (default='', no weighting). if paramname is a list, then can also give paramweights as a list of the same length, specifying a different weights column for each map -- this is important if, e.g. one of the paramnames is 'blob_em', which is typically also used as the weights for the other parameters. passing a value of None will result in an unweighted map. itmod : set to an integer > 1 to use only every it_mod'th iteration (defaul=100) nlayers: optionally set number of layers (number of iterations) to use rather than itmod. if set, will override itmod. default=None binsize : size of a pixel, in same units as paramx and paramy, default=60 (fast) iteration_type (string) : 'median', 'average', or 'total'. Determines how to combine the blobs within each iteration to create the iteration image. (default is 'median', but note that it should be set to 'total' if making emission measure map) if paramname is a list, then can also give iteration_type as a list of the same length, specifying a different iteration_type for each map -- this is important if, e.g. one of the paramnames is 'blob_em', which should generally use iteration_type='total'. ctype (string) : 'median', 'average', 'total', or 'error' to specify how to combine the different iteration images when making the final image. error will produce a map of the parameter error (default='median'). if paramname is a list, then can also give ctype as a list of the same length, specifying a different ctype for each map. witherror (bool) : switch to also return a map of the error in each pixel (standard deviation) withsignificance (bool) : switch to also return a map of the significance (in #sigma, i.e. img/errimg) in each pixel. If True, then will set witherror=True, regardless of whether the witherror argument was explicitly set. imagesize (float) : optionally specify the size of output image (length of one side of square image) in same units as paramx,y. if paramnames is list, then all maps will have the same image size as the first one in the list. x0,y0 (floats): center coordinates of the desired output image, in same units as paramx,paramy. if paramname is a list, then all maps will have the same x0,y0 coordinates as the first map. exclude_region (3D float tuple): tuple of form (x0,y0,radius) specifying a circular region to mask (set image values to zero). x0,y0 are the center coordinates of the circle. all units are in the same units as paramx and paramy. sigthresh (float): optionally specify a significance threshold (in number of sigma) for the final map. all pixels that do not meet this significance threshold are set to nan. sigthreshparam (string): specify which parameter should be used to calculate the significance for the significance threshold. Ignored if sigthresh=0.0. Most commonly, sigthreshparam=None (default) or sigthreshparam='blob_em'. The latter will then only map the regions (on a per pixel basis) for which the emission measure significance was greater than sigthresh. If not None, then sigthreshparam must be an element of paramname list. imgthresh (float) : similar to sigthresh, except the threshold is set as a minimum pixel value in the specified image (imgthreshparam map). If this minimum is greater than the maximum pixel value in the imgthreshparam map, then it will be ignored. imgthreshparam (string) : same as sigthreshparam, but associated with the imgthresh argument. typically, this should be either None (no thresholding, default), or 'blob_em'. clobber (bool) : specify whether any existing fits file of the same name as outfile should be overwritten. parallel (bool) : switch to specify if the iteration images should be computed in serial or in parallel using multiprocessing (default=True) nproc (int) : if parallel=True, then nproc sets the number of processors to use (default=3). ignored if parallel=False cint (bool) : turn on/off the use of ctypes for integration (default=True). set cint=False if gaussian.c is not compiled on your machine. movie (bool) : save each layer image individually in order to create a movie from the images. Number of frames=nlayers. (default=False). If paramname is a list, then a movie will created for each parameter map, or can pass a list of bool to movie specifying which maps in paramname should get an associated movie. moviedir (str) : optionally specify the folder in which to save the frames for the movie. ignored if movie=False (default=outfile_base_parname_movie/). Be careful - if moviedir already exists, any frames in it will be overwritten! cumulativemovie (bool) : create the movie using cumulative images, i.e. recreate the image using all available iterations each time. ignored if movie=False (default=False) rotation (numeric) : number of degrees to rotate the final images. If not a multiple of 90, then the output image size will be greater than the imagesize parameter (but with empty corners), to avoid dropping any pixels. Output: Saves a fits file in the same directory as infile, containing the calculated map. Usage Notes: - The implementation for shape='sphere' is not yet functional. - If given multiple parameters to map, then all will mapped on the same x,y grid (imagesize, binsizes, and x0,y0 will be the same) - The image does not have to be square, but each pixel is always square. - If the input dataframe has no columns 'iteration', then all blobs will be assumed to come from a single iteration. - If both sigthresh and imgthresh are used, then sigthresh will be applied first. """ #----Import Modules---- from wrangle import filterblobs,gaussian_volume import time #----Set any defaults---- if withsignificance is True: witherror = True #----Check if lists---- if not isinstance(paramname,list): paramname=[paramname] if not isinstance(paramweights,list): paramweights = [paramweights]*len(paramname) if not isinstance(iteration_type,list): iteration_type = [iteration_type]*len(paramname) if not isinstance(ctype,list): ctype = [ctype]*len(paramname) if not isinstance(movie,list): movie = [movie]*len(paramname) #----Verify inputs---- types = ['median','average','total','error','max'] for i in xrange(len(paramname)): if ctype[i] not in types: print "Warning: Unrecognized ctype. Using ctype='median'" ctype[i] = 'median' if iteration_type[i] not in types: print ("Warning: Unrecognized iteration_type. " "Using iteration_type='median'") iteration_type[i] = 'median' if (paramshape != 'gauss') and (paramshape != 'sphere' ) and \ (paramshape != 'points'): print "Warning: Unrecognized paramshape. Using paramshape='gauss'" paramshape = 'gauss' if (sigthreshparam is not None) and (sigthreshparam not in paramname): print ("Warning: "+sigthreshparam+" is not in paramname. " "Resetting sigthreshparam=None.") sigthreshparam=None if (imgthreshparam is not None) and \ (imgthreshparam not in paramname): print ("Warning: "+imgthreshparam+" is not in paramname. " "Resetting imgthreshparam=None.") imgthreshparam=None #----Store blob information in DataFrame and set output file---- if outfile is not None: outfile_base,ext = os.path.splitext(outfile) if outfile_base[-1] != '_': outfile_base = outfile_base+'_' if isinstance(indata,str): df = pd.read_table(indata,sep='\t',index_col = 0) if outfile is None: (fname,ext) = os.path.splitext(indata) outfile_base = fname+'_bin'+str(int(binsize))+'_' indatastr = indata else: df = indata if outfile is None: outfile_base = 'bin'+str(int(binsize))+'_' indatastr = 'DataFrame' if 'iteration' not in df.columns: df['iteration'] = np.zeros_like(df[paramname[0]]) #--set output file names and moviedirs outfiles = [outfile]*len(paramname) moviedirs = [None]*len(paramname) badparams = [] for p in xrange(len(paramname)): outfiles[p] = outfile_base+ctype[p]+'_'+paramname[p]+'.fits' moviedirs[p] = outfile_base+ctype[p]+'_'+paramname[p]+'_movie/' #--check if output file already exists-- if os.path.isfile(outfiles[p]) and clobber is not True: print ("Warning: "+outfile+" exists and clobber=False. " "Not mapping "+paramname[p]+".") badparams = badparams + [paramname[p]] #-check if sigthreshparam is being removed- if paramname[p] == sigthreshparam: print ("Warning: sigthreshparam is not being mapped. " "Resetting sigthresh=0.0") sigthreshparam = None sigthresh = 0.0 #-check if imgthreshparam is being removed- if paramname[p] == imgthreshparam: print ("Warning: imgthreshparam is not being mapped. " "Resetting imgthresh=None") imgthreshparam = None imgthresh = None #--remove parameters that would be clobbered if clobber=False-- for b in badparams: bi = paramname.index(b) outfiles.remove(outfiles[bi]) moviedirs.remove(moviedirs[bi]) paramname.remove(paramname[bi]) iteration_type.remove(iteration_type[bi]) ctype.remove(ctype[bi]) paramweights.remove(paramweights[bi]) movie.remove(movie[bi]) #----Set default image size and center---- if imagesize is None: ximagesize = 1.1*(max(df[paramx] - min(df[paramx]))) yimagesize = 1.1*(max(df[paramy] - min(df[paramy]))) elif isinstance(imagesize,tuple) or isinstance(imagesize,list): if len(imagesize)>2: print ("calculate_map: Warning: imagesize has too many"+ " elements, using first two only") if len(imagesize)>=2: ximagesize=imagesize[0] yimagesize=imagesize[1] if len(imagesize)==1: ximagesize=imagesize[0] yimagesize=imagesize[0] else: ximagesize=imagesize yimagesize=imagesize if x0 is None: x0 = (max(df[paramx])-min(df[paramx]))/2.0+min(df[paramx]) if y0 is None: y0 = (max(df[paramy])-min(df[paramy]))/2.0+min(df[paramy]) ximageradius = ximagesize/2 yimageradius = yimagesize/2 xmin = x0 - ximageradius xmax = x0 + ximageradius ymin = y0 - yimageradius ymax = y0 + yimageradius ximageradius = (xmax-xmin)/2.0 yimageradius = (ymax-ymin)/2.0 ximagesize = ximageradius*2.0 yimagesize = yimageradius*2.0 print 'x,yimagesize,x0,y0,xmin,ymin = ',ximagesize,yimagesize,x0,y0,xmin,ymin #-number of map layers (one per iteration) and number of pixels- niter = np.unique(df['iteration']).size if nlayers is None: nlayers = niter/itmod if nlayers == 0: nlayers = 1 else: if nlayers > niter: #max nlayers = number iterations nlayers = niter itmod = niter/nlayers nbins_x = int(np.floor((xmax - xmin)/binsize)) nbins_y = int(np.floor((ymax - ymin)/binsize)) print 'nbins_x, nbins_y, nlayers = ',nbins_x,nbins_y,nlayers imgs = [] #empty list of image arrays (one per parameter) errimgs = [] #empty list of image arrays (one per parameter) #-initialize image stack or arguments list- nparams = len(paramname) if parallel is False: image_stacks = np.zeros((nbins_x,nbins_y,nparams,nlayers)) else: imgargs = [[]]*nlayers #--Remove iterations according to itmod-- #-make list of iterations to use- # randomly chooses the required number of iterations # from iterations which exist in the dataframe its = np.random.choice(df['iteration'].unique(),size=nlayers, replace=False) itstr = ['iteration']*len(its) #-keep only matching iterations- df = filterblobs(df,itstr,minvals=its,maxvals=its,logic='or') #----Calculate Blob Volumes---- if 'blob_volume' not in df.columns: if paramshape == 'gauss': df['blob_volume'] = (2.0*np.pi*np.square(df[paramsize]))**1.5 if shape == 'sphere': df['blob_volume'] = (4.0/3.0)*np.pi*df[paramsize]**3.0 if shape == 'points': df['blob_volume'] = (0.1*binsize)**3.0 # set to much smaller # than pixel #----Group by Iteration---- layers = df.groupby('iteration') #----Iterate over groups (i.e. iterations)---- layer = 0 for i, group in layers: if parallel is False: # create iteration images in serial print 'layer = ',layer #i=iteration number, group = subset of dataframe image_stacks[:,:,:,layer] = iteration_image(group,paramname, paramweights, nbins_x,nbins_y,binsize,xmin,ymin, iteration_type,paramshape,paramx,paramy, paramsize,cint,fast=True, n_int_steps=n_int_steps) else: # construct argument lists for multiprocessing imgargs[layer] = [group,paramname,paramweights,nbins_x,nbins_y, binsize,xmin,ymin,iteration_type,paramshape, paramx,paramy,paramsize,cint] layer = layer + 1 # using multiprocessing package if parallel is True: pool=Pool(nproc) image_stacks = np.array(pool.map(iteration_image_star, imgargs)) pool.close() pool.join() image_stacks = image_stacks.swapaxes(0,2).swapaxes(0,1).swapaxes(2,3) #----Loop through parameters to create and manipulate final images---- #--Collapse Image Stack (combine iterations)-- collapsed_images = np.zeros((nbins_x,nbins_y,nparams)) err_images = np.zeros((nbins_x,nbins_y,nparams)) for p in xrange(len(paramname)): themap = collapse_stack(image_stacks[:,:,p,:], ctype=ctype[p]) #--Apply significance threshold and Create Error Maps-- if (sigthresh != 0.0) or (witherror is True): # - compute error (standard deviation) map - errmap = collapse_stack(image_stacks[:,:,p,:], ctype='error') else: errmap = None collapsed_images[:,:,p] = themap err_images[:,:,p] = errmap #--Mask Region-- # not yet functional if exclude_region is not None: msk = circle_mask(df,paramx,paramy,exclude_region,binsize, imagesize,x0,y0) themap = themap*msk if errmap is not None: errmap = errmap*msk #--Rotate Image-- if rotation != 0.0: themap = rotate(themap,rotation,axes=(0,1)) errmap = rotate(errmap,rotation,axes=(0,1)) #after testing, add cval=np.nan argument #--Save images to list-- imgs = imgs+[themap] errimgs = errimgs+[errmap] #--Make movie-- if movie[p] is True: movie_from_stack(themap,moviedirs[p], cumulativemovie=cumulativemovie, parallel=parallel) #--Loop through images, apply thresholds, and save to fits-- # (must be separate loop to allow for sigthreshparam!=param[p]) if sigthreshparam is not None: #-sigthreshparam has just been mapped- if sigthreshparam in paramname: sigp = paramname.index(sigthreshparam) sigmap = abs(imgs[sigp])/errimgs[sigp] if imgthreshparam is not None: #-sigthreshparam has just been mapped- if imgthreshparam in paramname: imgthp = paramname.index(imgthreshparam) imgthmap = imgs[imgthp] for p in xrange(len(paramname)): themap=imgs[p] errmap=errimgs[p] #--Apply significance threshold-- if sigthreshparam is None: sigmap = abs(imgs[p])/errimgs[p] # - set pixels with significance < threshold to Nan - if sigthresh != 0.0: themap[sigmap < sigthresh] = np.nan #--Apply img threshold-- if imgthresh != None: if imgthreshparam is None: imgthmap = imgs[p] # - set pixels with value < threshold to Nan - #imgmin = np.nanmax(imgmap)-imgthresh*np.nanstd(imgmap) if np.nanmax(imgthmap) > imgthresh: themap[imgthmap < imgthresh] = np.nan else: print ("Warning: imgthresh > max imgthreshparam image. " "Not applying imgthresh.") #--Save map to fits file-- #--write history-- history1 = ('make_map,'+indatastr+',outfile='+nstr(outfiles[p]) +',paramname='+nstr(paramname[p]) +',paramweights='+nstr(paramweights[p]) +',paramx='+nstr(paramx)+',paramy='+nstr(paramy) +',paramsize='+nstr(paramsize)+',binsize=' +nstr(binsize)+',itmod='+nstr(itmod)+',paramshape=' +nstr(paramshape)+',ctype='+nstr(ctype[p]) +',iteration_type=' +nstr(iteration_type[p])+',x0='+nstr(x0)+',y0=' +nstr(y0) +',imagesize='+nstr(imagesize)+',sigthresh=' +nstr(sigthresh)+',sigthreshparam=' +nstr(sigthreshparam)+',imgthresh=' +nstr(imgthresh)+',imgthreshparam=' +nstr(imgthreshparam)+',movie=' +str(movie[p]) +',moviedir='+moviedirs[p]) history3 = 'ximagesize = '+nstr(ximagesize) history4 = 'yimagesize = '+nstr(yimagesize) history2 = 'Created '+str(time.strftime("%x %H:%M:%S")) #--write file-- hdr = fits.Header() hdr['HISTORY']=history2 hdr['HISTORY']=history1 hdr['HISTORY']=history3 hdr['HISTORY']=history4 hdu = fits.PrimaryHDU(themap,header=hdr) hdu.writeto(outfiles[p],clobber=clobber) if witherror is True: hdr=fits.Header() hdr['HISTORY']=history2 hdr['HISTORY']=history1 hdr['HISTORY']=history3 hdr['HISTORY']=history4 hdr['HISTORY']='error map' fits.append(outfiles[p],errmap,hdr) if withsignificance is True: hdr=fits.Header() hdr['HISTORY']=history2 hdr['HISTORY']=history1 hdr['HISTORY']=history3 hdr['HISTORY']=history4 hdr['HISTORY']='significance (img/errimg) map' fits.append(outfiles[p],sigmap,hdr) # imgs = imgs+[themap] # errimgs = errimgs+[errmap] return imgs