def run_emcee(self,Nthreads_galario,Nthreads_emcee,nwalkers,starting_point,Nsteps,output_filename='temporary_model'): ''' This function runs emcee to produce MCMC samples from the probability distribution defined by my_prior and loglike. Parameters Nthreads_galario, Nthreads_emcee: (int,int) Number of threads to be used by Galario and by emcee, respectively nwalkers: int Number of Emcee walkers, see emcee documentation starting_point: list of float List of parameters to be used as starting point for the MCMC chain Nsteps: int Number of MCMC steps, needs to be >100 because I burn 100 by default (easy to change this below). The functions saves the samples every 100 steps. output_filename: string Filename for the numpy save file storing the samples ''' ndim=10 from galario import double double.threads(Nthreads_galario) pos = [np.array(starting_point)*(1+.1*np.random.randn(ndim)) for i in range(nwalkers)] sampler = emcee.EnsembleSampler(nwalkers, ndim, picklable_boundmethod(self.lnprob),threads=Nthreads_emcee) sampler.run_mcmc(pos, 100) for idx in range(int(Nsteps/100)): sampler.run_mcmc(None, 100) samples = sampler.chain[:, 10:, :].reshape((-1, ndim)) np.save(output_filename,samples) print(idx,' of ',int(Nsteps/100))
def run_Multinest(self,Nthreads,output_filename='temporary_model'): ''' This function runs Multinest using the self.my_prior priors, which need to be appropriately set to a Multinest-type of priors. Parameters Nthreads: int Number of threads per Multinest instance used by Galario to compute the model visibilities Output_filename: string Multinest output base file names. See pymultinest docs for how to read and analyze those. ''' n_params = 10 from galario import double double.threads(Nthreads) pymultinest.run(self.loglike_multinest, self.my_prior, n_params, outputfiles_basename=output_filename, resume = True, verbose = True,sampling_efficiency='model')
def interpolate_model(u, v, freq, model, nthreads=1, dRA=0., dDec=0.): double.threads(nthreads) real = [] imag = [] dxy = (model.x[1] - model.x[0])*arcsec for i in range(len(model.freq)): vis = double.sampleImage(model.image[:,:,i,0].copy(order='C'), \ dxy, u, v, dRA=dRA*arcsec, dDec=dDec*arcsec) real.append(vis.real.reshape((u.size,1))) imag.append(vis.imag.reshape((u.size,1))) real = numpy.concatenate(real, axis=1) imag = numpy.concatenate(imag, axis=1) return Visibilities(u, v, freq, real, imag, numpy.ones(real.shape))
def find_max_prob(self, starting_guess, Nthreads_galario): ''' This function tries to find the maximum probability parameter values. This may be useful for ML fitting or, e.g., to start an MCMC sampler such as emcee. Parameters Nthreads_galario: int Number of threads to be used by Galario starting_guess: list of float This should be a list of disk fitting parameters to be used as a starting point for the optimizer. Returns result['x']: list of float The list of parameters which achieves the highest probability, as computed by the optimizer. ''' from galario import double double.threads(Nthreads_galario) nll = lambda *args: -self.lnprob(*args) from scipy.optimize import minimize result = minimize(nll, starting_guess, method='Nelder-Mead') print(result) return result['x']
def compare_vis_galario(datfile='data/HD163296.CO32.regridded.cen15',modfile='model/testpy_alma',new_weight=[1,],systematic=False,isgas=True,plot_resid=False): '''Calculate the raw chi-squared based on the difference between the model and data visibilities. :param datfile: (default = 'data/HD163296.CO32.regridded.cen15') The base name for the data file. The code reads in the visibilities from datfile+'.vis.fits' :param modfile" (default='model/testpy_alma') The base name for the model file. The code reads in the visibilities from modfile+'.model.vis.fits' :param new_weight: An array containing the weights to be used in the chi-squared calculation. This should have the same dimensions as the real and imaginary part of the visibilities (ie Nbas x Nchan) :param systematic: The systematic weight to be applied. The value sent with this keyword is used to scale the absolute flux level of the model. It is defined such that a value >1 decreases the model and a value <1 increases the model (the model visibilities are divided by the systematic parameter). It is meant to mimic a true flux in the data which is larger or smaller by the fraction systematic (e.g. specifying systematic=1.2 is equivalent to saying that the true flux of the data is 20% brighter than what has been observed, with this scaling applied to the model instead of changing the data) :param isgas: If the data is line emission then the data has an extra dimension covering the >1 channels. Set this keyword to ensure that the data is read in properly. Conversely, if you are comparing continuum data then set this keyword to False. ''' #Limit the multi-threading of Galario (necessary on the computing cluster) gdouble.threads(1) # - Read in object visibilities obj = fits.open(datfile+'.vis.fits') freq0 = obj[0].header['crval4'] u_obj,v_obj = (obj[0].data['UU']*freq0).astype(np.float64),(obj[0].data['VV']*freq0).astype(np.float64) vis_obj = (obj[0].data['data']).squeeze() if isgas: if obj[0].header['telescop'] == 'ALMA': if obj[0].header['naxis3'] == 2: real_obj = (vis_obj[:,:,0,0]+vis_obj[:,:,1,0])/2. imag_obj = (vis_obj[:,:,0,1]+vis_obj[:,:,1,1])/2. weight_real = vis_obj[:,:,0,2] weight_imag = vis_obj[:,:,1,2] else: real_obj = vis_obj[::2,:,0] imag_obj = vis_obj[::2,:,1] else: if obj[0].header['telescop'] == 'ALMA': if obj[0].header['naxis3'] == 2: real_obj = (vis_obj[:,0,0]+vis_obj[:,1,0])/2. imag_obj = (vis_obj[:,0,1]+vis_obj[:,1,1])/2. weight_real = vis_obj[:,0,2] weight_imag = vis_obj[:,1,2] obj.close() #Generate model visibilities model_fits = fits.open(modfile+'.fits') model = model_fits[0].data.squeeze() nxy,dxy = model_fits[0].header['naxis1'],np.radians(np.abs(model_fits[0].header['cdelt1'])) model_fits.close() if isgas: real_model = np.zeros(real_obj.shape) imag_model = np.zeros(imag_obj.shape) for i in range(real_obj.shape[1]): vis = gdouble.sampleImage(np.flipud(model[i,:,:]).byteswap().newbyteorder(),dxy,u_obj,v_obj) real_model[:,i] = vis.real imag_model[:,i] = vis.imag else: vis = gdouble.sampleImage(model.byteswap().newbyteorder(),dxy,u_obj,v_obj) real_model = vis.real imag_model = vis.imag if systematic: real_model = real_model/systematic imag_model = imag_model/systematic if len(new_weight) > 1: weight_real = new_weight weight_imag = new_weight weight_real[real_obj==0] = 0. weight_imag[imag_obj==0] = 0. print('Removed data %i' % ((weight_real ==0).sum()+(weight_imag==0).sum())) if plot_resid: #Code to plot, and fit, residuals #If errors are Gaussian, then residuals should have gaussian shape #If error size is correct, residuals will have std=1 uv = np.sqrt(u_obj**2+v_obj**2) use = (weight_real > .05) & (weight_imag>.05) diff = np.concatenate((((real_model[use]-real_obj[use])*np.sqrt(weight_real[use])),((imag_model[use]-imag_obj[use])*np.sqrt(weight_imag[use])))) diff = diff.flatten() n,bins,patches = plt.hist(diff,10000,normed=1,histtype='step',color='k',label='Data',lw=3) popt,pcov = curve_fit(gaussian,bins[1:],n) y=gaussian(bins,popt[0],popt[1],popt[2]) print('Gaussian fit parameters (amp,width,center): ',popt) print('If errors are properly scaled, then width should be close to 1') plt.plot(bins,y,'r--',lw=6,label='gaussuian') #slight deviations from gaussian, but gaussian is still the best... plt.xlabel('(Model-Data)/$\sigma$',fontweight='bold',fontsize=20) ax=plt.gca() for tick in ax.xaxis.get_major_ticks(): tick.label1.set_fontsize(20) tick.label1.set_fontweight('bold') for tick in ax.yaxis.get_major_ticks(): tick.label1.set_fontsize(20) tick.label1.set_fontweight('bold') plt.show() chi = ((real_model-real_obj)**2*weight_real).sum() + ((imag_model-imag_obj)**2*weight_imag).sum() return chi
def do_timing(options, input_data, gpu=False, tpb=0, omp_num_threads=0): if gpu: acc_lib_cuda.threads(tpb) else: acc_lib_cpu.threads(omp_num_threads) str_headers = "\t".join([ "size", "nsamples", "real", "OMP", "tpb", "Ttot", "Tavg", "Tstd", "Tmin" ]) if options.output_header: with open(options.output, 'w') as f: f.write(str_headers + "\n") exit(0) cycles = options.cycles number = 1 acc_lib = 'acc_lib_cuda' if gpu else 'acc_lib_cpu' if options.image: if options.use_py: t = timeit.Timer( 'from __main__ import input_data, py_chi2Image; py_chi2Image(*input_data)' ) else: t = timeit.Timer( 'from __main__ import input_data, {}; {}.chi2Image(*input_data)' .format(acc_lib, acc_lib)) else: if options.use_py: t = timeit.Timer( 'from __main__ import input_data, py_chi2Profile; py_chi2Profile(*input_data)' ) else: t = timeit.Timer( 'from __main__ import input_data, {}; {}.chi2Profile(*input_data)' .format(acc_lib, acc_lib)) if options.output: filename = options.output else: filename = "timings_" if gpu: filename += "GPU_{}".format(tpb) else: filename += "CPU_OMP_NUM_THREADS_{}".format(omp_num_threads) filename += "_{}.txt".format( datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) t_results = t.repeat(cycles, number) # drop 1st call: invovles lots of overhead str_results = "{}\t{:e}\t{}\t{}\t{}\t{:e}\t{:e}\t{:e}\t{:e}".format( size, nsamples, options.dtype, omp_num_threads, tpb, np.sum(t_results[1:]), np.average(t_results[1:]), np.std(t_results[1:]), np.min(t_results)) with open(filename, 'a') as f: # f.write(str_headers + "\n") f.write(str_results + "\n") f.write("# |--> timings: {}".format(t_results) + "\n") if not options.no_verbose: print(str_headers) print(str_results) print(t_results) print("Log saved in {}".format(filename))
import emcee import scipy.optimize import matplotlib.pyplot as plt import pymultinest as pmn import corner import galario.double as gd from galario import arcsec import alma.image #get_ipython().run_line_magic('matplotlib', 'notebook') # In[2]: # this may be needed to avoid emcee hanging when using multiple threads gd.threads(num=1) # ### Extract the visibilities # ```python # statwt(vis='hd202682.ms', datacolumn='DATA') # import uvplot # uvplot.io.export_uvtable('uv.txt', tb, vis='hd202628.ms', # datacolumn='DATA', # channel='all') # ``` # In[3]: # import the data, this assumes we're getting the output from uvplot uv_file = 'uv.txt' u, v, Re, Im, w = np.require(np.loadtxt(uv_file, unpack=True),