def monitor_progress(runname, idx): list = glob.glob("../results/" + runname + "/" + runname + "_progress_bin" + str(idx) + "*.csv") nlist = len(list) if (nlist == 0): misc.printFAILED("Cannot find any progress files for " + runname + " - Bin: " + str(idx)) exit() print("- Checking...") fig, ax = plt.subplots(nrows=7, ncols=1, sharex=True, figsize=(12, 15)) ax = ax.ravel() fig.subplots_adjust(hspace=0.0, left=0.1, right=0.95, bottom=0.05, top=0.95) alpha = 0.5 params = [ 'lp', 'accept', 'stepsize', 'treedepth', 'n_leap', 'diver.', 'energy' ] npar = len(params) ax[6].set_xlabel("Iterations") for i in range(npar): ax[i].set_ylabel(params[i]) for infile in list: tab = ascii.read(infile) ax[0].semilogy(-1.0 * tab['lp__'], alpha=alpha) ax[1].plot(tab['accept_stat__'], alpha=alpha) ax[2].semilogy(tab['stepsize__'], alpha=alpha) ax[3].plot(tab['treedepth__'], alpha=alpha) ax[4].semilogy(tab['n_leapfrog__'], alpha=alpha) ax[5].plot(tab['divergent__'], alpha=alpha) ax[6].semilogy(tab['energy__'], alpha=alpha) fig.savefig("../results/" + runname + "/" + runname + "_progress_bin" + str(idx) + ".png") plt.close('all') return
def load_hdf5(filename, verbose=True): misc.printRUNNING("Loading " + filename + " data") # Checking file exists if not os.path.exists(filename): misc.printFAILED("Cannot find file " + filename) sys.exit() # Opening file if verbose: print("# Opening file") print("") f = h5py.File(filename, 'r') # Defining output dictionary struct = {} # Filling up dictionary if verbose: print("# Loading input data:") input_data = f['in'] for key, values in input_data.items(): if verbose: print(' - ' + key) struct[key] = np.array(values) if f.get("out") != None: if verbose: print("") print("# Loading Stan results:") output_data = f['out'] bins_list = list(output_data.keys()) for idx in bins_list: tmp = f['out/' + idx] struct[int(idx)] = {} for key, values in tmp.items(): if verbose: print(' - [' + idx + '] ' + key) struct[int(idx)][key] = np.array(values) misc.printDONE() return struct
def run_inspect_fits(filename, idx, losvd_file=None, save=0): # Checking bin exists in dataset stridx = str(idx) f = h5py.File(filename, "r") dummy = f.get('out/' + stridx + '/bestfit') if dummy == None: misc.printFAILED("ERROR: Bin " + stridx + " does not exist in file") sys.exit() # Reading input LOSVD if requested if not (losvd_file == None): tab = ascii.read(losvd_file) input_xvel = tab['col1'] input_losvd = tab['col2'] / np.sum(tab['col2']) # Reading the results # --- Input data ---------- xbin = np.array(f['in/xbin']) ybin = np.array(f['in/ybin']) wave_obs = np.exp(np.array(f['in/wave_obs'])) spec_obs = np.array(f['in/spec_obs'][:, idx]) xvel = np.array(f['in/xvel']) mask = np.array(f['in/mask']) ndim = np.array(f['in/ndim']) # --- Output results --------- bestfit = np.array(f['out/' + stridx + '/bestfit']) losvd = np.array(f['out/' + stridx + '/losvd']) poly = np.array(f['out/' + stridx + '/poly']) + 1.0 nbins = len(xbin) # Normalizing LOSVDs ---------------------------------------------------------- norm_factor = np.sum(losvd[2, :]) for i in range(5): losvd[i, :] /= norm_factor # Making plot ---------------------------------------------------------- fig = plt.figure(figsize=(10, 7)) fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold') plt.subplots_adjust(left=0.07, bottom=0.10, right=0.98, top=0.925, wspace=0.0, hspace=0.3) # Bin map ----------- if ndim > 1: ax0 = plt.subplot2grid((2, 4), (0, 0), colspan=1) ax0.set_title("BinID map") ax0.plot(xbin, ybin, 'k+', zorder=0) ax0.plot(xbin[idx], ybin[idx], 'r.', markersize=15.0) ax0.set_aspect('equal') for i in range(nbins): ax0.text(xbin[i], ybin[i], i, fontsize=5, horizontalalignment='right', verticalalignment='center', zorder=1) # LOSVD ----------- ax1 = plt.subplot2grid((2, 4), (0, 2), colspan=2) ax1.fill_between(xvel, losvd[0, :], losvd[4, :], color='blue', alpha=0.15, step='mid') ax1.fill_between(xvel, losvd[1, :], losvd[3, :], color='blue', alpha=0.50, step='mid') ax1.plot(xvel, losvd[2, :], 'k.-', ds='steps-mid') if not (losvd_file == None): ax1.plot(input_xvel, input_losvd, 'r.-', ds='steps-mid') ax1.axhline(y=0.0, color='k', linestyle='--') ax1.axvline(x=0.0, color='k', linestyle=":") ax1.set_xlabel("Velocity (km s$^{-1}$)") # Spectral fit mx = 1.1 * np.amax(spec_obs) mn0 = 0.7 * np.amin(spec_obs) ax2 = plt.subplot2grid((2, 4), (1, 0), colspan=4) ax2.fill_between(wave_obs, poly[1, :], poly[3, :], facecolor='yellow', zorder=0, alpha=0.50, label="Leg. polynomial") ax2.plot(wave_obs, poly[1, :], color='gray', linestyle='--', linewidth=1, zorder=0) ax2.plot(wave_obs, poly[3, :], color='gray', linestyle='--', linewidth=1, zorder=0) ax2.plot(wave_obs, spec_obs, 'k', zorder=1, label="Obs. data") ax2.fill_between(wave_obs, bestfit[1, :], bestfit[3, :], facecolor='orange', zorder=2, alpha=0.75) ax2.plot(wave_obs, bestfit[2, :], color='red', zorder=3, label="Bestfit") res = spec_obs - bestfit[2, :] + mn0 + 0.1 ax2.plot(wave_obs, res, color='green', label="Residuals") ax2.set_ylim([mn0, mx]) ax2.axhline(y=mn0 + 0.1, color='k', linestyle='--') ax2.axvline(x=wave_obs[mask[0]], color='k', linestyle=":") ax2.axvline(x=wave_obs[mask[-1]], color='k', linestyle=":") w = np.flatnonzero(np.diff(mask) > 1) if w.size > 0: for wj in w: l0 = wave_obs[mask[wj]] l1 = wave_obs[mask[wj + 1]] ax2.axvspan(l0, l1, alpha=0.25, color='gray') ax2.set_ylabel("Norm. flux") ax2.set_xlabel("Wavelength ($\\mathrm{\\AA}$)") print(1.0 / np.std(res[mask] - mn0 - 0.1)) # exit() if (save == 1): dirname, inputname = os.path.split(filename) basename = os.path.splitext(inputname)[0] outpng = dirname + '/' + basename + '_bin' + stridx + '.png' plt.savefig(outpng) else: plt.show() return
default=None, help="(Optional) Filename of the input LOSVD") parser.add_option("-s", "--save", dest="save", type="int", default=0, help="(Optional) Save figure") parser.add_option("-d", "--dir", dest="dir", type="string", default='../results/', help="(Optional) The directory with results") (options, args) = parser.parse_args() runname = options.runname binID = options.binID losvd_file = options.losvd save = options.save dir = options.dir filename = dir + runname + "/" + runname + "_results.hdf5" if not os.path.exists(filename): misc.printFAILED(filename + " does not exist.") sys.exit() run_inspect_fits(filename, binID, losvd_file, save=save) misc.printDONE(runname + " - Bin: " + str(binID))
def run_inspect_ghfit(filename, idx, losvd_file=None, save=0): # Checking bin exists in dataset stridx = str(idx) f = h5py.File(filename, "r") dummy = f.get('out/' + stridx + '/offset') # print(dummy) if dummy == None: misc.printFAILED("Bin " + stridx + " does not exist in file") sys.exit() # Reading input LOSVD if requested if not (losvd_file == None): tab = ascii.read(losvd_file) input_xvel = tab['col1'] input_losvd = tab['col2'] / np.sum(tab['col2']) # Reading the results # --- Input data ---------- xbin = np.array(f['in/xbin']) ybin = np.array(f['in/ybin']) xvel = np.array(f['in/xvel']) nbins = len(xbin) ndim = np.array(f['in/ndim']) # --- Output results --------- losvd = np.array(f['out/' + stridx + '/losvd']) losvd_gh_mod = np.array(f['out/' + stridx + '/losvd_gh_mod']) # Normalizing LOSVDs ---------------------------------------------------------- norm_factor = np.trapz(losvd[2, :], -xvel) for i in range(5): losvd[i, :] /= norm_factor norm_factor = np.trapz(losvd_gh_mod[2, :], -xvel) for i in range(5): losvd_gh_mod[i, :] /= norm_factor # Making plot ---------------------------------------------------------- # Bin map ----------- if ndim > 1: fig = plt.figure(figsize=(10, 4)) plt.subplots_adjust(left=0.07, bottom=0.15, right=0.98, top=0.925, wspace=0.0, hspace=0.3) fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold') ax0 = plt.subplot2grid((1, 4), (0, 0), colspan=1) ax0.set_title("BinID map") ax0.plot(xbin, ybin, 'k+', zorder=0) ax0.plot(xbin[idx], ybin[idx], 'r.', markersize=15.0) ax0.set_aspect('equal') for i in range(nbins): ax0.text(xbin[i], ybin[i], i, fontsize=5, horizontalalignment='right', verticalalignment='center', zorder=1) else: fig = plt.figure(figsize=(5, 4)) plt.subplots_adjust(left=0.01, bottom=0.13, right=0.99, top=0.925) fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold') ax1 = plt.subplot2grid((1, 1), (0, 0)) # LOSVD ----------- ax1.fill_between(xvel, losvd[0, :], losvd[4, :], color='gray', alpha=0.15, step='mid') ax1.fill_between(xvel, losvd[1, :], losvd[3, :], color='gray', alpha=0.50, step='mid') ax1.plot(xvel, losvd[2, :], '.--', color='black', ds='steps-mid', label='BAYES-LOSVD fit') ax1.fill_between(xvel, losvd_gh_mod[0, :], losvd_gh_mod[4, :], color='red', alpha=0.25, step='mid') ax1.fill_between(xvel, losvd_gh_mod[1, :], losvd_gh_mod[3, :], color='red', alpha=0.50, step='mid') ax1.plot(xvel, losvd_gh_mod[2, :], 'r.-', ds='steps-mid', label='GH fit') if not (losvd_file == None): ax1.plot(input_xvel, input_losvd, 'r.-', ds='steps-mid') ax1.axhline(y=0.0, color='k', linestyle='--') ax1.axvline(x=0.0, color='k', linestyle=":") ax1.set_xlabel("Velocity (km s$^{-1}$)") ax1.legend() ax1.set_yticks([]) if (save == 1): dirname, inputname = os.path.split(filename) basename = os.path.splitext(inputname)[0] outpng = dirname + '/' + basename + '_bin' + stridx + '.png' plt.savefig(outpng) else: plt.show() return
verbose = True if (save_chains == 0): save_chains = False else: save_chains = True if (save_plots == 0): save_plots = False else: save_plots = True # Checking the file exists results_file = "../results/" + runname + "/" + runname + "_results.hdf5" if not os.path.exists(results_file): misc.printFAILED(results_file + " does not exist.") sys.exit() # Loading input information from the results file f = h5py.File(results_file, 'r') nbins = np.array(f['in/nbins']) f.close() # Defining the list of bins to be analysed if (bin == "all"): bin_list = list(np.arange(nbins)) print("# ENTIRE list of bins selected") elif (bin == "odd"): bin_list = list(np.arange(0, nbins, 2)) print("# ODD bins selected") elif (bin == "even"):
def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth, verbose, save_chains, save_plots): idx = bin_list[i] stridx = str(idx) misc.printRUNNING(runname + " - Bin: " + stridx) try: # Checking the desired bin exists input_file = "../results/" + runname + "/" + runname + "_results.hdf5" struct = h5py.File(input_file, 'r+') check_bin = struct.get('out/' + stridx) if check_bin == None: misc.printFAILED("Bin " + stridx + " does not exist in " + input_file) return 'ERROR' # Defining the version of the code to use codefile = 'stan_model/bayes-losvd_ghfit.stan' if not os.path.exists(codefile): misc.printFAILED(codefile + " does not exist.") sys.exit() # Defining output names and directories outdir = "../results/" + runname pdf_filename = outdir + "/" + runname + "_gh_diagnostics_bin" + stridx + ".pdf" summary_filename = outdir + "/" + runname + "_gh_Stan_summary_bin" + stridx + ".txt" arviz_filename = outdir + "/" + runname + "_gh_chains_bin" + str( idx) + ".netcdf" sample_filename = outdir + "/" + runname + "_gh_progress_bin" + stridx + ".csv" outhdf5 = outdir + "/" + runname + "_gh_results_bin" + stridx + ".hdf5" # Creating the structure with the data for Stan # ------- # NOTE: losvd_obs, sigma_losvd is what goes into the GH fit # losvd is the processed output of bayes_losvd_run.py # losvd_obs = losvd[2,:] # sigma_losvd is an averaged version of the true 1sigma uncertainties from the bayes_losvd_run.py fit # ------- losvd = struct['out/' + stridx + '/losvd'][2, :] sigma = np.zeros((len(losvd), 2)) sigma[:, 0] = np.fabs(struct['out/' + stridx + '/losvd'][1, :] - losvd) sigma[:, 1] = np.fabs(struct['out/' + stridx + '/losvd'][3, :] - losvd) sigma_losvd = np.mean(sigma, axis=1) data = { 'nvel': struct['in/nvel'], 'xvel': struct['in/xvel'], 'losvd_obs': losvd, 'sigma_losvd': sigma_losvd } # Creating a temporary file adding the input data to the input HDF5 file info temp = tempfile.NamedTemporaryFile() struct2 = h5py.File(temp.name, 'w') struct.copy('in', struct2) struct2.create_dataset("out/" + stridx + "/losvd", data=np.array(struct['out/' + stridx + '/losvd']), compression="gzip") # Running the model with open(codefile, 'r') as myfile: code = myfile.read() model = stan_cache(model_code=code, codefile=codefile) fit = model.sampling(data=data, iter=niter, chains=nchain, control={ 'adapt_delta': adapt_delta, 'max_treedepth': max_treedepth }, sample_file=sample_filename, check_hmc_diagnostics=True) samples = fit.extract(permuted=True) diag_pars = fit.get_sampler_params() # If requested, saving sample chains if (save_chains == True): print("") print("# Saving chains in Arviz (NETCDF) format: " + arviz_filename) arviz_data = az.from_pystan(posterior=fit) az.to_netcdf(arviz_data, arviz_filename) # Saving Stan's summary of main parameters on disk print("") print("# Saving Stan summary: " + summary_filename) unwanted = {'losvd_mod'} misc.save_stan_summary(fit, unwanted=unwanted, verbose=verbose, summary_filename=summary_filename) # Processing output and saving results print("") print("# Processing and saving results: " + outhdf5) misc.process_stan_output_hdp(struct2, samples, outhdf5, stridx) # Creating diagnostic plots if (save_plots == True): if os.path.exists(pdf_filename): os.remove(pdf_filename) print("") print("# Saving diagnostic plots: " + pdf_filename) create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter, nchain) # Removing progess files print("") print("# Deleting progress files") misc.delete_files(sample_filename, 'csv') misc.delete_files(sample_filename, 'png') # If we are here, we are DONE! struct.close() struct2.close() misc.printDONE(runname + " - Bin: " + stridx) return 'OK' except: misc.printFAILED() traceback.print_exc() return 'ERROR'
def load_data(struct): # Adding the relative path to input filename and check file exists if not os.path.exists("../data/" + struct['filename']): misc.printFAILED("File '" + struct['filename'] + "' not found in 'data' directory") sys.exit() struct['filename'] = "../data/" + struct['filename'] # Reading instruments config file instr_config = toml.load("../config_files/instruments.properties") instr_list = list(instr_config.keys()) if struct['instrument'] not in instr_list: misc.printFAILED("Instrument '" + struct['instrument'] + "' not found in instruments configuration file") sys.exit() if not os.path.exists("../config_files/instruments/" + instr_config[struct['instrument']]['read_file']): misc.printFAILED("Instrument read file '" + instr_config[struct['instrument']]['read_file'] + "' not found in instruments directory") sys.exit() # Reading instrument specific data and info print(" - Reading the data and basic info") instr = importlib.util.spec_from_file_location( "", "../config_files/instruments/" + instr_config[struct['instrument']]['read_file']) module = importlib.util.module_from_spec(instr) instr.loader.exec_module(module) data = module.read_data("../data/" + struct['filename']) # Creating variables for convenience wave = data['wave'] spec = data['spec'] espec = data['espec'] x = data['x'] y = data['y'] npix = data['npix'] nspec = data['nspax'] psize = data['psize'] ndim = data['ndim'] lmin = struct['lmin'] lmax = struct['lmax'] # Correcting the data for redshift print(" - Correcting data for redshift") wave /= (1.0 + struct['redshift']) # Checking the desired wavelength range is within data wavelength limits if (wave[0] > lmin): lmin = wave[0] if (wave[-1] < lmax): lmax = wave[-1] # Cutting the data to the desired wavelength range print(" - Cutting data to desired wavelength range") idx = (wave >= lmin) & (wave <= lmax) wave = wave[idx] spec = spec[idx, :] espec = espec[idx, :] npix = np.sum(idx) # Computing the SNR in each spaxel print(" - Computing the SNR of each spaxel") signal = np.nanmedian(spec, axis=0) noise = np.abs(np.nanmedian(espec, axis=0)) # Filtering out those spectra with NaN estimates for SNR good = np.isfinite(signal / noise) & (signal / noise > 0.0) if np.sum(good) > 0: signal = signal[good] noise = noise[good] spec = spec[:, good] espec = espec[:, good] x = x[good] y = y[good] nspec = np.sum(good) # Selecting those spaxels above SNR_min print(" - Selecting spaxels above SNR_min") delta = np.abs((signal / noise) - struct['snr_min']) idx = (delta <= 3.0) if np.sum(idx) > 0: isof = np.mean(signal[idx]) idx = (signal >= isof) spec = spec[:, idx] espec = espec[:, idx] signal = signal[idx] noise = noise[idx] x, y = x[idx], y[idx] nspec = np.sum(idx) # IF requested, Voronoi binning the data if struct['snr'] > 0.0: # Determining Voronoi binning to the data print(" - Computing the Voronoi binning") binNum, xbin, ybin, xBar, yBar, bin_snr, nPixels, scale = cap.voronoi_2d_binning(x, y, \ signal, noise, struct['snr'], plot=False, quiet=True, pixelsize=psize) print(" - " + str(len(xbin)) + " Voronoi bins created") # Applying the Voronoi binning to the data print(" - Applying the Voronoi binning") ubins = np.unique(binNum) nbins = len(ubins) bin_spec = np.zeros([npix, nbins]) bin_espec = np.zeros([npix, nbins]) bin_flux = np.zeros(nbins) for i in trange(nbins, ascii=True, leave=False): k = np.where(binNum == ubins[i])[0] valbin = len(k) if valbin == 1: av_spec = spec[:, k] av_err_spec = espec[:, k] else: av_spec = np.nansum(spec[:, k], axis=1) av_err_spec = np.sqrt(np.sum(espec[:, k]**2, axis=1)) bin_flux[i] = np.mean(av_spec, axis=0) bin_spec[:, i] = np.ravel(av_spec) bin_espec[:, i] = np.ravel(av_err_spec) else: bin_snr = signal / noise binNum = np.arange(nspec) bin_flux = np.mean(spec, axis=0) bin_spec = spec bin_espec = espec nbins = nspec xbin = x ybin = y print(" - " + str(len(xbin)) + " spectra in file") # Log-rebinning the data to the input Velscale print(" - Log-rebinning and normalizing the spectra") lamRange = np.array([np.amin(wave), np.amax(wave)]) dummy, lwave, _ = cap.log_rebin(lamRange, bin_spec[:, 0], velscale=struct['velscale']) npix_log = len(dummy) lspec, lespec = np.zeros([npix_log, nbins]), np.zeros([npix_log, nbins]) for i in trange(nbins, ascii=True, leave=False): #Log-rebinning the spectra lspec[:, i], dummy, dummy = cap.log_rebin(lamRange, bin_spec[:, i], velscale=struct['velscale']) lespec[:, i], dummy, dummy = cap.log_rebin(lamRange, bin_espec[:, i], velscale=struct['velscale']) # Normalizing the observed and error spectra respecting the SNR of each bin lespec[:, i] /= np.nanmedian(lspec[:, i]) lspec[:, i] /= np.nanmedian(lspec[:, i]) # Defining the data mask print(" - Defining the data mask") if (struct['mask_file'] == "None"): mn = np.int(0.01 * npix_log) # Masking edges only mask = np.arange(mn, npix_log - mn) else: if not os.path.exists("../config_files/" + struct['mask_file']): misc.printFAILED( "Cannot find mask file in 'config_files' directory") sys.exit() mask = misc.spectralMasking("../config_files/" + struct['mask_file'], lwave, struct['redshift']) # Storing all the info in a data structure print(" - Storing everything in data structure") print("") data_struct = { 'binID': binNum, 'x': x, 'y': y, 'flux': signal, 'xbin': xbin, 'ybin': ybin, 'bin_flux': bin_flux, 'spec_obs': lspec, 'sigma_obs': lespec, 'wave_obs': lwave, 'wave': wave, 'velscale': struct['velscale'], 'mask': np.ravel(mask), 'nmask': len(mask), 'bin_snr': bin_snr, 'npix': npix, 'npix_obs': npix_log, 'nspec': nspec, 'porder': struct['porder'], 'nbins': nbins, 'snr': struct['snr'], 'lmin': lmin, 'lmax': lmax, 'ndim': ndim } return data_struct
def load_testdata(struct): # Adding the relative path to input filename and check file exists if not os.path.exists("../data/" + struct['filename']): misc.printFAILED("File '" + struct['filename'] + "' not found in 'data' directory") sys.exit() struct['filename'] = "../data/" + struct['filename'] # Reading instruments config file instr_config = toml.load("../config_files/instruments.properties") instr_list = list(instr_config.keys()) if struct['instrument'] not in instr_list: misc.printFAILED("Instrument '" + struct['instrument'] + "' not found in instruments configuration file") sys.exit() if not os.path.exists("../config_files/instruments/" + instr_config[struct['instrument']]['read_file']): misc.printFAILED("Instrument read file '" + instr_config[struct['instrument']]['read_file'] + "' not found in instruments directory") sys.exit() # Reading instrument specific data and info print(" - Reading the data and basic info") instr = importlib.util.spec_from_file_location( "", "../config_files/instruments/" + instr_config[struct['instrument']]['read_file']) module = importlib.util.module_from_spec(instr) instr.loader.exec_module(module) data = module.read_data("../data/" + struct['filename']) # Creating variables for convenience wave = data['wave'] spec = data['spec'] espec = data['espec'] x = data['x'] y = data['y'] npix = data['npix'] nspec = data['nspax'] psize = data['psize'] ndim = data['ndim'] lmin = np.log(struct['lmin']) lmax = np.log(struct['lmax']) # Correcting the data for redshift # NOTE: remember that testdata is already in log # print(" - Correcting data for redshift") # wave -= (1.0 + struct['redshift']) # Checking the desired wavelength range is within data wavelength limits if (wave[0] > lmin): lmin = wave[0] if (wave[-1] < lmax): lmax = wave[-1] # Cutting the data to the desired wavelength range print(" - Cutting data to desired wavelength range") idx = (wave >= lmin) & (wave <= lmax) wave = wave[idx] spec = spec[idx, :] espec = espec[idx, :] npix = np.sum(idx) flux = np.median(spec) npix_log = npix nbins = nspec print(" - Normalising the spectra") for i in trange(nbins, ascii=True, leave=False): # Normalizing the observed and error spectra respecting the SNR of each bin espec[:, i] /= np.nanmedian(spec[:, i]) spec[:, i] /= np.nanmedian(spec[:, i]) # Defining the data mask print(" - Defining the data mask") if (struct['mask_file'] == "None"): mn = np.int(0.01 * npix_log) # Masking edges only mask = np.arange(mn, npix_log - mn) else: if not os.path.exists("../config_files/" + struct['mask_file']): misc.printFAILED( "Cannot find mask file in 'config_files' directory") sys.exit() mask = misc.spectralMasking("../config_files/" + struct['mask_file'], wave, struct['redshift']) # Storing all the info in a data structure print(" - Storing everything in data structure") print("") data_struct = { 'binID': np.arange(nbins), 'x': x, 'y': y, 'flux': flux, 'xbin': x, 'ybin': y, 'bin_flux': flux, 'spec_obs': spec, 'sigma_obs': espec, 'wave_obs': wave, 'wave': wave, 'velscale': struct['velscale'], 'mask': np.ravel(mask), 'nmask': len(mask), 'bin_snr': 0, 'npix': npix, 'npix_obs': npix_log, 'nspec': nspec, 'porder': struct['porder'], 'nbins': nbins, 'snr': struct['snr'], 'lmin': np.exp(lmin), 'lmax': np.exp(lmax), 'ndim': ndim } return data_struct
def load_templates(struct, data_struct): # Reading relevant info from config file temp_name = struct['template_lib'] velscale = struct['velscale'] npca = struct['npca'] instr = struct['instrument'] redshift = struct['redshift'] vmax = struct['vmax'] lmin = data_struct['lmin'] lmax = data_struct['lmax'] # Getting the appropiate LSF files instr_config = toml.load("../config_files/instruments.properties") lsf_data_file = "../config_files/instruments/" + instr_config[instr][ 'lsf_file'] lsf_temp_file = "../config_files/instruments/" + temp_name + '.lsf' if not os.path.exists(lsf_data_file): misc.printFAILED( "Data lsf file not found in 'config_files/instruments' directory") sys.exit() if not os.path.exists(lsf_temp_file): misc.printFAILED( "Templates lsf file not found in 'config_files/instruments' directory" ) sys.exit() # Loading SSP models and defining some basic parameters– list = glob.glob("../templates/" + temp_name + "/*") ntemp = len(list) print(" - " + str(ntemp) + " templates found in " + temp_name + " library") hdu = fits.open(list[0]) tmp = np.ravel(hdu[0].data) hdr = hdu[0].header wave = hdr['CRVAL1'] + np.arange(len(tmp)) * hdr['CDELT1'] dwav = hdr['CDELT1'] npix = len(wave) # Defining output arrays temp = np.zeros((npix, ntemp)) scale = np.zeros(ntemp) # Loading templates into final arrays # NOTE: this loops already cuts the spectra to the Lmin,Lmax limits print(" - Loading and preparing the templates...") for i in trange(ntemp, ascii=True, leave=False): # Reading, trimming and scaling the spectra hdu = fits.open(list[i]) temp[:, i] = np.ravel(hdu[0].data) scale[i] = np.mean(temp[:, i]) temp[:, i] /= scale[i] # Running PCA on the input models if npix < ntemp: misc.printFAILED("The number of pixels in the spectra (" + str(npix) + ") has to be larger than the number of templates (" + str(ntemp) + ") to run PCA.") sys.exit() if npca > 0: print(" - Running PCA on the templates...") mean_temp = np.mean(temp, axis=1) pca = PCA(n_components=ntemp) PC_tmp = pca.fit_transform(temp) # Extracting the desired number of PCA components cumsum_pca_variance = np.cumsum(pca.explained_variance_ratio_) print(" " + str(npca) + " PCA components explain {:7.3f}".format( cumsum_pca_variance[npca] * 100) + "% of the variance in the input library") templates = np.zeros((npix, npca)) templates = PC_tmp[:, 0:npca] ntemplates = npca # Continuum and Z-score Normalization to aid in the minimization for i in range(npca): coef = np.polyfit(wave, templates[:, i], 1) pfit = np.polyval(coef, wave) templates[:, i] -= pfit templates[:, i] /= np.std(templates[:, i]) else: mean_temp = np.zeros(npix) templates = temp ntemplates = ntemp # Convolving the templates to match the data's LSF print(" - Convolving the templates to match the data's LSF") data_lsf = misc.read_lsf(wave, lsf_data_file) data_lsf /= (1.0 + redshift) temp_lsf = misc.read_lsf(wave, lsf_temp_file) fwhm_diff = np.sqrt(data_lsf**2 - temp_lsf**2) # in angstroms bad_pix = np.isnan(fwhm_diff) if np.sum(bad_pix) > 0: misc.printWARNING( "Some values of the data LSF are below the templates values") fwhm_diff[ bad_pix] = 1E-2 # Fixing the FWHM_diff to a tiny value if there are NaNs sigma_diff = fwhm_diff / 2.355 / dwav mean_temp = cap.gaussian_filter1d(mean_temp, sigma_diff) for i in trange(ntemplates, ascii=True, leave=False): templates[:, i] = cap.gaussian_filter1d( templates[:, i], sigma_diff) # convolution with variable sigma # Log-rebinning the PCA spectra using the data's velscale print(" - Log-rebinning the templates") lamRange = np.array([np.amin(wave), np.amax(wave)]) mean_temp, lwave, dummy = cap.log_rebin(lamRange, mean_temp, velscale=velscale) npix_temp = mean_temp.shape[0] tmp_temp = np.zeros((npix_temp, ntemplates)) for i in range(ntemplates): tmp_temp[:, i], dummy, dummy = cap.log_rebin(lamRange, templates[:, i], velscale=velscale) templates = tmp_temp # Checking the wavelength solution for the templates is identical to the data # If not, the templates are resampled # NOTE: this is important to have a centered LOSVD on xvel=0.0 good = (lwave >= np.log(data_struct['lmin'])) & (lwave <= np.log( data_struct['lmax'])) check = np.array_equal(lwave[good], data_struct['wave_obs']) if check == False: print( " - Resampling the templates to match the wavelength of the observed data (if needed)" ) mean_temp = misc.spectres(data_struct['wave_obs'], lwave, mean_temp, fill=np.nan) npix_temp = len(mean_temp) new_temp = np.zeros((npix_temp, ntemplates)) for i in range(ntemplates): new_temp[:, i] = misc.spectres(data_struct['wave_obs'], lwave, templates[:, i], fill=np.nan) lwave = data_struct['wave_obs'] templates = new_temp else: mean_temp = mean_temp[good] templates = templates[good, :] lwave = lwave[good] npix_temp = len(lwave) # Normalizing the mean template to 1.0 and adjusting the other templates so that the mean is around 0.0 mean_temp /= np.mean(mean_temp) for i in range(ntemplates): templates[:, i] -= np.mean(templates[:, i]) # Storing everything into a dictionary print(" - Storing everything in templates structure") struct = { 'lwave_temp': lwave, 'mean_template': mean_temp, 'templates': templates, 'npix_temp': npix_temp, 'ntemp': ntemplates } return struct
def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth, verbose=False, save_chains=False, save_plots=False, fit_type=None): idx = bin_list[i] stridx = str(idx) misc.printRUNNING(runname+" - Bin: "+stridx+" - Fit type: "+fit_type) try: # Defining the version of the code to use codefile, extrapars = misc.read_code(fit_type) # Defining output names and directories rootname = runname+"-"+fit_type outdir = "../results/"+rootname pdf_filename = outdir+"/"+rootname+"_diagnostics_bin"+str(idx)+".pdf" summary_filename = outdir+"/"+rootname+"_Stan_summary_bin"+str(idx)+".txt" arviz_filename = outdir+"/"+rootname+"_chains_bin"+str(idx)+".netcdf" sample_filename = outdir+"/"+rootname+"_progress_bin"+str(idx)+".csv" outhdf5 = outdir+"/"+rootname+"_results_bin"+str(idx)+".hdf5" # Creating the basic structure with the data for Stan struct = h5py.File("../preproc_data/"+runname+".hdf5","r") data = {'npix_obs': np.array(struct['in/npix_obs']), 'ntemp': np.array(struct['in/ntemp']), 'nvel': np.array(struct['in/nvel']), 'npix_temp': np.array(struct['in/npix_temp']), 'mask': np.array(struct['in/mask']), 'nmask': np.array(struct['in/nmask']), 'porder': np.array(struct['in/porder']), 'spec_obs': np.array(struct['in/spec_obs'][:,idx]), 'sigma_obs': np.array(struct['in/sigma_obs'][:,idx]), 'templates': np.array(struct['in/templates']), 'mean_template': np.array(struct['in/mean_template']), 'velscale': np.array(struct['in/velscale']), 'xvel': np.array(struct['in/xvel'])} # Adding any extra parameter needed for that particular fit_type for key, val in extrapars.items(): data[key] = val # Running the model with open(codefile, 'r') as myfile: code = myfile.read() model = stan_cache(model_code=code, codefile=codefile) fit = model.sampling(data=data, iter=niter, chains=nchain, control={'adapt_delta':adapt_delta, 'max_treedepth':max_treedepth}, sample_file=sample_filename, check_hmc_diagnostics=True) samples = fit.extract(permuted=True) # Extracting parameter samples diag_pars = fit.get_sampler_params() # Getting sampler diagnostic params # If requested, saving sample chains if (save_chains == True): print("") print("# Saving chains in Arviz (NETCDF) format: "+arviz_filename) arviz_data = az.from_pystan(posterior=fit, observed_data=['mask','spec_obs','sigma_obs']) az.to_netcdf(arviz_data,arviz_filename) # Saving Stan's summary of main parameters on disk print("") print("# Saving Stan summary: "+summary_filename) unwanted = {'spec','conv_spec','poly','bestfit','a','losvd_'} misc.save_stan_summary(fit, unwanted=unwanted, verbose=verbose, summary_filename=summary_filename) # Processing output and saving results print("") print("# Processing and saving results: "+outhdf5) misc.process_stan_output_hdp(struct,samples,outhdf5,stridx) # Creating diagnostic plots if (save_plots == True): if os.path.exists(pdf_filename): os.remove(pdf_filename) print("") print("# Saving diagnostic plots: "+pdf_filename) create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter, nchain) # Removing progess files print("") print("# Deleting progress files") misc.delete_files(sample_filename,'csv') misc.delete_files(sample_filename,'png') # If we are here, we are DONE! struct.close() misc.printDONE(runname+" - Bin: "+stridx+" - Fit type: "+fit_type) return 'OK' except Exception: misc.printFAILED() traceback.print_exc() return 'ERROR'
else: save_chains = True if (save_plots == 0): save_plots = False else: save_plots = True if (restart == 0): restart = False else: restart = True # Checking the file exists if not os.path.exists(preproc_file): misc.printFAILED(preproc_file+" does not exist.") sys.exit() # Checking fit_type is a valid one misc.check_codes(fit_type) # Defining rootnames for output files tmpname = os.path.basename(preproc_file) runname = os.path.splitext(tmpname)[0] outdir = "../results/"+runname+"-"+fit_type if not os.path.exists("../results"): os.mkdir("../results") if not os.path.exists(outdir): os.mkdir(outdir)