예제 #1
0
def monitor_progress(runname, idx):

    list = glob.glob("../results/" + runname + "/" + runname +
                     "_progress_bin" + str(idx) + "*.csv")
    nlist = len(list)
    if (nlist == 0):
        misc.printFAILED("Cannot find any progress files for " + runname +
                         " - Bin: " + str(idx))
        exit()

    print("- Checking...")

    fig, ax = plt.subplots(nrows=7, ncols=1, sharex=True, figsize=(12, 15))
    ax = ax.ravel()
    fig.subplots_adjust(hspace=0.0,
                        left=0.1,
                        right=0.95,
                        bottom=0.05,
                        top=0.95)
    alpha = 0.5

    params = [
        'lp', 'accept', 'stepsize', 'treedepth', 'n_leap', 'diver.', 'energy'
    ]

    npar = len(params)
    ax[6].set_xlabel("Iterations")
    for i in range(npar):
        ax[i].set_ylabel(params[i])

    for infile in list:

        tab = ascii.read(infile)

        ax[0].semilogy(-1.0 * tab['lp__'], alpha=alpha)
        ax[1].plot(tab['accept_stat__'], alpha=alpha)
        ax[2].semilogy(tab['stepsize__'], alpha=alpha)
        ax[3].plot(tab['treedepth__'], alpha=alpha)
        ax[4].semilogy(tab['n_leapfrog__'], alpha=alpha)
        ax[5].plot(tab['divergent__'], alpha=alpha)
        ax[6].semilogy(tab['energy__'], alpha=alpha)

    fig.savefig("../results/" + runname + "/" + runname + "_progress_bin" +
                str(idx) + ".png")
    plt.close('all')

    return
예제 #2
0
def load_hdf5(filename, verbose=True):

    misc.printRUNNING("Loading " + filename + " data")

    # Checking file exists
    if not os.path.exists(filename):
        misc.printFAILED("Cannot find file " + filename)
        sys.exit()

    # Opening file
    if verbose:
        print("# Opening file")
        print("")
    f = h5py.File(filename, 'r')

    # Defining output dictionary
    struct = {}

    # Filling up dictionary
    if verbose:
        print("# Loading input data:")
    input_data = f['in']
    for key, values in input_data.items():
        if verbose:
            print(' - ' + key)
        struct[key] = np.array(values)

    if f.get("out") != None:
        if verbose:
            print("")
            print("# Loading Stan results:")
        output_data = f['out']
        bins_list = list(output_data.keys())
        for idx in bins_list:
            tmp = f['out/' + idx]
            struct[int(idx)] = {}
            for key, values in tmp.items():
                if verbose:
                    print(' - [' + idx + '] ' + key)
                struct[int(idx)][key] = np.array(values)

    misc.printDONE()

    return struct
예제 #3
0
def run_inspect_fits(filename, idx, losvd_file=None, save=0):

    # Checking bin exists in dataset
    stridx = str(idx)
    f = h5py.File(filename, "r")
    dummy = f.get('out/' + stridx + '/bestfit')
    if dummy == None:
        misc.printFAILED("ERROR: Bin " + stridx + " does not exist in file")
        sys.exit()

    # Reading input LOSVD if requested
    if not (losvd_file == None):
        tab = ascii.read(losvd_file)
        input_xvel = tab['col1']
        input_losvd = tab['col2'] / np.sum(tab['col2'])

    # Reading the results
    # --- Input data ----------
    xbin = np.array(f['in/xbin'])
    ybin = np.array(f['in/ybin'])
    wave_obs = np.exp(np.array(f['in/wave_obs']))
    spec_obs = np.array(f['in/spec_obs'][:, idx])
    xvel = np.array(f['in/xvel'])
    mask = np.array(f['in/mask'])
    ndim = np.array(f['in/ndim'])
    # --- Output results ---------
    bestfit = np.array(f['out/' + stridx + '/bestfit'])
    losvd = np.array(f['out/' + stridx + '/losvd'])
    poly = np.array(f['out/' + stridx + '/poly']) + 1.0
    nbins = len(xbin)

    # Normalizing LOSVDs ----------------------------------------------------------
    norm_factor = np.sum(losvd[2, :])
    for i in range(5):
        losvd[i, :] /= norm_factor

    # Making plot ----------------------------------------------------------
    fig = plt.figure(figsize=(10, 7))
    fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold')
    plt.subplots_adjust(left=0.07,
                        bottom=0.10,
                        right=0.98,
                        top=0.925,
                        wspace=0.0,
                        hspace=0.3)

    # Bin map -----------
    if ndim > 1:
        ax0 = plt.subplot2grid((2, 4), (0, 0), colspan=1)
        ax0.set_title("BinID map")
        ax0.plot(xbin, ybin, 'k+', zorder=0)
        ax0.plot(xbin[idx], ybin[idx], 'r.', markersize=15.0)
        ax0.set_aspect('equal')
        for i in range(nbins):
            ax0.text(xbin[i],
                     ybin[i],
                     i,
                     fontsize=5,
                     horizontalalignment='right',
                     verticalalignment='center',
                     zorder=1)

    # LOSVD -----------
    ax1 = plt.subplot2grid((2, 4), (0, 2), colspan=2)
    ax1.fill_between(xvel,
                     losvd[0, :],
                     losvd[4, :],
                     color='blue',
                     alpha=0.15,
                     step='mid')
    ax1.fill_between(xvel,
                     losvd[1, :],
                     losvd[3, :],
                     color='blue',
                     alpha=0.50,
                     step='mid')
    ax1.plot(xvel, losvd[2, :], 'k.-', ds='steps-mid')
    if not (losvd_file == None):
        ax1.plot(input_xvel, input_losvd, 'r.-', ds='steps-mid')
    ax1.axhline(y=0.0, color='k', linestyle='--')
    ax1.axvline(x=0.0, color='k', linestyle=":")
    ax1.set_xlabel("Velocity (km s$^{-1}$)")

    # Spectral fit
    mx = 1.1 * np.amax(spec_obs)
    mn0 = 0.7 * np.amin(spec_obs)
    ax2 = plt.subplot2grid((2, 4), (1, 0), colspan=4)
    ax2.fill_between(wave_obs,
                     poly[1, :],
                     poly[3, :],
                     facecolor='yellow',
                     zorder=0,
                     alpha=0.50,
                     label="Leg. polynomial")
    ax2.plot(wave_obs,
             poly[1, :],
             color='gray',
             linestyle='--',
             linewidth=1,
             zorder=0)
    ax2.plot(wave_obs,
             poly[3, :],
             color='gray',
             linestyle='--',
             linewidth=1,
             zorder=0)
    ax2.plot(wave_obs, spec_obs, 'k', zorder=1, label="Obs. data")
    ax2.fill_between(wave_obs,
                     bestfit[1, :],
                     bestfit[3, :],
                     facecolor='orange',
                     zorder=2,
                     alpha=0.75)
    ax2.plot(wave_obs, bestfit[2, :], color='red', zorder=3, label="Bestfit")
    res = spec_obs - bestfit[2, :] + mn0 + 0.1
    ax2.plot(wave_obs, res, color='green', label="Residuals")
    ax2.set_ylim([mn0, mx])
    ax2.axhline(y=mn0 + 0.1, color='k', linestyle='--')
    ax2.axvline(x=wave_obs[mask[0]], color='k', linestyle=":")
    ax2.axvline(x=wave_obs[mask[-1]], color='k', linestyle=":")

    w = np.flatnonzero(np.diff(mask) > 1)
    if w.size > 0:
        for wj in w:
            l0 = wave_obs[mask[wj]]
            l1 = wave_obs[mask[wj + 1]]
            ax2.axvspan(l0, l1, alpha=0.25, color='gray')

    ax2.set_ylabel("Norm. flux")
    ax2.set_xlabel("Wavelength ($\\mathrm{\\AA}$)")

    print(1.0 / np.std(res[mask] - mn0 - 0.1))
    # exit()

    if (save == 1):
        dirname, inputname = os.path.split(filename)
        basename = os.path.splitext(inputname)[0]
        outpng = dirname + '/' + basename + '_bin' + stridx + '.png'
        plt.savefig(outpng)
    else:
        plt.show()

    return
예제 #4
0
                      default=None,
                      help="(Optional) Filename of the input LOSVD")
    parser.add_option("-s",
                      "--save",
                      dest="save",
                      type="int",
                      default=0,
                      help="(Optional) Save figure")
    parser.add_option("-d",
                      "--dir",
                      dest="dir",
                      type="string",
                      default='../results/',
                      help="(Optional) The directory with results")

    (options, args) = parser.parse_args()
    runname = options.runname
    binID = options.binID
    losvd_file = options.losvd
    save = options.save
    dir = options.dir
    filename = dir + runname + "/" + runname + "_results.hdf5"

    if not os.path.exists(filename):
        misc.printFAILED(filename + " does not exist.")
        sys.exit()

    run_inspect_fits(filename, binID, losvd_file, save=save)

    misc.printDONE(runname + " - Bin: " + str(binID))
def run_inspect_ghfit(filename, idx, losvd_file=None, save=0):

    # Checking bin exists in dataset
    stridx = str(idx)
    f = h5py.File(filename, "r")
    dummy = f.get('out/' + stridx + '/offset')
    # print(dummy)
    if dummy == None:
        misc.printFAILED("Bin " + stridx + " does not exist in file")
        sys.exit()

    # Reading input LOSVD if requested
    if not (losvd_file == None):
        tab = ascii.read(losvd_file)
        input_xvel = tab['col1']
        input_losvd = tab['col2'] / np.sum(tab['col2'])

    # Reading the results
    # --- Input data ----------
    xbin = np.array(f['in/xbin'])
    ybin = np.array(f['in/ybin'])
    xvel = np.array(f['in/xvel'])
    nbins = len(xbin)
    ndim = np.array(f['in/ndim'])
    # --- Output results ---------
    losvd = np.array(f['out/' + stridx + '/losvd'])
    losvd_gh_mod = np.array(f['out/' + stridx + '/losvd_gh_mod'])

    # Normalizing LOSVDs ----------------------------------------------------------
    norm_factor = np.trapz(losvd[2, :], -xvel)
    for i in range(5):
        losvd[i, :] /= norm_factor
    norm_factor = np.trapz(losvd_gh_mod[2, :], -xvel)
    for i in range(5):
        losvd_gh_mod[i, :] /= norm_factor

    # Making plot ----------------------------------------------------------

    # Bin map -----------
    if ndim > 1:
        fig = plt.figure(figsize=(10, 4))
        plt.subplots_adjust(left=0.07,
                            bottom=0.15,
                            right=0.98,
                            top=0.925,
                            wspace=0.0,
                            hspace=0.3)
        fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold')
        ax0 = plt.subplot2grid((1, 4), (0, 0), colspan=1)
        ax0.set_title("BinID map")
        ax0.plot(xbin, ybin, 'k+', zorder=0)
        ax0.plot(xbin[idx], ybin[idx], 'r.', markersize=15.0)
        ax0.set_aspect('equal')
        for i in range(nbins):
            ax0.text(xbin[i],
                     ybin[i],
                     i,
                     fontsize=5,
                     horizontalalignment='right',
                     verticalalignment='center',
                     zorder=1)
    else:
        fig = plt.figure(figsize=(5, 4))
        plt.subplots_adjust(left=0.01, bottom=0.13, right=0.99, top=0.925)
        fig.suptitle("BinID: " + str(idx), fontsize=14, fontweight='bold')
        ax1 = plt.subplot2grid((1, 1), (0, 0))

    # LOSVD -----------
    ax1.fill_between(xvel,
                     losvd[0, :],
                     losvd[4, :],
                     color='gray',
                     alpha=0.15,
                     step='mid')
    ax1.fill_between(xvel,
                     losvd[1, :],
                     losvd[3, :],
                     color='gray',
                     alpha=0.50,
                     step='mid')
    ax1.plot(xvel,
             losvd[2, :],
             '.--',
             color='black',
             ds='steps-mid',
             label='BAYES-LOSVD fit')

    ax1.fill_between(xvel,
                     losvd_gh_mod[0, :],
                     losvd_gh_mod[4, :],
                     color='red',
                     alpha=0.25,
                     step='mid')
    ax1.fill_between(xvel,
                     losvd_gh_mod[1, :],
                     losvd_gh_mod[3, :],
                     color='red',
                     alpha=0.50,
                     step='mid')
    ax1.plot(xvel, losvd_gh_mod[2, :], 'r.-', ds='steps-mid', label='GH fit')

    if not (losvd_file == None):
        ax1.plot(input_xvel, input_losvd, 'r.-', ds='steps-mid')
    ax1.axhline(y=0.0, color='k', linestyle='--')
    ax1.axvline(x=0.0, color='k', linestyle=":")
    ax1.set_xlabel("Velocity (km s$^{-1}$)")
    ax1.legend()
    ax1.set_yticks([])

    if (save == 1):
        dirname, inputname = os.path.split(filename)
        basename = os.path.splitext(inputname)[0]
        outpng = dirname + '/' + basename + '_bin' + stridx + '.png'
        plt.savefig(outpng)
    else:
        plt.show()

    return
예제 #6
0
        verbose = True

    if (save_chains == 0):
        save_chains = False
    else:
        save_chains = True

    if (save_plots == 0):
        save_plots = False
    else:
        save_plots = True

    # Checking the file exists
    results_file = "../results/" + runname + "/" + runname + "_results.hdf5"
    if not os.path.exists(results_file):
        misc.printFAILED(results_file + " does not exist.")
        sys.exit()

    # Loading input information from the results file
    f = h5py.File(results_file, 'r')
    nbins = np.array(f['in/nbins'])
    f.close()

    # Defining the list of bins to be analysed
    if (bin == "all"):
        bin_list = list(np.arange(nbins))
        print("# ENTIRE list of bins selected")
    elif (bin == "odd"):
        bin_list = list(np.arange(0, nbins, 2))
        print("# ODD bins selected")
    elif (bin == "even"):
예제 #7
0
def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth,
        verbose, save_chains, save_plots):

    idx = bin_list[i]
    stridx = str(idx)
    misc.printRUNNING(runname + " - Bin: " + stridx)

    try:

        # Checking the desired bin exists
        input_file = "../results/" + runname + "/" + runname + "_results.hdf5"

        struct = h5py.File(input_file, 'r+')
        check_bin = struct.get('out/' + stridx)
        if check_bin == None:
            misc.printFAILED("Bin " + stridx + " does not exist in " +
                             input_file)
            return 'ERROR'

        # Defining the version of the code to use
        codefile = 'stan_model/bayes-losvd_ghfit.stan'
        if not os.path.exists(codefile):
            misc.printFAILED(codefile + " does not exist.")
            sys.exit()

        # Defining output names and directories
        outdir = "../results/" + runname
        pdf_filename = outdir + "/" + runname + "_gh_diagnostics_bin" + stridx + ".pdf"
        summary_filename = outdir + "/" + runname + "_gh_Stan_summary_bin" + stridx + ".txt"
        arviz_filename = outdir + "/" + runname + "_gh_chains_bin" + str(
            idx) + ".netcdf"
        sample_filename = outdir + "/" + runname + "_gh_progress_bin" + stridx + ".csv"
        outhdf5 = outdir + "/" + runname + "_gh_results_bin" + stridx + ".hdf5"

        # Creating the structure with the data for Stan
        # -------
        # NOTE: losvd_obs, sigma_losvd is what goes into the GH fit
        #       losvd is the processed output of bayes_losvd_run.py
        #       losvd_obs = losvd[2,:]
        #       sigma_losvd is an averaged version of the true 1sigma uncertainties from the bayes_losvd_run.py fit
        # -------
        losvd = struct['out/' + stridx + '/losvd'][2, :]
        sigma = np.zeros((len(losvd), 2))
        sigma[:, 0] = np.fabs(struct['out/' + stridx + '/losvd'][1, :] - losvd)
        sigma[:, 1] = np.fabs(struct['out/' + stridx + '/losvd'][3, :] - losvd)
        sigma_losvd = np.mean(sigma, axis=1)

        data = {
            'nvel': struct['in/nvel'],
            'xvel': struct['in/xvel'],
            'losvd_obs': losvd,
            'sigma_losvd': sigma_losvd
        }

        # Creating a temporary file adding the input data to the input HDF5 file info
        temp = tempfile.NamedTemporaryFile()
        struct2 = h5py.File(temp.name, 'w')
        struct.copy('in', struct2)
        struct2.create_dataset("out/" + stridx + "/losvd",
                               data=np.array(struct['out/' + stridx +
                                                    '/losvd']),
                               compression="gzip")

        # Running the model
        with open(codefile, 'r') as myfile:
            code = myfile.read()
        model = stan_cache(model_code=code, codefile=codefile)
        fit = model.sampling(data=data,
                             iter=niter,
                             chains=nchain,
                             control={
                                 'adapt_delta': adapt_delta,
                                 'max_treedepth': max_treedepth
                             },
                             sample_file=sample_filename,
                             check_hmc_diagnostics=True)
        samples = fit.extract(permuted=True)
        diag_pars = fit.get_sampler_params()

        # If requested, saving sample chains
        if (save_chains == True):
            print("")
            print("# Saving chains in Arviz (NETCDF) format: " +
                  arviz_filename)
            arviz_data = az.from_pystan(posterior=fit)
            az.to_netcdf(arviz_data, arviz_filename)

        # Saving Stan's summary of main parameters on disk
        print("")
        print("# Saving Stan summary: " + summary_filename)
        unwanted = {'losvd_mod'}
        misc.save_stan_summary(fit,
                               unwanted=unwanted,
                               verbose=verbose,
                               summary_filename=summary_filename)

        # Processing output and saving results
        print("")
        print("# Processing and saving results: " + outhdf5)
        misc.process_stan_output_hdp(struct2, samples, outhdf5, stridx)

        # Creating diagnostic plots
        if (save_plots == True):
            if os.path.exists(pdf_filename):
                os.remove(pdf_filename)
            print("")
            print("# Saving diagnostic plots: " + pdf_filename)
            create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter,
                                    nchain)

        # Removing progess files
        print("")
        print("# Deleting progress files")
        misc.delete_files(sample_filename, 'csv')
        misc.delete_files(sample_filename, 'png')

        # If we are here, we are DONE!
        struct.close()
        struct2.close()
        misc.printDONE(runname + " - Bin: " + stridx)

        return 'OK'

    except:

        misc.printFAILED()
        traceback.print_exc()

        return 'ERROR'
예제 #8
0
def load_data(struct):

    # Adding the relative path to input filename and check file exists
    if not os.path.exists("../data/" + struct['filename']):
        misc.printFAILED("File '" + struct['filename'] +
                         "' not found in 'data' directory")
        sys.exit()
    struct['filename'] = "../data/" + struct['filename']

    # Reading instruments config file
    instr_config = toml.load("../config_files/instruments.properties")
    instr_list = list(instr_config.keys())

    if struct['instrument'] not in instr_list:
        misc.printFAILED("Instrument '" + struct['instrument'] +
                         "' not found in instruments configuration file")
        sys.exit()
    if not os.path.exists("../config_files/instruments/" +
                          instr_config[struct['instrument']]['read_file']):
        misc.printFAILED("Instrument read file '" +
                         instr_config[struct['instrument']]['read_file'] +
                         "' not found in instruments directory")
        sys.exit()

    # Reading instrument specific data and info
    print(" - Reading the data and basic info")
    instr = importlib.util.spec_from_file_location(
        "", "../config_files/instruments/" +
        instr_config[struct['instrument']]['read_file'])
    module = importlib.util.module_from_spec(instr)
    instr.loader.exec_module(module)
    data = module.read_data("../data/" + struct['filename'])

    # Creating variables for convenience
    wave = data['wave']
    spec = data['spec']
    espec = data['espec']
    x = data['x']
    y = data['y']
    npix = data['npix']
    nspec = data['nspax']
    psize = data['psize']
    ndim = data['ndim']
    lmin = struct['lmin']
    lmax = struct['lmax']

    # Correcting the data for redshift
    print(" - Correcting data for redshift")
    wave /= (1.0 + struct['redshift'])

    # Checking the desired wavelength range is within data wavelength limits
    if (wave[0] > lmin):
        lmin = wave[0]
    if (wave[-1] < lmax):
        lmax = wave[-1]

    # Cutting the data to the desired wavelength range
    print(" - Cutting data to desired wavelength range")
    idx = (wave >= lmin) & (wave <= lmax)
    wave = wave[idx]
    spec = spec[idx, :]
    espec = espec[idx, :]
    npix = np.sum(idx)

    # Computing the SNR in each spaxel
    print(" - Computing the SNR of each spaxel")
    signal = np.nanmedian(spec, axis=0)
    noise = np.abs(np.nanmedian(espec, axis=0))

    # Filtering out those spectra with NaN estimates for SNR
    good = np.isfinite(signal / noise) & (signal / noise > 0.0)
    if np.sum(good) > 0:
        signal = signal[good]
        noise = noise[good]
        spec = spec[:, good]
        espec = espec[:, good]
        x = x[good]
        y = y[good]
        nspec = np.sum(good)

    # Selecting those spaxels above SNR_min
    print(" - Selecting spaxels above SNR_min")
    delta = np.abs((signal / noise) - struct['snr_min'])
    idx = (delta <= 3.0)
    if np.sum(idx) > 0:
        isof = np.mean(signal[idx])
        idx = (signal >= isof)
        spec = spec[:, idx]
        espec = espec[:, idx]
        signal = signal[idx]
        noise = noise[idx]
        x, y = x[idx], y[idx]
        nspec = np.sum(idx)

    # IF requested, Voronoi binning the data
    if struct['snr'] > 0.0:

        # Determining Voronoi binning to the data
        print(" - Computing the Voronoi binning")
        binNum, xbin, ybin, xBar, yBar, bin_snr, nPixels, scale = cap.voronoi_2d_binning(x, y, \
                signal, noise, struct['snr'], plot=False, quiet=True, pixelsize=psize)

        print("   - " + str(len(xbin)) + " Voronoi bins created")

        # Applying the Voronoi binning to the data
        print("   - Applying the Voronoi binning")
        ubins = np.unique(binNum)
        nbins = len(ubins)
        bin_spec = np.zeros([npix, nbins])
        bin_espec = np.zeros([npix, nbins])
        bin_flux = np.zeros(nbins)

        for i in trange(nbins, ascii=True, leave=False):
            k = np.where(binNum == ubins[i])[0]
            valbin = len(k)
            if valbin == 1:
                av_spec = spec[:, k]
                av_err_spec = espec[:, k]
            else:
                av_spec = np.nansum(spec[:, k], axis=1)
                av_err_spec = np.sqrt(np.sum(espec[:, k]**2, axis=1))

            bin_flux[i] = np.mean(av_spec, axis=0)
            bin_spec[:, i] = np.ravel(av_spec)
            bin_espec[:, i] = np.ravel(av_err_spec)

    else:

        bin_snr = signal / noise
        binNum = np.arange(nspec)
        bin_flux = np.mean(spec, axis=0)
        bin_spec = spec
        bin_espec = espec
        nbins = nspec
        xbin = x
        ybin = y
        print(" - " + str(len(xbin)) + " spectra in file")

    # Log-rebinning the data to the input Velscale
    print(" - Log-rebinning and normalizing the spectra")
    lamRange = np.array([np.amin(wave), np.amax(wave)])
    dummy, lwave, _ = cap.log_rebin(lamRange,
                                    bin_spec[:, 0],
                                    velscale=struct['velscale'])
    npix_log = len(dummy)
    lspec, lespec = np.zeros([npix_log, nbins]), np.zeros([npix_log, nbins])
    for i in trange(nbins, ascii=True, leave=False):

        #Log-rebinning the spectra
        lspec[:, i], dummy, dummy = cap.log_rebin(lamRange,
                                                  bin_spec[:, i],
                                                  velscale=struct['velscale'])
        lespec[:, i], dummy, dummy = cap.log_rebin(lamRange,
                                                   bin_espec[:, i],
                                                   velscale=struct['velscale'])

        # Normalizing the observed and error spectra respecting the SNR of each bin
        lespec[:, i] /= np.nanmedian(lspec[:, i])
        lspec[:, i] /= np.nanmedian(lspec[:, i])

    # Defining the data mask
    print(" - Defining the data mask")
    if (struct['mask_file'] == "None"):
        mn = np.int(0.01 * npix_log)  # Masking edges only
        mask = np.arange(mn, npix_log - mn)
    else:
        if not os.path.exists("../config_files/" + struct['mask_file']):
            misc.printFAILED(
                "Cannot find mask file in 'config_files' directory")
            sys.exit()
        mask = misc.spectralMasking("../config_files/" + struct['mask_file'],
                                    lwave, struct['redshift'])

    # Storing all the info in a data structure
    print(" - Storing everything in data structure")
    print("")
    data_struct = {
        'binID': binNum,
        'x': x,
        'y': y,
        'flux': signal,
        'xbin': xbin,
        'ybin': ybin,
        'bin_flux': bin_flux,
        'spec_obs': lspec,
        'sigma_obs': lespec,
        'wave_obs': lwave,
        'wave': wave,
        'velscale': struct['velscale'],
        'mask': np.ravel(mask),
        'nmask': len(mask),
        'bin_snr': bin_snr,
        'npix': npix,
        'npix_obs': npix_log,
        'nspec': nspec,
        'porder': struct['porder'],
        'nbins': nbins,
        'snr': struct['snr'],
        'lmin': lmin,
        'lmax': lmax,
        'ndim': ndim
    }

    return data_struct
예제 #9
0
def load_testdata(struct):

    # Adding the relative path to input filename and check file exists
    if not os.path.exists("../data/" + struct['filename']):
        misc.printFAILED("File '" + struct['filename'] +
                         "' not found in 'data' directory")
        sys.exit()
    struct['filename'] = "../data/" + struct['filename']

    # Reading instruments config file
    instr_config = toml.load("../config_files/instruments.properties")
    instr_list = list(instr_config.keys())

    if struct['instrument'] not in instr_list:
        misc.printFAILED("Instrument '" + struct['instrument'] +
                         "' not found in instruments configuration file")
        sys.exit()
    if not os.path.exists("../config_files/instruments/" +
                          instr_config[struct['instrument']]['read_file']):
        misc.printFAILED("Instrument read file '" +
                         instr_config[struct['instrument']]['read_file'] +
                         "' not found in instruments directory")
        sys.exit()

    # Reading instrument specific data and info
    print(" - Reading the data and basic info")
    instr = importlib.util.spec_from_file_location(
        "", "../config_files/instruments/" +
        instr_config[struct['instrument']]['read_file'])
    module = importlib.util.module_from_spec(instr)
    instr.loader.exec_module(module)
    data = module.read_data("../data/" + struct['filename'])

    # Creating variables for convenience
    wave = data['wave']
    spec = data['spec']
    espec = data['espec']
    x = data['x']
    y = data['y']
    npix = data['npix']
    nspec = data['nspax']
    psize = data['psize']
    ndim = data['ndim']
    lmin = np.log(struct['lmin'])
    lmax = np.log(struct['lmax'])

    # Correcting the data for redshift
    # NOTE: remember that testdata is already in log
    #    print(" - Correcting data for redshift")
    #    wave -= (1.0 + struct['redshift'])

    # Checking the desired wavelength range is within data wavelength limits
    if (wave[0] > lmin):
        lmin = wave[0]
    if (wave[-1] < lmax):
        lmax = wave[-1]

    # Cutting the data to the desired wavelength range
    print(" - Cutting data to desired wavelength range")
    idx = (wave >= lmin) & (wave <= lmax)
    wave = wave[idx]
    spec = spec[idx, :]
    espec = espec[idx, :]
    npix = np.sum(idx)
    flux = np.median(spec)
    npix_log = npix
    nbins = nspec

    print(" - Normalising the spectra")
    for i in trange(nbins, ascii=True, leave=False):

        # Normalizing the observed and error spectra respecting the SNR of each bin
        espec[:, i] /= np.nanmedian(spec[:, i])
        spec[:, i] /= np.nanmedian(spec[:, i])

    # Defining the data mask
    print(" - Defining the data mask")
    if (struct['mask_file'] == "None"):
        mn = np.int(0.01 * npix_log)  # Masking edges only
        mask = np.arange(mn, npix_log - mn)
    else:
        if not os.path.exists("../config_files/" + struct['mask_file']):
            misc.printFAILED(
                "Cannot find mask file in 'config_files' directory")
            sys.exit()
        mask = misc.spectralMasking("../config_files/" + struct['mask_file'],
                                    wave, struct['redshift'])

    # Storing all the info in a data structure
    print(" - Storing everything in data structure")
    print("")
    data_struct = {
        'binID': np.arange(nbins),
        'x': x,
        'y': y,
        'flux': flux,
        'xbin': x,
        'ybin': y,
        'bin_flux': flux,
        'spec_obs': spec,
        'sigma_obs': espec,
        'wave_obs': wave,
        'wave': wave,
        'velscale': struct['velscale'],
        'mask': np.ravel(mask),
        'nmask': len(mask),
        'bin_snr': 0,
        'npix': npix,
        'npix_obs': npix_log,
        'nspec': nspec,
        'porder': struct['porder'],
        'nbins': nbins,
        'snr': struct['snr'],
        'lmin': np.exp(lmin),
        'lmax': np.exp(lmax),
        'ndim': ndim
    }

    return data_struct
예제 #10
0
def load_templates(struct, data_struct):

    # Reading relevant info from config file
    temp_name = struct['template_lib']
    velscale = struct['velscale']
    npca = struct['npca']
    instr = struct['instrument']
    redshift = struct['redshift']
    vmax = struct['vmax']
    lmin = data_struct['lmin']
    lmax = data_struct['lmax']

    # Getting the appropiate LSF files
    instr_config = toml.load("../config_files/instruments.properties")
    lsf_data_file = "../config_files/instruments/" + instr_config[instr][
        'lsf_file']
    lsf_temp_file = "../config_files/instruments/" + temp_name + '.lsf'
    if not os.path.exists(lsf_data_file):
        misc.printFAILED(
            "Data lsf file not found in 'config_files/instruments' directory")
        sys.exit()
    if not os.path.exists(lsf_temp_file):
        misc.printFAILED(
            "Templates lsf file not found in 'config_files/instruments' directory"
        )
        sys.exit()

    # Loading SSP models and defining some basic parameters–
    list = glob.glob("../templates/" + temp_name + "/*")
    ntemp = len(list)
    print(" - " + str(ntemp) + " templates found in " + temp_name + " library")

    hdu = fits.open(list[0])
    tmp = np.ravel(hdu[0].data)
    hdr = hdu[0].header
    wave = hdr['CRVAL1'] + np.arange(len(tmp)) * hdr['CDELT1']
    dwav = hdr['CDELT1']
    npix = len(wave)

    # Defining output arrays
    temp = np.zeros((npix, ntemp))
    scale = np.zeros(ntemp)

    # Loading templates into final arrays
    # NOTE: this loops already cuts the spectra to the Lmin,Lmax limits
    print(" - Loading and preparing the templates...")
    for i in trange(ntemp, ascii=True, leave=False):

        # Reading, trimming and scaling the spectra
        hdu = fits.open(list[i])
        temp[:, i] = np.ravel(hdu[0].data)
        scale[i] = np.mean(temp[:, i])
        temp[:, i] /= scale[i]

    # Running PCA on the input models
    if npix < ntemp:
        misc.printFAILED("The number of pixels in the spectra (" + str(npix) +
                         ") has to be larger than the number of templates (" +
                         str(ntemp) + ") to run PCA.")
        sys.exit()

    if npca > 0:
        print(" - Running PCA on the templates...")
        mean_temp = np.mean(temp, axis=1)
        pca = PCA(n_components=ntemp)
        PC_tmp = pca.fit_transform(temp)

        # Extracting the desired number of PCA components
        cumsum_pca_variance = np.cumsum(pca.explained_variance_ratio_)
        print("    " + str(npca) + " PCA components explain {:7.3f}".format(
            cumsum_pca_variance[npca] * 100) +
              "% of the variance in the input library")
        templates = np.zeros((npix, npca))
        templates = PC_tmp[:, 0:npca]
        ntemplates = npca

        # Continuum and Z-score Normalization to aid in the minimization
        for i in range(npca):
            coef = np.polyfit(wave, templates[:, i], 1)
            pfit = np.polyval(coef, wave)
            templates[:, i] -= pfit
            templates[:, i] /= np.std(templates[:, i])
    else:
        mean_temp = np.zeros(npix)
        templates = temp
        ntemplates = ntemp

    # Convolving the templates to match the data's LSF
    print(" - Convolving the templates to match the data's LSF")
    data_lsf = misc.read_lsf(wave, lsf_data_file)
    data_lsf /= (1.0 + redshift)
    temp_lsf = misc.read_lsf(wave, lsf_temp_file)
    fwhm_diff = np.sqrt(data_lsf**2 - temp_lsf**2)  # in angstroms
    bad_pix = np.isnan(fwhm_diff)
    if np.sum(bad_pix) > 0:
        misc.printWARNING(
            "Some values of the data LSF are below the templates values")
    fwhm_diff[
        bad_pix] = 1E-2  # Fixing the FWHM_diff to a tiny value if there are NaNs
    sigma_diff = fwhm_diff / 2.355 / dwav

    mean_temp = cap.gaussian_filter1d(mean_temp, sigma_diff)
    for i in trange(ntemplates, ascii=True, leave=False):
        templates[:, i] = cap.gaussian_filter1d(
            templates[:, i], sigma_diff)  # convolution with variable sigma

    # Log-rebinning the PCA spectra using the data's velscale
    print(" - Log-rebinning the templates")
    lamRange = np.array([np.amin(wave), np.amax(wave)])
    mean_temp, lwave, dummy = cap.log_rebin(lamRange,
                                            mean_temp,
                                            velscale=velscale)
    npix_temp = mean_temp.shape[0]
    tmp_temp = np.zeros((npix_temp, ntemplates))
    for i in range(ntemplates):
        tmp_temp[:, i], dummy, dummy = cap.log_rebin(lamRange,
                                                     templates[:, i],
                                                     velscale=velscale)
    templates = tmp_temp

    # Checking the wavelength solution for the templates is identical to the data
    # If not, the templates are resampled
    # NOTE: this is important to have a centered LOSVD on xvel=0.0
    good = (lwave >= np.log(data_struct['lmin'])) & (lwave <= np.log(
        data_struct['lmax']))
    check = np.array_equal(lwave[good], data_struct['wave_obs'])
    if check == False:
        print(
            " - Resampling the templates to match the wavelength of the observed data (if needed)"
        )
        mean_temp = misc.spectres(data_struct['wave_obs'],
                                  lwave,
                                  mean_temp,
                                  fill=np.nan)
        npix_temp = len(mean_temp)
        new_temp = np.zeros((npix_temp, ntemplates))
        for i in range(ntemplates):
            new_temp[:, i] = misc.spectres(data_struct['wave_obs'],
                                           lwave,
                                           templates[:, i],
                                           fill=np.nan)
        lwave = data_struct['wave_obs']
        templates = new_temp
    else:
        mean_temp = mean_temp[good]
        templates = templates[good, :]
        lwave = lwave[good]
        npix_temp = len(lwave)

    # Normalizing the mean template to 1.0 and adjusting the other templates so that the mean is around 0.0
    mean_temp /= np.mean(mean_temp)
    for i in range(ntemplates):
        templates[:, i] -= np.mean(templates[:, i])

    # Storing everything into a dictionary
    print(" - Storing everything in templates structure")
    struct = {
        'lwave_temp': lwave,
        'mean_template': mean_temp,
        'templates': templates,
        'npix_temp': npix_temp,
        'ntemp': ntemplates
    }

    return struct
예제 #11
0
def run(i, bin_list, runname, niter, nchain, adapt_delta, max_treedepth, 
        verbose=False, save_chains=False, save_plots=False, fit_type=None):

    idx = bin_list[i]
    stridx = str(idx)
    misc.printRUNNING(runname+" - Bin: "+stridx+" - Fit type: "+fit_type) 

    try:

        # Defining the version of the code to use
        codefile, extrapars = misc.read_code(fit_type)
     
        # Defining output names and directories
        rootname         = runname+"-"+fit_type
        outdir           = "../results/"+rootname
        pdf_filename     = outdir+"/"+rootname+"_diagnostics_bin"+str(idx)+".pdf"
        summary_filename = outdir+"/"+rootname+"_Stan_summary_bin"+str(idx)+".txt"
        arviz_filename   = outdir+"/"+rootname+"_chains_bin"+str(idx)+".netcdf"
        sample_filename  = outdir+"/"+rootname+"_progress_bin"+str(idx)+".csv"
        outhdf5          = outdir+"/"+rootname+"_results_bin"+str(idx)+".hdf5"

        # Creating the basic structure with the data for Stan
        struct = h5py.File("../preproc_data/"+runname+".hdf5","r")
        data   = {'npix_obs':      np.array(struct['in/npix_obs']), 
                  'ntemp':         np.array(struct['in/ntemp']), 
                  'nvel':          np.array(struct['in/nvel']),
                  'npix_temp':     np.array(struct['in/npix_temp']),
                  'mask':          np.array(struct['in/mask']), 
                  'nmask':         np.array(struct['in/nmask']), 
                  'porder':        np.array(struct['in/porder']),
                  'spec_obs':      np.array(struct['in/spec_obs'][:,idx]), 
                  'sigma_obs':     np.array(struct['in/sigma_obs'][:,idx]), 
                  'templates':     np.array(struct['in/templates']),
                  'mean_template': np.array(struct['in/mean_template']),
                  'velscale':      np.array(struct['in/velscale']),
                  'xvel':          np.array(struct['in/xvel'])}

        # Adding any extra parameter needed for that particular fit_type
        for key, val in extrapars.items():
            data[key] = val
            
        # Running the model
        with open(codefile, 'r') as myfile:
           code = myfile.read()
        model   = stan_cache(model_code=code, codefile=codefile) 
        fit     = model.sampling(data=data, iter=niter, chains=nchain, 
                  control={'adapt_delta':adapt_delta, 'max_treedepth':max_treedepth}, 
                  sample_file=sample_filename, check_hmc_diagnostics=True)

        samples   = fit.extract(permuted=True) # Extracting parameter samples
        diag_pars = fit.get_sampler_params()   # Getting sampler diagnostic params
        
        # If requested, saving sample chains
        if (save_chains == True):
           print("")
           print("# Saving chains in Arviz (NETCDF) format: "+arviz_filename) 
           arviz_data = az.from_pystan(posterior=fit, observed_data=['mask','spec_obs','sigma_obs'])
           az.to_netcdf(arviz_data,arviz_filename)

        # Saving Stan's summary of main parameters on disk
        print("")
        print("# Saving Stan summary: "+summary_filename)         
        unwanted = {'spec','conv_spec','poly','bestfit','a','losvd_'}
        misc.save_stan_summary(fit, unwanted=unwanted, verbose=verbose, summary_filename=summary_filename)

        # Processing output and saving results
        print("")
        print("# Processing and saving results: "+outhdf5)
        misc.process_stan_output_hdp(struct,samples,outhdf5,stridx)

        # Creating diagnostic plots
        if (save_plots == True):
            if os.path.exists(pdf_filename):
              os.remove(pdf_filename)    
            print("")
            print("# Saving diagnostic plots: "+pdf_filename) 
            create_diagnostic_plots(idx, pdf_filename, fit, diag_pars, niter, nchain)
    
        # Removing progess files
        print("")
        print("# Deleting progress files")
        misc.delete_files(sample_filename,'csv')
        misc.delete_files(sample_filename,'png')

        # If we are here, we are DONE!
        struct.close()
        misc.printDONE(runname+" - Bin: "+stridx+" - Fit type: "+fit_type)

        return 'OK'
    
    except Exception:

        misc.printFAILED()
        traceback.print_exc()            
          
        return 'ERROR'
예제 #12
0
    else:
        save_chains = True    

    if (save_plots == 0):
        save_plots = False
    else:
        save_plots = True    

    if (restart == 0):
        restart = False
    else:
        restart = True    

    # Checking the file exists
    if not os.path.exists(preproc_file):
       misc.printFAILED(preproc_file+" does not exist.")
       sys.exit()

    # Checking fit_type is a valid one
    misc.check_codes(fit_type)

    # Defining rootnames for output files
    tmpname = os.path.basename(preproc_file)
    runname = os.path.splitext(tmpname)[0]
    outdir  = "../results/"+runname+"-"+fit_type

    if not os.path.exists("../results"):
        os.mkdir("../results")
    if not os.path.exists(outdir):
        os.mkdir(outdir)