Ejemplo n.º 1
0
 def input_wrapper(*args,**kwargs):
     spec= args[0]
     specerr= args[1]
     if isinstance(specerr,str): # locID+APOGEE-ID; array
         ispec= apread.aspcapStar(spec,specerr,ext=1,header=False,
                                  aspcapWavegrid=True)
         ispecerr= apread.aspcapStar(spec,specerr,ext=2,header=False,
                                     aspcapWavegrid=True)
         spec= ispec
         specerr= ispecerr
     elif (isinstance(specerr,(list,numpy.ndarray)) \
               and isinstance(specerr[0],str)): # locID+APOGEE-ID; array
         aspcapBlu_start,aspcapGre_start,aspcapRed_start,aspcapTotal = _aspcapPixelLimits(dr=None)
         nspec= len(specerr)
         ispec= numpy.empty((nspec,aspcapTotal))
         ispecerr= numpy.empty((nspec,aspcapTotal))
         for ii in range(nspec):
             ispec[ii]= apread.aspcapStar(spec[ii],specerr[ii],ext=1,
                                          header=False,aspcapWavegrid=True)
             ispecerr[ii]= apread.aspcapStar(spec[ii],specerr[ii],ext=2,
                                             header=False,aspcapWavegrid=True)
         spec= ispec
         specerr= ispecerr
     elif isinstance(specerr,(list,numpy.ndarray)) \
             and isinstance(specerr[0],(float,numpy.float32,
                                        numpy.float64,numpy.ndarray)) \
         and ((len(specerr.shape) == 1 and len(specerr) == 8575)
              or (len(specerr.shape) == 2 and specerr.shape[1] == 8575)): #array on apStar grid
         spec= toAspcapGrid(spec)
         specerr= toAspcapGrid(specerr)
     return func(spec,specerr,*args[2:],**kwargs)
Ejemplo n.º 2
0
 def input_wrapper(*args,**kwargs):
     spec= args[0]
     specerr= args[1]
     if isinstance(specerr,str): # locID+APOGEE-ID; array
         ispec= apread.aspcapStar(spec,specerr,ext=1,header=False,
                                  aspcapWavegrid=True)
         ispecerr= apread.aspcapStar(spec,specerr,ext=2,header=False,
                                     aspcapWavegrid=True)
         spec= ispec
         specerr= ispecerr
     elif (isinstance(specerr,(list,numpy.ndarray)) \
               and isinstance(specerr[0],str)): # locID+APOGEE-ID; array
         nspec= len(specerr)
         ispec= numpy.empty((nspec,7214))
         ispecerr= numpy.empty((nspec,7214))
         for ii in range(nspec):
             ispec[ii]= apread.aspcapStar(spec[ii],specerr[ii],ext=1,
                                          header=False,aspcapWavegrid=True)
             ispecerr[ii]= apread.aspcapStar(spec[ii],specerr[ii],ext=2,
                                             header=False,aspcapWavegrid=True)
         spec= ispec
         specerr= ispecerr
     elif isinstance(specerr,(list,numpy.ndarray)) \
             and isinstance(specerr[0],(float,numpy.float32,
                                        numpy.float64,numpy.ndarray)) \
         and ((len(specerr.shape) == 1 and len(specerr) == 8575)
              or (len(specerr.shape) == 2 and specerr.shape[1] == 8575)): #array on apStar grid
         spec= toAspcapGrid(spec)
         specerr= toAspcapGrid(specerr)
     return func(spec,specerr,*args[2:],**kwargs)
Ejemplo n.º 3
0
def tophat(elem, dr=None, apStarWavegrid=True):
    """
    NAME:
       tophat
    PURPOSE:
       return an array with True in the window of a given element and False otherwise
    INPUT:
       elem - element     
       dr= read the window corresponding to this data release       
       apStarWavegrid= (True) if True, output the window onto the apStar wavelength grid, otherwise just give the ASPCAP version (blue+green+red directly concatenated)
    OUTPUT:
       array on apStar grid
    HISTORY:
       2015-01-26 - Written - Bovy (IAS@KITP)
    """
    out = numpy.zeros(_NLAMBDA, dtype='bool')
    for si, ei in zip(*waveregions(elem, asIndex=True, dr=dr)):
        out[si + 1:ei] = True
    if not apStarWavegrid: return toAspcapGrid(out)
    else: return out
Ejemplo n.º 4
0
def tophat(elem,dr=None,apStarWavegrid=True):
    """
    NAME:
       tophat
    PURPOSE:
       return an array with True in the window of a given element and False otherwise
    INPUT:
       elem - element     
       dr= read the window corresponding to this data release       
       apStarWavegrid= (True) if True, output the window onto the apStar wavelength grid, otherwise just give the ASPCAP version (blue+green+red directly concatenated)
    OUTPUT:
       array on apStar grid
    HISTORY:
       2015-01-26 - Written - Bovy (IAS@KITP)
    """
    out= numpy.zeros(_NLAMBDA,dtype='bool')
    for si,ei in zip(*waveregions(elem,asIndex=True,dr=dr)):
        out[si+1:ei]= True
    if not apStarWavegrid: return toAspcapGrid(out)
    else: return out
Ejemplo n.º 5
0
def read(elem,apStarWavegrid=True,dr=None):
    """
    NAME:
       read
    PURPOSE:
       read the window weights for a given element, modified to only return 'good' windows
    INPUT:
       elem - element
       apStarWavegrid= (True) if True, output the window onto the apStar wavelength grid, otherwise just give the ASPCAP version (blue+green+red directly concatenated)
       dr= read the window corresponding to this data release       
    OUTPUT:
       Array with window weights
    HISTORY:
       2015-01-25 - Written - Bovy (IAS)
       2015-09-02 - Modified for only returning 'good' windows - Bovy (UofT)
    """
    out= apwindow.read(elem,apStarWavegrid=True,dr=dr)
    out[bad(elem)]= 0.
    if not apStarWavegrid:
        return toAspcapGrid(out)
    else:
        return out
Ejemplo n.º 6
0
def tophat(elem,dr=None,apStarWavegrid=True):
    """
    NAME:
       tophat
    PURPOSE:
       return an array with True in the window of a given element and False otherwise, only for 'good' windows
    INPUT:
       elem - element     
       dr= read the window corresponding to this data release       
       apStarWavegrid= (True) if True, output the window onto the apStar wavelength grid, otherwise just give the ASPCAP version (blue+green+red directly concatenated)
    OUTPUT:
       array on apStar grid
    HISTORY:
       2015-01-26 - Written - Bovy (IAS@KITP)
       2015-09-02 - Modified for only returning 'good' windows - Bovy (UofT)
    """
    out= apwindow.tophat(elem,apStarWavegrid=True,dr=dr)
    out[bad(elem)]= 0.
    if not apStarWavegrid:
        return toAspcapGrid(out)
    else:
        return out
Ejemplo n.º 7
0
def pixels_cannon(*args, **kwargs):
    """
    NAME:
       pixels_cannon
    PURPOSE:
       determine continuum pixels using a Cannon-like technique (Ness et al. 2015)
    INPUT:
       Either:
        a) Input for running the apogee.spec.cannon:
          spec - spectra to fit (nspec,nlambda)
          specerrs - errors on the spectra (nspec,nlambda); assume no covariances
          label1, label2, ... - labels (nspec); best to subtract reference values before running this
          type= ('lin') type of Cannon to run:
             'lin' - linear Cannon
             'quad' - quadratic Cannon
        b) Output from a previous Cannon run:
          coefficients - coefficients from the fit (ncoeffs,nlambda)
          scatter - scatter from the fit (nlambda)
    KEYWORDS:
       baseline_dev= (0.015) maximum deviation from baseline
       label1_max= (10.**-5.) maximum deviation in first linear coefficient
       label2_max= (0.006) similar for the second
       label3_max= (0.012) similar for the third
       labelN_max= same with default 0.03
       ...
       scatter_max= (0.015) maximum scatter of residuals
       dr= (module-wide default) data release
    OUTPUT:
       Boolean index into the wavelength range with True for continuum pixels
    HISTORY:
       2015-02-05 - Written - Bovy (IAS@KITP)
    """
    # Grab kwargs
    type = kwargs.pop('type', 'lin')
    dr = kwargs.pop('dr', path._default_dr())
    # Parse input
    if len(args) == 0:  # Use default fit
        from apogee.spec._train_cannon import load_fit
        coeffs, scatter, baseline_labels = load_fit()
        type = 'quad'
    else:
        spec = args[0]
        specerr = args[1]
        # Determine the type of input
        if len(specerr.shape) == 2:
            # Run the Cannon
            if type.lower() == 'lin':
                coeffs, scatter = cannon.linfit(*args)
            elif type.lower() == 'quad':
                coeffs, scatter = cannon.quadfit(*args)
        else:
            coeffs = spec
            scatter = specerr
    ncoeffs = coeffs.shape[0]
    if type.lower() == 'lin':
        nlabels = ncoeffs - 1
    elif type.lower() == 'quad':
        nlabels = int((-3 + numpy.sqrt(9 + 8 * (ncoeffs - 1)))) // 2
    # Determine continuum pixels
    out = numpy.ones(len(scatter), dtype='bool')
    # Deviation from baseline
    out[numpy.fabs(coeffs[0] - 1.) > kwargs.get('baseline_dev', 0.015)] = False
    # Large dependence on labels
    maxs = numpy.zeros(nlabels)
    maxs[0] = kwargs.get('label1_max', 10.**-5.)
    maxs[1] = kwargs.get('label2_max', 0.006)
    maxs[2] = kwargs.get('label3_max', 0.012)
    for ii in range(nlabels - 3):
        maxs[ii + 3] = kwargs.get('label%i_max' % (ii + 4), 0.03)
    for ii in range(1, nlabels + 1):
        out[numpy.fabs(coeffs[ii]) > maxs[ii - 1]] = False
    # Large residuals
    out[scatter > kwargs.get('scatter_max', 0.015)] = False
    _, _, _, aspcapDR12length = _aspcapPixelLimits(dr='12')
    if int(dr) > 12 and coeffs.shape[1] == aspcapDR12length:
        # Want continuum pixels on >DR12 ASPCAP grid, but using coefficients
        # from <= DR12 grid
        dr_module = path._default_dr()
        path.change_dr(12)
        out = toApStarGrid(out)
        path.change_dr(dr)
        out = toAspcapGrid(out)
        path.change_dr(dr_module)
    return out
Ejemplo n.º 8
0
def fit(spec,
        specerr,
        type='aspcap',
        deg=None,
        niter=10,
        usigma=3.,
        lsigma=0.1,
        cont_pixels=None):
    """
    NAME:
       fit
    PURPOSE:
       fit the continuum (a) with a sigma-clipping rejection method (~ASPCAP) or (b) with a Chebyshev polynomial based on a set of continuum pixels
    INPUT:
       spec - spectra to fit (nspec,nlambda)
       specerr - errors on the spectra (nspec,nlambda); assume no covariances
       type= ('aspcap') type of continuum fitting to do: 'ASPCAP' for the sigma-clipping rejection that ASPCAP uses and 'Cannon' for fitting a Chebyshev polynomial to continuum pixels
       ASPCAP keywords:
          deg= (4) degree of the polynomial
          niter= (10) number of sigma-clipping iterations to perform
          usigma, lsigma= (3., 0.1) upper and lower sigmas for sigma clipping
       Cannon keywords:
          deg= (2) degree of the polynomial
          cont_pixels= (None; loads default) boolean index in the ASPCAP wavelength grid with True for continuum pixels
    OUTPUT:
       continuum (nspec,nlambda)
    HISTORY:
       2015-03-01 - Cannon-style fit written - Bovy (IAS)
       2015-03-01 - ASPCAP-style fit written - Bovy (IAS)
    """
    # Parse input
    if len(spec.shape) == 1:
        tspec = copy.copy(numpy.reshape(spec, (1, len(spec))))
        tspecerr = numpy.reshape(specerr, (1, len(specerr)))
    else:
        tspec = copy.copy(spec)
        tspecerr = specerr
    if tspec.shape[1] == 8575:
        tspec = toAspcapGrid(tspec)
        tspecerr = toAspcapGrid(tspecerr)
    apStarBlu_lo,apStarBlu_hi,\
        apStarGre_lo,apStarGre_hi,\
        apStarRed_lo,apStarRed_hi= _apStarPixelLimits(dr=None)
    aspcapBlu_start, aspcapGre_start, aspcapRed_start, aspcapTotal = _aspcapPixelLimits(
        dr=None)
    if deg is None and type.lower() == 'aspcap': deg = 4
    elif deg is None: deg = 2
    # Fit each detector separately
    cont = numpy.empty_like(tspec)
    # Rescale wavelengths
    bluewav = numpy.arange(aspcapGre_start) / float(aspcapGre_start -
                                                    1.) * 2. - 1.
    greenwav = numpy.arange(
        (aspcapRed_start - aspcapGre_start
         )) / float(aspcapRed_start - aspcapGre_start - 1.) * 2. - 1.
    redwav = numpy.arange(
        (aspcapTotal - aspcapRed_start
         )) / float(aspcapTotal - aspcapRed_start - 1.) * 2. - 1.
    # Split the continuum pixels
    if type.lower() == 'cannon':
        if cont_pixels is None:
            cont_pixels = pixels_cannon()
        blue_pixels = cont_pixels[:aspcapGre_start]
        green_pixels = cont_pixels[aspcapGre_start:aspcapRed_start]
        red_pixels = cont_pixels[aspcapRed_start:]
    # Loop through the data
    for ii in range(tspec.shape[0]):
        # Blue
        if type.lower() == 'aspcap':
            print(len(bluewav), len(tspec[ii, :aspcapGre_start]))
            cont[ii,:aspcapGre_start]=\
                _fit_aspcap(bluewav,
                            tspec[ii,:aspcapGre_start],
                            tspecerr[ii,:aspcapGre_start],
                            deg,
                            niter,usigma,lsigma)
        else:
            cont[ii,:aspcapGre_start]=\
                _fit_cannonpixels(bluewav,
                                  tspec[ii,:aspcapGre_start],
                                  tspecerr[ii,:aspcapGre_start],
                                  deg,
                                  blue_pixels)
        # Green
        if type.lower() == 'aspcap':
            cont[ii,aspcapGre_start:aspcapRed_start]=\
                _fit_aspcap(greenwav,
                            tspec[ii,aspcapGre_start:aspcapRed_start],
                            tspecerr[ii,aspcapGre_start:aspcapRed_start],
                            deg,
                            niter,usigma,lsigma)
        else:
            cont[ii,aspcapGre_start:aspcapRed_start]=\
                _fit_cannonpixels(greenwav,
                                  tspec[ii,aspcapGre_start:aspcapRed_start],
                                  tspecerr[ii,aspcapGre_start:aspcapRed_start],
                                  deg,
                                  green_pixels)
        # Red
        if type.lower() == 'aspcap':
            cont[ii,aspcapRed_start:]=\
                _fit_aspcap(redwav,
                            tspec[ii,aspcapRed_start:],
                            tspecerr[ii,aspcapRed_start:],
                            deg,
                            niter,usigma,lsigma)
        else:
            cont[ii,aspcapRed_start:]=\
                _fit_cannonpixels(redwav,
                                  tspec[ii,aspcapRed_start:],
                                  tspecerr[ii,aspcapRed_start:],
                                  deg,
                                  red_pixels)
    if (len(spec.shape) == 1 and spec.shape[0] == 8575) \
            or (len(spec.shape) == 2 and spec.shape[1] == 8575):
        cont = toApStarGrid(cont)
    if len(spec.shape) == 1: cont = cont[0]
    return cont
Ejemplo n.º 9
0
def fit(spec,specerr,type='aspcap',
        deg=None,
        niter=10,usigma=3.,lsigma=0.1,
        cont_pixels=None):
    """
    NAME:
       fit
    PURPOSE:
       fit the continuum (a) with a sigma-clipping rejection method (~ASPCAP) or (b) with a Chebyshev polynomial based on a set of continuum pixels
    INPUT:
       spec - spectra to fit (nspec,nlambda)
       specerr - errors on the spectra (nspec,nlambda); assume no covariances
       type= ('aspcap') type of continuum fitting to do: 'ASPCAP' for the sigma-clipping rejection that ASPCAP uses and 'Cannon' for fitting a Chebyshev polynomial to continuum pixels
       ASPCAP keywords:
          deg= (4) degree of the polynomial
          niter= (10) number of sigma-clipping iterations to perform
          usigma, lsigma= (3., 0.1) upper and lower sigmas for sigma clipping
       Cannon keywords:
          deg= (2) degree of the polynomial
          cont_pixels= (None; loads default) boolean index in the ASPCAP wavelength grid with True for continuum pixels
    OUTPUT:
       continuum (nspec,nlambda)
    HISTORY:
       2015-03-01 - Cannon-style fit written - Bovy (IAS)
       2015-03-01 - ASPCAP-style fit written - Bovy (IAS)
    """
    # Parse input
    if len(spec.shape) == 1:
        tspec= copy.copy(numpy.reshape(spec,(1,len(spec))))
        tspecerr= numpy.reshape(specerr,(1,len(specerr)))
    else:
        tspec= copy.copy(spec)
        tspecerr= specerr
    if tspec.shape[1] == 8575:
        tspec= toAspcapGrid(tspec)
        tspecerr= toAspcapGrid(tspecerr)
    if deg is None and type.lower() == 'aspcap': deg= 4
    elif deg is None: deg= 2
    # Fit each detector separately
    cont= numpy.empty_like(tspec)
    # Rescale wavelengths
    bluewav= numpy.arange(2920)/2919.*2.-1.
    greenwav= numpy.arange(2400)/2399.*2.-1.
    redwav= numpy.arange(1894)/1893.*2.-1.
    # Split the continuum pixels
    if type.lower() == 'cannon':
        if cont_pixels is None:
            cont_pixels= pixels_cannon()
        blue_pixels= cont_pixels[:2920]
        green_pixels= cont_pixels[2920:5320]
        red_pixels= cont_pixels[5320:]
    # Loop through the data
    for ii in range(tspec.shape[0]):
        # Blue
        if type.lower() == 'aspcap':
            cont[ii,:2920]= _fit_aspcap(bluewav,
                                        tspec[ii,:2920],
                                        tspecerr[ii,:2920],
                                        deg,
                                        niter,usigma,lsigma)
        else:
            cont[ii,:2920]= _fit_cannonpixels(bluewav,
                                              tspec[ii,:2920],
                                              tspecerr[ii,:2920],
                                              deg,
                                              blue_pixels)
        # Green
        if type.lower() == 'aspcap':
            cont[ii,2920:5320]= _fit_aspcap(greenwav,
                                            tspec[ii,2920:5320],
                                            tspecerr[ii,2920:5320],
                                            deg,
                                            niter,usigma,lsigma)
        else:
            cont[ii,2920:5320]= _fit_cannonpixels(greenwav,
                                                  tspec[ii,2920:5320],
                                                  tspecerr[ii,2920:5320],
                                                  deg,
                                                  green_pixels)
        # Red
        if type.lower() == 'aspcap':
            cont[ii,5320:]= _fit_aspcap(redwav,
                                        tspec[ii,5320:],
                                        tspecerr[ii,5320:],
                                        deg,
                                        niter,usigma,lsigma)
        else:
            cont[ii,5320:]= _fit_cannonpixels(redwav,
                                              tspec[ii,5320:],
                                              tspecerr[ii,5320:],
                                              deg,
                                              red_pixels)
    if (len(spec.shape) == 1 and spec.shape[0] == 8575) \
            or (len(spec.shape) == 2 and spec.shape[1] == 8575):
        cont= toApStarGrid(cont)
    if len(spec.shape) == 1: cont= cont[0]
    return cont
Ejemplo n.º 10
0
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster,
             spectra, spectra_errs, run_number, location, elem):
    """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions,
    and skipped elements for the simulated spectra.

    This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated
    spectra in the same way as the data.

    Parameters
    ----------
    num_elem : int
        The number of elements in APOGEE
    num_stars : int
        The number of stars in the desired cluster
    apogee_cluster_data : structured array
        All cluster data from APOGEE
    sigma : float
        The value of sigma to create the simulated spectra
    T : tuple
        Array of floats representing the effective temperature of each star in the cluster
    cluster : str
        Name of the desired cluster (e.g. 'PJ_26')
    spectra : tuple
        Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
        Array of floats representing the spectral uncertainties of the desired cluster
    run_number : int
        Number of the run by which to label files
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being examined (e.g. 'AL')

    Returns
    -------
    fake_res : tuple
        Array of floats representing the residuals of the quadratic fit 
    fake_err : tuple
        Array of floats representing the spectral errors corresponding to the residuals
    y_ax_psm : tuple
        One-dimensional array containing values from 0 to 1, the same size as cdist
    psm_cdists : tuple
        One-dimensional array containing the sorted, normalized fit residuals
    fake_nanless_res : tuple
        Array of floats representing the residuals of the quadratic fit, with NaNs removed 
        (doesn't return fake_nanless_err because they are the same as real_nanless_err)
    fake_used_elems : tuple
        Array of str representing the elements used in the cluster (some elements may be omitted due to 
        lack of data)
    fake_skipped_elems : tuple
        Array of str representing the elements skipped due to lack of data 
    final_real_spectra : tuple
        Array of observed spectra masked in the same way as the simulated spectra 
    final_real_spectra_err : tuple
        Array of observed spectral errors masked in the same way as the simulated spectra
    """

    #Abundances WRT H
    num_elem = 15
    num_stars = len(spectra)
    fe_abundance_dict = {
        'element': [
            'C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE',
            'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H'
        ]
    }
    cluster_xh = np.zeros((num_elem, num_stars))
    for i in range(num_elem):
        for j in range(num_stars):
            if fe_abundance_dict['element'][i] == 'FE_H':
                cluster_xh[i] = apogee_cluster_data['FE_H']
            else:
                cluster_xh[i] = apogee_cluster_data[fe_abundance_dict[
                    'element'][i]] + apogee_cluster_data['FE_H']
    cluster_avg_abundance = np.mean(cluster_xh, axis=1)

    cluster_logg = apogee_cluster_data['LOGG']
    elem_number_dict = {
        'C': 0,
        'N': 1,
        'O': 2,
        'NA': 3,
        'MG': 4,
        'AL': 5,
        'SI': 6,
        'S': 7,
        'K': 8,
        'CA': 9,
        'TI': 10,
        'V': 11,
        'MN': 12,
        'FE': 13,
        'NI': 14
    }
    cluster_fake_abundance = np.copy(cluster_xh)
    cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal(
        loc=cluster_avg_abundance[elem_number_dict[elem]],
        scale=float(sigma),
        size=num_stars)

    cluster_gen_spec = np.zeros((num_stars, 7214))
    for i in range(len(spectra)):
        cluster_gen_spec[i] = psm.generate_spectrum(
            Teff=T[i] / 1000,
            logg=cluster_logg[i],
            vturb=psm.vturb,
            ch=cluster_fake_abundance[elem_number_dict['C']][i],
            nh=cluster_fake_abundance[elem_number_dict['N']][i],
            oh=cluster_fake_abundance[elem_number_dict['O']][i],
            nah=cluster_fake_abundance[elem_number_dict['NA']][i],
            mgh=cluster_fake_abundance[elem_number_dict['MG']][i],
            alh=cluster_fake_abundance[elem_number_dict['AL']][i],
            sih=cluster_fake_abundance[elem_number_dict['SI']][i],
            sh=cluster_fake_abundance[elem_number_dict['S']][i],
            kh=cluster_fake_abundance[elem_number_dict['K']][i],
            cah=cluster_fake_abundance[elem_number_dict['CA']][i],
            tih=cluster_fake_abundance[elem_number_dict['TI']][i],
            vh=cluster_fake_abundance[elem_number_dict['V']][i],
            mnh=cluster_fake_abundance[elem_number_dict['MN']][i],
            nih=cluster_fake_abundance[elem_number_dict['NI']][i],
            feh=cluster_fake_abundance[elem_number_dict['FE']][i],
            c12c13=psm.c12c13)

    #Mask spectra outside of boundaries of DR12 detectors
    apStar_cluster_gen_spec = toApStarGrid(cluster_gen_spec, dr='12')
    dr12_d1_left = apStarInds['12']['blue'][0]
    dr12_d1_right = apStarInds['12']['blue'][-1]
    dr12_d2_left = apStarInds['12']['green'][0]
    dr12_d2_right = apStarInds['12']['green'][-1]
    dr12_d3_left = apStarInds['12']['red'][0]
    dr12_d3_right = apStarInds['12']['red'][-1]
    for i in range(len(apStar_cluster_gen_spec)):
        for j in range(len(apStar_cluster_gen_spec.T)):
            if j < dr12_d1_left or (dr12_d1_right < j < dr12_d2_left) or (
                    dr12_d2_right < j < dr12_d3_left) or j > dr12_d3_right:
                apStar_cluster_gen_spec[i][j] = np.nan

    #Pad psm spectra with zeros to make appropriate size for DR14
    cluster_padded_spec = toAspcapGrid(apStar_cluster_gen_spec, dr='14')

    #Create array of nans to mask the psm in the same way as the spectra
    masked_psm = np.empty_like(spectra)
    masked_psm[:] = np.nan

    #Mask the spectra
    for i in range(len(spectra)):
        for j in range(7514):
            if ~np.isnan(spectra[i][j]):
                masked_psm[i][j] = cluster_padded_spec[i][j]

    #Read in repeats residuals
    if location == 'personal':
        file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5', 'r')
    elif location == 'server':
        file = h5py.File(
            '/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5', 'r')
    repeat_res = file['residuals'][()]
    file.close()

    #Cut out gaps between detectors for DR14
    repeats_dr14 = toAspcapGrid(repeat_res, dr='14')
    #Calculate 6sigma for repeats
    repeats_mean = np.nanmean(repeats_dr14)
    repeats_std = np.nanstd(repeats_dr14)
    repeats_6sigma_pos = repeats_mean + repeats_std * 6
    repeats_6sigma_neg = repeats_mean - repeats_std * 6

    #Create fake noise to add to the psm
    selected_repeats = []
    for i in range(0, num_stars):
        #Select a random star from the repeats residuals by which to multiply the spectra errors
        random_repeat = np.random.choice(np.arange(0, len(repeats_dr14)))
        selected_repeats.append(repeats_dr14[random_repeat])
    selected_repeats = np.array(selected_repeats)

    #Mask individual |repeats| that are > 6sigma
    for i in range(len(selected_repeats)):
        for j in range(len(selected_repeats.T)):
            if (selected_repeats[i][j] > repeats_6sigma_pos) or (
                    selected_repeats[i][j] < repeats_6sigma_neg):
                #if np.abs(selected_repeats[i][j]) > repeats_6sigma_pos:
                selected_repeats[i][j] = np.nan

    #Multiply the repeats by the spectral errors
    cluster_fake_errs = spectra_errs * selected_repeats
    #Correct the fake errors with zeroes in the same places as the PSM spectra
    cluster_fake_errs[masked_psm == 0] = 0.0

    #Add the noise to the psm
    noise_fake_spec = masked_psm + cluster_fake_errs
    #Mask the real spectra and spectra errors in the same way as the fake spectra
    masked_real_spectra = np.copy(spectra)
    masked_real_spectra_err = np.copy(spectra_errs)
    masked_real_spectra[np.isnan(noise_fake_spec)] = np.nan
    masked_real_spectra_err[np.isnan(noise_fake_spec)] = np.nan

    #Remove empty spectra - assertion
    final_fake_spec = []
    final_real_spectra = []
    final_real_spectra_err = []
    for i in range(len(noise_fake_spec)):
        if any(noise_fake_spec[i, :] != 0):
            final_fake_spec.append(noise_fake_spec[i])
            final_real_spectra.append(masked_real_spectra[i])
            final_real_spectra_err.append(masked_real_spectra_err[i])
    final_fake_spec = np.array(final_fake_spec)
    final_real_spectra = np.array(final_real_spectra)
    final_real_spectra_err = np.array(final_real_spectra_err)

    #Run fitting function on synthetic spectra
    fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = pj.fit_func(
        elem,
        cluster,
        final_fake_spec,
        final_real_spectra_err,
        T,
        dat_type='sim',
        run_number=run_number,
        location=location,
        sigma_val=sigma)

    #Cumulative distributions
    y_ax_psm, psm_cdists = pp.cum_dist(fake_nanless_res, fake_nanless_err)
    return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster,
             spectra, spectra_errs, run_number, location, elem):
    """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions,
    and skipped elements for the simulated spectra.

    This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated
    spectra in the same way as the data.

    Parameters
    ----------
    num_elem : int
        The number of elements in APOGEE
    num_stars : int
        The number of stars in the desired cluster
    apogee_cluster_data : structured array
        All cluster data from APOGEE
    sigma : float
        The value of sigma to create the simulated spectra
    T : tuple
        Array of floats representing the effective temperature of each star in the cluster
    cluster : str
        Name of the desired cluster (e.g. 'NGC 2682')
    spectra : tuple
        Array of floats representing the spectra of the desired cluster
    spectra_errs : tuple
        Array of floats representing the spectral uncertainties of the desired cluster
    run_number : int
        Number of the run by which to label files
    location : str
        If running locally, set to 'personal'.  If running on the server, set to 'server'.
    elem : str
        Element being examined (e.g. 'AL')

    Returns
    -------
    fake_res : tuple
        Array of floats representing the residuals of the quadratic fit 
    fake_err : tuple
        Array of floats representing the spectral errors corresponding to the residuals
    y_ax_psm : tuple
        One-dimensional array containing values from 0 to 1, the same size as cdist
    psm_cdists : tuple
        One-dimensional array containing the sorted, normalized fit residuals
    fake_nanless_res : tuple
        Array of floats representing the residuals of the quadratic fit, with NaNs removed 
        (doesn't return fake_nanless_err because they are the same as real_nanless_err)
    fake_used_elems : tuple
        Array of str representing the elements used in the cluster (some elements may be omitted due to 
        lack of data)
    fake_skipped_elems : tuple
        Array of str representing the elements skipped due to lack of data 
    final_real_spectra : tuple
        Array of observed spectra masked in the same way as the simulated spectra 
    final_real_spectra_err : tuple
        Array of observed spectral errors masked in the same way as the simulated spectra
    """

    #Abundances WRT H
    num_elem = 15  ###Number of elements in APOGEE
    num_stars = len(spectra)  ###Number of stars in the cluster
    ###Dictionary for the names of the FE abundances in the allStar file
    fe_abundance_dict = {
        'element': [
            'C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE',
            'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H'
        ]
    }
    cluster_xh = np.zeros(
        (num_elem, num_stars
         ))  ###Make an empty array to add all of the cluster abundances to
    for i in range(num_elem):  ###Iterate through the elements
        for j in range(num_stars):  ###Iterate through the stars
            ###Get all of the [X/H] abundances for all of the elements X and all of the stars in the cluster by multiplying each [X/Fe] abundance by [Fe/H]
            cluster_xh[i] = apogee_cluster_data[
                fe_abundance_dict['element'][i]] * apogee_cluster_data['FE_H']
    cluster_avg_abundance = np.mean(
        cluster_xh, axis=1
    )  ###Get the average abundances of all the elements in the cluster (so should be just 1 number for each element)

    cluster_logg = apogee_cluster_data[
        'LOGG']  ###Get the surface gravities of each star from the allStar file
    elem_number_dict = {
        'C':
        0,  ###Create a dictionary to match element names to their order number 
        'N': 1,
        'O': 2,
        'NA': 3,
        'MG': 4,
        'AL': 5,
        'SI': 6,
        'S': 7,
        'K': 8,
        'CA': 9,
        'TI': 10,
        'V': 11,
        'MN': 12,
        'FE': 13,
        'NI': 14
    }
    cluster_fake_abundance = np.copy(
        cluster_xh
    )  ###Create a copy of the array of all abundances in the cluster to use to simulate abundances
    ###Simulate the abundances of the DESIRED ELEMENT (ONE ELEMENT ONLY) by drawing from a random normal distribution centred about the mean of THAT ONE
    ###ELEMENT with a scatter of the chosen value of sigma.  The rest of the abundances in this array will remain the same as the data
    cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal(
        loc=cluster_avg_abundance[elem_number_dict[elem]],
        scale=float(sigma),
        size=num_stars)

    cluster_gen_spec = np.zeros(
        (num_stars, 7214))  ###Create an empty array to add the fake spectra
    for i in range(
            len(spectra)
    ):  ###Iterate through all of the stars - change this to range(num_stars)
        ###Use PSM to make a fake spectrum, using the array of abundances created above (with the element in question varied by the value of sigma and the
        ###remaining values of abundances the same as the data), using the photometric Teffs calculated previously, the logg of each star, the default
        ###PSM vturb value, and the default psm c12c13 value.
        cluster_gen_spec[i] = psm.generate_spectrum(
            Teff=T[i] / 1000,
            logg=cluster_logg[i],
            vturb=psm.vturb,
            ch=cluster_fake_abundance[elem_number_dict['C']][i],
            nh=cluster_fake_abundance[elem_number_dict['N']][i],
            oh=cluster_fake_abundance[elem_number_dict['O']][i],
            nah=cluster_fake_abundance[elem_number_dict['NA']][i],
            mgh=cluster_fake_abundance[elem_number_dict['MG']][i],
            alh=cluster_fake_abundance[elem_number_dict['AL']][i],
            sih=cluster_fake_abundance[elem_number_dict['SI']][i],
            sh=cluster_fake_abundance[elem_number_dict['S']][i],
            kh=cluster_fake_abundance[elem_number_dict['K']][i],
            cah=cluster_fake_abundance[elem_number_dict['CA']][i],
            tih=cluster_fake_abundance[elem_number_dict['TI']][i],
            vh=cluster_fake_abundance[elem_number_dict['V']][i],
            mnh=cluster_fake_abundance[elem_number_dict['MN']][i],
            nih=cluster_fake_abundance[elem_number_dict['NI']][i],
            feh=cluster_fake_abundance[elem_number_dict['FE']][i],
            c12c13=psm.c12c13)

    #Pad psm spectra with zeros to make appropriate size for DR14
    apStar_cluster_gen_spec = toApStarGrid(
        cluster_gen_spec, dr='12')  ###Put the fake spectra onto the DR12 grid
    cluster_padded_spec = toAspcapGrid(
        apStar_cluster_gen_spec, dr='14'
    )  ###Put the fake spectra onto the DR14 grid.  This will pad the spectra with zeroes
    ### to make it the right shape for DR14

    #Create array of nans to mask the psm in the same way as the spectra
    masked_psm = np.empty_like(
        spectra
    )  ###Create an empty array that is the same shape as the spectra
    masked_psm[:] = np.nan  ###Fill the array with NaNs to mask it

    #Mask the spectra
    for i in range(len(
            spectra)):  ###Iterate through the stars - change this to num_stars
        for j in range(
                7514
        ):  ###Iterate through the wavelength range - change to len(spectra.T) or make a variable = 7514 so I can stop hardcoding
            if ~np.isnan(spectra[i][j]
                         ):  ###If the entry in the real spectra is not a NaN
                masked_psm[i][j] = cluster_padded_spec[i][
                    j]  ###Make the value in the masked fake spectra the corresponding entry in the fake spectra

    #Read in repeats residuals
    if location == 'personal':  ###If running on Mac
        file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5',
                         'r')  ###Path to repeats file that I made
    elif location == 'server':  ###If running on server
        file = h5py.File(
            '/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5',
            'r')  ###Path to repeats file that I made
    repeat_res = file['residuals'][()]  ###Get the repeats
    file.close()  ###Close the file

    #Cut out gaps between detectors for DR14
    repeats_dr14 = toAspcapGrid(
        repeat_res,
        dr='14')  ###Cut the gaps between the detectors in the repeats
    #Calculate 6sigma for repeats
    repeats_mean = np.nanmean(
        repeats_dr14
    )  ###Get the mean of the repeats, avoiding the masked areas that are NaNs
    repeats_std = np.nanstd(
        repeats_dr14
    )  ###Get the standard deviation of the repeats, avoiding the masked areas that are NaNs
    repeats_6sigma = repeats_mean + repeats_std * 6  ###Get the value for 6 sigma of the repeats from the mean (check that this is correct)

    #Create fake noise to add to the psm
    selected_repeats = [
    ]  ###Empty list to append the random repeats that I will use to multiply the spectral errors
    for i in range(0, num_stars):  ###Iterate through the stars
        #Select a random star from the repeats residuals by which to multiply the spectra errors
        random_repeat = np.random.choice(np.arange(0, len(
            repeats_dr14)))  ###Randomly select one of the stars in the repeats
        selected_repeats.append(
            repeats_dr14[random_repeat]
        )  ###Get all of the repeats residuals for this star
    selected_repeats = np.array(
        selected_repeats)  ###Turn the list into an array

    #Mask individual |repeats| that are > 6sigma
    for i in range(
            len(selected_repeats)
    ):  ###Iterate through the number of selected repeats (number of stars)
        for j in range(len(
                selected_repeats.T)):  ###Iterate through the wavelength range
            if np.abs(
                    selected_repeats[i][j]
            ) > repeats_6sigma:  ###If the absolute value of a one of the entries in the selected repeats is greater than 6sigma
                selected_repeats[i][j] = np.nan  ###Mask it out

    #Multiply the repeats by the spectral errors
    cluster_fake_errs = spectra_errs * selected_repeats  ###Multiply the spectral errors by these randomly selected repeats.  The spectral errors will become masked in the same way as the repeats
    #Pad the fake errors with zeroes in the same places as the PSM spectra
    cluster_fake_errs[
        masked_psm ==
        0] = 0.0  ###Pad the fake errors with zeroes in the same places as the fake spectra from modifying the DR12 wavelength range to fit DR14

    #Add the noise to the psm
    noise_fake_spec = masked_psm + cluster_fake_errs  ###Add the fake errors that I've created to the fake spectra as fake noise to make more realistic
    #Mask the real spectra and spectra errors in the same way as the fake spectra
    masked_real_spectra = np.copy(
        spectra
    )  ###Make a copy of the observed spectra to mask in the same way as the fake spectra
    masked_real_spectra_err = np.copy(
        spectra_errs
    )  ###Make a copy of the observed spectral errors to mask in the same way as the fake spectra
    masked_real_spectra[np.isnan(
        noise_fake_spec
    )] = np.nan  ###Mask the observed spectra in the same way as the fake spectra
    masked_real_spectra_err[np.isnan(
        noise_fake_spec
    )] = np.nan  ###Mask the observed spectral errors in the same way as the fake spectra

    #Remove empty spectra ###I'm not sure if this chunk is necessary but I wrote it in just in case
    final_fake_spec = []  ###Empty list to append the final set of fake spectra
    final_real_spectra = [
    ]  ###Empty list to append the final set of real spectra
    final_real_spectra_err = [
    ]  ###Empty list to append the final set of real spectral errors
    for i in range(len(noise_fake_spec)):  ###Iterate through the fake spectra
        if any(
                noise_fake_spec[i, :] != 0
        ):  ###If there are rows that are not completely filled with zeroes
            final_fake_spec.append(
                noise_fake_spec[i])  ###Append those fake spectra
            final_real_spectra.append(
                masked_real_spectra[i])  ###Append those real spectra
            final_real_spectra_err.append(
                masked_real_spectra_err[i]
            )  ###Append those real spectral errors
    final_fake_spec = np.array(final_fake_spec)  ###Make into array
    final_real_spectra = np.array(final_real_spectra)  ###Make into array
    final_real_spectra_err = np.array(
        final_real_spectra_err)  ###Make into array

    #Run fitting function on synthetic spectra
    fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = oc.fit_func(
        elem,
        cluster,
        final_fake_spec,
        final_real_spectra_err,
        T,
        dat_type='sim',
        run_number=run_number,
        location=location,
        sigma_val=sigma)

    #Cumulative distributions
    y_ax_psm, psm_cdists = pp.cum_dist(fake_nanless_res, fake_nanless_err)
    return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err