def get_apogee(dr='16',use_astroNN=True): """ dr: dr to select. astronn: boolean flag for whether to use astroNN abundances, defaults to ASPCAP abundances or not. Returns APOGEE allStar file without duplicates. """ dr = str(dr) # Change to relevant data release change_dr(dr) # Only use astroNN values if they are available for this data release. if use_astroNN: if int(dr)<14: use_astroNN=False elif int(dr)>=14: allStar=apread.allStar(rmcommissioning=True, rmdups=False, use_astroNN=True) if not use_astroNN: allStar=apread.allStar(rmcommissioning=True, rmdups=False, use_astroNN=False) # Manually remove duplicates apids,inds=np.unique(allStar['APOGEE_ID'],return_index=True) return allStar[inds]
def fit_func(elem, name, spectra, spectra_errs, T, dat_type, run_number, location, sigma_val=None): ###Fitting function """Return fit residuals from quadratic fit, spectral errors for desired element, fluxes for desired element, an appropriately-sized array of effective temperatures, the quadratic fitting parameters, the residuals, errors, temperatures, and fluxes with NaNs removed, and the normalized elemental weights. Functions: Reads in the DR14 windows. Obtains the indices of pixels of the absorption lines and saves the flux value and uncertainty for each star in these pixels. Performs the quadratic fit on each pixel using weight_lsq() and computes the residuals using residuals(). Obtains the flux values, uncertainties, fits, residuals, and temperatures with NaNs removed. Writes the residuals and fit parameters to .hdf5 files. Parameters ---------- elem : str Element name (i.e. 'AL') name : str Name of desired cluster (i.e. 'NGC 2682') spectra : tuple Array of floats representing the spectra of the desired cluster spectra_errs : tuple Array of floats representing the spectral uncertainties of the desired cluster T : tuple Array of floats representing the effective temperature of each star in the cluster dat_type : str Indicates whether the data being examined is the data or a simulation run_number : int Number of the run by which to label files location : str If running locally, set to 'personal'. If running on the server, set to 'server'. sigma_val : float, optional Indicates the value of sigma being used for the simulation in question, if applicable (default is None) Returns ------- elem_res : tuple Array of floats representing the fit residuals, with original positioning of points maintained final_err : tuple Array of floats representing the spectral uncertainties from the lines of the desired element, with original positioning of points maintained final_points : tuple Array of floats representing the fluxes from the lines of the desired element, with original positioning of points maintained temp_array : tuple Array of floats representing the effective temperature of each star in the cluster, with a row for each pixel of the desired element elem_a : tuple Array of floats representing the fitting parameters for the quadratic terms in the fits for each pixel of the desired element elem_b : tuple Array of floats representing the fitting parameters for the linear terms in the fits for each pixel of the desired element elem_c : tuple Array of floats representing the fitting parameters for the constant terms in the fits for each pixel of the desired element nanless_res : tuple Array of floats representing the fit residuals, with NaNs removed nanless_T : tuple Array of floats representing the effective temperature of each star in the cluster, with a row for each pixel of the desired element, with NaNs removed nanless_points : tuple Array of floats representing the fluxes from the lines of the desired element, with NaNs removed normed_weights : tuple Array of floats representing the weight of each elemental window, normalized to 1 """ change_dr('12') ###Switch data-release to 12 #Find the DR14 windows from the DR12 windows dr12_elem_windows = window.read( elem) ###Read in the DR12 windows for the element in question change_dr('14') ###Switch back to DR14 dr14_elem_windows_12 = np.concatenate( (dr12_elem_windows[246:3274], dr12_elem_windows[3585:6080], dr12_elem_windows[6344:8335] )) ###Fit the dr12 windows to dr14 ("hacked" dr14 windows) normalized_dr14_elem_windows_12 = ( dr14_elem_windows_12 - np.nanmin(dr14_elem_windows_12)) / ( np.nanmax(dr14_elem_windows_12) - np.nanmin(dr14_elem_windows_12) ) ###Normalize the hacked dr14 windows to 1 #Get the indices of the lines ind_12 = np.argwhere( normalized_dr14_elem_windows_12 > 0 ) ###Get the indices of all of the pixels of the absorption lines of the element in question ind_12 = ind_12.flatten( ) ###Get rid of the extra dimension produced by np.argwhere #Get the fluxes and errors from spectra len_spectra = len(spectra) ###Number of stars elem_points_12 = np.zeros( (len(ind_12), len_spectra) ) ###Array for values of the points in the spectra that are at the indices of the elemental lines in DR12 elem_err_12 = np.zeros( (len(ind_12), len_spectra) ) ###Array for values of the errors in the spectra that are at the indices of the elemental lines in DR12 for i in range(0, len(ind_12)): ###Iterate through the DR12 elemental indices for j in range(0, len_spectra): ###Iterate through the stars elem_points_12[i][j] = spectra[j][ind_12[ i]] ###Get the values of the points in the spectra at these indices in DR12 elem_err_12[i][j] = spectra_errs[j][ind_12[ i]] #APOGEE measured errors ###Get the values of the errors in the spectra at these indices in DR12 #Use only pixels with more than 5 points final_points_12 = [] ###Empty list for the final DR12 points final_err_12 = [] ###Empty list for the final DR12 errors final_inds_12 = [] ###Empty list for the final DR12 elemental line indices for i in range( len(elem_points_12)): ###Iterate through the DR12 flux points if np.count_nonzero( ~np.isnan(elem_points_12[i]) ) >= 5: ###If the number of points in each pixel that are not NaNs is greater than or equal to 5 final_points_12.append(elem_points_12[i]) ###Append those points final_err_12.append(elem_err_12[i]) ###Append those errors final_inds_12.append(ind_12[i]) ###Append those indices final_points_12 = np.array(final_points_12) ###Make into array final_err_12 = np.array(final_err_12) ###Make into array final_inds_12 = np.array(final_inds_12) ###Make into array if len( final_inds_12 ) == 0: ###If no indices are left (i.e. if there are less than 5 points in every pixel) print('Warning: less than 5 points for every pixel, skipping ', elem) ###Skip and don't finish this element else: ###If there are enough points left dr12_weights = normalized_dr14_elem_windows_12[ final_inds_12] ###Get all of the weights of the elemental pixels for DR12 sorted_dr12_weights = np.sort( dr12_weights) ###Sort these weights from smallest to largest #Get windows if location == 'personal': ###If running on Mac window_file = pd.read_hdf( '/Users/chloecheng/Personal/dr14_windows.hdf5', 'window_df') ###Get file I made for DR14 windows elif location == 'server': ###If running on the server window_file = pd.read_hdf( '/geir_data/scr/ccheng/AST425/Personal/dr14_windows.hdf5', 'window_df') ###Get file I made for DR14 windows dr14_elem_windows_14 = window_file[ elem].values ###Get the DR14 windows for the element in question normalized_dr14_elem_windows_14 = ( dr14_elem_windows_14 - np.min(dr14_elem_windows_14)) / ( np.max(dr14_elem_windows_14) - np.min(dr14_elem_windows_14) ) ###Normalize these windows to 1 #Get the indices of the lines if elem == 'C' or elem == 'N' or elem == 'FE': ###If we're looking at one of the elements with order ~1000 pixels ind = np.argwhere( normalized_dr14_elem_windows_14 > np.min( sorted_dr12_weights[int(len(sorted_dr12_weights) * 0.7):]) ) ###Get rid of the smallest 70% of the DR12 pixels else: ###For all of the other elements ind = np.argwhere( normalized_dr14_elem_windows_14 > 0) ###Get all of the pixels ind = ind.flatten( ) ###Get rid of the extra dimension from argwhere (try to streamline this) #Get the fluxes and errors from spectra #Limits of DR12 detectors dr12_d1_left = 322 ###Left limit of detector 1 dr12_d1_right = 3242 ###Right limit of detector 1 dr12_d2_left = 3648 ###Left limit of detector 2 dr12_d2_right = 6048 ###Right limit of detector 2 dr12_d3_left = 6412 ###Left limit of detector 3 dr12_d3_right = 8306 ###Right limit of detector 3 elem_points = np.zeros( (len(ind), len_spectra) ) ###Make an empty array to hold the values of the spectra at the elemental indices elem_err = np.zeros( (len(ind), len_spectra) ) ###Make an empty array to hold the values of the spectral errors at the elemental indices for i in range(0, len(ind)): ###Iterate through the elemental indices for j in range( 0, len_spectra): ###Iterate through the number of stars ###If the indices are outside of the bounds of the DR12 detectors (these bounds should be right) if ind[i] < dr12_d1_left or ( dr12_d1_right < ind[i] < dr12_d2_left) or ( dr12_d2_right < ind[i] < dr12_d3_left) or ind[i] > dr12_d3_right: elem_points[i][ j] = np.nan ###Set the point to NaN and ignore elem_err[i][j] = np.nan ###Set the error to NaN and ignore else: ###If the indices are within the bounds of the DR12 detectors elem_points[i][j] = spectra[j][ ind[i]] ###Get the corresponding point in the spectra elem_err[i][j] = spectra_errs[j][ind[ i]] #APOGEE measured errors ###Get the corresponding point in the spectral errors #Use only pixels with more than 5 points final_points = [ ] ###Make an array for the final set of spectral points final_err = [ ] ###Make an array for the final set of spectral error points final_inds = [ ] ###Make an array for the final set of elemental indices for i in range(len( elem_points)): ###Iterate through the points we just obtained if np.count_nonzero( ~np.isnan(elem_points[i]) ) >= 5: ###If the number of non-NaNs in the pixel is greater than or equal to 5 final_points.append(elem_points[i]) ###Append the points final_err.append(elem_err[i]) ###Append the errors final_inds.append(ind[i]) ###Append the indices final_points = np.array(final_points) ###Make into array final_err = np.array(final_err) ###Make into array final_inds = np.array(final_inds) ###Make into array if len(final_points) == 0: ###If all pixels have less than 5 points print('Warning: less than 5 points for every pixel, skipping ', elem) ###Skip the element and end here else: ###If there are some pixels remaining #Create an appropriately-sized array of temperatures to mask as well temp_array = np.full( (final_points.shape), T ) ###Create an array of the temperatures that is the same size as the spectral points, but each row is the same set of temperatures for i in range( 0, len(final_points)): ###Iterate through the spectral points for j in range(0, len_spectra): ###Iterate through the stars if np.isnan(final_points[i][j]): ###If the point is a NaN temp_array[i][ j] = np.nan ###Mask the corresponding point in the temperature array #Do fits with non-nan numbers nanless_inds = np.isfinite( final_points) ###Get the indices of the non-NaN points fits = [] ###Create an empty list for the fit parameters for i in range( len(final_points)): ###Iterate through the spectral points fits.append( weight_lsq(final_points[i][nanless_inds[i]], temp_array[i][nanless_inds[i]], final_err[i][nanless_inds[i]]) ) ###Fit using weight_lsq function and all points that are not NaNs for i in range(len(fits)): ###Iterate through the fits fits[i] = np.array( fits[i]) ###Make each sub-list into an array fits = np.array(fits) ###Make the whole list into an array ###Check the order of these as well - I think it should be fine if you just change the order in weight_lsq bc still return a, b, c in the order 0, 1, 2 elem_a = fits[:, 0] ###Get the a-parameter elem_b = fits[:, 1] ###Get the b-parameter elem_c = fits[:, 2] ###Get the c-parameter elem_fits = np.zeros_like( final_points) ###Create an array to save the actual fits for i in range(0, len(final_points)): ###Iterate through the points elem_fits[i] = elem_a[i] * temp_array[i]**2 + elem_b[ i] * temp_array[i] + elem_c[i] ###Fit quadratically #Calculate residuals elem_res = residuals(final_points, elem_fits) ###Calculate the fit residuals #Remove nans from fits, residuals, errors, and temperatures for plotting and cumulative distribution #calculation purposes nanless_fits = [] ###Create an empty list for the nanless fits nanless_res = [ ] ###Create an empty list for the nanless fit residuals nanless_err = [] ###Create an empty list for the nanless errors nanless_T = [ ] ###Create an empty list for the nanless temperatures nanless_points = [ ] ###Create an empty list for the nanless spectral points for i in range( len(final_points)): ###Iterate through the spectral points nanless_fits.append( elem_fits[i][nanless_inds[i]]) ###Append the nanless fits nanless_res.append(elem_res[i][ nanless_inds[i]]) ###Append the nanless residuals nanless_err.append(final_err[i][ nanless_inds[i]]) ###Append the nanless errors nanless_T.append(temp_array[i][ nanless_inds[i]]) ###Append the nanless temperatures nanless_points.append(final_points[i][ nanless_inds[i]]) ###Append the nanless points for i in range( len(final_points)): ###Turn all sub lists into arrays nanless_fits[i] = np.array(nanless_fits[i]) nanless_res[i] = np.array(nanless_res[i]) nanless_err[i] = np.array(nanless_err[i]) nanless_T[i] = np.array(nanless_T[i]) nanless_points[i] = np.array(nanless_points[i]) nanless_fits = np.array( nanless_fits) ###Turn all lists into arrays nanless_res = np.array(nanless_res) nanless_err = np.array(nanless_err) nanless_T = np.array(nanless_T) nanless_points = np.array(nanless_points) #Get the weights for later weights = normalized_dr14_elem_windows_14[ final_inds] ###Get the weights fo the DR14 lines that we're using normed_weights = weights / np.sum( weights) ###Normalize the weights #File-saving #If we are looking at the data timestr = time.strftime("%Y%m%d_%H%M%S") ###date_time string name_string = str(name).replace(' ', '') ###cluster name, remove space pid = str(os.getpid()) ###PID string if sigma_val == None: ###IF we are looking at the data if location == 'personal': ###If running on Mac path_dat = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + str( elem) + '_' + 'fit_res' + '_' + str( dat_type) + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' ###Use this path elif location == 'server': ###If running on server path_dat = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + str( elem) + '_' + 'fit_res' + '_' + str( dat_type) + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' ###Use this path #If the file exists, output the desired variables if glob.glob( path_dat ): ###If the file already exists, don't write anything return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights #If the file does not exist, create file and output the desired variables else: ###If the file does not exist, write all fitting information file = h5py.File(path_dat, 'w') file['points'] = final_points file['residuals'] = elem_res file['err_200'] = final_err file['a_param'] = elem_a file['b_param'] = elem_b file['c_param'] = elem_c file.close() return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights #If we are looking at simulations else: ###If we are looking at a simulation if location == 'personal': ###If running from Mac path_sim = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + str( elem) + '_' + 'fit_res' + '_' + str( dat_type) + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' ###Use this path elif location == 'server': ###If running on server path_sim = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + str( elem) + '_' + 'fit_res' + '_' + str( dat_type) + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' ###Use this path #If the file exists, append to the file if glob.glob(path_sim): ###If the file exists file = h5py.File(path_sim, 'a') ###Append to the file #If the group for the particular value of sigma exists, don't do anything if glob.glob( str(sigma_val) ): ###If this value of sigma has already been tested, don't write anything file.close() #If not, append a new group to the file for the particular value of sigma else: ###If it has not been tested, write all fitting information to a group named after the value of sigma grp = file.create_group(str(sigma_val)) grp['points'] = final_points grp['residuals'] = elem_res grp['err_200'] = final_err grp['a_param'] = elem_a grp['b_param'] = elem_b grp['c_param'] = elem_c file.close() #If the file does not exist, create a new file else: ###If the file does not exist, write all of the fitting information to a group named after the value of sigma file = h5py.File(path_sim, 'w') grp = file.create_group(str(sigma_val)) grp['points'] = final_points grp['residuals'] = elem_res grp['err_200'] = final_err grp['a_param'] = elem_a grp['b_param'] = elem_b grp['c_param'] = elem_c file.close() return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
-h Help file --cluster=<arg> Cluster name --red_clump=<arg> Whether to exclude red clump stars in rcsample or not --element=<arg> Element name --type=<arg> Data type --location=<arg> Machine where the code is being run """ #Imports #apogee package import apogee.tools.read as apread ###Read in the allStar data from apogee.tools.path import change_dr ###Change the data-release from apogee.spec import window ###Windows in fitting function for DR12 from apogee.tools.read import rcsample ###Remove red clumps change_dr('14') #use DR14 ###Change to DR14 #astropy helper functions import astropy.io.fits as afits ###Read OCCAM file #basic math and plotting from docopt import docopt ###Run with docopt from terminal import numpy as np ###Numpy import pandas as pd ###Read DR14 windows file import h5py ###Read/write files import glob ###See whether file/directory exists or not import matplotlib.pyplot as plt ###Plot for weighted least-squares function if matrix is singular import os ###Make directory for cluster import time ###Label files with date and time fs = 16 ###Plot fontsize plt.rc('font', family='serif', size=fs) ###Plot fonts
def pixels_cannon(*args, **kwargs): """ NAME: pixels_cannon PURPOSE: determine continuum pixels using a Cannon-like technique (Ness et al. 2015) INPUT: Either: a) Input for running the apogee.spec.cannon: spec - spectra to fit (nspec,nlambda) specerrs - errors on the spectra (nspec,nlambda); assume no covariances label1, label2, ... - labels (nspec); best to subtract reference values before running this type= ('lin') type of Cannon to run: 'lin' - linear Cannon 'quad' - quadratic Cannon b) Output from a previous Cannon run: coefficients - coefficients from the fit (ncoeffs,nlambda) scatter - scatter from the fit (nlambda) KEYWORDS: baseline_dev= (0.015) maximum deviation from baseline label1_max= (10.**-5.) maximum deviation in first linear coefficient label2_max= (0.006) similar for the second label3_max= (0.012) similar for the third labelN_max= same with default 0.03 ... scatter_max= (0.015) maximum scatter of residuals dr= (module-wide default) data release OUTPUT: Boolean index into the wavelength range with True for continuum pixels HISTORY: 2015-02-05 - Written - Bovy (IAS@KITP) """ # Grab kwargs type = kwargs.pop('type', 'lin') dr = kwargs.pop('dr', path._default_dr()) # Parse input if len(args) == 0: # Use default fit from apogee.spec._train_cannon import load_fit coeffs, scatter, baseline_labels = load_fit() type = 'quad' else: spec = args[0] specerr = args[1] # Determine the type of input if len(specerr.shape) == 2: # Run the Cannon if type.lower() == 'lin': coeffs, scatter = cannon.linfit(*args) elif type.lower() == 'quad': coeffs, scatter = cannon.quadfit(*args) else: coeffs = spec scatter = specerr ncoeffs = coeffs.shape[0] if type.lower() == 'lin': nlabels = ncoeffs - 1 elif type.lower() == 'quad': nlabels = int((-3 + numpy.sqrt(9 + 8 * (ncoeffs - 1)))) // 2 # Determine continuum pixels out = numpy.ones(len(scatter), dtype='bool') # Deviation from baseline out[numpy.fabs(coeffs[0] - 1.) > kwargs.get('baseline_dev', 0.015)] = False # Large dependence on labels maxs = numpy.zeros(nlabels) maxs[0] = kwargs.get('label1_max', 10.**-5.) maxs[1] = kwargs.get('label2_max', 0.006) maxs[2] = kwargs.get('label3_max', 0.012) for ii in range(nlabels - 3): maxs[ii + 3] = kwargs.get('label%i_max' % (ii + 4), 0.03) for ii in range(1, nlabels + 1): out[numpy.fabs(coeffs[ii]) > maxs[ii - 1]] = False # Large residuals out[scatter > kwargs.get('scatter_max', 0.015)] = False _, _, _, aspcapDR12length = _aspcapPixelLimits(dr='12') if int(dr) > 12 and coeffs.shape[1] == aspcapDR12length: # Want continuum pixels on >DR12 ASPCAP grid, but using coefficients # from <= DR12 grid dr_module = path._default_dr() path.change_dr(12) out = toApStarGrid(out) path.change_dr(dr) out = toAspcapGrid(out) path.change_dr(dr_module) return out
def fit_func(elem, name, spectra, spectra_errs, T, dat_type, run_number, location, sigma_val=None): """Return fit residuals from quadratic fit, spectral errors for desired element, fluxes for desired element, an appropriately-sized array of effective temperatures, the quadratic fitting parameters, the residuals, errors, temperatures, and fluxes with NaNs removed, and the normalized elemental weights. Functions: Reads in the DR14 windows. Obtains the indices of pixels of the absorption lines and saves the flux value and uncertainty for each star in these pixels. Performs the quadratic fit on each pixel using weight_lsq() and computes the residuals using residuals(). Obtains the flux values, uncertainties, fits, residuals, and temperatures with NaNs removed. Writes the residuals and fit parameters to .hdf5 files. Parameters ---------- elem : str Element name (i.e. 'AL') name : str Name of desired cluster (i.e. 'NGC 2682') spectra : tuple Array of floats representing the spectra of the desired cluster spectra_errs : tuple Array of floats representing the spectral uncertainties of the desired cluster T : tuple Array of floats representing the effective temperature of each star in the cluster dat_type : str Indicates whether the data being examined is the data or a simulation run_number : int Number of the run by which to label files location : str If running locally, set to 'personal'. If running on the server, set to 'server'. sigma_val : float, optional Indicates the value of sigma being used for the simulation in question, if applicable (default is None) Returns ------- elem_res : tuple Array of floats representing the fit residuals, with original positioning of points maintained final_err : tuple Array of floats representing the spectral uncertainties from the lines of the desired element, with original positioning of points maintained final_points : tuple Array of floats representing the fluxes from the lines of the desired element, with original positioning of points maintained temp_array : tuple Array of floats representing the effective temperature of each star in the cluster, with a row for each pixel of the desired element elem_a : tuple Array of floats representing the fitting parameters for the quadratic terms in the fits for each pixel of the desired element elem_b : tuple Array of floats representing the fitting parameters for the linear terms in the fits for each pixel of the desired element elem_c : tuple Array of floats representing the fitting parameters for the constant terms in the fits for each pixel of the desired element nanless_res : tuple Array of floats representing the fit residuals, with NaNs removed nanless_T : tuple Array of floats representing the effective temperature of each star in the cluster, with a row for each pixel of the desired element, with NaNs removed nanless_points : tuple Array of floats representing the fluxes from the lines of the desired element, with NaNs removed normed_weights : tuple Array of floats representing the weight of each elemental window, normalized to 1 """ change_dr('12') #Find the DR14 windows from the DR12 windows dr12_elem_windows = window.read(elem) change_dr('14') dr14_elem_windows_12 = np.concatenate((dr12_elem_windows[246:3274], dr12_elem_windows[3585:6080], dr12_elem_windows[6344:8335])) normalized_dr14_elem_windows_12 = (dr14_elem_windows_12 - np.nanmin(dr14_elem_windows_12))/(np.nanmax(dr14_elem_windows_12) - np.nanmin(dr14_elem_windows_12)) #Get the indices of the lines ind_12 = np.argwhere(normalized_dr14_elem_windows_12 > 0) ind_12 = ind_12.flatten() #Get the fluxes and errors from spectra len_spectra = len(spectra) elem_points_12 = np.zeros((len(ind_12), len_spectra)) elem_err_12 = np.zeros((len(ind_12), len_spectra)) for i in range(0, len(ind_12)): for j in range(0, len_spectra): elem_points_12[i][j] = spectra[j][ind_12[i]] elem_err_12[i][j] = spectra_errs[j][ind_12[i]] #APOGEE measured errors #Use only pixels with more than 5 points final_points_12 = [] final_err_12 = [] final_inds_12 = [] for i in range(len(elem_points_12)): if np.count_nonzero(~np.isnan(elem_points_12[i])) >= 5: final_points_12.append(elem_points_12[i]) final_err_12.append(elem_err_12[i]) final_inds_12.append(ind_12[i]) final_points_12 = np.array(final_points_12) final_err_12 = np.array(final_err_12) final_inds_12 = np.array(final_inds_12) if len(final_inds_12) == 0: print('Warning: less than 5 points for every pixel, skipping ', elem) else: dr12_weights = normalized_dr14_elem_windows_12[final_inds_12] sorted_dr12_weights = np.sort(dr12_weights) #Get windows if location == 'personal': window_file = pd.read_hdf('/Users/chloecheng/Personal/dr14_windows.hdf5', 'window_df') elif location == 'server': window_file = pd.read_hdf('/geir_data/scr/ccheng/AST425/Personal/dr14_windows.hdf5', 'window_df') dr14_elem_windows_14 = window_file[elem].values normalized_dr14_elem_windows_14 = (dr14_elem_windows_14 - np.min(dr14_elem_windows_14))/(np.max(dr14_elem_windows_14) - np.min(dr14_elem_windows_14)) #Get the indices of the lines if elem == 'C' or elem == 'N' or elem == 'FE': ind = np.argwhere(normalized_dr14_elem_windows_14 > np.min(sorted_dr12_weights[int(len(sorted_dr12_weights)*0.7):])) else: ind = np.argwhere(normalized_dr14_elem_windows_14 > 0) ind = ind.flatten() #Get the fluxes and errors from spectra elem_points = np.zeros((len(ind), len_spectra)) elem_err = np.zeros((len(ind), len_spectra)) for i in range(0, len(ind)): for j in range(0, len_spectra): elem_points[i][j] = spectra[j][ind[i]] elem_err[i][j] = spectra_errs[j][ind[i]] #APOGEE measured errors #Use only pixels with more than 5 points final_points = [] final_err = [] final_inds = [] for i in range(len(elem_points)): if np.count_nonzero(~np.isnan(elem_points[i])) >= 5: final_points.append(elem_points[i]) final_err.append(elem_err[i]) final_inds.append(ind[i]) final_points = np.array(final_points) final_err = np.array(final_err) final_inds = np.array(final_inds) if len(final_points) == 0: print('Warning: less than 5 points for every pixel, skipping ', elem) else: #Create an appropriately-sized array of temperatures to mask as well temp_array = np.full((final_points.shape), T) for i in range(0, len(final_points)): for j in range(0, len_spectra): if np.isnan(final_points[i][j]): temp_array[i][j] = np.nan #Do fits with non-nan numbers nanless_inds = np.isfinite(final_points) fits = [] for i in range(len(final_points)): fits.append(weight_lsq(final_points[i][nanless_inds[i]], temp_array[i][nanless_inds[i]], final_err[i][nanless_inds[i]])) for i in range(len(fits)): fits[i] = np.array(fits[i]) fits = np.array(fits) elem_a = fits[:,0] elem_b = fits[:,1] elem_c = fits[:,2] elem_fits = np.zeros_like(final_points) for i in range(0, len(final_points)): elem_fits[i] = elem_a[i]*temp_array[i]**2 + elem_b[i]*temp_array[i] + elem_c[i] #Calculate residuals elem_res = residuals(final_points, elem_fits) #Remove nans from fits, residuals, errors, and temperatures for plotting and cumulative distribution #calculation purposes nanless_fits = [] nanless_res = [] nanless_err = [] nanless_T = [] nanless_points = [] for i in range(len(final_points)): nanless_fits.append(elem_fits[i][nanless_inds[i]]) nanless_res.append(elem_res[i][nanless_inds[i]]) nanless_err.append(final_err[i][nanless_inds[i]]) nanless_T.append(temp_array[i][nanless_inds[i]]) nanless_points.append(final_points[i][nanless_inds[i]]) for i in range(len(final_points)): nanless_fits[i] = np.array(nanless_fits[i]) nanless_res[i] = np.array(nanless_res[i]) nanless_err[i] = np.array(nanless_err[i]) nanless_T[i] = np.array(nanless_T[i]) nanless_points[i] = np.array(nanless_points[i]) nanless_fits = np.array(nanless_fits) nanless_res = np.array(nanless_res) nanless_err = np.array(nanless_err) nanless_T = np.array(nanless_T) nanless_points = np.array(nanless_points) #Get the weights for later weights = normalized_dr14_elem_windows_14[final_inds] normed_weights = weights/np.sum(weights) #File-saving #If we are looking at the data timestr = time.strftime("%Y%m%d_%H%M%S") name_string = str(name).replace(' ', '') pid = str(os.getpid()) if sigma_val == None: if location == 'personal': path_dat = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' elif location == 'server': path_dat = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' #If the file exists, output the desired variables if glob.glob(path_dat): return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights #If the file does not exist, create file and output the desired variables else: file = h5py.File(path_dat, 'w') file['points'] = final_points file['residuals'] = elem_res file['err_200'] = final_err file['a_param'] = elem_a file['b_param'] = elem_b file['c_param'] = elem_c file.close() return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights #If we are looking at simulations else: if location == 'personal': path_sim = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' elif location == 'server': path_sim = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'fit_res' + '_' + str(dat_type) + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' #If the file exists, append to the file if glob.glob(path_sim): file = h5py.File(path_sim, 'a') #If the group for the particular value of sigma exists, don't do anything if glob.glob(str(sigma_val)): file.close() #If not, append a new group to the file for the particular value of sigma else: grp = file.create_group(str(sigma_val)) grp['points'] = final_points grp['residuals'] = elem_res grp['err_200'] = final_err grp['a_param'] = elem_a grp['b_param'] = elem_b grp['c_param'] = elem_c file.close() #If the file does not exist, create a new file else: file = h5py.File(path_sim, 'w') grp = file.create_group(str(sigma_val)) grp['points'] = final_points grp['residuals'] = elem_res grp['err_200'] = final_err grp['a_param'] = elem_a grp['b_param'] = elem_b grp['c_param'] = elem_c file.close() return elem_res, final_err, final_points, temp_array, elem_a, elem_b, elem_c, nanless_res, nanless_err, nanless_T, nanless_points, normed_weights
-h Help file --cluster=<arg> Cluster name --red_clump=<arg> Whether to exclude red clump stars in rcsample or not --element=<arg> Element name --type=<arg> Data type --location=<arg> Machine where the code is being run """ #Imports #apogee package import apogee.tools.read as apread from apogee.tools.path import change_dr from apogee.spec import window from apogee.tools.read import rcsample change_dr('14') #use DR14 #astropy helper functions import astropy.io.fits as afits #basic math and plotting from docopt import docopt import numpy as np import pandas as pd import h5py import glob import matplotlib.pyplot as plt import os import time fs=16 plt.rc('font', family='serif',size=fs)