def run_everything(cluster, num_sigma, red_clump, run_number, location, elem): """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster, for every simulation run. Function also saves all final summary statistics and values of sigma to file. Parameters ---------- cluster : str Name of the desired cluster (e.g. 'NGC 2682') num_sigma : int Number of simulations to run red_clump : str If the red clump stars in rcsample are to be removed, set to True. If all stars are to be used, set to False. run_number : int Number of the run by which to label files. location : str If running locally, set to 'personal'. If running on the server, set to 'server'. elem : str Element being analyzed. Returns ------- D_cov_all : tuple All covariance matrix summary statistics for all simulations ks_all : tuple All KS distances for all simulations """ #Create cluster directory, if doesn't exist already cluster_dir = oc.make_directory(cluster) #Get APOGEE and spectral data apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra( cluster, red_clump, location) num_elem = 15 num_stars = len(spectra) #Create synthetic spectra for each value of sigma and fit sigma_vals = np.random.uniform(0, 0.1, int(num_sigma)) fake_res = [] fake_err = [] y_ax_psm = [] psm_cdists = [] fake_nanless_res = [] final_real_spectra = [] final_real_spectra_err = [] for i in range(len(sigma_vals)): fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data, sigma_vals[i], T, cluster, spectra, spectra_errs, run_number, location, elem) fake_res.append(fake_dat[0]) fake_err.append(fake_dat[1]) y_ax_psm.append(fake_dat[2]) psm_cdists.append(fake_dat[3]) fake_nanless_res.append(fake_dat[4]) final_real_spectra.append(fake_dat[5]) final_real_spectra_err.append(fake_dat[6]) #Fit the data real_res = [] real_err = [] real_nanless_res = [] real_nanless_err = [] real_weights = [] for i in range(len(sigma_vals)): real_dat = oc.fit_func(elem, cluster, final_real_spectra[i], final_real_spectra_err[i], T, dat_type='data', run_number=run_number, location=location, sigma_val=None) real_res.append(real_dat[0]) real_err.append(real_dat[1]) real_nanless_res.append(real_dat[7]) real_nanless_err.append(real_dat[8]) real_weights.append(real_dat[11]) #Get the cumulative distributions for the data y_ax_real = [] real_cdists = [] for i in range(len(sigma_vals)): real_cdist_dat = pp.cum_dist(real_nanless_res[i], real_nanless_err[i]) y_ax_real.append(real_cdist_dat[0]) real_cdists.append(real_cdist_dat[1]) #Calculate summary statistics D_cov_all = [] ks_all = [] for i in range(len(sigma_vals)): D_cov_all.append( ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i], fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem, location, run_number)) ks_all.append( ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i], psm_cdists[i], sigma_vals[i], elem, location, run_number)) D_cov_all = np.array(D_cov_all) ks_all = np.array(ks_all) #Write to file timestr = time.strftime( "%Y%m%d_%H%M%S") #Date and time by which to identify file name_string = str(cluster).replace(' ', '') #Remove spaces from name of cluster pid = str(os.getpid()) if location == 'personal': path = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str( elem ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' elif location == 'server': path = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str( elem ) + '/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' #Server path file = h5py.File(path, 'w') file['D_cov'] = D_cov_all file['KS'] = ks_all file['sigma'] = sigma_vals file.close() return D_cov_all, ks_all, sigma_vals
def run_everything(cluster, num_sigma, red_clump, run_number, location, elem): ###Function to run the entire algorithm """Return the covariance matrix statistics and KS distances for every element in APOGEE in the desired cluster, for every simulation run. Function also saves all final summary statistics and values of sigma to file. Parameters ---------- cluster : str Name of the desired cluster (e.g. 'NGC 2682') num_sigma : int Number of simulations to run red_clump : str If the red clump stars in rcsample are to be removed, set to True. If all stars are to be used, set to False. run_number : int Number of the run by which to label files. location : str If running locally, set to 'personal'. If running on the server, set to 'server'. elem : str Element being analyzed. Returns ------- D_cov_all : tuple All covariance matrix summary statistics for all simulations ks_all : tuple All KS distances for all simulations """ #Create cluster directory, if doesn't exist already cluster_dir = oc.make_directory( cluster) ###Make a directory named after the cluster #Get APOGEE and spectral data apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra( cluster, red_clump, location) ###Get the allStar data and spectra num_elem = 15 ###Number of elements in APOGEE num_stars = len(spectra) ###Number of stars in the cluster #Create synthetic spectra for each value of sigma and fit sigma_vals = np.random.uniform( 0, 0.1, int(num_sigma) ) ###Create an array of sigma values between 0 and 0.1 dex that are randomly drawn from a uniform ###distribution, the size of the number of simulations that you want to run at once fake_res = [] ###Empty list for the fake residuals fake_err = [] ###Empty list for the fake errors y_ax_psm = [ ] ###Empty list for the y-axis for the fake cumulative distributions psm_cdists = [] ###Empty list for the fake cumulative distributions fake_nanless_res = [ ] ###Empty list for the fake residuals with NaNs removed final_real_spectra = [ ] ###Empty list for the observed spectra that are masked in the same way as the fake spectra final_real_spectra_err = [ ] ###Empty list for the observed spectral errors that are masked in the same way as the fake spectra for i in range( len(sigma_vals) ): ###Iterate through the number of simulations you want to run ###Run the psm_data function from ABC.py to get the fake fits, etc. fake_dat = ABC.psm_data(num_elem, num_stars, apogee_cluster_data, sigma_vals[i], T, cluster, spectra, spectra_errs, run_number, location, elem) fake_res.append(fake_dat[0]) ###Get the fake residuals fake_err.append(fake_dat[1]) ###Get the fake errors y_ax_psm.append( fake_dat[2] ) ###Get the y-axis for the fake cumulative distributions psm_cdists.append( fake_dat[3]) ###Get the fake cumulative distributions fake_nanless_res.append( fake_dat[4]) ###Get the fake residuals with no NaNs final_real_spectra.append( fake_dat[5] ) ###Get the observed spectra that are masked in the same way as the fake spectra final_real_spectra_err.append( fake_dat[6] ) ###Get the observed spectral errors that are masked in the same way as the fake spectra #Fit the data real_res = [] ###Empty list for the real residuals real_err = [] ###Empty list for the real errors real_nanless_res = [] ###Empty list for the real residuals with no NaNs real_nanless_err = [] ###Empty list for the real errors with no NaNs real_weights = [ ] ###Empty list for the weights of the windows for the element for i in range( len(sigma_vals)): ###Iterate through the number of simulations ###Run the fit_func function from occam_clusters_input.py to get fits for real data, using the observed spectra and errors masked in the same way as the simulations real_dat = oc.fit_func(elem, cluster, final_real_spectra[i], final_real_spectra_err[i], T, dat_type='data', run_number=run_number, location=location, sigma_val=None) real_res.append(real_dat[0]) ###Get the real residuals real_err.append(real_dat[1]) ###Get the real errors real_nanless_res.append( real_dat[7]) ###Get the real residuals with no NaNs real_nanless_err.append( real_dat[8]) ###Get the real errors with no NaNs real_weights.append( real_dat[11]) ###Get the weights of the windows for the element #Get the cumulative distributions for the data y_ax_real = [] ###Empty list for y-axis for real cumulative distributions real_cdists = [] ###Empty list for real cumulative distributions for i in range( len(sigma_vals)): ###Iterate through the number of simulations real_cdist_dat = pp.cum_dist( all_real_nanless_res[i], all_real_nanless_err[i] ) ###Compute the cumulative distributions using the cum_dist function from occam_clusters_post_process.py y_ax_real.append( real_cdist_dat[0] ) ###Get the y-axes for the real cumulative distributions real_cdists.append( real_cdist_dat[1]) ###Get the real cumulative distributions #Calculate summary statistics D_cov_all = [] ###Empty list for the delta covariance statistics ks_all = [] ###Empty list for the KS distance statistics for i in range(len(sigma_vals)): ###Iterate through the simulations ###Compute the delta covariance statistics D_cov_all.append( ABC.d_cov(cluster, real_weights[i], real_res[i], real_err[i], fake_res[i], fake_err[i], num_stars, sigma_vals[i], elem, location, run_number)) ###Compute the KS distance statistics ks_all.append( ABC.KS(cluster, y_ax_real[i], real_cdists[i], y_ax_psm[i], psm_cdists[i], sigma_vals[i], elem, location, run_number)) D_cov_all = np.array(D_cov_all) ###Make into array ks_all = np.array(ks_all) ###Make into array #Write to file timestr = time.strftime( "%Y%m%d_%H%M%S") #Date and time by which to identify file name_string = str(cluster).replace(' ', '') #Remove spaces from name of cluster pid = str(os.getpid()) ###PID for file labelling if location == 'personal': ###If running on Mac path = '/Users/chloecheng/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' elif location == 'server': ###If running on server path = '/geir_data/scr/ccheng/AST425/Personal/run_files/' + name_string + '/' + name_string + '_' + elem + '_' + timestr + '_' + pid + '_' + str( run_number) + '.hdf5' #Server path file = h5py.File(path, 'w') ###Write file file['D_cov'] = D_cov_all file['KS'] = ks_all file['sigma'] = sigma_vals file.close() return D_cov_all, ks_all, sigma_vals
pid = str(os.getpid()) if location == 'personal': path = '/Users/chloecheng/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'KS' + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' elif location == 'server': path = '/geir_data/scr/ccheng/AST425/Personal/run_files_' + name_string + '_' + str(elem) + '/' + name_string + '/' + name_string + '_' + str(elem) + '_' + 'KS' + '_' + timestr + '_' + pid + '_' + str(run_number) + '.hdf5' #If file exists, append to file if glob.glob(path): file = h5py.File(path, 'a') grp = file.create_group(str(sigma)) grp['KS'] = dist file.close() #Else create a new file else: file = h5py.File(path, 'w') grp = file.create_group(str(sigma)) grp['KS'] = dist file.close() return dist if __name__ == '__main__': arguments = docopt(__doc__) apogee_cluster_data, spectra, spectra_errs, T, bitmask = oc.get_spectra(arguments['--cluster'], arguments['--red_clump'], ['--location']) num_elem = 15 num_stars = len(spectra) fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err = psm_data(num_elem, num_stars, apogee_cluster_data, arguments['--sigma'], T, arguments['--cluster'], spectra, spectra_errs, run_number, arguments['--location'], arguments['--elem']) real_res, real_err, real_points, real_temp, real_a, real_b, real_c, real_nanless_res, real_nanless_err, real_nanless_T, real_nanless_points, real_normed_weights = oc.fit_func(arguments['--elem'], arguments['--cluster'], final_real_spectra, final_real_spectra_err, T, arguments['--dat_type'], run_number, arguments['--location'], arguments['--sigma']) y_ax_real, real_cdists = pp.cum_dist(real_nanless_res, real_nanless_err) D_cov = d_cov(arguments['--cluster'], real_weights, real_res, real_err, fake_res, fake_err, num_stars, arguments['--sigma'], arguments['--elem'], arguments['--location'], run_number) ks = KS(arguments['--cluster'], y_ax_real, real_cdists, y_ax_psm, psm_cdists, arguments['--sigma'], arguments['--elem'], arguments['--location'], run_number)
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster, spectra, spectra_errs, run_number, location, elem): """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions, and skipped elements for the simulated spectra. This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated spectra in the same way as the data. Parameters ---------- num_elem : int The number of elements in APOGEE num_stars : int The number of stars in the desired cluster apogee_cluster_data : structured array All cluster data from APOGEE sigma : float The value of sigma to create the simulated spectra T : tuple Array of floats representing the effective temperature of each star in the cluster cluster : str Name of the desired cluster (e.g. 'NGC 2682') spectra : tuple Array of floats representing the spectra of the desired cluster spectra_errs : tuple Array of floats representing the spectral uncertainties of the desired cluster run_number : int Number of the run by which to label files location : str If running locally, set to 'personal'. If running on the server, set to 'server'. elem : str Element being examined (e.g. 'AL') Returns ------- fake_res : tuple Array of floats representing the residuals of the quadratic fit fake_err : tuple Array of floats representing the spectral errors corresponding to the residuals y_ax_psm : tuple One-dimensional array containing values from 0 to 1, the same size as cdist psm_cdists : tuple One-dimensional array containing the sorted, normalized fit residuals fake_nanless_res : tuple Array of floats representing the residuals of the quadratic fit, with NaNs removed (doesn't return fake_nanless_err because they are the same as real_nanless_err) fake_used_elems : tuple Array of str representing the elements used in the cluster (some elements may be omitted due to lack of data) fake_skipped_elems : tuple Array of str representing the elements skipped due to lack of data final_real_spectra : tuple Array of observed spectra masked in the same way as the simulated spectra final_real_spectra_err : tuple Array of observed spectral errors masked in the same way as the simulated spectra """ #Abundances WRT H num_elem = 15 num_stars = len(spectra) fe_abundance_dict = {'element': ['C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE', 'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H']} cluster_xh = np.zeros((num_elem, num_stars)) for i in range(num_elem): for j in range(num_stars): if fe_abundance_dict['element'][i] == 'FE_H': cluster_xh[i] = apogee_cluster_data['FE_H'] else: cluster_xh[i] = apogee_cluster_data[fe_abundance_dict['element'][i]] + apogee_cluster_data['FE_H'] cluster_avg_abundance = np.mean(cluster_xh, axis=1) cluster_logg = apogee_cluster_data['LOGG'] elem_number_dict = {'C': 0, 'N': 1, 'O': 2, 'NA': 3, 'MG': 4, 'AL': 5, 'SI': 6, 'S': 7, 'K': 8, 'CA': 9, 'TI': 10, 'V': 11, 'MN': 12, 'FE': 13, 'NI': 14} cluster_fake_abundance = np.copy(cluster_xh) cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal(loc = cluster_avg_abundance[elem_number_dict[elem]], scale = float(sigma), size = num_stars) cluster_gen_spec = np.zeros((num_stars, 7214)) for i in range(len(spectra)): cluster_gen_spec[i] = psm.generate_spectrum(Teff = T[i]/1000, logg = cluster_logg[i], vturb = psm.vturb, ch = cluster_fake_abundance[elem_number_dict['C']][i], nh = cluster_fake_abundance[elem_number_dict['N']][i], oh = cluster_fake_abundance[elem_number_dict['O']][i], nah = cluster_fake_abundance[elem_number_dict['NA']][i], mgh = cluster_fake_abundance[elem_number_dict['MG']][i], alh = cluster_fake_abundance[elem_number_dict['AL']][i], sih = cluster_fake_abundance[elem_number_dict['SI']][i], sh = cluster_fake_abundance[elem_number_dict['S']][i], kh = cluster_fake_abundance[elem_number_dict['K']][i], cah = cluster_fake_abundance[elem_number_dict['CA']][i], tih = cluster_fake_abundance[elem_number_dict['TI']][i], vh = cluster_fake_abundance[elem_number_dict['V']][i], mnh = cluster_fake_abundance[elem_number_dict['MN']][i], nih = cluster_fake_abundance[elem_number_dict['NI']][i], feh = cluster_fake_abundance[elem_number_dict['FE']][i], c12c13 = psm.c12c13) #Mask spectra outside of boundaries of DR12 detectors apStar_cluster_gen_spec = toApStarGrid(cluster_gen_spec, dr='12') dr12_d1_left = apStarInds['12']['blue'][0] dr12_d1_right = apStarInds['12']['blue'][-1] dr12_d2_left = apStarInds['12']['green'][0] dr12_d2_right = apStarInds['12']['green'][-1] dr12_d3_left = apStarInds['12']['red'][0] dr12_d3_right = apStarInds['12']['red'][-1] for i in range(len(apStar_cluster_gen_spec)): for j in range(len(apStar_cluster_gen_spec.T)): if j < dr12_d1_left or (dr12_d1_right < j < dr12_d2_left) or (dr12_d2_right < j < dr12_d3_left) or j > dr12_d3_right: apStar_cluster_gen_spec[i][j] = np.nan #Pad psm spectra with zeros to make appropriate size for DR14 cluster_padded_spec = toAspcapGrid(apStar_cluster_gen_spec, dr='14') #Create array of nans to mask the psm in the same way as the spectra masked_psm = np.empty_like(spectra) masked_psm[:] = np.nan #Mask the spectra for i in range(len(spectra)): for j in range(7514): if ~np.isnan(spectra[i][j]): masked_psm[i][j] = cluster_padded_spec[i][j] #Read in repeats residuals if location == 'personal': file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5', 'r') elif location == 'server': file = h5py.File('/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5', 'r') repeat_res = file['residuals'][()] file.close() #Cut out gaps between detectors for DR14 repeats_dr14 = toAspcapGrid(repeat_res, dr='14') #Calculate 6sigma for repeats repeats_mean = np.nanmean(repeats_dr14) repeats_std = np.nanstd(repeats_dr14) repeats_6sigma_pos = repeats_mean + repeats_std*6 repeats_6sigma_neg = repeats_mean - repeats_std*6 #Create fake noise to add to the psm selected_repeats = [] for i in range(0, num_stars): #Select a random star from the repeats residuals by which to multiply the spectra errors random_repeat = np.random.choice(np.arange(0, len(repeats_dr14))) selected_repeats.append(repeats_dr14[random_repeat]) selected_repeats = np.array(selected_repeats) #Mask individual |repeats| that are > 6sigma for i in range(len(selected_repeats)): for j in range(len(selected_repeats.T)): if (selected_repeats[i][j] > repeats_6sigma_pos) or (selected_repeats[i][j] < repeats_6sigma_neg): #if np.abs(selected_repeats[i][j]) > repeats_6sigma_pos: selected_repeats[i][j] = np.nan #Multiply the repeats by the spectral errors cluster_fake_errs = spectra_errs*selected_repeats #Correct the fake errors with zeroes in the same places as the PSM spectra cluster_fake_errs[masked_psm == 0] = 0.0 #Add the noise to the psm noise_fake_spec = masked_psm + cluster_fake_errs #Mask the real spectra and spectra errors in the same way as the fake spectra masked_real_spectra = np.copy(spectra) masked_real_spectra_err = np.copy(spectra_errs) masked_real_spectra[np.isnan(noise_fake_spec)] = np.nan masked_real_spectra_err[np.isnan(noise_fake_spec)] = np.nan #Remove empty spectra - assertion final_fake_spec = [] final_real_spectra = [] final_real_spectra_err = [] for i in range(len(noise_fake_spec)): if any(noise_fake_spec[i,:] != 0): final_fake_spec.append(noise_fake_spec[i]) final_real_spectra.append(masked_real_spectra[i]) final_real_spectra_err.append(masked_real_spectra_err[i]) final_fake_spec = np.array(final_fake_spec) final_real_spectra = np.array(final_real_spectra) final_real_spectra_err = np.array(final_real_spectra_err) #Run fitting function on synthetic spectra fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = oc.fit_func(elem, cluster, final_fake_spec, final_real_spectra_err, T, dat_type='sim', run_number = run_number, location = location, sigma_val = sigma) #Cumulative distributions y_ax_psm, psm_cdists = pp.cum_dist(fake_nanless_res, fake_nanless_err) return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err
def psm_data(num_elem, num_stars, apogee_cluster_data, sigma, T, cluster, spectra, spectra_errs, run_number, location, elem): """Return the residuals (with and without NaNs), errors (with and without NaNs), cumulative distributions, and skipped elements for the simulated spectra. This function generates synthetic spectra using PSM and a specified sigma value, then fits the simulated spectra in the same way as the data. Parameters ---------- num_elem : int The number of elements in APOGEE num_stars : int The number of stars in the desired cluster apogee_cluster_data : structured array All cluster data from APOGEE sigma : float The value of sigma to create the simulated spectra T : tuple Array of floats representing the effective temperature of each star in the cluster cluster : str Name of the desired cluster (e.g. 'NGC 2682') spectra : tuple Array of floats representing the spectra of the desired cluster spectra_errs : tuple Array of floats representing the spectral uncertainties of the desired cluster run_number : int Number of the run by which to label files location : str If running locally, set to 'personal'. If running on the server, set to 'server'. elem : str Element being examined (e.g. 'AL') Returns ------- fake_res : tuple Array of floats representing the residuals of the quadratic fit fake_err : tuple Array of floats representing the spectral errors corresponding to the residuals y_ax_psm : tuple One-dimensional array containing values from 0 to 1, the same size as cdist psm_cdists : tuple One-dimensional array containing the sorted, normalized fit residuals fake_nanless_res : tuple Array of floats representing the residuals of the quadratic fit, with NaNs removed (doesn't return fake_nanless_err because they are the same as real_nanless_err) fake_used_elems : tuple Array of str representing the elements used in the cluster (some elements may be omitted due to lack of data) fake_skipped_elems : tuple Array of str representing the elements skipped due to lack of data final_real_spectra : tuple Array of observed spectra masked in the same way as the simulated spectra final_real_spectra_err : tuple Array of observed spectral errors masked in the same way as the simulated spectra """ #Abundances WRT H num_elem = 15 ###Number of elements in APOGEE num_stars = len(spectra) ###Number of stars in the cluster ###Dictionary for the names of the FE abundances in the allStar file fe_abundance_dict = { 'element': [ 'C_FE', 'N_FE', 'O_FE', 'NA_FE', 'MG_FE', 'AL_FE', 'SI_FE', 'S_FE', 'K_FE', 'CA_FE', 'TI_FE', 'V_FE', 'MN_FE', 'NI_FE', 'FE_H' ] } cluster_xh = np.zeros( (num_elem, num_stars )) ###Make an empty array to add all of the cluster abundances to for i in range(num_elem): ###Iterate through the elements for j in range(num_stars): ###Iterate through the stars ###Get all of the [X/H] abundances for all of the elements X and all of the stars in the cluster by multiplying each [X/Fe] abundance by [Fe/H] cluster_xh[i] = apogee_cluster_data[ fe_abundance_dict['element'][i]] * apogee_cluster_data['FE_H'] cluster_avg_abundance = np.mean( cluster_xh, axis=1 ) ###Get the average abundances of all the elements in the cluster (so should be just 1 number for each element) cluster_logg = apogee_cluster_data[ 'LOGG'] ###Get the surface gravities of each star from the allStar file elem_number_dict = { 'C': 0, ###Create a dictionary to match element names to their order number 'N': 1, 'O': 2, 'NA': 3, 'MG': 4, 'AL': 5, 'SI': 6, 'S': 7, 'K': 8, 'CA': 9, 'TI': 10, 'V': 11, 'MN': 12, 'FE': 13, 'NI': 14 } cluster_fake_abundance = np.copy( cluster_xh ) ###Create a copy of the array of all abundances in the cluster to use to simulate abundances ###Simulate the abundances of the DESIRED ELEMENT (ONE ELEMENT ONLY) by drawing from a random normal distribution centred about the mean of THAT ONE ###ELEMENT with a scatter of the chosen value of sigma. The rest of the abundances in this array will remain the same as the data cluster_fake_abundance[elem_number_dict[elem]] = np.random.normal( loc=cluster_avg_abundance[elem_number_dict[elem]], scale=float(sigma), size=num_stars) cluster_gen_spec = np.zeros( (num_stars, 7214)) ###Create an empty array to add the fake spectra for i in range( len(spectra) ): ###Iterate through all of the stars - change this to range(num_stars) ###Use PSM to make a fake spectrum, using the array of abundances created above (with the element in question varied by the value of sigma and the ###remaining values of abundances the same as the data), using the photometric Teffs calculated previously, the logg of each star, the default ###PSM vturb value, and the default psm c12c13 value. cluster_gen_spec[i] = psm.generate_spectrum( Teff=T[i] / 1000, logg=cluster_logg[i], vturb=psm.vturb, ch=cluster_fake_abundance[elem_number_dict['C']][i], nh=cluster_fake_abundance[elem_number_dict['N']][i], oh=cluster_fake_abundance[elem_number_dict['O']][i], nah=cluster_fake_abundance[elem_number_dict['NA']][i], mgh=cluster_fake_abundance[elem_number_dict['MG']][i], alh=cluster_fake_abundance[elem_number_dict['AL']][i], sih=cluster_fake_abundance[elem_number_dict['SI']][i], sh=cluster_fake_abundance[elem_number_dict['S']][i], kh=cluster_fake_abundance[elem_number_dict['K']][i], cah=cluster_fake_abundance[elem_number_dict['CA']][i], tih=cluster_fake_abundance[elem_number_dict['TI']][i], vh=cluster_fake_abundance[elem_number_dict['V']][i], mnh=cluster_fake_abundance[elem_number_dict['MN']][i], nih=cluster_fake_abundance[elem_number_dict['NI']][i], feh=cluster_fake_abundance[elem_number_dict['FE']][i], c12c13=psm.c12c13) #Pad psm spectra with zeros to make appropriate size for DR14 apStar_cluster_gen_spec = toApStarGrid( cluster_gen_spec, dr='12') ###Put the fake spectra onto the DR12 grid cluster_padded_spec = toAspcapGrid( apStar_cluster_gen_spec, dr='14' ) ###Put the fake spectra onto the DR14 grid. This will pad the spectra with zeroes ### to make it the right shape for DR14 #Create array of nans to mask the psm in the same way as the spectra masked_psm = np.empty_like( spectra ) ###Create an empty array that is the same shape as the spectra masked_psm[:] = np.nan ###Fill the array with NaNs to mask it #Mask the spectra for i in range(len( spectra)): ###Iterate through the stars - change this to num_stars for j in range( 7514 ): ###Iterate through the wavelength range - change to len(spectra.T) or make a variable = 7514 so I can stop hardcoding if ~np.isnan(spectra[i][j] ): ###If the entry in the real spectra is not a NaN masked_psm[i][j] = cluster_padded_spec[i][ j] ###Make the value in the masked fake spectra the corresponding entry in the fake spectra #Read in repeats residuals if location == 'personal': ###If running on Mac file = h5py.File('/Users/chloecheng/Personal/repeats_dr14.hdf5', 'r') ###Path to repeats file that I made elif location == 'server': ###If running on server file = h5py.File( '/geir_data/scr/ccheng/AST425/Personal/repeats_dr14.hdf5', 'r') ###Path to repeats file that I made repeat_res = file['residuals'][()] ###Get the repeats file.close() ###Close the file #Cut out gaps between detectors for DR14 repeats_dr14 = toAspcapGrid( repeat_res, dr='14') ###Cut the gaps between the detectors in the repeats #Calculate 6sigma for repeats repeats_mean = np.nanmean( repeats_dr14 ) ###Get the mean of the repeats, avoiding the masked areas that are NaNs repeats_std = np.nanstd( repeats_dr14 ) ###Get the standard deviation of the repeats, avoiding the masked areas that are NaNs repeats_6sigma = repeats_mean + repeats_std * 6 ###Get the value for 6 sigma of the repeats from the mean (check that this is correct) #Create fake noise to add to the psm selected_repeats = [ ] ###Empty list to append the random repeats that I will use to multiply the spectral errors for i in range(0, num_stars): ###Iterate through the stars #Select a random star from the repeats residuals by which to multiply the spectra errors random_repeat = np.random.choice(np.arange(0, len( repeats_dr14))) ###Randomly select one of the stars in the repeats selected_repeats.append( repeats_dr14[random_repeat] ) ###Get all of the repeats residuals for this star selected_repeats = np.array( selected_repeats) ###Turn the list into an array #Mask individual |repeats| that are > 6sigma for i in range( len(selected_repeats) ): ###Iterate through the number of selected repeats (number of stars) for j in range(len( selected_repeats.T)): ###Iterate through the wavelength range if np.abs( selected_repeats[i][j] ) > repeats_6sigma: ###If the absolute value of a one of the entries in the selected repeats is greater than 6sigma selected_repeats[i][j] = np.nan ###Mask it out #Multiply the repeats by the spectral errors cluster_fake_errs = spectra_errs * selected_repeats ###Multiply the spectral errors by these randomly selected repeats. The spectral errors will become masked in the same way as the repeats #Pad the fake errors with zeroes in the same places as the PSM spectra cluster_fake_errs[ masked_psm == 0] = 0.0 ###Pad the fake errors with zeroes in the same places as the fake spectra from modifying the DR12 wavelength range to fit DR14 #Add the noise to the psm noise_fake_spec = masked_psm + cluster_fake_errs ###Add the fake errors that I've created to the fake spectra as fake noise to make more realistic #Mask the real spectra and spectra errors in the same way as the fake spectra masked_real_spectra = np.copy( spectra ) ###Make a copy of the observed spectra to mask in the same way as the fake spectra masked_real_spectra_err = np.copy( spectra_errs ) ###Make a copy of the observed spectral errors to mask in the same way as the fake spectra masked_real_spectra[np.isnan( noise_fake_spec )] = np.nan ###Mask the observed spectra in the same way as the fake spectra masked_real_spectra_err[np.isnan( noise_fake_spec )] = np.nan ###Mask the observed spectral errors in the same way as the fake spectra #Remove empty spectra ###I'm not sure if this chunk is necessary but I wrote it in just in case final_fake_spec = [] ###Empty list to append the final set of fake spectra final_real_spectra = [ ] ###Empty list to append the final set of real spectra final_real_spectra_err = [ ] ###Empty list to append the final set of real spectral errors for i in range(len(noise_fake_spec)): ###Iterate through the fake spectra if any( noise_fake_spec[i, :] != 0 ): ###If there are rows that are not completely filled with zeroes final_fake_spec.append( noise_fake_spec[i]) ###Append those fake spectra final_real_spectra.append( masked_real_spectra[i]) ###Append those real spectra final_real_spectra_err.append( masked_real_spectra_err[i] ) ###Append those real spectral errors final_fake_spec = np.array(final_fake_spec) ###Make into array final_real_spectra = np.array(final_real_spectra) ###Make into array final_real_spectra_err = np.array( final_real_spectra_err) ###Make into array #Run fitting function on synthetic spectra fake_res, fake_err, fake_points, fake_temp, fake_a, fake_b, fake_c, fake_nanless_res, fake_nanless_err, fake_nanless_T, fake_nanless_points, fake_normed_weights = oc.fit_func( elem, cluster, final_fake_spec, final_real_spectra_err, T, dat_type='sim', run_number=run_number, location=location, sigma_val=sigma) #Cumulative distributions y_ax_psm, psm_cdists = pp.cum_dist(fake_nanless_res, fake_nanless_err) return fake_res, fake_err, y_ax_psm, psm_cdists, fake_nanless_res, final_real_spectra, final_real_spectra_err