def find_peaks(freq_arr,int_arr,res,min_sep,is_sim=False,sigma=3,kms=True): ''' ''' if kms is True: max_f = np.amax(freq_arr) min_f = np.amin(freq_arr) cfreq = (max_f + min_f)/2 v_res = res*ckm/max_f #finest velocity spacing v_span = (max_f - min_f) * ckm/(cfreq) #total velocity range spanned, setting cfreq at v=0. v_samp = np.arange(-v_span/2,v_span/2+v_res,v_res) #create a uniformly spaced velocity array freq_new = v_samp*cfreq/ckm + cfreq #convert it back to frequency int_new = np.interp(freq_new,freq_arr,int_arr,left=0.,right=0.) chan_sep = min_sep/v_res else: freq_new = freq_arr int_new = int_arr chan_sep = min_sep/res indices = signal.find_peaks(int_new,distance=chan_sep) if kms is True: indices = [find_nearest(freq_arr,freq_new[x]) for x in indices[0]] #if we had to re-sample things if is_sim is True: return np.asarray(indices) rms = get_rms(int_arr) indices = [x for x in indices if int_arr[x]>sigma*rms ] return np.asarray(indices)
def matched_filter(data_x,data_y,filter_y,name='mf'): ''' Perform a matched filter analysis on data_x,data_y using filter_y ''' #do the filter and normalization to SNR scale mf_y = np.correlate(data_y,filter_y,mode='valid') mf_y /= get_rms(mf_y) #trim off the edges of the velocity data to match the range of the filter response nchans = round(len(mf_y)/2) c_chan = round(len(data_x)/2) mf_x = np.copy(data_x[c_chan-nchans:c_chan+nchans]) #make sure there's no rounding errors if abs(len(mf_x) - len(mf_y)) == 1: if len(mf_x) > len(mf_y): mf_x = mf_x[:-1] else: mf_y = mf_y[:-1] #load the result into a Spectrum object and return it. mf = Spectrum(name=name) mf.velocity = np.copy(mf_x) mf.snr = np.copy(mf_y) return mf
def set_upper_limit(sim, obs, params={}): ''' Automatically finds an upper limit for a simulation in an observation. ''' #load in options from the params dictionary, and any defaults plot_name = params['plot_name'] if 'plot_name' in params else None vel_widths = params['vel_widths'] if 'vel_widths' in params else 40. tolerance = params['tolerance'] if 'tolerance' in params else 0.01 sigma = params['sigma'] if 'sigma' in params else 1.0 #find the indices of the peaks in the simulation peak_indices = find_peaks(sim.spectrum.freq_profile, sim.spectrum.int_profile, _get_res(sim.spectrum.freq_profile), sim.source.dV, is_sim=True) #get the frequencies and absolute values of the intensities in these regions peak_freqs = np.copy(sim.spectrum.freq_profile[peak_indices]) peak_ints = np.copy(abs(sim.spectrum.int_profile[peak_indices])) #sort the arrays based on the intensity, and create some new ones to hold more info sort_idx = peak_ints.argsort()[::-1] peak_ints = peak_ints[sort_idx] peak_freqs = peak_freqs[sort_idx] peak_idx = peak_indices[sort_idx] peak_rms = np.copy(peak_ints) * 0. peak_snr = np.copy(peak_ints) * 0. #Go through and calculate RMS values, looking vel_widths on either side of the line for the RMS for i in range(len(peak_freqs)): ll_idx = find_nearest( obs.spectrum.frequency, peak_freqs[i] - vel_widths * sim.source.dV * peak_freqs[i] / ckm) ul_idx = find_nearest( obs.spectrum.frequency, peak_freqs[i] + vel_widths * sim.source.dV * peak_freqs[i] / ckm) rms = get_rms(obs.spectrum.Tb[ll_idx:ul_idx]) #if the rms is NaN because there's no data in that region if np.isnan(rms) is True: peak_rms[i] = np.nan peak_snr[i] = 0. else: peak_rms[i] = rms peak_snr[i] = peak_ints[i] / rms #now find the maximum snr value and get the corresponding line frequency, rms, intensity, and index best_idx = np.argmax(peak_snr) best_freq = peak_freqs[best_idx] best_rms = sigma * peak_rms[best_idx] best_int = peak_ints[best_idx] #now continuously adjust the simulation column density until it matches the rms while abs(best_int - best_rms) / best_rms > tolerance: sim.source.column *= best_rms / best_int sim.update() best_int = np.nanmax(sim.spectrum.int_profile[find_nearest( sim.spectrum.freq_profile, best_freq)]) return
def set_upper_limit(sim, obs, params={}): ''' Automatically finds an upper limit for a simulation in an observation. params dictionary can contain: plot_name : str Name of the output plot, defaults to None (also plotting not implemented yet) vel_widths : float Number of FWHM on each side of line to calculate RMS values (defaults to 40.) tolerance : float How close to require the match between the best line and the rms to be (defaults to 0.01 or 1%) sigma : float What confidence level is desired on the upper limit (defaults to 1.0 sigma) return_result : bool Whether to return an upper limit results object that stores metadata and prints reports (defaults to False) ''' #load in options from the params dictionary, and any defaults plot_name = params['plot_name'] if 'plot_name' in params else None vel_widths = params['vel_widths'] if 'vel_widths' in params else 40. tolerance = params['tolerance'] if 'tolerance' in params else 0.01 sigma = params['sigma'] if 'sigma' in params else 1.0 return_result = params[ 'return_result'] if 'return_result' in params else False #find the indices of the peaks in the simulation peak_indices = find_peaks(sim.spectrum.freq_profile, sim.spectrum.int_profile, _get_res(sim.spectrum.freq_profile), sim.source.dV, is_sim=True) #get the frequencies and absolute values of the intensities in these regions peak_freqs = np.copy(sim.spectrum.freq_profile[peak_indices]) peak_ints = np.copy(abs(sim.spectrum.int_profile[peak_indices])) #sort the arrays based on the intensity, and create some new ones to hold more info sort_idx = peak_ints.argsort()[::-1] peak_ints = peak_ints[sort_idx] peak_freqs = peak_freqs[sort_idx] peak_idx = peak_indices[sort_idx] peak_rms = np.copy(peak_ints) * 0. peak_snr = np.copy(peak_ints) * 0. #Go through and calculate RMS values, looking vel_widths on either side of the line for the RMS for i in range(len(peak_freqs)): ll_idx = find_nearest( obs.spectrum.frequency, peak_freqs[i] - vel_widths * sim.source.dV * peak_freqs[i] / ckm) ul_idx = find_nearest( obs.spectrum.frequency, peak_freqs[i] + vel_widths * sim.source.dV * peak_freqs[i] / ckm) rms = get_rms(obs.spectrum.Tb[ll_idx:ul_idx]) #if the rms is NaN because there's no data in that region if np.isnan(rms) is True: peak_rms[i] = np.nan peak_snr[i] = 0. else: peak_rms[i] = rms peak_snr[i] = peak_ints[i] / rms #now find the maximum snr value and get the corresponding line frequency, rms, intensity, and index best_idx = np.argmax(peak_snr) best_freq = peak_freqs[best_idx] best_rms = sigma * peak_rms[best_idx] best_int = peak_ints[best_idx] #now continuously adjust the simulation column density until it matches the rms while abs(best_int - best_rms) / best_rms > tolerance: sim.source.column *= best_rms / best_int sim.update() best_int = np.nanmax(sim.spectrum.int_profile[find_nearest( sim.spectrum.freq_profile, best_freq)]) if return_result is True: # Get the result class from molsim.classes import Ulim_Result # Make one result = Ulim_Result() # Start storing results result.line_frequency = best_freq result.line_intensity = best_int result.rms = best_rms / sigma result.sigma = sigma result.sim = sim result.obs = obs return result else: return
def velocity_stack(params): ''' Perform a velocity stack. Requires a params catalog for all the various options. Here they are, noted as required, or otherwise have defaults: name: a name for this spectrum object. String. Default: 'stack' selection : 'peaks' or 'lines'. Default: 'lines' freq_arr : the array of frequencies. Required int_arr : the array of intensities. Required freq_sim: the array of simulated frequencies. Required int_sim: the array of simulated intensities. Required res_inp : resolution of input data [MHz]. Calculates if not given dV : FWHM of lines [km/s]. Required. dV_ext : How many dV to integrate over. Required if 'lines' selected. vlsr: vlsr [km/s]. Default: 0.0 vel_width : how many km/s of spectra on either side of a line to stack [km/s]. Required. v_res: desired velocity resolution [km/s]. Default: 0.1*dV drops: id's of any chunks to exclude. List. Default: [] blank_lines : True or False. Default: False blank_keep_range: range over which not to blank lines. List [a,b]. Default: 3*dV flag_lines: True or False. Default: False flag_sigma : number of sigma over which to consider a line an interloper. Float. Default: 5. n_strongest: stack the strongest x lines. Integer. Default: All lines. n_snr: stack the x highest snr lines. Integer. Default: All lines. return_snr: output arrays of the snrs stacked plus the snr of the stack itself. True or False. Default: False ''' #define an obs_chunk class to hold chunks of data to stack class ObsChunk(object): def __init__(self,freq_obs,int_obs,freq_sim,int_sim,peak_int,id,cfreq): self.freq_obs = freq_obs #frequency array to be stacked self.int_obs = int_obs #intensity array to be stacked self.freq_sim = freq_sim #simulated frequency array to be stacked self.int_sim = int_sim #simulated intensity array to be stacked self.peak_int = peak_int #peak intensity for this chunk self.id = id #id of this chunk self.cfreq = cfreq #center frequency of the chunk self.flag = False #flagged as not to be used self.rms = None #rms of the chunk self.velocity = None #to hold the velocity array self.test = False self.check_data() if self.flag is False: self.set_rms() self.set_velocity() self.set_sim_velocity() return def check_data(self): #check if we have enough data here or if we ended up near an edge or a bunch of nans if len(self.freq_obs) < 2: self.flag = True return #check if we have more nans than not, and if so, skip it if np.count_nonzero(~np.isnan(self.int_obs)) < np.count_nonzero(np.isnan(self.int_obs)): self.flag = True return #check if peak_int is 0.0, in which case skip it if self.peak_int == 0: self.flag = True return return def set_rms(self): self.rms = get_rms(self.int_obs) return def set_velocity(self): vel = np.zeros_like(self.freq_obs) vel += (self.freq_obs - self.cfreq)*ckm/self.cfreq self.velocity = vel return def set_sim_velocity(self): sim_vel = np.zeros_like(self.freq_sim) sim_vel += (self.freq_sim - self.cfreq)*ckm/self.cfreq self.sim_velocity = sim_vel return #unpacking the dictionary into local variables for ease of use options = params.keys() name = params['name'] if 'name' in options else 'stack' freq_arr = np.copy(params['freq_arr']) int_arr = np.copy(params['int_arr']) freq_sim = np.copy(params['freq_sim']) int_sim = np.copy(params['int_sim']) res_inp = params['res_inp'] if 'res_inp' in options else _get_res(freq_arr) dV = params['dV'] dV_ext = params['dV_ext'] if 'dV_ext' in options else None vlsr = params['vlsr'] if 'vlsr' in options else 0.0 vel_width = params['vel_width'] v_res = params['v_res'] if 'v_res' in options else 0.1*dV drops = params['drops'] if 'drops' in options else [] blank_lines = params['blank_lines'] if 'blank_lines' in options else False blank_keep_range = params['blank_keep_range'] if 'blank_keep_range' in options else [-3*dV,3*dV] flag_lines = params['flag_lines'] if 'flag_lines' in options else False flag_sigma = params['flag_sigma'] if 'flag_sigma' in options else 5. n_strongest = params['n_strongest'] if 'n_strongest' in options else None n_snr = params['n_snr'] if 'n_snr' in options else None return_snr = params['return_snr'] if 'return_snr' in options else False #initialize a spectrum object to hold the stack and name it stacked_spectrum = Spectrum(name=name) #determine the locations to stack and their intensities, either with peaks or lines if params['selection'] == 'peaks': peak_indices = find_peaks(freq_sim,int_sim,res_inp,dV,is_sim=True) peak_freqs = freq_sim[peak_indices] peak_ints = int_sim[peak_indices] if params['selection'] == 'lines': peak_indices = find_peaks(freq_sim,int_sim,res_inp,dV*dV_ext,is_sim=True) peak_freqs = freq_sim[peak_indices] freq_widths = dV*dV_ext*peak_freqs/ckm lls = np.asarray([find_nearest(freq_sim,(x-y/2)) for x,y in zip(peak_freqs,freq_widths)]) uls = np.asarray([find_nearest(freq_sim,(x+y/2)) for x,y in zip(peak_freqs,freq_widths)]) peak_ints = np.asarray([np.nansum(int_sim[x:y]) for x,y in zip(lls,uls)]) #choose the n strongest lines, if that is specified if n_strongest is not None: sort_idx = np.flip(np.argsort(peak_ints)) if n_strongest > len(peak_ints): pass else: peak_ints = peak_ints[sort_idx][:n_strongest] peak_freqs = peak_freqs[sort_idx][:n_strongest] #choose the n highest snr lines, if that is instead specified if n_snr is not None: if n_snr > len(peak_ints): pass else: freq_widths = vel_width*peak_freqs/ckm lls_obs = np.asarray([find_nearest(freq_arr,x-y) for x,y in zip(peak_freqs,freq_widths)]) uls_obs = np.asarray([find_nearest(freq_arr,x+y) for x,y in zip(peak_freqs,freq_widths)]) line_noise = np.asarray([get_rms(int_arr[x:y]) for x,y in zip(lls_obs,uls_obs)]) line_snr = peak_ints/line_noise sort_idx = np.flip(np.argsort(line_snr)) peak_ints = peak_ints[sort_idx][:n_snr] peak_freqs = peak_freqs[sort_idx][:n_snr] #split out the data to use, first finding the appropriate indices for the width range we want freq_widths = vel_width*peak_freqs/ckm lls_obs = np.asarray([find_nearest(freq_arr,x-y) for x,y in zip(peak_freqs,freq_widths)]) uls_obs = np.asarray([find_nearest(freq_arr,x+y) for x,y in zip(peak_freqs,freq_widths)]) lls_sim = np.asarray([find_nearest(freq_sim,x-y) for x,y in zip(peak_freqs,freq_widths)]) uls_sim = np.asarray([find_nearest(freq_sim,x+y) for x,y in zip(peak_freqs,freq_widths)]) obs_chunks = [ObsChunk(np.copy(freq_arr[x:y]),np.copy(int_arr[x:y]),np.copy(freq_sim[a:b]),np.copy(int_sim[a:b]),peak_int,c,d) for x,y,a,b,peak_int,c,d in zip(lls_obs,uls_obs,lls_sim,uls_sim,peak_ints,range(len(uls_sim)),peak_freqs)] #flagging for obs in obs_chunks: #already flagged, move on if obs.flag is True: continue #make sure there's data at all. if len(obs.freq_obs) == 0: obs.flag = True continue #drop anything in drops if obs.id in drops: obs.flag = True continue #blank out lines not in the center to be stacked if blank_lines is True: #Find the indices corresponding to the safe range ll_obs = find_nearest(obs.freq_obs,obs.cfreq - blank_keep_range[1]*obs.cfreq/ckm) ul_obs = find_nearest(obs.freq_obs,obs.cfreq - blank_keep_range[0]*obs.cfreq/ckm) mask = np.concatenate((np.where(abs(obs.int_obs[:ll_obs]) > flag_sigma * obs.rms)[0],np.where(abs(obs.int_obs[ul_obs:]) > flag_sigma * obs.rms)[0]+ul_obs)) obs.int_obs[mask] = np.nan obs.set_rms() obs_nans_lls,obs_nans_uls = _find_nans(obs.int_obs) obs_nans_freqs_lls = obs.int_obs[obs_nans_lls] obs_nans_freqs_uls = obs.int_obs[obs_nans_uls] sim_nans_lls = [find_nearest(obs.int_sim,x) for x in obs_nans_freqs_lls] sim_nans_uls = [find_nearest(obs.int_sim,x) for x in obs_nans_freqs_uls] for x,y in zip(sim_nans_lls,sim_nans_uls): obs.int_sim[x:y] = np.nan #if we're flagging lines in the center, do that now too if flag_lines is True: if np.nanmax(obs.int_obs) > flag_sigma*obs.rms: obs.flag = True continue #setting and applying the weights max_int = max(peak_ints) for obs in obs_chunks: if obs.flag is False: obs.weight = obs.peak_int/max_int obs.weight /= obs.rms**2 obs.int_weighted = obs.int_obs * obs.weight obs.int_sim_weighted = obs.int_sim * obs.weight #Generate a velocity array to interpolate everything onto velocity_avg = np.arange(-vel_width,vel_width,v_res) #go through all the chunks and resample them, setting anything that is outside the range we asked for to be nans. for obs in obs_chunks: if obs.flag is False: obs.int_samp = np.interp(velocity_avg,obs.velocity,obs.int_weighted,left=np.nan,right=np.nan) obs.int_sim_samp = np.interp(velocity_avg,obs.sim_velocity,obs.int_sim_weighted,left=np.nan,right=np.nan) #Now we loop through all the chunks and add them to a list, then convert to an numpy array. We have to do the same thing w/ RMS values to allow for proper division. interped_ints = [] interped_rms = [] interped_sim_ints = [] for obs in obs_chunks: if obs.flag is False: interped_ints.append(obs.int_samp) interped_rms.append(obs.rms) interped_sim_ints.append(obs.int_sim_samp) interped_ints = np.asarray(interped_ints) interped_rms = np.asarray(interped_rms) interped_sim_ints = np.asarray(interped_sim_ints) #we're going to now need a point by point rms array, so that when we average up and ignore nans, we don't divide by extra values. rms_arr = [] for x in range(len(velocity_avg)): rms_sum = 0 for y in range(len(interped_rms)): if np.isnan(interped_ints[y][x]): continue else: rms_sum += interped_rms[y]**2 rms_arr.append(rms_sum) rms_arr = np.asarray(rms_arr) rms_arr[rms_arr==0] = np.nan #add up the interped intensities, then divide that by the rms_array int_avg = np.nansum(interped_ints,axis=0)/rms_arr int_sim_avg = np.nansum(interped_sim_ints,axis=0)/rms_arr #drop some edge channels int_avg = int_avg[5:-5] int_sim_avg = int_sim_avg[5:-5] velocity_avg = velocity_avg[5:-5] #Get the final rms, and divide out to get to snr. rms_tmp = get_rms(int_avg) int_avg /= rms_tmp int_sim_avg /= rms_tmp #store everything in the spectrum object and return it stacked_spectrum.velocity = np.copy(velocity_avg) stacked_spectrum.snr = np.copy(int_avg) stacked_spectrum.int_sim = np.copy(int_sim_avg) if return_snr is False: return stacked_spectrum if return_snr is True: ll = find_nearest(velocity_avg,-dV*dV_ext) ul = find_nearest(velocity_avg,dV*dV_ext) stack_int = np.nansum(int_avg[ll:ul]) stack_rms = get_rms(int_avg[ll:ul]) stack_snr = stack_int*1E5 return stacked_spectrum,line_snr[sort_idx][:n_snr],stack_snr
def set_rms(self): self.rms = get_rms(self.int_obs) return