Ejemplo n.º 1
0
def find_peaks(freq_arr,int_arr,res,min_sep,is_sim=False,sigma=3,kms=True):
	'''
	'''

	if kms is True:
		max_f = np.amax(freq_arr)
		min_f = np.amin(freq_arr)
		cfreq = (max_f + min_f)/2
		v_res = res*ckm/max_f #finest velocity spacing
		v_span = (max_f - min_f) * ckm/(cfreq) #total velocity range spanned, setting cfreq at v=0.
		v_samp = np.arange(-v_span/2,v_span/2+v_res,v_res) #create a uniformly spaced velocity array
		freq_new = v_samp*cfreq/ckm + cfreq #convert it back to frequency
		int_new = np.interp(freq_new,freq_arr,int_arr,left=0.,right=0.)
		chan_sep = min_sep/v_res
	else:
		freq_new = freq_arr
		int_new = int_arr
		chan_sep = min_sep/res
	
	indices = signal.find_peaks(int_new,distance=chan_sep)

	if kms is True:
		indices = [find_nearest(freq_arr,freq_new[x]) for x in indices[0]] #if we had to re-sample things
		
	if is_sim is True:
		return np.asarray(indices)
		
	rms = get_rms(int_arr)
	indices = [x for x in indices if int_arr[x]>sigma*rms ]
	
	return np.asarray(indices)
Ejemplo n.º 2
0
def matched_filter(data_x,data_y,filter_y,name='mf'):
	'''
	Perform a matched filter analysis on data_x,data_y using filter_y
	'''
	
	#do the filter and normalization to SNR scale
	mf_y = np.correlate(data_y,filter_y,mode='valid')
	mf_y /= get_rms(mf_y)
	
	#trim off the edges of the velocity data to match the range of the filter response
	nchans = round(len(mf_y)/2)
	c_chan = round(len(data_x)/2)
	mf_x = np.copy(data_x[c_chan-nchans:c_chan+nchans])
	#make sure there's no rounding errors
	if abs(len(mf_x) - len(mf_y)) == 1:
		if len(mf_x) > len(mf_y):
			mf_x = mf_x[:-1]
		else:
			mf_y = mf_y[:-1]	
	
	#load the result into a Spectrum object and return it.
	mf = Spectrum(name=name)
	mf.velocity = np.copy(mf_x)
	mf.snr = np.copy(mf_y)
	
	return mf
Ejemplo n.º 3
0
def set_upper_limit(sim, obs, params={}):
    '''
	Automatically finds an upper limit for a simulation in an observation.
	'''

    #load in options from the params dictionary, and any defaults
    plot_name = params['plot_name'] if 'plot_name' in params else None
    vel_widths = params['vel_widths'] if 'vel_widths' in params else 40.
    tolerance = params['tolerance'] if 'tolerance' in params else 0.01
    sigma = params['sigma'] if 'sigma' in params else 1.0

    #find the indices of the peaks in the simulation
    peak_indices = find_peaks(sim.spectrum.freq_profile,
                              sim.spectrum.int_profile,
                              _get_res(sim.spectrum.freq_profile),
                              sim.source.dV,
                              is_sim=True)

    #get the frequencies and absolute values of the intensities in these regions
    peak_freqs = np.copy(sim.spectrum.freq_profile[peak_indices])
    peak_ints = np.copy(abs(sim.spectrum.int_profile[peak_indices]))

    #sort the arrays based on the intensity, and create some new ones to hold more info
    sort_idx = peak_ints.argsort()[::-1]
    peak_ints = peak_ints[sort_idx]
    peak_freqs = peak_freqs[sort_idx]
    peak_idx = peak_indices[sort_idx]
    peak_rms = np.copy(peak_ints) * 0.
    peak_snr = np.copy(peak_ints) * 0.

    #Go through and calculate RMS values, looking vel_widths on either side of the line for the RMS
    for i in range(len(peak_freqs)):
        ll_idx = find_nearest(
            obs.spectrum.frequency,
            peak_freqs[i] - vel_widths * sim.source.dV * peak_freqs[i] / ckm)
        ul_idx = find_nearest(
            obs.spectrum.frequency,
            peak_freqs[i] + vel_widths * sim.source.dV * peak_freqs[i] / ckm)
        rms = get_rms(obs.spectrum.Tb[ll_idx:ul_idx])
        #if the rms is NaN because there's no data in that region
        if np.isnan(rms) is True:
            peak_rms[i] = np.nan
            peak_snr[i] = 0.
        else:
            peak_rms[i] = rms
            peak_snr[i] = peak_ints[i] / rms

    #now find the maximum snr value and get the corresponding line frequency, rms, intensity, and index
    best_idx = np.argmax(peak_snr)
    best_freq = peak_freqs[best_idx]
    best_rms = sigma * peak_rms[best_idx]
    best_int = peak_ints[best_idx]

    #now continuously adjust the simulation column density until it matches the rms
    while abs(best_int - best_rms) / best_rms > tolerance:
        sim.source.column *= best_rms / best_int
        sim.update()
        best_int = np.nanmax(sim.spectrum.int_profile[find_nearest(
            sim.spectrum.freq_profile, best_freq)])

    return
Ejemplo n.º 4
0
def set_upper_limit(sim, obs, params={}):
    '''
	Automatically finds an upper limit for a simulation in an observation.
	
	params dictionary can contain:
		plot_name : str
			Name of the output plot, defaults to None (also plotting not implemented yet)
		vel_widths : float
			Number of FWHM on each side of line to calculate RMS values (defaults to 40.)
		tolerance : float
			How close to require the match between the best line and the rms to be (defaults to 0.01 or 1%)
		sigma : float
			What confidence level is desired on the upper limit (defaults to 1.0 sigma)
		return_result : bool
			Whether to return an upper limit results object that stores metadata and prints reports (defaults to False)
	'''

    #load in options from the params dictionary, and any defaults
    plot_name = params['plot_name'] if 'plot_name' in params else None
    vel_widths = params['vel_widths'] if 'vel_widths' in params else 40.
    tolerance = params['tolerance'] if 'tolerance' in params else 0.01
    sigma = params['sigma'] if 'sigma' in params else 1.0
    return_result = params[
        'return_result'] if 'return_result' in params else False

    #find the indices of the peaks in the simulation
    peak_indices = find_peaks(sim.spectrum.freq_profile,
                              sim.spectrum.int_profile,
                              _get_res(sim.spectrum.freq_profile),
                              sim.source.dV,
                              is_sim=True)

    #get the frequencies and absolute values of the intensities in these regions
    peak_freqs = np.copy(sim.spectrum.freq_profile[peak_indices])
    peak_ints = np.copy(abs(sim.spectrum.int_profile[peak_indices]))

    #sort the arrays based on the intensity, and create some new ones to hold more info
    sort_idx = peak_ints.argsort()[::-1]
    peak_ints = peak_ints[sort_idx]
    peak_freqs = peak_freqs[sort_idx]
    peak_idx = peak_indices[sort_idx]
    peak_rms = np.copy(peak_ints) * 0.
    peak_snr = np.copy(peak_ints) * 0.

    #Go through and calculate RMS values, looking vel_widths on either side of the line for the RMS
    for i in range(len(peak_freqs)):
        ll_idx = find_nearest(
            obs.spectrum.frequency,
            peak_freqs[i] - vel_widths * sim.source.dV * peak_freqs[i] / ckm)
        ul_idx = find_nearest(
            obs.spectrum.frequency,
            peak_freqs[i] + vel_widths * sim.source.dV * peak_freqs[i] / ckm)
        rms = get_rms(obs.spectrum.Tb[ll_idx:ul_idx])
        #if the rms is NaN because there's no data in that region
        if np.isnan(rms) is True:
            peak_rms[i] = np.nan
            peak_snr[i] = 0.
        else:
            peak_rms[i] = rms
            peak_snr[i] = peak_ints[i] / rms

    #now find the maximum snr value and get the corresponding line frequency, rms, intensity, and index
    best_idx = np.argmax(peak_snr)
    best_freq = peak_freqs[best_idx]
    best_rms = sigma * peak_rms[best_idx]
    best_int = peak_ints[best_idx]

    #now continuously adjust the simulation column density until it matches the rms
    while abs(best_int - best_rms) / best_rms > tolerance:
        sim.source.column *= best_rms / best_int
        sim.update()
        best_int = np.nanmax(sim.spectrum.int_profile[find_nearest(
            sim.spectrum.freq_profile, best_freq)])

    if return_result is True:
        # Get the result class
        from molsim.classes import Ulim_Result
        # Make one
        result = Ulim_Result()
        # Start storing results
        result.line_frequency = best_freq
        result.line_intensity = best_int
        result.rms = best_rms / sigma
        result.sigma = sigma
        result.sim = sim
        result.obs = obs

        return result
    else:
        return
Ejemplo n.º 5
0
def velocity_stack(params):

	'''
	Perform a velocity stack.  Requires a params catalog for all the various options.
	Here they are, noted as required, or otherwise have defaults:
	
	name: a name for this spectrum object. String.  Default: 'stack'
	selection : 'peaks' or 'lines'. Default: 'lines'
	freq_arr : the array of frequencies. Required
	int_arr : the array of intensities. Required
	freq_sim: the array of simulated frequencies.  Required
	int_sim: the array of simulated intensities. Required
	res_inp : resolution of input data [MHz].  Calculates if not given
	dV : FWHM of lines [km/s]. Required.
	dV_ext : How many dV to integrate over.  Required if 'lines' selected.
	vlsr: vlsr [km/s]. Default: 0.0 
	vel_width : how many km/s of spectra on either side of a line to stack [km/s].  Required.
	v_res: desired velocity resolution [km/s].  Default: 0.1*dV
	drops: id's of any chunks to exclude.  List.  Default: []
	blank_lines : True or False.  Default: False
	blank_keep_range: range over which not to blank lines.  List [a,b].  Default: 3*dV
	flag_lines: True or False. Default: False
	flag_sigma : number of sigma over which to consider a line an interloper.  Float.  Default: 5.
	n_strongest: stack the strongest x lines.  Integer.  Default: All lines.
	n_snr: stack the x highest snr lines.  Integer.  Default: All lines.
	return_snr: output arrays of the snrs stacked plus the snr of the stack itself. True or False. Default: False 
	'''
	
	#define an obs_chunk class to hold chunks of data to stack
	
	class ObsChunk(object):

		def __init__(self,freq_obs,int_obs,freq_sim,int_sim,peak_int,id,cfreq):
	
			self.freq_obs = freq_obs #frequency array to be stacked
			self.int_obs = int_obs #intensity array to be stacked
			self.freq_sim = freq_sim #simulated frequency array to be stacked
			self.int_sim = int_sim #simulated intensity array to be stacked
			self.peak_int = peak_int #peak intensity for this chunk
			self.id = id #id of this chunk
			self.cfreq = cfreq #center frequency of the chunk
			self.flag = False #flagged as not to be used
			self.rms = None #rms of the chunk
			self.velocity = None #to hold the velocity array
			self.test = False
			
			self.check_data()
			if self.flag is False:
				self.set_rms()
				self.set_velocity()
				self.set_sim_velocity()
			
			return
			
		def check_data(self):
			#check if we have enough data here or if we ended up near an edge or a bunch of nans
			if len(self.freq_obs) < 2:
				self.flag = True
				return
			#check if we have more nans than not, and if so, skip it
			if np.count_nonzero(~np.isnan(self.int_obs)) < np.count_nonzero(np.isnan(self.int_obs)):
				self.flag = True
				return
			#check if peak_int is 0.0, in which case skip it
			if self.peak_int == 0:
				self.flag = True
				return
			return
			
		def set_rms(self):
			self.rms = get_rms(self.int_obs)
			return	
			
		def set_velocity(self):
			vel = np.zeros_like(self.freq_obs)
			vel += (self.freq_obs - self.cfreq)*ckm/self.cfreq
			self.velocity = vel
			return	
			
		def set_sim_velocity(self):
			sim_vel = np.zeros_like(self.freq_sim)
			sim_vel += (self.freq_sim - self.cfreq)*ckm/self.cfreq
			self.sim_velocity = sim_vel
			return				


	#unpacking the dictionary into local variables for ease of use
	options = params.keys()
	name = params['name'] if 'name' in options else 'stack'
	freq_arr = np.copy(params['freq_arr'])
	int_arr = np.copy(params['int_arr'])
	freq_sim = np.copy(params['freq_sim'])
	int_sim = np.copy(params['int_sim'])
	res_inp = params['res_inp'] if 'res_inp' in options else _get_res(freq_arr)
	dV = params['dV']
	dV_ext = params['dV_ext'] if 'dV_ext' in options else None
	vlsr = params['vlsr'] if 'vlsr' in options else 0.0
	vel_width = params['vel_width']
	v_res = params['v_res'] if 'v_res' in options else 0.1*dV
	drops = params['drops'] if 'drops' in options else []
	blank_lines = params['blank_lines'] if 'blank_lines' in options else False
	blank_keep_range = params['blank_keep_range'] if 'blank_keep_range' in options else [-3*dV,3*dV]
	flag_lines = params['flag_lines'] if 'flag_lines' in options else False
	flag_sigma = params['flag_sigma'] if 'flag_sigma' in options else 5.	
	n_strongest = params['n_strongest'] if 'n_strongest' in options else None
	n_snr = params['n_snr'] if 'n_snr' in options else None
	return_snr = params['return_snr'] if 'return_snr' in options else False

	#initialize a spectrum object to hold the stack and name it
	stacked_spectrum = Spectrum(name=name)
	
	#determine the locations to stack and their intensities, either with peaks or lines
	if params['selection'] == 'peaks':
		peak_indices = find_peaks(freq_sim,int_sim,res_inp,dV,is_sim=True)
		peak_freqs = freq_sim[peak_indices]
		peak_ints = int_sim[peak_indices]
		
	if params['selection'] == 'lines':
		peak_indices = find_peaks(freq_sim,int_sim,res_inp,dV*dV_ext,is_sim=True)	
		peak_freqs = freq_sim[peak_indices]
		freq_widths = dV*dV_ext*peak_freqs/ckm
		lls = np.asarray([find_nearest(freq_sim,(x-y/2)) for x,y in zip(peak_freqs,freq_widths)])
		uls = np.asarray([find_nearest(freq_sim,(x+y/2)) for x,y in zip(peak_freqs,freq_widths)])
		peak_ints = np.asarray([np.nansum(int_sim[x:y]) for x,y in zip(lls,uls)])
		
	#choose the n strongest lines, if that is specified
	if n_strongest is not None:
		sort_idx = np.flip(np.argsort(peak_ints))
		if n_strongest > len(peak_ints):
			pass
		else:
			peak_ints = peak_ints[sort_idx][:n_strongest]	
			peak_freqs = peak_freqs[sort_idx][:n_strongest]
			
	#choose the n highest snr lines, if that is instead specified
	if n_snr is not None:
		if n_snr > len(peak_ints):
			pass
		else:		
			freq_widths = vel_width*peak_freqs/ckm
			lls_obs = np.asarray([find_nearest(freq_arr,x-y) for x,y in zip(peak_freqs,freq_widths)])
			uls_obs = np.asarray([find_nearest(freq_arr,x+y) for x,y in zip(peak_freqs,freq_widths)])		
			line_noise = np.asarray([get_rms(int_arr[x:y]) for x,y in zip(lls_obs,uls_obs)])
			line_snr = peak_ints/line_noise
			sort_idx = np.flip(np.argsort(line_snr))
			peak_ints = peak_ints[sort_idx][:n_snr]	
			peak_freqs = peak_freqs[sort_idx][:n_snr]
	
	
	#split out the data to use, first finding the appropriate indices for the width range we want
	freq_widths = vel_width*peak_freqs/ckm
	lls_obs = np.asarray([find_nearest(freq_arr,x-y) for x,y in zip(peak_freqs,freq_widths)])
	uls_obs = np.asarray([find_nearest(freq_arr,x+y) for x,y in zip(peak_freqs,freq_widths)])
	lls_sim = np.asarray([find_nearest(freq_sim,x-y) for x,y in zip(peak_freqs,freq_widths)])
	uls_sim = np.asarray([find_nearest(freq_sim,x+y) for x,y in zip(peak_freqs,freq_widths)])						
		
	obs_chunks = [ObsChunk(np.copy(freq_arr[x:y]),np.copy(int_arr[x:y]),np.copy(freq_sim[a:b]),np.copy(int_sim[a:b]),peak_int,c,d) for x,y,a,b,peak_int,c,d in zip(lls_obs,uls_obs,lls_sim,uls_sim,peak_ints,range(len(uls_sim)),peak_freqs)]

	#flagging
	for obs in obs_chunks:
		#already flagged, move on
		if obs.flag is True:
			continue
		#make sure there's data at all.
		if len(obs.freq_obs) == 0:
			obs.flag = True
			continue	
		#drop anything in drops
		if obs.id in drops:
			obs.flag = True
			continue	
		#blank out lines not in the center to be stacked
		if blank_lines is True:			
			#Find the indices corresponding to the safe range
			ll_obs = find_nearest(obs.freq_obs,obs.cfreq - blank_keep_range[1]*obs.cfreq/ckm)
			ul_obs = find_nearest(obs.freq_obs,obs.cfreq - blank_keep_range[0]*obs.cfreq/ckm)
			mask = np.concatenate((np.where(abs(obs.int_obs[:ll_obs]) > flag_sigma * obs.rms)[0],np.where(abs(obs.int_obs[ul_obs:]) > flag_sigma * obs.rms)[0]+ul_obs))
			obs.int_obs[mask] = np.nan
			obs.set_rms()
			obs_nans_lls,obs_nans_uls = _find_nans(obs.int_obs)
			obs_nans_freqs_lls = obs.int_obs[obs_nans_lls]
			obs_nans_freqs_uls = obs.int_obs[obs_nans_uls]
			sim_nans_lls = [find_nearest(obs.int_sim,x) for x in obs_nans_freqs_lls]
			sim_nans_uls = [find_nearest(obs.int_sim,x) for x in obs_nans_freqs_uls]
			for x,y in zip(sim_nans_lls,sim_nans_uls):
				obs.int_sim[x:y] = np.nan			
				
		#if we're flagging lines in the center, do that now too
		if flag_lines is True:
			if np.nanmax(obs.int_obs) > flag_sigma*obs.rms:
				obs.flag = True
				continue
				
	#setting and applying the weights
	max_int = max(peak_ints)
	for obs in obs_chunks:
		if obs.flag is False:
			obs.weight = obs.peak_int/max_int
			obs.weight /= obs.rms**2
			obs.int_weighted = obs.int_obs * obs.weight
			obs.int_sim_weighted = obs.int_sim * obs.weight	
			
			
	#Generate a velocity array to interpolate everything onto				
	velocity_avg = np.arange(-vel_width,vel_width,v_res)	
	
	#go through all the chunks and resample them, setting anything that is outside the range we asked for to be nans.
	for obs in obs_chunks:
		if obs.flag is False:
			obs.int_samp = np.interp(velocity_avg,obs.velocity,obs.int_weighted,left=np.nan,right=np.nan)
			obs.int_sim_samp = np.interp(velocity_avg,obs.sim_velocity,obs.int_sim_weighted,left=np.nan,right=np.nan)		
	
	#Now we loop through all the chunks and add them to a list, then convert to an numpy array.  We have to do the same thing w/ RMS values to allow for proper division.
	interped_ints = []
	interped_rms = []
	interped_sim_ints = []
	
	for obs in obs_chunks:
		if obs.flag is False:
			interped_ints.append(obs.int_samp)
			interped_rms.append(obs.rms)
			interped_sim_ints.append(obs.int_sim_samp)
	
	interped_ints = np.asarray(interped_ints)
	interped_rms = np.asarray(interped_rms)
	interped_sim_ints = np.asarray(interped_sim_ints)
	
	#we're going to now need a point by point rms array, so that when we average up and ignore nans, we don't divide by extra values.
	rms_arr = []
	for x in range(len(velocity_avg)):
		rms_sum = 0
		for y in range(len(interped_rms)):
			if np.isnan(interped_ints[y][x]):
				continue
			else:
				rms_sum += interped_rms[y]**2
		rms_arr.append(rms_sum)
	rms_arr	= np.asarray(rms_arr)
	rms_arr[rms_arr==0] = np.nan
	
	#add up the interped intensities, then divide that by the rms_array
	int_avg = np.nansum(interped_ints,axis=0)/rms_arr
	int_sim_avg = np.nansum(interped_sim_ints,axis=0)/rms_arr
	
	#drop some edge channels
	int_avg = int_avg[5:-5]
	int_sim_avg = int_sim_avg[5:-5]
	velocity_avg = velocity_avg[5:-5]
	
	#Get the final rms, and divide out to get to snr.
	rms_tmp = get_rms(int_avg)
	int_avg /= rms_tmp
	int_sim_avg /= rms_tmp
	
	#store everything in the spectrum object and return it
	stacked_spectrum.velocity = np.copy(velocity_avg)
	stacked_spectrum.snr = np.copy(int_avg)
	stacked_spectrum.int_sim = np.copy(int_sim_avg)
						
	if return_snr is False:
		return stacked_spectrum
	if return_snr is True:	
		ll = find_nearest(velocity_avg,-dV*dV_ext)
		ul = find_nearest(velocity_avg,dV*dV_ext)
		stack_int = np.nansum(int_avg[ll:ul])
		stack_rms = get_rms(int_avg[ll:ul])
		stack_snr = stack_int*1E5
		return stacked_spectrum,line_snr[sort_idx][:n_snr],stack_snr
Ejemplo n.º 6
0
		def set_rms(self):
			self.rms = get_rms(self.int_obs)
			return