Beispiel #1
0
def calc_chisq(datacuts, synobs):
	"""
	Method for comparing the I/F values in the datacut object with the I/F values in the synthetic observations. Calculates a correlation score (insert details here later).
	"""

#Some thoughts about code/object organization:
#
#A 'datacut' is an instance of class Datacut in the observation module. It #refers to an extracted portion of real data, along with all of its attributes 
#(avg. I/F in specific spectral channel, lat/lon, etc)
#
#'Datacuts' is a python list of several 'datacut' objects. To reference an
#individual attribute within an object in said list, you must use 
#datacuts[0].uv1, for example. 
#
#'Synobs' is an instance of class Synthetic contained in the observation
#module. The object contains parsed data from the execution of the fortran
#rad tran adding/doubling code. The execution of said code creates a single
#output file, in which information about multiple synthetic observations are
#found. Thus, 'synobs' contains an attribute that is a list, that contains said
#information, without resorting to a "list of objects"
#
#There may be better ways of organizing the data. One aspect of all this is 
#when to collect the information from the 'datacuts' in order to compare with
#the data from 'synobs' ... 
#
#Because 'Datacuts' is "manually" created while running the script, i.e.
#
#datacuts = [cut1, cut2, cut3]
#
#it makes sense to me to harvest the information from each cut for comparison
#in this compare.py module, because a 'harvest' method is not appropriate
#in the 'datacut' class, as that refers to a single datacut, not a collection of
#several datacuts (i.e. as a list of cuts)
	
#determine number of observations for comparison
#(this also determines the number of observations to extract from the
#synthetic observation)

	num_obs = len(datacuts)
	
	assert num_obs == synobs.nangles, \
		"Number of observations in datacuts != Number of observations in synthetic data."
			
	real_obs_data = observation.concat(datacuts)
	syn_obs_data = synobs.concat(num_obs)
		    
#divide real observations by 10000
#	real_obs_data /= 10000.0
	
#naming variable 'chisq' to avoid conflicts with scipy.stats.chisquare
#and perhaps other modules that use 'chisquare'/'chisquared'	
	tmp = (syn_obs_data - real_obs_data)**2 / real_obs_data
	chisq = tmp.sum()
	
#Note (12-Nov-2012): I may be missing the inclusion of 'sigma', the
#uncertainties in the observations while calculating the chisq... this is
#found in chisqd.pro	

	return chisq
#need to write out outputfile_iter as a text file required by the
#fortran code
f = open('filenum.dat', 'w')
f.write('{:0>3d}'.format(outputfile_iter))
f.close()

#Evaluate model using fortran code to get synthetic observation
interface.exec_rtmod()

#Read result file into python
syn_obs = observation.Synthetic(filename = outputfile)

#below is testing ... remove when find_best_model.py is finished
initial_csq = compare.calc_chisq(datacuts, syn_obs)

print "Chi-square: {}".format(initial_csq)

#diagnostic:
real = observation.concat(datacuts)
syn = syn_obs.concat(3)


#remove temporary working model files
# os.system('rm -f workmodlb*')
# os.system('rm -f workmodl*')
# os.system('rm -f workrslt.*')



Beispiel #3
0
def adj_model_lstsq(data, synobs, atm_model):
	"""
	Method for adjusting atmospheric model based on least-squares fitting of the derivative matrix w/r/t chi-square minimization, given a set of real observational data, a set of synthetic observations, and an atmospheric model
	"""
	
	logging.info("Adjusting atmospheric model.")
	
#create a working atmospheric model
#must use copy.deepcopy to avoid referencing confusion
	new_model = copy.deepcopy(atm_model)	
	
	
	
#determine derivative of model results w/r/t small changes to param
	deriv = calc_deriv(new_model, synobs)
	
#divide derivative vector by the uncertainty
#
#first, expand uncertainty vector by repeating elements within by the number
#of free parameters
	sig = data[0].er_list  #'er_list' is an attribute of each datacut
						   #'data' is the list of all data cuts
						   
	sig = np.asarray([i for i in sig for j in range(synobs.nangles)])
		
#	assert sig.shape == deriv[0,:].shape, \
#		"Error in uncertainty vector / derivative vector shapes."

#porting from IDL code				

#so what does this all mean?
#here is my take, but subject to change from later future understanding
#am is the derivative matrix divided by the uncertainty
#it is the collection of data denoting how the syn. obs. change w/r/t 
#changes in the free parameters
#
#taking the SVD of am reduces this matrix to show where the changes are most
#significant/correlated with one another

#24 Nov 2012: for simplicity, removing division by uncertainty for now in light
#of modification to deriv array
#	am = deriv / sig
	am = deriv
	
#b is the difference vector, i.e. the difference between the real observed 
#data and the synthetic observations, divided by the uncertainty
#
#the broad objective appears to be.. figure out what changes to make towards
#the free parameters that would produce the difference between real and syn
#observations
	real_obs_data = observation.concat(data)
	syn_obs_data = synobs.concat(synobs.nangles)
	
#	b = (syn_obs_data - real_obs_data)/sig
	b = (syn_obs_data - real_obs_data)
	
#source:
#http://stackoverflow.com/questions/12580019/solve-singular-value-decomposition-svd-in-python
#
#use linalg.lstsq (which apparently uses SVD anyway)

#Underscore in python can be used as a "throwaway" variable
	param_diff, _, _, _ = np.linalg.lstsq(am.T, b)

#Note: need to use transpose here to match dimensions... explanation of why
#is written in my lab notebook, need to place here.

#introduce changes of free parameters into model

#p = param_diff index... a bit clunky here (copying code from 
#calc_deriv as it does what needs to be done here (parse through layers
#in model and flags), and I need to keep track of which param_diff is applied
#as the change to the parameter
	p = 0

	for i in range(new_model.nlayers):

#first, extract said layer

		l = getattr(new_model, 'l'+str(i+1))
		
#parse layer for flags
#determine index locations
		flaglocs = [a for a in range(len(l.flaglist)) \
					if type(l.flaglist[a]) == str]
#source
#http://stackoverflow.com/questions/7270321/finding-the-index-of-elements-based-on-a-condition-using-python-list-comprehensi


#the negative sign for delta is crucial for driving model towards
#convergence

		for j in flaglocs:

			old_value = getattr(l, l.param_names[j])
			delta = -param_diff[p]

			logging.info("Adjusting parameter {param} in level {level:d}. Old Value: {old:5.3f}  Delta: {delta:5.3f}".\
				format(param = l.param_names[j], level = i+1, \
				old = old_value, delta = delta))

			#safety valve to prevent negative values:
			if (old_value + delta) < 0:
				logging.info("Delta produces negative values.\
				Dampening adjustment.")
				#instead, implement a +/- 10% adjustment
				l.adj_param_epsilon(l.param_names[j], \
									epsilon = copysign(0.1, uniform(-1.0, 1.0)))
			else:
				l.adj_param_delta(l.param_names[j], delta = -param_diff[p])

			p += 1
			
			new_model.replace_layer(i+1, l)
			
	return new_model