def saveExecutionStatistics(execTime, coreSpeciesCount, coreReactionCount, \ edgeSpeciesCount, edgeReactionCount, memoryUse, restartSize): """ Save the statistics of the RMG job to an Excel spreadsheet for easy viewing after the run is complete. The statistics are saved to the file `statistics.xls` in the output directory. The ``xlwt`` package is used to create the spreadsheet file; if this package is not installed, no file is saved. """ # Attempt to import the xlwt package; return if not installed try: import xlwt except ImportError: logging.warning('Package xlwt not found. Unable to save execution statistics.') return # Create workbook and sheet for statistics to be places workbook = xlwt.Workbook() sheet = workbook.add_sheet('Statistics') # First column is execution time sheet.write(0,0,'Execution time (s)') for i, etime in enumerate(execTime): sheet.write(i+1,0,etime) # Second column is number of core species sheet.write(0,1,'Core species') for i, count in enumerate(coreSpeciesCount): sheet.write(i+1,1,count) # Third column is number of core reactions sheet.write(0,2,'Core reactions') for i, count in enumerate(coreReactionCount): sheet.write(i+1,2,count) # Fourth column is number of edge species sheet.write(0,3,'Edge species') for i, count in enumerate(edgeSpeciesCount): sheet.write(i+1,3,count) # Fifth column is number of edge reactions sheet.write(0,4,'Edge reactions') for i, count in enumerate(edgeReactionCount): sheet.write(i+1,4,count) # Sixth column is memory used sheet.write(0,5,'Memory used (MB)') for i, memory in enumerate(memoryUse): sheet.write(i+1,5,memory) # Seventh column is restart file size sheet.write(0,6,'Restart file size (MB)') for i, memory in enumerate(restartSize): sheet.write(i+1,6,memory) # Save workbook to file fstr = os.path.join(settings.outputDirectory, 'statistics.xls') workbook.save(fstr)
def __efficiency(self, T, E0, alpha, densStates, E): dE = E[1] - E[0] FeNum = 0; FeDen = 0 Delta1 = 0; Delta2 = 0; DeltaN = 0; Delta = 1 for r in range(0, len(E)): value = densStates[r] * math.exp(-E[r] / constants.R / T) if E[r] > E0: FeNum += value * dE if FeDen == 0: FeDen = value * constants.R * T if FeDen == 0: return 1.0 Fe = FeNum / FeDen for r in range(0, len(E)): value = densStates[r] * math.exp(-E[r] / constants.R / T) # Delta if E[r] < E0: Delta1 += value * dE Delta2 += value * dE * math.exp(-(E0 - E[r]) / (Fe * constants.R * T)) DeltaN += value * dE Delta1 /= DeltaN Delta2 /= DeltaN Delta = Delta1 - (Fe * constants.R * T) / (alpha + Fe * constants.R * T) * Delta2 beta = (alpha / (alpha + Fe * constants.R * T))**2 / Delta if beta < 0 or beta > 1: logging.warning('Invalid collision efficiency %s calculated at %s K.' % (beta, T)) if beta < 0: beta = 0 elif beta > 1: beta = 1 return beta
def generateSpectralData(struct, thermoData): """ Generate the spectral data for a :class:`structure.Structure` object `struct` with corresponding :class:`thermo.ThermoGAData` object `thermo`. The group frequency method is used to do so; this method has two steps: 1. Search the structure for certain functional groups for which characteristic frequencies are known, and use those frequencies. 2. For any remaining degrees of freedom, fit the parameters such that they replicate the heat capacity. This method only fits the internal modes (i.e. vibrations and hindered rotors). """ # No spectral data for single atoms if len(struct.atoms()) < 2: return None # Determine linearity of structure linear = struct.isLinear() # Determine the number of internal modes for this structure numModes = 3 * len(struct.atoms()) - (5 if linear else 6) # Determine the number of vibrational and hindered rotor modes for this # structure numRotors = struct.countInternalRotors() numVibrations = numModes - numRotors #print 'For %s, I found %i internal rotors and %i vibrations for a total of %i modes' % (struct, numRotors, numVibrations, numModes) # For each group in library, find all subgraph isomorphisms groupCount = {} for node, data in frequencyDatabase.library.iteritems(): ismatch, map12List, map21List = struct.findSubgraphIsomorphisms(frequencyDatabase.dictionary[node]) if ismatch: count = len(map12List) else: count = 0 if count % data[0] != 0: raise Exception('Incorrect number of matches of node "%s" while estimating frequencies of %s; expected a multiple of %s, got %s.' % (node, struct, data[0], count)) groupCount[node] = count / data[0] # For debugging, print a list of the groups found # print 'Groups found:' # for node, count in groupCount.iteritems(): # if count != 0: print '\t', node, count # Get characteristic frequencies frequencies = [] for node, count in groupCount.iteritems(): for charFreq in frequencyDatabase.library[node][1:]: frequencies.extend(charFreq.generateFrequencies(count)) # Check that we have the right number of degrees of freedom specified if len(frequencies) > numVibrations: # We have too many vibrational modes difference = len(frequencies) - numVibrations # First try to remove hindered rotor modes until the proper number of modes remains if numRotors >= difference: numRotors -= difference numVibrations = len(frequencies) logging.warning('For %s, more characteristic frequencies were generated than vibrational modes allowed. Removed %i internal rotors to compensate.' % (struct, difference)) # If that won't work, turn off functional groups until the problem is underspecified again else: groupsRemoved = 0 freqsRemoved = 0 freqCount = len(frequencies) while freqCount > numVibrations: minDegeneracy, minNode = min([(sum([charFreq.degeneracy for charFreq in frequencyDatabase.library[node][1:]]), node) for node in groupCount if groupCount[node] > 0]) if groupCount[minNode] > 1: groupCount[minNode] -= 1 else: del groupCount[minNode] groupsRemoved += 1 freqsRemoved += minDegeneracy freqCount -= minDegeneracy # Log warning logging.warning('For %s, more characteristic frequencies were generated than vibrational modes allowed. Removed %i groups (%i frequencies) to compensate.' % (struct, groupsRemoved, freqsRemoved)) # Regenerate characteristic frequencies frequencies = [] for node, count in groupCount.iteritems(): for charFreq in frequencyDatabase.library[node][1:]: frequencies.extend(charFreq.generateFrequencies(count)) # Create spectral data object with characteristic frequencies spectralData = SpectralData() for freq in frequencies: spectralData.modes.append(HarmonicOscillator(frequency=freq)) # Subtract out contributions to heat capacity from the characteristic modes import numpy Cp = [thermoData.getHeatCapacity(T) for T in thermo.ThermoGAData.CpTlist] Cv = numpy.array(Cp) / constants.R Tlist = numpy.array(thermo.ThermoGAData.CpTlist) for mode in spectralData.modes: Cv -= mode.getHeatCapacity(Tlist) # Subtract out translational modes Cv -= 1.5 # Subtract out external rotational modes Cv -= (1.5 if not linear else 1.0) # Subtract out PV term (Cp -> Cv) Cv -= 1.0 # Check that all Cv values are still positive (should we do this?) #for C in Cv: # if C <= 0.0: raise Exception('Remaining heat capacity is negative.') # Fit remaining frequencies and hindered rotors to the heat capacity data import fit try: vib, hind = fit.fitSpectralDataToHeatCapacity(struct, Tlist, Cv, numVibrations - len(frequencies), numRotors) for freq, degen in vib: spectralData.modes.append(HarmonicOscillator(frequency=freq, degeneracy=degen)) for freq, barr, degen in hind: spectralData.modes.append(HinderedRotor(frequency=freq, barrier=barr, degeneracy=degen)) # Check: Does the fitted data reproduce the Cv data? # We use root mean square error per data point as our basis for judging Cp_fit = spectralData.getHeatCapacity(Tlist) + (3.0 if not linear else 2.5) Cp_data = [thermoData.getHeatCapacity(T) / 8.314472 for T in Tlist] rms = 0.0 for i in range(len(Tlist)): rms += (Cp_fit[i] - Cp_data[i]) * (Cp_fit[i] - Cp_data[i]) rms /= len(Tlist) if rms > 1.0: logging.warning('Fitted spectral data may not reproduce heat capacity data to within tolerance. RMS/point = %s' % rms) #if rms > 3.0: # raise fit.SpectralFitException('Fitted spectral data does not reproduce heat capacity data to within tolerance. RMS = %s\nModel heat capacity is %s\nData heat capacity is %s' % (rms, Cp_fit, Cp_data)) except fit.SpectralFitException, e: e.msg += '\nThe species I was fitting spectral data to was %s' % str(struct) raise
def fit_groups(family_names = None): """Decouples a nested tree and fits values to groups for each seperate tree. If given a list of family names, only does those families. """ import os import math import numpy import numpy.linalg import pylab if not family_names: family_names = reaction.kineticsDatabase.families.keys() for family_name in family_names: family = reaction.kineticsDatabase.families[family_name] print if not family.library: logging.debug("Family '%s' has no data in the library."%family_name) if family.reverse.library: logging.debug("(but its reverse '%s' does)"%family.reverse.label) continue logging.info("Fitting groups for reaction family: %s (%s)"%(family_name, os.path.basename(os.path.abspath(family._path))) ) # Get set of all nodes node_set = family.tree.parent.keys() non_top_nodes = [ node for node in node_set if family.tree.parent[node] ] top_nodes = [ node for node in node_set if not family.tree.parent[node] ] group_names = [node for node in non_top_nodes] # poor man's copy group_names.append("Constant") family.tree.children['Constant']=[] #: a dictionary of lists. key = node, value = list of kinetics items which contributed to that node kinetics_used_in={'Constant':[]} for node in node_set: # must initialise in loop so each has a separate list instance! kinetics_used_in[node] = list() Ts = [300, 500, 1000, 1500] def rates_string(k): """Return a string representing the rates of :class:`kinetics` object k log10 of the k at a bunch of T values""" string = "%5.2f "*len(Ts) return string%tuple([ math.log10(k.getRateConstant(T,Hrxn)) for T in Ts ]) A_list = [] b_list = [] # Get available data to_delete=[] for key, kinetics in family.library.iteritems(): if kinetics.alpha: logging.warning("Warning: %s %s has EP alpha = %g"%(kinetics.index, kinetics.label, kinetics.alpha)) to_delete.append(key) #if re.search('O2b',kinetics.label): # logging.warning("Removing %s %s because I don't like O2b"%(kinetics.index, kinetics.label)) # to_delete.append(key) # for key in to_delete: del family.library[key] logging.warning("Deleting %s from kinetics library!"%key) for key, kinetics in family.library.iteritems(): nodes = key.split(';') # example: # nodes = ['A11', 'B11'] # kinetics = <rmg.reaction.ArrheniusEPKinetics instance> #b_row = [ math.log(kinetics.A), # kinetics.n, # kinetics.alpha, # kinetics.E0 ] if kinetics.alpha: logging.warning("Warning: %s has EP alpha = %g"%(nodes,kinetics.alpha)) Hrxn=0 b_row = [ math.log10(kinetics.getRateConstant(T,Hrxn)) for T in Ts ] all_ancestors=list() kinetics.used_in_groups = list() kinetics.used_in_combinations = list() for node in nodes: # start with the most specific - the node itself # then add the ancestors ancestors = [node] ancestors.extend( family.tree.ancestors(node) ) # append to the list of lists all_ancestors.append(ancestors) # add to the list kinetics.used_in_groups.extend(ancestors) for ancestor in ancestors: kinetics_used_in[ancestor].append(kinetics) kinetics_used_in['Constant'].append(kinetics) # example # all_ancestors = [['A11','A1','A'], ['B11','B1','B']] # kinetics.used_in_groups = [ 'A11','A1','A','B11','B1','B' ] # kinetics_used_in['A11'] = kinetics_used_in['A1'] ... = [... <kinetics>] all_combinations = data.getAllCombinations(all_ancestors) # example: # all_combinations = # [['A11', 'B11'], ['A1', 'B11'], ['A', 'B11'], ['A11', 'B1'], # ['A1', 'B1'], ['A', 'B1'], ['A11', 'B'], ['A1', 'B'], ['A', 'B']] for combination in all_combinations: # Create a row of the A matrix. Each column is for a non_top_node # It contains 1 if that node exists in combination, else 0 A_row = [int(node in combination) for node in non_top_nodes] # Add on a column at the end for constant C which is always there A_row.append(1) kinetics.used_in_combinations.append(len(A_list)) A_list.append(A_row) b_list.append(b_row) A = numpy.array(A_list) b = numpy.array(b_list) logging.info("Library contained %d rates"%len(family.library)) logging.info("Matrix for inversion is %d x %d"%A.shape) x, residues, rank, s = numpy.linalg.lstsq(A,b) fitted_b = numpy.dot(A,x) errors = fitted_b - b #: squared and SUMMED over temperatures, not averaged errors_sum_squared = numpy.sum(errors*errors, axis=1) group_values=dict() group_error=dict() group_count=dict() group_error_MAD_by_T=dict() for node in top_nodes: group_values[node] = tuple([0 for i in Ts]) # eg. (0 0 0 0 0) group_error[node] = 0 group_count[node] = 0 group_error_MAD_by_T[node] = tuple([0 for i in Ts]) # eg. (0 0 0 0 0) for i in range(len(x)): group_values[group_names[i]] = tuple(x[i,:]) for i in range(len(x)): # for each group #: vector of 1s and 0s, one for each rate-group rates_in_group = A[:,i] #: number of data points training this group (each measured rate may be counted many times) group_count[group_names[i]] = sum(rates_in_group) #: RMS error for this group (where M = mean over temperatures and rates training the group) group_error[group_names[i]] = numpy.sqrt( sum(rates_in_group * errors_sum_squared) / sum(rates_in_group) / len(Ts) ) #: Mean Absolute Deviation, reported by Temperature (as tuple) group_error_MAD_by_T[group_names[i]] = tuple( numpy.dot(rates_in_group, abs(errors)) / sum(rates_in_group) ) for key, kinetics in family.library.iteritems(): rows = kinetics.used_in_combinations #: RMS error for this rate (where M = mean over temperatures and group combinations it's estimated by) kinetics.RMS_error = numpy.sqrt( sum([errors_sum_squared[i] for i in rows]) / len(rows) / len(Ts) ) kinetics.key = key rates = family.library.values() rates.sort(cmp=lambda x,y: cmp(x.RMS_error, y.RMS_error)) print "Rate expressions sorted by how well they are predicted by their group combinations" rates_1000 = [] rates_err = [] for k in rates: print "%-5s %-30s\tRMS error: %.2f Rates: %s %.30s"%(k.index, k.key, k.RMS_error, rates_string(k), k.comment ) rates_1000.append( math.log10(k.getRateConstant(1000,Hrxn)) ) rates_err.append( k.RMS_error ) # [Ts.index(T)] rates_1000 = numpy.array(rates_1000) rates_err = numpy.array(rates_err) fig_number = family_names.index(family_name) fig1 = pylab.figure( fig_number ) pylab.plot(rates_1000, rates_err, 'o') pylab.xlabel('log10(k) at 1000K') pylab.ylabel('RMSE') pylab.show() def print_node_tree(node,indent=0): print (' '*indent + node.ljust(17-indent) + ("\t%7.2g"*len(group_values[node])) % group_values[node] + "\t%6.2g\t%d"%(group_error[node],group_count[node]) + ("\t%7.3g"*len(group_error_MAD_by_T[node])) % group_error_MAD_by_T[node] ) children = family.tree.children[node] if children: children.sort() for child in children: # recurse! print_node_tree(child,indent+1) print ("Log10(k) at T= " + ("\t%7g"*len(Ts)) % tuple(Ts) + '\t RMS\tcount' + ("\tMAD @ %d"*len(Ts)) % tuple(Ts) ) print_node_tree('Constant') for node in top_nodes: print_node_tree(node) print fig = pylab.figure( 100 + fig_number ) xvals = numpy.array([ group_count[group] for group in group_names ]) yvals = numpy.array([ group_error[group] for group in group_names ]) pylab.semilogx(xvals,yvals,'o',picker=5) # 5 points tolerance pylab.title(family_name) def onpick(event): thisline = event.artist xdata = thisline.get_xdata() ydata = thisline.get_ydata() for ind in event.ind: group_name = group_names[ind] print "#%d Name: %s \tRates:%d \tNode-Rates:%d \tRMS error: %g"%(ind, group_name, len(kinetics_used_in[group_name]) , xvals[ind], yvals[ind]) print "MAD errors:"+(" %.2f"*len(Ts))%group_error_MAD_by_T[group_name] print "Kinetics taken from:" rates = kinetics_used_in[group_name] rates.sort(cmp=lambda x,y: cmp(x.RMS_error, y.RMS_error)) for k in rates: print "%s\tIndex:%s \t%s "%(k.key,k.index,repr(k)) print "RMS error: %.2f"%(k.RMS_error), print "Rates: ",rates_string(k) for combo in k.used_in_combinations: #print "A[%d,%d] ="%(combo,ind),A[combo,ind] if not A[combo,ind]: #print "Rate didn't use the node in question (presumably used an ancestor)" continue print "Using", used_nodes = [ group_names[i] for i in A[combo,:].nonzero()[0] ] used_nodes.remove(group_name) print group_name + ' with ' + ' + '.join(used_nodes) + '\t', rms = numpy.sqrt( errors_sum_squared[combo] / len(Ts) ) print "RMSE: %.2f Err(T):"%(rms), errors[combo] print #print 'check %g:'%ind, zip(xdata[ind], ydata[ind]) connection_id = fig.canvas.mpl_connect('pick_event', onpick) # disconnect with: fig.canvas.mpl_disconnect(connection_id) pylab.show() #http://matplotlib.sourceforge.net/users/event_handling.html import pdb; pdb.set_trace()
def calculateRateCoefficients(self, Tlist, Plist, Elist, method, errorCheck=True): """ Calculate the phenomenological rate coefficients for the network. """ K = numpy.zeros([len(Tlist), len(Plist),\ len(self.isomers), len(self.isomers)], numpy.float64) try: for t, T in enumerate(Tlist): # Calculate equilibrium distributions for isomer in self.isomers: if isomer.densStates is not None: isomer.calculateEqDist(Elist, T) # # DEBUG: Plot equilibrium distributions # import pylab # for isomer in self.isomers: # if isomer.densStates is not None: # pylab.plot(Elist / 1000.0, isomer.eqDist, '-') # pylab.xlabel('Energy (kJ/mol)') # pylab.ylabel('Equilibrium distribution') # pylab.show() # Calculate microcanonical rates k(E) # It might seem odd that this is dependent on temperature, and it # isn't -- unless the Arrhenius expression has a negative n for reaction in self.pathReactions: reaction.kf, reaction.kb = reaction.calculateMicrocanonicalRate(Elist, T, reaction.reactant.densStates, reaction.product.densStates) # # DEBUG: Plot microcanonical rates # import pylab # for i, reaction in enumerate(self.pathReactions): # if reaction.isIsomerization() or reaction.isDissociation(): # pylab.semilogy(Elist / 1000.0, reaction.kf, '-') # if reaction.isIsomerization() or reaction.isAssociation(): # pylab.semilogy(Elist / 1000.0, reaction.kb, '--') # pylab.xlabel('Energy (kJ/mol)') # pylab.ylabel('Microcanonical rate') # pylab.show() for p, P in enumerate(Plist): # Calculate collision frequencies for isomer in self.isomers: if isomer.isUnimolecular(): isomer.calculateCollisionFrequency(T, P, self.bathGas) # Determine phenomenological rate coefficients using approximate # method K[t,p,:,:] = self.applyApproximateMethod(T, P, Elist, method, errorCheck) except UnirxnNetworkException, e: if method.lower() == 'reservoirstate': logging.warning(e.msg) else: logging.error(e.msg) # Save network to file for later testing fstr = 'unirxn_input.xml' logging.info('Troublesome network saved to %s.' % fstr) import io io.writeInputFile(fstr, self, Tlist, Plist, Elist, method, 'none') if method.lower() == 'reservoirstate': logging.info('Falling back to modified strong collision for this network.') return self.calculateRateCoefficients(Tlist, Plist, Elist, 'modifiedstrongcollision') else: raise e
def fitSpectralDataToHeatCapacity(struct, Tlist, Cvlist, Nvib, Nrot): """ For a given set of dimensionless heat capacity data `Cvlist` corresponding to temperature list `Tlist` in K, fit `Nvib` harmonic oscillator and `Nrot` hindered internal rotor modes. The fitting is done by calling into Fortran in order to use the DQED nonlinear constrained optimization code. This function returns two lists: the first contains lists of frequency-degeneracy pairs representing fitted harmonic oscillator modes, and the second contains lists of frequency-barrier-degeneracy triples representing fitted hindered rotor modes. """ # Setup the initial guess and the bounds for the solver variables # The format of the variables depends on the numbers of oscillators and # rotors being fitted: # - For low values of Nvib and Nrot, we can fit the individual # parameters directly # - For high values of Nvib and/or Nrot we are limited by the number of # temperatures we are fitting at, and so we can only fit # pseudo-oscillators and/or pseudo-rotors if Nvib <= 0 and Nrot <= 0: return [], [] elif Nvib + 2 * Nrot < len(Tlist): x0, bl, bu, ind = setupCaseDirect(Nvib, Nrot) elif Nvib + 2 < len(Tlist): x0, bl, bu, ind = setupCasePseudoRot(Nvib, Nrot) elif len(Tlist) < 7: raise Exception('Unable to fit spectral data; you need to specify at least 7 heat capacity points.') else: x0, bl, bu, ind = setupCasePseudo(Nvib, Nrot) # Set parameters that are not needed by the solver but are needed to # evaluate the objective function and its Jacobian # These are stored in a Fortran 90 module called params _fit.setparams( p_tlist = numpy.array(Tlist), p_cvlist = numpy.array(Cvlist), p_mcon = 0, p_mequa = len(Tlist), p_nvars = len(x0), p_nvib = Nvib, p_nrot = Nrot ) # Execute the optimization, passing the initial guess and bounds and other # solver options x, igo = _fit.fitmodes( x0 = numpy.array(x0), bl = numpy.array(bl), bu = numpy.array(bu), ind = numpy.array(ind), maxiter = maxIter, ) # Clean up the temporary variables stored via _fit.setparams() earlier _fit.cleanup() if not numpy.isfinite(x).all(): raise SpectralFitException('Returned solution vector is nonsensical: x = %s.' % (x)) if igo == 8: logging.warning('Maximum number of iterations reached when fitting spectral data for %s.' % str(struct)) #raise SpectralFitException('Maximum number of iterations reached; solution may be invalid.\nI was trying to fit %s oscillators and %s rotors.' % (Nvib, Nrot)) # Convert the solution vector into sets of oscillators and rotors # The procedure for doing this depends on the content of the solution # vector, which itself depends on the number of oscillators and rotors # being fitted if Nvib + 2 * Nrot < len(Tlist): vib, rot = postprocessCaseDirect(Nvib, Nrot, x) elif Nvib + 2 < len(Tlist): vib, rot = postprocessCasePseudoRot(Nvib, Nrot, x) else: vib, rot = postprocessCasePseudo(Nvib, Nrot, x) return vib, rot