Exemple #1
0
    def fillRulesByAveragingUp(self, rootTemplate, alreadyDone, verbose=False):
        """
        Fill in gaps in the kinetics rate rules by averaging child nodes.
        If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments
        (warning: this uses up a lot of memory due to the extensively long comments)
        """
        rootLabel = ';'.join([g.label for g in rootTemplate])
        
        if rootLabel in alreadyDone:
            return alreadyDone[rootLabel]

        # Generate the distance 1 pairings which must be averaged for this root template.
        # The distance 1 template is created by taking the parent node from one or more trees
        # and creating the combinations with children from a single remaining tree.  
        # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and
        # (A's children, B).  For node (A,B,C), we would retrieve all combinations of (A,B,C's children) 
        # (A,B's children,C) etc...  
        # If a particular node has no children, it is skipped from the children expansion altogether.

        childrenList = []
        for i, parent in enumerate(rootTemplate):
            # Start with the root template, and replace the ith member with its children
            if parent.children:
                childrenSet = [[group] for group in rootTemplate]
                childrenSet[i] = parent.children
                childrenList.extend(getAllCombinations(childrenSet))

        kineticsList = []
        for template in childrenList:
            label = ';'.join([g.label for g in template])
            
            if label in alreadyDone:
                kinetics = alreadyDone[label]
            else:
                kinetics = self.fillRulesByAveragingUp(template, alreadyDone, verbose)
            
            if kinetics is not None:
                kineticsList.append([kinetics, template])
                
        # See if we already have a rate rule for this exact template instead
        # and return it now that we have finished searching its children
        entry = self.getRule(rootTemplate)
        if entry is not None and entry.rank > 0:
            # We already have a rate rule for this exact template
            # If the entry has rank of zero, then we have so little faith
            # in it that we'd rather use an averaged value if possible
            # Since this entry does not have a rank of zero, we keep its
            # value
            alreadyDone[rootLabel] = entry.data
            return entry.data
        
        if len(kineticsList) > 0:
            
            if len(kineticsList) > 1:
                # We found one or more results! Let's average them together
                kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
                
                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(
                         ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList))
                
                else:
                    kinetics.comment = 'Average of [{0}]'.format(
                     ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList))

            else:
                k,t = kineticsList[0]
                kinetics = deepcopy(k)
                # Even though we are using just a single set of kinetics, it's still considered
                # an average.  It just happens that the other distance 1 children had no data.
                
                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(k.comment if k.comment != '' else ';'.join(g.label for g in t))
                else:
                    kinetics.comment = 'Average of [{0}]'.format(';'.join(g.label for g in t))
                

            
            entry = Entry(
                index = 0,
                label = rootLabel,
                item = rootTemplate,
                data = kinetics,
                rank = 10, # Indicates this is an averaged estimate
            )
            self.entries[entry.label] = [entry]
            alreadyDone[rootLabel] = entry.data
            return entry.data
            
        alreadyDone[rootLabel] = None
        return None
Exemple #2
0
    def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
        """
        Fill in gaps in the kinetics rate rules by averaging child nodes.
        """
        rootLabel = ';'.join([g.label for g in rootTemplate])

        if rootLabel in alreadyDone:
            return alreadyDone[rootLabel]

        # See if we already have a rate rule for this exact template
        entry = self.getRule(rootTemplate)
        if entry is not None and entry.rank > 0:
            # We already have a rate rule for this exact template
            # If the entry has rank of zero, then we have so little faith
            # in it that we'd rather use an averaged value if possible
            # Since this entry does not have a rank of zero, we keep its
            # value
            alreadyDone[rootLabel] = entry.data
            return entry.data

        # Recursively descend to the child nodes
        childrenList = [[group] for group in rootTemplate]
        for group in childrenList:
            parent = group.pop(0)
            if len(parent.children) > 0:
                group.extend(parent.children)
            else:
                group.append(parent)

        childrenList = getAllCombinations(childrenList)
        kineticsList = []
        for template in childrenList:
            label = ';'.join([g.label for g in template])
            if template == rootTemplate:
                continue

            if label in alreadyDone:
                kinetics = alreadyDone[label]
            else:
                kinetics = self.fillRulesByAveragingUp(template, alreadyDone)

            if kinetics is not None:
                kineticsList.append([kinetics, template])

        if len(kineticsList) > 0:

            # We found one or more results! Let's average them together
            kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
            if len(kineticsList) > 1:
                kinetics.comment += 'Average of ({0})'.format(' + '.join(
                    k.comment if k.comment != '' else ';'.join(g.label
                                                               for g in t)
                    for k, t in kineticsList))
            else:
                k, t = kineticsList[0]
                kinetics.comment += k.comment if k.comment != '' else ';'.join(
                    g.label for g in t)
            entry = Entry(
                index=0,
                label=rootLabel,
                item=rootTemplate,
                data=kinetics,
                rank=10,  # Indicates this is an averaged estimate
            )
            self.entries[entry.label] = [entry]
            alreadyDone[rootLabel] = entry.data
            return entry.data

        alreadyDone[rootLabel] = None
        return None
Exemple #3
0
 def fillRulesByAveragingUp(self, rootTemplate, alreadyDone):
     """
     Fill in gaps in the kinetics rate rules by averaging child nodes.
     """
     rootLabel = ';'.join([g.label for g in rootTemplate])
     
     if rootLabel in alreadyDone:
         return alreadyDone[rootLabel]
     
     # See if we already have a rate rule for this exact template 
     entry = self.getRule(rootTemplate)
     if entry is not None and entry.rank > 0:
         # We already have a rate rule for this exact template
         # If the entry has rank of zero, then we have so little faith
         # in it that we'd rather use an averaged value if possible
         # Since this entry does not have a rank of zero, we keep its
         # value
         alreadyDone[rootLabel] = entry.data
         return entry.data
     
     # Recursively descend to the child nodes
     childrenList = [[group] for group in rootTemplate]
     for group in childrenList:
         parent = group.pop(0)
         if len(parent.children) > 0:
             group.extend(parent.children)
         else:
             group.append(parent)
             
     childrenList = getAllCombinations(childrenList)
     kineticsList = []
     for template in childrenList:
         label = ';'.join([g.label for g in template])
         if template == rootTemplate: 
             continue
         
         if label in alreadyDone:
             kinetics = alreadyDone[label]
         else:
             kinetics = self.fillRulesByAveragingUp(template, alreadyDone)
         
         if kinetics is not None:
             kineticsList.append([kinetics, template])
     
     if len(kineticsList) > 0:
         
         # We found one or more results! Let's average them together
         kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
         kinetics.comment += 'Average of ({0}). '.format(
             ' + '.join([k.comment if k.comment != '' else ','.join([g.label for g in t]) for k, t in kineticsList]),
         )
         entry = Entry(
             index = 0,
             label = rootLabel,
             item = rootTemplate,
             data = kinetics,
             rank = 10, # Indicates this is an averaged estimate
         )
         self.entries[entry.label] = [entry]
         alreadyDone[rootLabel] = entry.data
         return entry.data
         
     alreadyDone[rootLabel] = None
     return None
Exemple #4
0
    def fillRulesByAveragingUp(self, rootTemplate, alreadyDone, verbose=False):
        """
        Fill in gaps in the kinetics rate rules by averaging child nodes.
        If verbose is set to True, then exact sources of kinetics are saved in the kinetics comments
        (warning: this uses up a lot of memory due to the extensively long comments)
        """
        rootLabel = ';'.join([g.label for g in rootTemplate])
        
        if rootLabel in alreadyDone:
            return alreadyDone[rootLabel]

        # Generate the distance 1 pairings which must be averaged for this root template.
        # The distance 1 template is created by taking the parent node from one or more trees
        # and creating the combinations with children from a single remaining tree.  
        # i.e. for some node (A,B), we want to fetch all combinations for the pairing of (A,B's children) and
        # (A's children, B).  For node (A,B,C), we would retrieve all combinations of (A,B,C's children) 
        # (A,B's children,C) etc...  
        # If a particular node has no children, it is skipped from the children expansion altogether.

        childrenList = []
        distanceList = []
        for i, parent in enumerate(rootTemplate):
            # Start with the root template, and replace the ith member with its children
            if parent.children:
                childrenSet = [[group] for group in rootTemplate]
                childrenSet[i] = parent.children
                childrenList.extend(getAllCombinations(childrenSet))
                distanceList.extend([k.nodalDistance for k in parent.children])
                
        if distanceList != []: #average the minimum distance neighbors
            minDist = min(distanceList) 
            closeChildrenList = [childrenList[i] for i in xrange(len(childrenList)) if distanceList[i]==minDist]
        else:
            closeChildrenList = []
            
        kineticsList = []
        for template in childrenList:
            label = ';'.join([g.label for g in template])
            
            if label in alreadyDone:
                kinetics = alreadyDone[label]
            else:
                kinetics = self.fillRulesByAveragingUp(template, alreadyDone, verbose)
            
            if template in closeChildrenList and kinetics is not None:
                kineticsList.append([kinetics, template])
        
        # See if we already have a rate rule for this exact template instead
        # and return it now that we have finished searching its children
        entry = self.getRule(rootTemplate)
        
        if entry is not None and entry.rank > 0:
            # We already have a rate rule for this exact template
            # If the entry has rank of zero, then we have so little faith
            # in it that we'd rather use an averaged value if possible
            # Since this entry does not have a rank of zero, we keep its
            # value
            alreadyDone[rootLabel] = entry.data
            return entry.data
        
        if len(kineticsList) > 0:
            
            if len(kineticsList) > 1:
                # We found one or more results! Let's average them together
                kinetics = self.__getAverageKinetics([k for k, t in kineticsList])
                
                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(
                         ' + '.join(k.comment if k.comment != '' else ';'.join(g.label for g in t) for k, t in kineticsList))
                
                else:
                    kinetics.comment = 'Average of [{0}]'.format(
                     ' + '.join(';'.join(g.label for g in t) for k, t in kineticsList))

            else:
                k,t = kineticsList[0]
                kinetics = deepcopy(k)
                # Even though we are using just a single set of kinetics, it's still considered
                # an average.  It just happens that the other distance 1 children had no data.
                
                if verbose:
                    kinetics.comment = 'Average of [{0}]'.format(k.comment if k.comment != '' else ';'.join(g.label for g in t))
                else:
                    kinetics.comment = 'Average of [{0}]'.format(';'.join(g.label for g in t))
                

            
            entry = Entry(
                index = 0,
                label = rootLabel,
                item = rootTemplate,
                data = kinetics,
                rank = 11, # Indicates this is an averaged estimate
            )
            self.entries[entry.label] = [entry]
            alreadyDone[rootLabel] = entry.data
            return entry.data
            
        alreadyDone[rootLabel] = None
        return None
Exemple #5
0
    def generateGroupAdditivityValues(self,
                                      trainingSet,
                                      kunits,
                                      method='Arrhenius'):
        """
        Generate the group additivity values using the given `trainingSet`,
        a list of 2-tuples of the form ``(template, kinetics)``. You must also
        specify the `kunits` for the family and the `method` to use when
        generating the group values. Returns ``True`` if the group values have
        changed significantly since the last time they were fitted, or ``False``
        otherwise.
        """

        # keep track of previous values so we can detect if they change
        old_entries = dict()
        for label, entry in self.entries.items():
            if entry.data is not None:
                old_entries[label] = entry.data

        # Determine a complete list of the entries in the database, sorted as in the tree
        groupEntries = self.top[:]
        for entry in self.top:
            groupEntries.extend(self.descendants(entry))

        # Determine a unique list of the groups we will be able to fit parameters for
        groupList = []
        for template, kinetics in trainingSet:
            for group in template:
                if group not in self.top:
                    groupList.append(group)
                    groupList.extend(self.ancestors(group)[:-1])
        groupList = list(set(groupList))
        groupList.sort(key=lambda x: x.index)

        if method == 'KineticsData':
            # Fit a discrete set of k(T) data points by training against k(T) data

            Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000])

            # Initialize dictionaries of fitted group values and uncertainties
            groupValues = {}
            groupUncertainties = {}
            groupCounts = {}
            groupComments = {}
            for entry in groupEntries:
                groupValues[entry] = []
                groupUncertainties[entry] = []
                groupCounts[entry] = []
                groupComments[entry] = set()

            # Generate least-squares matrix and vector
            A = []
            b = []

            kdata = []
            for template, kinetics in trainingSet:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception(
                        'Unexpected kinetics model of type {0} for template {1}.'
                        .format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)
                # Add a row to the matrix for each combination
                for groups in combinations:
                    Arow = [1 if group in groups else 0 for group in groupList]
                    Arow.append(1)
                    brow = [math.log10(k) for k in kd]
                    A.append(Arow)
                    b.append(brow)

                    for group in groups:
                        groupComments[group].add("{0!s}".format(template))

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            for t, T in enumerate(Tdata):

                # Determine error in each group (on log scale)
                stdev = numpy.zeros(len(groupList) + 1, numpy.float64)
                count = numpy.zeros(len(groupList) + 1, numpy.int)

                for index in range(len(trainingSet)):
                    template, kinetics = trainingSet[index]
                    kd = math.log10(kdata[index, t])
                    km = x[-1, t] + sum([
                        x[groupList.index(group), t]
                        for group in template if group in groupList
                    ])
                    variance = (km - kd)**2
                    for group in template:
                        groups = [group]
                        groups.extend(self.ancestors(group))
                        for g in groups:
                            if g not in self.top:
                                ind = groupList.index(g)
                                stdev[ind] += variance
                                count[ind] += 1
                    stdev[-1] += variance
                    count[-1] += 1
                stdev = numpy.sqrt(stdev / (count - 1))
                import scipy.stats
                ci = scipy.stats.t.ppf(0.975, count - 1) * stdev

                # Update dictionaries of fitted group values and uncertainties
                for entry in groupEntries:
                    if entry == self.top[0]:
                        groupValues[entry].append(10**x[-1, t])
                        groupUncertainties[entry].append(10**ci[-1])
                        groupCounts[entry].append(count[-1])
                    elif entry in groupList:
                        index = groupList.index(entry)
                        groupValues[entry].append(10**x[index, t])
                        groupUncertainties[entry].append(10**ci[index])
                        groupCounts[entry].append(count[index])
                    else:
                        groupValues[entry] = None
                        groupUncertainties[entry] = None
                        groupCounts[entry] = None

            # Store the fitted group values and uncertainties on the associated entries
            for entry in groupEntries:
                if groupValues[entry] is not None:
                    entry.data = KineticsData(Tdata=(Tdata, "K"),
                                              kdata=(groupValues[entry],
                                                     kunits))
                    if not any(
                            numpy.isnan(numpy.array(
                                groupUncertainties[entry]))):
                        entry.data.kdata.uncertainties = numpy.array(
                            groupUncertainties[entry])
                        entry.data.kdata.uncertaintyType = '*|/'
                    entry.shortDesc = "Group additive kinetics."
                    entry.longDesc = "Fitted to {0} rates.\n".format(
                        groupCounts[entry])
                    entry.longDesc += "\n".join(groupComments[entry])
                else:
                    entry.data = None

        elif method == 'Arrhenius':
            # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data

            Tdata = numpy.array([300, 400, 500, 600, 800, 1000, 1500, 2000])
            logTdata = numpy.log(Tdata)
            Tinvdata = 1000. / (constants.R * Tdata)

            A = []
            b = []

            kdata = []
            for template, kinetics in trainingSet:

                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception(
                        'Unexpected kinetics model of type {0} for template {1}.'
                        .format(kinetics.__class__, template))
                kdata.append(kd)

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)

                # Add a row to the matrix for each combination at each temperature
                for t, T in enumerate(Tdata):
                    logT = logTdata[t]
                    Tinv = Tinvdata[t]
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.extend([1, logT, -Tinv])
                            else:
                                Arow.extend([0, 0, 0])
                        Arow.extend([1, logT, -Tinv])
                        brow = math.log(kd[t])
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-3]), kunits),
                n=x[-2],
                Ea=(x[-1], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A=(math.exp(x[3 * i]), kunits),
                    n=x[3 * i + 1],
                    Ea=(x[3 * i + 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        elif method == 'Arrhenius2':
            # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values

            A = []
            b = []

            for template, kinetics in trainingSet:

                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]
                    groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)

                # Add a row to the matrix for each parameter
                if isinstance(kinetics,
                              Arrhenius) or (isinstance(kinetics, ArrheniusEP)
                                             and kinetics.alpha.value_si == 0):
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.append(1)
                            else:
                                Arow.append(0)
                        Arow.append(1)
                        Ea = kinetics.E0.value_si if isinstance(
                            kinetics, ArrheniusEP) else kinetics.Ea.value_si
                        brow = [
                            math.log(kinetics.A.value_si), kinetics.n.value_si,
                            Ea / 1000.
                        ]
                        A.append(Arow)
                        b.append(brow)

            if len(A) == 0:
                logging.warning(
                    'Unable to fit kinetics groups for family "{0}"; no valid data found.'
                    .format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)

            x, residues, rank, s = numpy.linalg.lstsq(A, b)

            # Store the results
            self.top[0].data = Arrhenius(
                A=(math.exp(x[-1, 0]), kunits),
                n=x[-1, 1],
                Ea=(x[-1, 2], "kJ/mol"),
                T0=(1, "K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A=(math.exp(x[i, 0]), kunits),
                    n=x[i, 1],
                    Ea=(x[i, 2], "kJ/mol"),
                    T0=(1, "K"),
                )

        # Add a note to the history of each changed item indicating that we've generated new group values
        changed = False
        for label, entry in self.entries.items():
            if entry.data is not None and old_entries.has_key(label):
                if (isinstance(entry.data, KineticsData)
                        and isinstance(old_entries[label], KineticsData)
                        and len(entry.data.kdata.value_si) == len(
                            old_entries[label].kdata.value_si) and all(
                                abs(entry.data.kdata.value_si /
                                    old_entries[label].kdata.value_si -
                                    1) < 0.01)):
                    #print "New group values within 1% of old."
                    pass
                elif (isinstance(entry.data, Arrhenius)
                      and isinstance(old_entries[label], Arrhenius)
                      and abs(entry.data.A.value_si /
                              old_entries[label].A.value_si - 1) < 0.01
                      and abs(entry.data.n.value_si /
                              old_entries[label].n.value_si - 1) < 0.01
                      and abs(entry.data.Ea.value_si /
                              old_entries[label].Ea.value_si - 1) < 0.01
                      and abs(entry.data.T0.value_si /
                              old_entries[label].T0.value_si - 1) < 0.01):
                    #print "New group values within 1% of old."
                    pass
                else:
                    changed = True
                    break
            else:
                changed = True
                break

        return changed
Exemple #6
0
    def generateGroupAdditivityValues(self, trainingSet, kunits, method='Arrhenius'):
        """
        Generate the group additivity values using the given `trainingSet`,
        a list of 2-tuples of the form ``(template, kinetics)``. You must also
        specify the `kunits` for the family and the `method` to use when
        generating the group values. Returns ``True`` if the group values have
        changed significantly since the last time they were fitted, or ``False``
        otherwise.
        """
        
        # keep track of previous values so we can detect if they change
        old_entries = dict()
        for label,entry in self.entries.items():
            if entry.data is not None:
                old_entries[label] = entry.data
        
        # Determine a complete list of the entries in the database, sorted as in the tree
        groupEntries = self.top[:]
        for entry in self.top:
            groupEntries.extend(self.descendants(entry))
        
        # Determine a unique list of the groups we will be able to fit parameters for
        groupList = []
        for template, kinetics in trainingSet:
            for group in template:
                if group not in self.top:
                    groupList.append(group)
                    groupList.extend(self.ancestors(group)[:-1])
        groupList = list(set(groupList))
        groupList.sort(key=lambda x: x.index)

        if method == 'KineticsData':
            # Fit a discrete set of k(T) data points by training against k(T) data
            
            Tdata = numpy.array([300,400,500,600,800,1000,1500,2000])
            
            # Initialize dictionaries of fitted group values and uncertainties
            groupValues = {}; groupUncertainties = {}; groupCounts = {}; groupComments = {}
            for entry in groupEntries:
                groupValues[entry] = []
                groupUncertainties[entry] = []
                groupCounts[entry] = []
                groupComments[entry] = set()
            
            # Generate least-squares matrix and vector
            A = []; b = []
            
            kdata = []
            for template, kinetics in trainingSet:
                
                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception('Unexpected kinetics model of type {0} for template {1}.'.format(kinetics.__class__, template))
                kdata.append(kd)
                    
                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]; groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)
                # Add a row to the matrix for each combination
                for groups in combinations:
                    Arow = [1 if group in groups else 0 for group in groupList]
                    Arow.append(1)
                    brow = [math.log10(k) for k in kd]
                    A.append(Arow); b.append(brow)
                    
                    for group in groups:
                        groupComments[group].add("{0!s}".format(template))
                
            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)
            
            x, residues, rank, s = numpy.linalg.lstsq(A, b)
            
            for t, T in enumerate(Tdata):
                
                # Determine error in each group (on log scale)
                stdev = numpy.zeros(len(groupList)+1, numpy.float64)
                count = numpy.zeros(len(groupList)+1, numpy.int)
                
                for index in range(len(trainingSet)):
                    template, kinetics = trainingSet[index]
                    kd = math.log10(kdata[index,t])
                    km = x[-1,t] + sum([x[groupList.index(group),t] for group in template if group in groupList])
                    variance = (km - kd)**2
                    for group in template:
                        groups = [group]; groups.extend(self.ancestors(group))
                        for g in groups:
                            if g not in self.top:
                                ind = groupList.index(g)
                                stdev[ind] += variance
                                count[ind] += 1
                    stdev[-1] += variance
                    count[-1] += 1
                stdev = numpy.sqrt(stdev / (count - 1))
                import scipy.stats
                ci = scipy.stats.t.ppf(0.975, count - 1) * stdev
                
                # Update dictionaries of fitted group values and uncertainties
                for entry in groupEntries:
                    if entry == self.top[0]:
                        groupValues[entry].append(10**x[-1,t])
                        groupUncertainties[entry].append(10**ci[-1])
                        groupCounts[entry].append(count[-1])
                    elif entry in groupList:
                        index = groupList.index(entry)
                        groupValues[entry].append(10**x[index,t])
                        groupUncertainties[entry].append(10**ci[index])
                        groupCounts[entry].append(count[index])
                    else:
                        groupValues[entry] = None
                        groupUncertainties[entry] = None
                        groupCounts[entry] = None
            
            # Store the fitted group values and uncertainties on the associated entries
            for entry in groupEntries:
                if groupValues[entry] is not None:
                    entry.data = KineticsData(Tdata=(Tdata,"K"), kdata=(groupValues[entry],kunits))
                    if not any(numpy.isnan(numpy.array(groupUncertainties[entry]))):
                        entry.data.kdata.uncertainties = numpy.array(groupUncertainties[entry])
                        entry.data.kdata.uncertaintyType = '*|/'
                    entry.shortDesc = "Group additive kinetics."
                    entry.longDesc = "Fitted to {0} rates.\n".format(groupCounts[entry])
                    entry.longDesc += "\n".join(groupComments[entry])
                else:
                    entry.data = None
        
        elif method == 'Arrhenius':
            # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data
            
            Tdata = numpy.array([300,400,500,600,800,1000,1500,2000])
            logTdata = numpy.log(Tdata)
            Tinvdata = 1000. / (constants.R * Tdata)
            
            A = []; b = []
            
            kdata = []
            for template, kinetics in trainingSet:
                
                if isinstance(kinetics, (Arrhenius, KineticsData)):
                    kd = [kinetics.getRateCoefficient(T) for T in Tdata]
                elif isinstance(kinetics, ArrheniusEP):
                    kd = [kinetics.getRateCoefficient(T, 0) for T in Tdata]
                else:
                    raise Exception('Unexpected kinetics model of type {0} for template {1}.'.format(kinetics.__class__, template))
                kdata.append(kd)
                
                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]; groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)
                
                # Add a row to the matrix for each combination at each temperature
                for t, T in enumerate(Tdata):
                    logT = logTdata[t]
                    Tinv = Tinvdata[t]
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.extend([1,logT,-Tinv])
                            else:
                                Arow.extend([0,0,0])
                        Arow.extend([1,logT,-Tinv])
                        brow = math.log(kd[t])
                        A.append(Arow); b.append(brow)
            
            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            kdata = numpy.array(kdata)
            
            x, residues, rank, s = numpy.linalg.lstsq(A, b)
            
            # Store the results
            self.top[0].data = Arrhenius(
                A = (math.exp(x[-3]),kunits),
                n = x[-2],
                Ea = (x[-1],"kJ/mol"),
                T0 = (1,"K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A = (math.exp(x[3*i]),kunits),
                    n = x[3*i+1],
                    Ea = (x[3*i+2],"kJ/mol"),
                    T0 = (1,"K"),
                )
        
        elif method == 'Arrhenius2':
            # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values
            
            A = []; b = []
            
            for template, kinetics in trainingSet:
                
                # Create every combination of each group and its ancestors with each other
                combinations = []
                for group in template:
                    groups = [group]; groups.extend(self.ancestors(group))
                    combinations.append(groups)
                combinations = getAllCombinations(combinations)
                        
                # Add a row to the matrix for each parameter
                if isinstance(kinetics, Arrhenius) or (isinstance(kinetics, ArrheniusEP) and kinetics.alpha.value_si == 0):
                    for groups in combinations:
                        Arow = []
                        for group in groupList:
                            if group in groups:
                                Arow.append(1)
                            else:
                                Arow.append(0)
                        Arow.append(1)
                        Ea = kinetics.E0.value_si if isinstance(kinetics, ArrheniusEP) else kinetics.Ea.value_si
                        brow = [math.log(kinetics.A.value_si), kinetics.n.value_si, Ea / 1000.]
                        A.append(Arow); b.append(brow)
            
            if len(A) == 0:
                logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(self.label))
                return
            A = numpy.array(A)
            b = numpy.array(b)
            
            x, residues, rank, s = numpy.linalg.lstsq(A, b)
            
            # Store the results
            self.top[0].data = Arrhenius(
                A = (math.exp(x[-1,0]),kunits),
                n = x[-1,1],
                Ea = (x[-1,2],"kJ/mol"),
                T0 = (1,"K"),
            )
            for i, group in enumerate(groupList):
                group.data = Arrhenius(
                    A = (math.exp(x[i,0]),kunits),
                    n = x[i,1],
                    Ea = (x[i,2],"kJ/mol"),
                    T0 = (1,"K"),
                )
        
        # Add a note to the history of each changed item indicating that we've generated new group values
        changed = False
        for label, entry in self.entries.items():
            if entry.data is not None and old_entries.has_key(label):
                if (isinstance(entry.data, KineticsData) and 
                    isinstance(old_entries[label], KineticsData) and
                    len(entry.data.kdata.value_si) == len(old_entries[label].kdata.value_si) and
                    all(abs(entry.data.kdata.value_si / old_entries[label].kdata.value_si - 1) < 0.01)):
                    #print "New group values within 1% of old."
                    pass
                elif (isinstance(entry.data, Arrhenius) and 
                    isinstance(old_entries[label], Arrhenius) and
                    abs(entry.data.A.value_si / old_entries[label].A.value_si - 1) < 0.01 and
                    abs(entry.data.n.value_si / old_entries[label].n.value_si - 1) < 0.01 and
                    abs(entry.data.Ea.value_si / old_entries[label].Ea.value_si - 1) < 0.01 and
                    abs(entry.data.T0.value_si / old_entries[label].T0.value_si - 1) < 0.01):
                    #print "New group values within 1% of old."
                    pass
                else:
                    changed = True
                    break
            else:
                changed = True
                break
        
        return changed
def generateKineticsGroupValues(family, database, trainingSetLabels, method):
    """
    Evaluate the kinetics group additivity values for the given reaction 
    `family` using the specified lists of depository components 
    `trainingSetLabels` as the training set. The already-loaded RMG database 
    should be given as the `database` parameter.
    """
    
    kunits = getRateCoefficientUnits(family)
    
    print 'Categorizing reactions in training sets for {0}'.format(family.label)
    trainingSets = createDataSet(trainingSetLabels, family, database)
    trainingSet = []
    for label, data in trainingSets:
        trainingSet.extend(data)
    #reactions = [reaction for label, trainingSet in trainingSets for reaction, template, entry in trainingSet]
    #templates = [template for label, trainingSet in trainingSets for reaction, template, entry in trainingSet]
    #entries = [entry for label, trainingSet in trainingSets for reaction, template, entry in trainingSet]
    
    print 'Fitting new group additivity values for {0}...'.format(family.label)
    
    # keep track of previous values so we can detect if they change
    old_entries = dict()
    for label,entry in family.groups.entries.iteritems():
        if entry.data is not None:
            old_entries[label] = entry.data
    
    # Determine a complete list of the entries in the database, sorted as in the tree
    groupEntries = family.groups.top[:]
    for entry in family.groups.top:
        groupEntries.extend(family.groups.descendants(entry))
    
    # Determine a unique list of the groups we will be able to fit parameters for
    groupList = []
    for reaction, template, entry in trainingSet:
        for group in template:
            if group not in family.groups.top:
                groupList.append(group)
                groupList.extend(family.groups.ancestors(group)[:-1])
    groupList = list(set(groupList))
    groupList.sort(key=lambda x: x.index)
    
    if method == 'KineticsData':
        # Fit a discrete set of k(T) data points by training against k(T) data
        
        Tdata = [300,400,500,600,800,1000,1500,2000]
        
        #kmodel = numpy.zeros_like(kdata)
        
        # Initialize dictionaries of fitted group values and uncertainties
        groupValues = {}; groupUncertainties = {}; groupCounts = {}; groupComments = {}
        for entry in groupEntries:
            groupValues[entry] = []
            groupUncertainties[entry] = []
            groupCounts[entry] = []
            groupComments[entry] = set()
        
        # Generate least-squares matrix and vector
        A = []; b = []
        
        kdata = []
        for reaction, template, entry in trainingSet:
            
            if isinstance(reaction.kinetics, Arrhenius) or isinstance(reaction.kinetics, KineticsData):
                kd = [reaction.kinetics.getRateCoefficient(T) / reaction.degeneracy for T in Tdata]
            elif isinstance(reaction.kinetics, ArrheniusEP):
                kd = [reaction.kinetics.getRateCoefficient(T, 0) / reaction.degeneracy for T in Tdata]
            else:
                raise Exception('Unexpected kinetics model of type {0} for reaction {1}.'.format(reaction.kinetics.__class__, reaction))
            kdata.append(kd)
                
            # Create every combination of each group and its ancestors with each other
            combinations = []
            for group in template:
                groups = [group]; groups.extend(family.groups.ancestors(group))
                combinations.append(groups)
            combinations = getAllCombinations(combinations)
            # Add a row to the matrix for each combination
            for groups in combinations:
                Arow = [1 if group in groups else 0 for group in groupList]
                Arow.append(1)
                brow = [math.log10(k) for k in kd]
                A.append(Arow); b.append(brow)
                
                for group in groups:
                    groupComments[group].add("{0!s}".format(template))
            
        if len(A) == 0:
            logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label))
            return
        A = numpy.array(A)
        b = numpy.array(b)
        kdata = numpy.array(kdata)
        
        x, residues, rank, s = numpy.linalg.lstsq(A, b)
        
        for t, T in enumerate(Tdata):
            
            # Determine error in each group (on log scale)
            stdev = numpy.zeros(len(groupList)+1, numpy.float64)
            count = numpy.zeros(len(groupList)+1, numpy.int)
            
            for index in range(len(trainingSet)):
                reaction, template, entry = trainingSet[index]
                kd = math.log10(kdata[index,t])
                km = x[-1,t] + sum([x[groupList.index(group),t] for group in template if group in groupList])
                variance = (km - kd)**2
                for group in template:
                    groups = [group]; groups.extend(family.groups.ancestors(group))
                    for g in groups:
                        if g not in family.groups.top:
                            ind = groupList.index(g)
                            stdev[ind] += variance
                            count[ind] += 1
                stdev[-1] += variance
                count[-1] += 1
            stdev = numpy.sqrt(stdev / (count - 1))
            ci = scipy.stats.t.ppf(0.975, count - 1) * stdev
            
            # Update dictionaries of fitted group values and uncertainties
            for entry in groupEntries:
                if entry == family.groups.top[0]:
                    groupValues[entry].append(10**x[-1,t])
                    groupUncertainties[entry].append(10**ci[-1])
                    groupCounts[entry].append(count[-1])
                elif entry in groupList:
                    index = groupList.index(entry)
                    groupValues[entry].append(10**x[index,t])
                    groupUncertainties[entry].append(10**ci[index])
                    groupCounts[entry].append(count[index])
                else:
                    groupValues[entry] = None
                    groupUncertainties[entry] = None
                    groupCounts[entry] = None
        
        # Store the fitted group values and uncertainties on the associated entries
        for entry in groupEntries:
            if groupValues[entry] is not None:
                entry.data = KineticsData(Tdata=(Tdata,"K"), kdata=(groupValues[entry],kunits))
                if not any(numpy.isnan(numpy.array(groupUncertainties[entry]))):
                    entry.data.kdata.uncertainties = numpy.array(groupUncertainties[entry])
                    entry.data.kdata.uncertaintyType = '*|/'
                entry.shortDesc = "Group additive kinetics."
                entry.longDesc = "Fitted to {0} rates.\n".format(groupCounts[entry])
                entry.longDesc += "\n".join(groupComments[entry])
            else:
                entry.data = None
        
        # Print the group values
        print '=============================== =========== =========== =========== ======='
        print 'Group                           T (K)       k(T) (SI)   CI (95%)    Count'
        print '=============================== =========== =========== =========== ======='
        entry = family.groups.top[0]
        for i in range(len(entry.data.Tdata.values)):
            label = ', '.join(['%s' % (top.label) for top in family.groups.top]) if i == 0 else ''
            T = Tdata[i]
            value = groupValues[entry][i]
            uncertainty = groupUncertainties[entry][i]
            count = groupCounts[entry][i]
            print '%-31s %-11g %-11.4e %-11.4e %-7i' % (label, T, value, uncertainty, count)
        print '------------------------------- ----------- ----------- ----------- -------'
        for entry in groupEntries:
            if entry.data is not None:
                for i in range(len(entry.data.Tdata.values)):
                    label = entry.label if i == 0 else ''
                    T = Tdata[i]
                    value = groupValues[entry][i]
                    uncertainty = groupUncertainties[entry][i]
                    count = groupCounts[entry][i]
                    print '%-31s %-11g %-11.4e %-11.4e %-7i' % (label, T, value, uncertainty, count)
        print '=============================== =========== =========== =========== ======='
    
    elif method == 'Arrhenius':
        # Fit Arrhenius parameters (A, n, Ea) by training against k(T) data
        
        Tdata = [300,400,500,600,800,1000,1500,2000]
        
        A = []; b = []
        
        kdata = []
        for reaction, template, entry in trainingSet:
            
            if isinstance(reaction.kinetics, Arrhenius) or isinstance(reaction.kinetics, KineticsData):
                kd = [reaction.kinetics.getRateCoefficient(T) / reaction.degeneracy for T in Tdata]
            elif isinstance(reaction.kinetics, ArrheniusEP):
                kd = [reaction.kinetics.getRateCoefficient(T, 0) / reaction.degeneracy for T in Tdata]
            else:
                raise Exception('Unexpected kinetics model of type {0} for reaction {1}.'.format(reaction.kinetics.__class__, reaction))
            kdata.append(kd)
            
            # Create every combination of each group and its ancestors with each other
            combinations = []
            for group in template:
                groups = [group]; groups.extend(family.groups.ancestors(group))
                combinations.append(groups)
            combinations = getAllCombinations(combinations)
            
            # Add a row to the matrix for each combination at each temperature
            for t, T in enumerate(Tdata):
                logT = math.log(T)
                Tinv = 1000.0 / (constants.R * T)
                for groups in combinations:
                    Arow = []
                    for group in groupList:
                        if group in groups:
                            Arow.extend([1,logT,-Tinv])
                        else:
                            Arow.extend([0,0,0])
                    Arow.extend([1,logT,-Tinv])
                    brow = math.log(kd[t])
                    A.append(Arow); b.append(brow)
        
        if len(A) == 0:
            logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label))
            return
        A = numpy.array(A)
        b = numpy.array(b)
        kdata = numpy.array(kdata)
        
        x, residues, rank, s = numpy.linalg.lstsq(A, b)
        
        # Store the results
        family.groups.top[0].data = Arrhenius(
            A = (math.exp(x[-3]),kunits),
            n = x[-2],
            Ea = (x[-1]*1000.,"J/mol"),
            T0 = (1,"K"),
        )
        for i, group in enumerate(groupList):
            group.data = Arrhenius(
                A = (math.exp(x[3*i]),kunits),
                n = x[3*i+1],
                Ea = (x[3*i+2]*1000.,"J/mol"),
                T0 = (1,"K"),
            )
        
        # Print the results
        print '======================================= =========== =========== ==========='
        print 'Group                                   log A (SI)  n           Ea (kJ/mol)   '
        print '======================================= =========== =========== ==========='
        entry = family.groups.top[0]
        label = ', '.join(['%s' % (top.label) for top in family.groups.top])
        logA = math.log10(entry.data.A.value)
        n = entry.data.n.value
        Ea = entry.data.Ea.value / 1000.
        print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea)
        print '--------------------------------------- ----------- ----------- -----------'
        for i, group in enumerate(groupList):
            label = group.label
            logA = math.log10(group.data.A.value)
            n = group.data.n.value
            Ea = group.data.Ea.value / 1000.
            print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea)
        print '======================================= =========== =========== ==========='
        
    
    elif method == 'Arrhenius2':
        # Fit Arrhenius parameters (A, n, Ea) by training against (A, n, Ea) values
        
        A = []; b = []
        
        for reaction, template, entry in trainingSet:
            
            # Create every combination of each group and its ancestors with each other
            combinations = []
            for group in template:
                groups = [group]; groups.extend(family.groups.ancestors(group))
                combinations.append(groups)
            combinations = getAllCombinations(combinations)
                    
            # Add a row to the matrix for each parameter
            if isinstance(entry.data, Arrhenius) or (isinstance(entry.data, ArrheniusEP) and entry.data.alpha.value == 0):
                for groups in combinations:
                    Arow = []
                    for group in groupList:
                        if group in groups:
                            Arow.append(1)
                        else:
                            Arow.append(0)
                    Arow.append(1)
                    Ea = entry.data.E0.value if isinstance(entry.data, ArrheniusEP) else entry.data.Ea.value
                    brow = [math.log(entry.data.A.value), entry.data.n.value, Ea / 1000.]
                    A.append(Arow); b.append(brow)
        
        if len(A) == 0:
            logging.warning('Unable to fit kinetics groups for family "{0}"; no valid data found.'.format(family.groups.label))
            return
        A = numpy.array(A)
        b = numpy.array(b)
        
        x, residues, rank, s = numpy.linalg.lstsq(A, b)
        
        # Store the results
        family.groups.top[0].data = Arrhenius(
            A = (math.exp(x[-1,0]),kunits),
            n = x[-1,1],
            Ea = (x[-1,2]*1000.,"J/mol"),
            T0 = (1,"K"),
        )
        for i, group in enumerate(groupList):
            group.data = Arrhenius(
                A = (math.exp(x[i,0]),kunits),
                n = x[i,1],
                Ea = (x[i,2]*1000.,"J/mol"),
                T0 = (1,"K"),
            )
        
        # Print the results
        print '======================================= =========== =========== ==========='
        print 'Group                                   log A (SI)  n           Ea (kJ/mol)   '
        print '======================================= =========== =========== ==========='
        entry = family.groups.top[0]
        label = ', '.join(['%s' % (top.label) for top in family.groups.top])
        logA = math.log10(entry.data.A.value)
        n = entry.data.n.value
        Ea = entry.data.Ea.value / 1000.
        print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea)
        print '--------------------------------------- ----------- ----------- -----------'
        for i, group in enumerate(groupList):
            label = group.label
            logA = math.log10(group.data.A.value)
            n = group.data.n.value
            Ea = group.data.Ea.value / 1000.
            print '%-39s %11.3f %11.3f %11.3f' % (label, logA, n, Ea)
        print '======================================= =========== =========== ==========='
    
    # Add a note to the history of each changed item indicating that we've generated new group values
    changed = False
    event = [time.asctime(),user,'action','Generated new group additivity values for this entry.']
    for label, entry in family.groups.entries.iteritems():
        if entry.data is not None and old_entries.has_key(label):
            if (isinstance(entry.data, KineticsData) and 
                isinstance(old_entries[label], KineticsData) and
                len(entry.data.kdata.values) == len(old_entries[label].kdata.values) and
                all(abs(entry.data.kdata.values / old_entries[label].kdata.values - 1) < 0.01)):
                #print "New group values within 1% of old."
                pass
            elif (isinstance(entry.data, Arrhenius) and 
                isinstance(old_entries[label], Arrhenius) and
                abs(entry.data.A.value / old_entries[label].A.value - 1) < 0.01 and
                abs(entry.data.n.value / old_entries[label].n.value - 1) < 0.01 and
                abs(entry.data.Ea.value / old_entries[label].Ea.value - 1) < 0.01 and
                abs(entry.data.T0.value / old_entries[label].T0.value - 1) < 0.01):
                #print "New group values within 1% of old."
                pass
            else:
                changed = True
                entry.history.append(event)
    
    return changed