def write_reaction_by_index(self, r):
     sparse = dict([(cid, self.S[i, r]) for i, cid in enumerate(self.cids)
                    if self.S[i, r] != 0])
     if self.rids is not None:
         reaction = KeggReaction(sparse, rid=self.rids[r])
     else:
         reaction = KeggReaction(sparse)
     return reaction.write_formula()
    def from_formulas(reaction_strings,
                      arrow='<=>',
                      has_reaction_ids=False,
                      raise_exception=False):
        """
        parses a list of reactions in KEGG format
        
        Arguments:
           reaction_strings - a list of reactions in KEGG format
           arrow            - the string used as the 'arrow' in each reaction (default: '<=>')
           has_reaction_ids - a boolean flag indicating if there is a column of
                              reaction IDs (separated from the reaction with
                              whitespaces)
        
        Return values:
           S     - a stoichiometric matrix
           cids  - the KEGG compound IDs in the same order as the rows of S
        """
        try:
            reactions = []
            not_balanced_count = 0
            for line in reaction_strings:
                rid = None
                if has_reaction_ids:
                    tokens = re.findall('(\w+)\s+(.*)', line.strip())[0]
                    rid = tokens[0]
                    line = tokens[1]
                try:
                    reaction = KeggReaction.parse_formula(line, arrow, rid)
                except KeggParseException as e:
                    logging.warning(str(e))
                    reaction = KeggReaction({})
                if not reaction.is_balanced(fix_water=True,
                                            raise_exception=raise_exception):
                    not_balanced_count += 1
                    logging.warning('Model contains an unbalanced reaction: ' +
                                    line)
                    reaction = KeggReaction({})
                reactions.append(reaction)
                logging.debug('Adding reaction: ' + reaction.write_formula())

            if not_balanced_count > 0:
                warning_str = '%d out of the %d reactions are not chemically balanced' % \
                              (not_balanced_count, len(reaction_strings))
                logging.debug(warning_str)
            return KeggModel.from_kegg_reactions(reactions, has_reaction_ids)

        except ValueError as e:
            if raise_exception:
                raise e
            else:
                logging.debug(str(e))
                return None
    def add_thermo(self, cc):
        # check that all CIDs in the reaction are already cached by CC
        Nc, Nr = self.S.shape
        reactions = []
        for j in xrange(Nr):
            sparse = {
                self.cids[i]: self.S[i, j]
                for i in xrange(Nc) if self.S[i, j] != 0
            }
            reaction = KeggReaction(sparse)
            reactions.append(reaction)

        self.dG0, self.cov_dG0 = cc.get_dG0_r_multi(reactions)
    def read_redox():
        """Read the Reduction potential data"""

        fname, weight = TrainingData.FNAME_DICT['REDOX']
        # columns are: reaction, dG'0, T, I, pH, pMg, weight, balance?
        thermo_params = []

        # fields are: name, CID_ox, nH_ox, charge_ox, CID_red,
        #             nH_red, charge_red, E'0, pH, I, pMg, T, ref
        for row in csv.DictReader(open(fname, 'r'), delimiter='\t'):
            cid_ox = int(row['CID_ox'])
            cid_red = int(row['CID_red'])
            delta_nH = TrainingData.str2double(row['nH_red']) - \
                       TrainingData.str2double(row['nH_ox'])
            delta_charge = TrainingData.str2double(row['charge_red']) - \
                           TrainingData.str2double(row['charge_ox'])
            delta_e = delta_nH - delta_charge
            dG0_prime = -F * TrainingData.str2double(row['E\'0']) * delta_e

            thermo_params.append({
                'reaction':
                KeggReaction({
                    cid_ox: -1,
                    cid_red: 1
                }),
                'dG\'0':
                dG0_prime,
                'T':
                TrainingData.str2double(row['T']),
                'I':
                TrainingData.str2double(row['I']),
                'pH':
                TrainingData.str2double(row['pH']),
                'pMg':
                TrainingData.str2double(row['pMg']),
                'weight':
                weight,
                'balance':
                False
            })

        logging.info('Successfully added %d redox potentials' %
                     len(thermo_params))
        return thermo_params
    def read_formations():
        """Read the Formation Energy data"""
        fname, weight = TrainingData.FNAME_DICT['FORMATION']
        # columns are: reaction, dG'0, T, I, pH, pMg, weight, balance?
        thermo_params = []
        cids_that_dont_decompose = set()

        # fields are: cid, name, dG'0, pH, I, pMg, T, decompose?,
        #             compound_ref, remark
        for row in csv.DictReader(open(fname, 'r'), delimiter='\t'):
            cid = int(row['cid'])
            if int(row['decompose']) == 0:
                cids_that_dont_decompose.add(cid)
            if row['dG\'0'] != '':
                thermo_params.append({
                    'reaction':
                    KeggReaction({cid: 1}),
                    'dG\'0':
                    TrainingData.str2double(row['dG\'0']),
                    'T':
                    TrainingData.str2double(row['T']),
                    'I':
                    TrainingData.str2double(row['I']),
                    'pH':
                    TrainingData.str2double(row['pH']),
                    'pMg':
                    TrainingData.str2double(row['pMg']),
                    'weight':
                    weight,
                    'balance':
                    False
                })

        logging.info('Successfully added %d formation energies' %
                     len(thermo_params))
        return thermo_params, cids_that_dont_decompose
    def balance_reactions(self, rxn_inds_to_balance):
        """
            use the chemical formulas from the InChIs to verify that each and every
            reaction is balanced
        """
        elements, Ematrix = self.ccache.get_kegg_ematrix(self.cids)
        cpd_inds_without_formula = list(
            np.nonzero(np.any(np.isnan(Ematrix), 1))[0].flat)
        Ematrix[np.isnan(Ematrix)] = 0

        S_without_formula = self.S[cpd_inds_without_formula, :]
        rxn_inds_without_formula = np.nonzero(np.any(S_without_formula != 0,
                                                     0))[0]
        rxn_inds_to_balance = set(rxn_inds_to_balance).difference(
            rxn_inds_without_formula)

        # need to check that all elements are balanced (except H, but including e-)
        # if only O is not balanced, add water molecules
        if 'O' in elements:
            i_H2O = self.cids.index(1)
            j_O = elements.index('O')
            conserved = np.dot(Ematrix.T, self.S)
            for k in rxn_inds_to_balance:
                self.S[i_H2O, k] = self.S[i_H2O, k] - conserved[j_O, k]

        # recalculate conservation matrix
        conserved = Ematrix.T * self.S

        rxn_inds_to_remove = [
            k for k in rxn_inds_to_balance if np.any(conserved[:, k] != 0, 0)
        ]

        for k in rxn_inds_to_remove:
            sprs = {}
            for i in np.nonzero(self.S[:, k])[0]:
                sprs[self.cids[i]] = self.S[i, k]
            reaction = KeggReaction(sprs)
            logging.debug('unbalanced reaction #%d: %s' %
                          (k, reaction.write_formula()))
            for j in np.where(conserved[:, k])[0].flat:
                logging.debug(
                    'there are %d more %s atoms on the right-hand side' %
                    (conserved[j, k], elements[j]))

        rxn_inds_to_keep = \
            set(range(self.S.shape[1])).difference(rxn_inds_to_remove)

        rxn_inds_to_keep = sorted(rxn_inds_to_keep)

        self.S = self.S[:, rxn_inds_to_keep]
        self.dG0_prime = self.dG0_prime[:, rxn_inds_to_keep]
        self.T = self.T[:, rxn_inds_to_keep]
        self.I = self.I[:, rxn_inds_to_keep]
        self.pH = self.pH[:, rxn_inds_to_keep]
        self.pMg = self.pMg[:, rxn_inds_to_keep]
        self.weight = self.weight[:, rxn_inds_to_keep]

        logging.info(
            'After removing %d unbalanced reactions, the stoichiometric '
            'matrix contains: '
            '%d compounds and %d reactions' %
            (len(rxn_inds_to_remove), self.S.shape[0], self.S.shape[1]))