def write_reaction_by_index(self, r): sparse = dict([(cid, self.S[i, r]) for i, cid in enumerate(self.cids) if self.S[i, r] != 0]) if self.rids is not None: reaction = KeggReaction(sparse, rid=self.rids[r]) else: reaction = KeggReaction(sparse) return reaction.write_formula()
def from_formulas(reaction_strings, arrow='<=>', has_reaction_ids=False, raise_exception=False): """ parses a list of reactions in KEGG format Arguments: reaction_strings - a list of reactions in KEGG format arrow - the string used as the 'arrow' in each reaction (default: '<=>') has_reaction_ids - a boolean flag indicating if there is a column of reaction IDs (separated from the reaction with whitespaces) Return values: S - a stoichiometric matrix cids - the KEGG compound IDs in the same order as the rows of S """ try: reactions = [] not_balanced_count = 0 for line in reaction_strings: rid = None if has_reaction_ids: tokens = re.findall('(\w+)\s+(.*)', line.strip())[0] rid = tokens[0] line = tokens[1] try: reaction = KeggReaction.parse_formula(line, arrow, rid) except KeggParseException as e: logging.warning(str(e)) reaction = KeggReaction({}) if not reaction.is_balanced(fix_water=True, raise_exception=raise_exception): not_balanced_count += 1 logging.warning('Model contains an unbalanced reaction: ' + line) reaction = KeggReaction({}) reactions.append(reaction) logging.debug('Adding reaction: ' + reaction.write_formula()) if not_balanced_count > 0: warning_str = '%d out of the %d reactions are not chemically balanced' % \ (not_balanced_count, len(reaction_strings)) logging.debug(warning_str) return KeggModel.from_kegg_reactions(reactions, has_reaction_ids) except ValueError as e: if raise_exception: raise e else: logging.debug(str(e)) return None
def add_thermo(self, cc): # check that all CIDs in the reaction are already cached by CC Nc, Nr = self.S.shape reactions = [] for j in xrange(Nr): sparse = { self.cids[i]: self.S[i, j] for i in xrange(Nc) if self.S[i, j] != 0 } reaction = KeggReaction(sparse) reactions.append(reaction) self.dG0, self.cov_dG0 = cc.get_dG0_r_multi(reactions)
def read_redox(): """Read the Reduction potential data""" fname, weight = TrainingData.FNAME_DICT['REDOX'] # columns are: reaction, dG'0, T, I, pH, pMg, weight, balance? thermo_params = [] # fields are: name, CID_ox, nH_ox, charge_ox, CID_red, # nH_red, charge_red, E'0, pH, I, pMg, T, ref for row in csv.DictReader(open(fname, 'r'), delimiter='\t'): cid_ox = int(row['CID_ox']) cid_red = int(row['CID_red']) delta_nH = TrainingData.str2double(row['nH_red']) - \ TrainingData.str2double(row['nH_ox']) delta_charge = TrainingData.str2double(row['charge_red']) - \ TrainingData.str2double(row['charge_ox']) delta_e = delta_nH - delta_charge dG0_prime = -F * TrainingData.str2double(row['E\'0']) * delta_e thermo_params.append({ 'reaction': KeggReaction({ cid_ox: -1, cid_red: 1 }), 'dG\'0': dG0_prime, 'T': TrainingData.str2double(row['T']), 'I': TrainingData.str2double(row['I']), 'pH': TrainingData.str2double(row['pH']), 'pMg': TrainingData.str2double(row['pMg']), 'weight': weight, 'balance': False }) logging.info('Successfully added %d redox potentials' % len(thermo_params)) return thermo_params
def read_formations(): """Read the Formation Energy data""" fname, weight = TrainingData.FNAME_DICT['FORMATION'] # columns are: reaction, dG'0, T, I, pH, pMg, weight, balance? thermo_params = [] cids_that_dont_decompose = set() # fields are: cid, name, dG'0, pH, I, pMg, T, decompose?, # compound_ref, remark for row in csv.DictReader(open(fname, 'r'), delimiter='\t'): cid = int(row['cid']) if int(row['decompose']) == 0: cids_that_dont_decompose.add(cid) if row['dG\'0'] != '': thermo_params.append({ 'reaction': KeggReaction({cid: 1}), 'dG\'0': TrainingData.str2double(row['dG\'0']), 'T': TrainingData.str2double(row['T']), 'I': TrainingData.str2double(row['I']), 'pH': TrainingData.str2double(row['pH']), 'pMg': TrainingData.str2double(row['pMg']), 'weight': weight, 'balance': False }) logging.info('Successfully added %d formation energies' % len(thermo_params)) return thermo_params, cids_that_dont_decompose
def balance_reactions(self, rxn_inds_to_balance): """ use the chemical formulas from the InChIs to verify that each and every reaction is balanced """ elements, Ematrix = self.ccache.get_kegg_ematrix(self.cids) cpd_inds_without_formula = list( np.nonzero(np.any(np.isnan(Ematrix), 1))[0].flat) Ematrix[np.isnan(Ematrix)] = 0 S_without_formula = self.S[cpd_inds_without_formula, :] rxn_inds_without_formula = np.nonzero(np.any(S_without_formula != 0, 0))[0] rxn_inds_to_balance = set(rxn_inds_to_balance).difference( rxn_inds_without_formula) # need to check that all elements are balanced (except H, but including e-) # if only O is not balanced, add water molecules if 'O' in elements: i_H2O = self.cids.index(1) j_O = elements.index('O') conserved = np.dot(Ematrix.T, self.S) for k in rxn_inds_to_balance: self.S[i_H2O, k] = self.S[i_H2O, k] - conserved[j_O, k] # recalculate conservation matrix conserved = Ematrix.T * self.S rxn_inds_to_remove = [ k for k in rxn_inds_to_balance if np.any(conserved[:, k] != 0, 0) ] for k in rxn_inds_to_remove: sprs = {} for i in np.nonzero(self.S[:, k])[0]: sprs[self.cids[i]] = self.S[i, k] reaction = KeggReaction(sprs) logging.debug('unbalanced reaction #%d: %s' % (k, reaction.write_formula())) for j in np.where(conserved[:, k])[0].flat: logging.debug( 'there are %d more %s atoms on the right-hand side' % (conserved[j, k], elements[j])) rxn_inds_to_keep = \ set(range(self.S.shape[1])).difference(rxn_inds_to_remove) rxn_inds_to_keep = sorted(rxn_inds_to_keep) self.S = self.S[:, rxn_inds_to_keep] self.dG0_prime = self.dG0_prime[:, rxn_inds_to_keep] self.T = self.T[:, rxn_inds_to_keep] self.I = self.I[:, rxn_inds_to_keep] self.pH = self.pH[:, rxn_inds_to_keep] self.pMg = self.pMg[:, rxn_inds_to_keep] self.weight = self.weight[:, rxn_inds_to_keep] logging.info( 'After removing %d unbalanced reactions, the stoichiometric ' 'matrix contains: ' '%d compounds and %d reactions' % (len(rxn_inds_to_remove), self.S.shape[0], self.S.shape[1]))