def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def CalculateThermo(): parser = MakeOpts() options, _ = parser.parse_args(sys.argv) pH, I, pMg, T = options.pH, options.I, options.pMg, options.T db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db) G.init() ignore_protonations = False list_of_mols = [] if options.smiles: list_of_mols.append({'id':options.smiles, 'mol':options.smiles, 'format':'smiles'}) elif options.inchi: list_of_mols.append({'id':options.inchi, 'mol':options.inchi, 'format':'inchi'}) elif options.csv_input_filename: for row in csv.DictReader(open(options.csv_input_filename, 'r')): if "InChI" in row: list_of_mols.append({'id':row["ID"], 'mol':row["InChI"], 'format':'inchi'}) elif "smiles" in row: list_of_mols.append({'id':row["ID"], 'mol':row["smiles"], 'format':'smiles'}) else: raise Exception("There must be one molecular ID column: InChI or smiles") else: parser.error("must use either -s or -c option") if options.biochemical: print ("Calculating biochemical formation energies for %s compounds" " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" % (len(list_of_mols), pH, I, pMg, T)) else: print ("Calculating chemical formation energies for %s compounds" % len(list_of_mols)) rowdicts = [] for mol_dict in list_of_mols: mol_id = mol_dict['id'] diss_table = Molecule._GetDissociationTable(mol_dict['mol'], fmt=mol_dict['format']) try: mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \ diss_table.GetAnyMol() if mol is None: raise Exception("Cannot convert input string to Molecule: " + mol_dict['mol']) decomposition = G.Mol2Decomposition(mol, ignore_protonations=ignore_protonations) groupvec = decomposition.AsVector() dG0 = G.groupvec2val(groupvec) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) pmap = diss_table.GetPseudoisomerMap() if options.biochemical: dG0_prime = pmap.Transform(pH, pMg, I, T) rowdicts.append({'ID':mol_id, 'pH':pH, 'I':I, 'pMg':pMg, 'dG0\'':"%.1f" % dG0_prime, 'groupvec':str(groupvec)}) else: for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): rowdicts.append({'ID':mol_id, 'nH':p_nH, 'charge':p_z, 'nMg':p_nMg, 'dG0':"%.1f" % p_dG0, 'groupvec':str(groupvec)}) except GroupDecompositionError: rowdicts.append({'ID':mol_id, 'error':"cannot decompose"}) except GroupMissingTrainDataError: rowdicts.append({'ID':mol_id, 'groupvec':str(groupvec), 'error':"missing training data"}) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout if options.biochemical: titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec'] else: titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] csv_writer = csv.DictWriter(out_fp, titles) csv_writer.writeheader() csv_writer.writerows(rowdicts)
def CalculateThermo(): parser = MakeOpts() options, _ = parser.parse_args(sys.argv) pH, I, pMg, T = options.pH, options.I, options.pMg, options.T db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db) G.init() ignore_protonations = False list_of_mols = [] if options.smiles: list_of_mols.append({ 'id': options.smiles, 'mol': options.smiles, 'format': 'smiles' }) elif options.inchi: list_of_mols.append({ 'id': options.inchi, 'mol': options.inchi, 'format': 'inchi' }) elif options.csv_input_filename: for row in csv.DictReader(open(options.csv_input_filename, 'r')): if "InChI" in row: list_of_mols.append({ 'id': row["ID"], 'mol': row["InChI"], 'format': 'inchi' }) elif "smiles" in row: list_of_mols.append({ 'id': row["ID"], 'mol': row["smiles"], 'format': 'smiles' }) else: raise Exception( "There must be one molecular ID column: InChI or smiles") else: parser.error("must use either -s or -c option") if options.biochemical: print( "Calculating biochemical formation energies for %s compounds" " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" % (len(list_of_mols), pH, I, pMg, T)) else: print("Calculating chemical formation energies for %s compounds" % len(list_of_mols)) rowdicts = [] for mol_dict in list_of_mols: mol_id = mol_dict['id'] diss_table = Molecule._GetDissociationTable(mol_dict['mol'], fmt=mol_dict['format']) try: mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \ diss_table.GetAnyMol() if mol is None: raise Exception("Cannot convert input string to Molecule: " + mol_dict['mol']) decomposition = G.Mol2Decomposition( mol, ignore_protonations=ignore_protonations) groupvec = decomposition.AsVector() dG0 = G.groupvec2val(groupvec) nH = decomposition.Hydrogens() nMg = decomposition.Magnesiums() diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) pmap = diss_table.GetPseudoisomerMap() if options.biochemical: dG0_prime = pmap.Transform(pH, pMg, I, T) rowdicts.append({ 'ID': mol_id, 'pH': pH, 'I': I, 'pMg': pMg, 'dG0\'': "%.1f" % dG0_prime, 'groupvec': str(groupvec) }) else: for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix(): rowdicts.append({ 'ID': mol_id, 'nH': p_nH, 'charge': p_z, 'nMg': p_nMg, 'dG0': "%.1f" % p_dG0, 'groupvec': str(groupvec) }) except GroupDecompositionError: rowdicts.append({'ID': mol_id, 'error': "cannot decompose"}) except GroupMissingTrainDataError: rowdicts.append({ 'ID': mol_id, 'groupvec': str(groupvec), 'error': "missing training data" }) if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout if options.biochemical: titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec'] else: titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] csv_writer = csv.DictWriter(out_fp, titles) csv_writer.writeheader() csv_writer.writerows(rowdicts)