def run(self):
        from toolbox.molecule import Molecule

        self.semaphore.acquire()

        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles,
                                                    fmt='smiles',
                                                    mid_pH=default_pH,
                                                    min_pKa=0,
                                                    max_pKa=14,
                                                    T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)

        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)

        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
    def run(self):
        from toolbox.molecule import Molecule
        
        self.semaphore.acquire()
        
        start_time = time.time()

        logging.debug("SMILES: " + self.smiles)
        diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles',
            mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T)
        logging.debug("Min charge: %d" % diss_table.min_charge)
        logging.debug("Min nH: %d" % diss_table.min_nH)
        
        elapsed_time = time.time() - start_time
        self.db_lock.acquire()
        db = SqliteDatabase(self.options.db_file)
        kegg = Kegg.getInstance()
        name = kegg.cid2name(self.cid)
        
        if diss_table is not None:
            for row in diss_table.ToDatabaseRow():
                db.Insert(self.options.table_name, [self.cid, name] + row)
        else:
            db.Insert(self.options.table_name, [self.cid, name] + [None] * 10)
        del db
        self.db_lock.release()

        logging.info("Completed C%05d, elapsed time = %.1f sec" %
                     (self.cid, elapsed_time))

        self.semaphore.release()
def CalculateThermo():
    parser = MakeOpts()
    options, _ = parser.parse_args(sys.argv)
    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T

    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db)
    G.init()
    ignore_protonations = False

    list_of_mols = []
    if options.smiles:
        list_of_mols.append({'id':options.smiles, 'mol':options.smiles,
            'format':'smiles'})
    elif options.inchi:
        list_of_mols.append({'id':options.inchi, 'mol':options.inchi,
            'format':'inchi'})
    elif options.csv_input_filename:
        for row in csv.DictReader(open(options.csv_input_filename, 'r')):
            if "InChI" in row:
                list_of_mols.append({'id':row["ID"], 'mol':row["InChI"],
                                     'format':'inchi'})
            elif "smiles" in row:
                list_of_mols.append({'id':row["ID"], 'mol':row["smiles"],
                                     'format':'smiles'})
            else:
                raise Exception("There must be one molecular ID column: InChI or smiles")
    else:
        parser.error("must use either -s or -c option")
    
    if options.biochemical:
        print ("Calculating biochemical formation energies for %s compounds" 
               " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" %  
               (len(list_of_mols), pH, I, pMg, T))
    else:
        print ("Calculating chemical formation energies for %s compounds" % 
               len(list_of_mols))
    
    rowdicts = []
    for mol_dict in list_of_mols:
        mol_id = mol_dict['id']
        diss_table = Molecule._GetDissociationTable(mol_dict['mol'],
                                                    fmt=mol_dict['format'])
        try:
            mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \
                  diss_table.GetAnyMol()
            if mol is None:
                raise Exception("Cannot convert input string to Molecule: " + 
                                mol_dict['mol'])
            
            decomposition = G.Mol2Decomposition(mol, 
                ignore_protonations=ignore_protonations)
            groupvec = decomposition.AsVector()
            dG0 = G.groupvec2val(groupvec)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
            pmap = diss_table.GetPseudoisomerMap()
            
            if options.biochemical:
                dG0_prime = pmap.Transform(pH, pMg, I, T)
                rowdicts.append({'ID':mol_id, 'pH':pH, 'I':I, 'pMg':pMg,
                                 'dG0\'':"%.1f" % dG0_prime, 'groupvec':str(groupvec)})
            else:
                for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                    rowdicts.append({'ID':mol_id, 'nH':p_nH, 'charge':p_z, 'nMg':p_nMg,
                                     'dG0':"%.1f" % p_dG0, 'groupvec':str(groupvec)})
        except GroupDecompositionError:
            rowdicts.append({'ID':mol_id, 'error':"cannot decompose"})
        except GroupMissingTrainDataError:
            rowdicts.append({'ID':mol_id, 'groupvec':str(groupvec),
                             'error':"missing training data"})
        
    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout
    
    if options.biochemical:
        titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec']
    else:
        titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec'] 
    csv_writer = csv.DictWriter(out_fp, titles)
    csv_writer.writeheader()
    csv_writer.writerows(rowdicts)
Exemple #4
0
def CalculateThermo():
    parser = MakeOpts()
    options, _ = parser.parse_args(sys.argv)
    pH, I, pMg, T = options.pH, options.I, options.pMg, options.T

    db = SqliteDatabase('../res/gibbs.sqlite')
    G = GroupContribution(db=db)
    G.init()
    ignore_protonations = False

    list_of_mols = []
    if options.smiles:
        list_of_mols.append({
            'id': options.smiles,
            'mol': options.smiles,
            'format': 'smiles'
        })
    elif options.inchi:
        list_of_mols.append({
            'id': options.inchi,
            'mol': options.inchi,
            'format': 'inchi'
        })
    elif options.csv_input_filename:
        for row in csv.DictReader(open(options.csv_input_filename, 'r')):
            if "InChI" in row:
                list_of_mols.append({
                    'id': row["ID"],
                    'mol': row["InChI"],
                    'format': 'inchi'
                })
            elif "smiles" in row:
                list_of_mols.append({
                    'id': row["ID"],
                    'mol': row["smiles"],
                    'format': 'smiles'
                })
            else:
                raise Exception(
                    "There must be one molecular ID column: InChI or smiles")
    else:
        parser.error("must use either -s or -c option")

    if options.biochemical:
        print(
            "Calculating biochemical formation energies for %s compounds"
            " at pH = %.1f, I = %.2f, pMg = %.1f, T = %.2f" %
            (len(list_of_mols), pH, I, pMg, T))
    else:
        print("Calculating chemical formation energies for %s compounds" %
              len(list_of_mols))

    rowdicts = []
    for mol_dict in list_of_mols:
        mol_id = mol_dict['id']
        diss_table = Molecule._GetDissociationTable(mol_dict['mol'],
                                                    fmt=mol_dict['format'])
        try:
            mol = diss_table.GetMostAbundantMol(pH, I, pMg, T) or \
                  diss_table.GetAnyMol()
            if mol is None:
                raise Exception("Cannot convert input string to Molecule: " +
                                mol_dict['mol'])

            decomposition = G.Mol2Decomposition(
                mol, ignore_protonations=ignore_protonations)
            groupvec = decomposition.AsVector()
            dG0 = G.groupvec2val(groupvec)
            nH = decomposition.Hydrogens()
            nMg = decomposition.Magnesiums()
            diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg)
            pmap = diss_table.GetPseudoisomerMap()

            if options.biochemical:
                dG0_prime = pmap.Transform(pH, pMg, I, T)
                rowdicts.append({
                    'ID': mol_id,
                    'pH': pH,
                    'I': I,
                    'pMg': pMg,
                    'dG0\'': "%.1f" % dG0_prime,
                    'groupvec': str(groupvec)
                })
            else:
                for p_nH, p_z, p_nMg, p_dG0 in pmap.ToMatrix():
                    rowdicts.append({
                        'ID': mol_id,
                        'nH': p_nH,
                        'charge': p_z,
                        'nMg': p_nMg,
                        'dG0': "%.1f" % p_dG0,
                        'groupvec': str(groupvec)
                    })
        except GroupDecompositionError:
            rowdicts.append({'ID': mol_id, 'error': "cannot decompose"})
        except GroupMissingTrainDataError:
            rowdicts.append({
                'ID': mol_id,
                'groupvec': str(groupvec),
                'error': "missing training data"
            })

    if options.csv_output_filename is not None:
        out_fp = open(options.csv_output_filename, 'w')
        print "writing results to %s ... " % options.csv_output_filename
    else:
        out_fp = sys.stdout

    if options.biochemical:
        titles = ['ID', 'error', 'pH', 'I', 'pMg', 'dG0\'', 'groupvec']
    else:
        titles = ['ID', 'error', 'nH', 'nMg', 'charge', 'dG0', 'groupvec']
    csv_writer = csv.DictWriter(out_fp, titles)
    csv_writer.writeheader()
    csv_writer.writerows(rowdicts)