def loadThermoDatabase(path): """ Load the RMG thermodynamics database from `path`. """ global thermoDatabase print 'Loading thermodynamics database...' thermoDatabase = ThermoDatabase() thermoDatabase.load(path)
def load_thermo_database(libraries: Optional[list] = None): """ A helper function to load thermo database given libraries used Args: libraries (Optional[list]): A list of libraries to be imported. All libraies will be imported if not assigned. """ thermo_db_path = os.path.join(rmg_settings['database.directory'], 'thermo') thermo_db = ThermoDatabase() thermo_db.load(thermo_db_path, libraries=libraries) return thermo_db
def testRemoveGroup(self): """ Test that removing groups using nodes near the root of radical.py """ #load up test data designed for this test database2 = ThermoDatabase() path = os.path.join(os.path.dirname(rmgpy.__file__),'data/test_data/') database2.load(os.path.join(path, 'thermo'), depository = False) #load up the thermo radical database as a test radGroup = database2.groups['radical'] #use root as removed groups parent, which should be an LogicOr node root = radGroup.top[0] #use group to remove as groupToRemove = radGroup.entries['RJ'] children = groupToRemove.children #remove the group radGroup.removeGroup(groupToRemove) #afterwards groupToRemove should not be in the database or root's children self.assertFalse(groupToRemove in radGroup.entries.values()) self.assertFalse(groupToRemove in root.children) for child in children: #groupToRemove children should all be in roots item.component and children attribuetes self.assertTrue(child.label in root.item.components) self.assertTrue(child in root.children) #the children should all have root a their parent now self.assertTrue(child.parent is root) #Specific to ThermoDatabase, (above test apply to all base class Database) #we check that unicode entry.data pointers are correctly reassigned #if groupToRemove is a pointer and another node pointed to it, we copy #groupToRemove pointer self.assertTrue(radGroup.entries['OJ'].data is groupToRemove.data) #Remove an entry with a ThermoData object groupToRemove2 = radGroup.entries['CsJ'] radGroup.removeGroup(groupToRemove2) #If groupToRemove was a data object, we point toward parent instead self.assertTrue(radGroup.entries['RJ2_triplet'].data == groupToRemove2.parent.label) #If the parent pointed toward groupToRemove, we need should have copied data object Tlist=[300, 400, 500, 600, 800, 1000, 1500] self.assertFalse(isinstance(groupToRemove2.parent.data, basestring)) self.assertTrue(groupToRemove2.parent.data.getEnthalpy(298) == groupToRemove2.data.getEnthalpy(298)) self.assertTrue(groupToRemove2.parent.data.getEntropy(298) == groupToRemove2.data.getEntropy(298)) self.assertFalse(False in [groupToRemove2.parent.data.getHeatCapacity(x) == groupToRemove2.data.getHeatCapacity(x) for x in Tlist])
def __init__(self, kernel_type): self.kernel_type = kernel_type if kernel_type == 'GA': from rmgpy.data.thermo import ThermoDatabase from rmgpy import settings database = ThermoDatabase() print('GA Database directory:\n{0}'.format( settings['database.directory'])) database.load(os.path.join(settings['database.directory'], 'thermo'), libraries=[]) self.kernel = database else: raise Exception( 'Kernel type {0} not supported yet.'.format(kernel_type))
def getH298(self, thermo_db=None): """ Compute and return the standard enthalpy of formation of the structure in kcal/mol. A :class:`rmgpy.data.thermo.ThermoDatabase` instance can be supplied, which is used to search databases and use group additivity values. """ # Load thermo database if thermo_db is None: thermo_db = ThermoDatabase() thermo_db.load(os.path.join(settings['database.directory'], 'thermo')) # Compute enthalpy for each molecule and add together H298 = 0.0 self.separateMol() for mol in self.mols: spc = mol.toRMGSpecies() spc.thermo = thermo_db.getThermoData(spc) H298 += spc.getEnthalpy(298.0) / constants.kcal_to_J # Return combined enthalpy of all molecules return H298
def getH298(self, thermo_db=None): """ Compute and return the standard enthalpy of formation of the structure in kcal/mol. A :class:`rmgpy.data.thermo.ThermoDatabase` instance can be supplied, which is used to search databases and use group additivity values. """ # Load thermo database if thermo_db is None: thermo_db = ThermoDatabase() thermo_db.load( os.path.join(settings['database.directory'], 'thermo')) # Compute enthalpy for each molecule and add together H298 = 0.0 self.separateMol() for mol in self.mols: spc = mol.toRMGSpecies() spc.thermo = thermo_db.getThermoData(spc) H298 += spc.getEnthalpy(298.0) / constants.kcal_to_J # Return combined enthalpy of all molecules return H298
class TestThermoDatabase(unittest.TestCase): """ Contains unit tests of the ThermoDatabase class. """ def setUp(self): """ A function run before each unit test in this class. """ self.database = ThermoDatabase() self.database.load(os.path.join(settings['database.directory'], 'thermo')) self.oldDatabase = ThermoDatabase() self.oldDatabase.loadOld(os.path.join(settings['database.directory'], '../output/RMG_database')) self.Tlist = [300, 400, 500, 600, 800, 1000, 1500] self.testCases = [ # SMILES symm H298 S298 Cp300 Cp400 Cp500 Cp600 Cp800 Cp1000 Cp1500 # 1,3-hexadiene decomposition products ['C=CC=CCC', 3, 13.5090, 86.5641, 29.49, 37.67, 44.54, 50.12, 58.66, 64.95, 74.71], ['[CH]=CC=CCC', 3, 72.6056, 87.9528, 29.30, 36.92, 43.18, 48.20, 55.84, 61.46, 70.18], ['C=[C]C=CCC', 3, 61.2064, 87.2754, 29.68, 36.91, 43.03, 48.11, 55.96, 61.78, 71.54], ['C=C[C]=CCC', 3, 61.2064, 87.2754, 29.68, 36.91, 43.03, 48.11, 55.96, 61.78, 71.54], ['C=CC=[C]CC', 3, 70.4053, 88.3718, 29.15, 36.46, 42.60, 47.60, 55.32, 61.04, 69.95], ['C=CC=C[CH]C', 6, 38.2926, 84.5953, 27.79, 35.46, 41.94, 47.43, 55.74, 61.92, 71.86], ['C=CC=CC[CH2]', 2, 62.5044, 89.9747, 28.72, 36.31, 42.63, 47.72, 55.50, 61.21, 70.05], ['[CH3]', 6, 35.1084, 46.3644, 9.20, 9.98, 10.75, 11.50, 12.86, 14.08, 16.29], ['C=CC=C[CH2]', 2, 46.1521, 75.9733, 22.54, 28.95, 34.24, 38.64, 45.14, 49.97, 57.85], ['[CH2]C', 6, 28.3580, 59.0565, 12.11, 14.59, 17.08, 19.35, 22.93, 25.78, 30.30], ['C=CC=[CH]', 1, 85.2149, 69.4966, 18.93, 23.55, 27.16, 29.92, 34.02, 37.03, 41.81], ['C=[CH]', 1, 71.6377, 55.8964, 10.24, 12.03, 13.71, 15.17, 17.35, 19.07, 21.82], ['[CH]=CCC', 3, 59.0278, 75.1332, 20.38, 25.34, 29.68, 33.36, 39.14, 43.48, 50.22], # Cyclic structures ['c1ccccc1', 1, 19.8389, 69.3100, 19.44, 26.64, 32.76, 37.80, 45.24, 50.46, 58.38], ['C1CCCCC1', 1, -29.4456, 74.8296, 27.20, 37.60, 46.60, 54.80, 67.50, 76.20, 88.50], ['c1ccc2ccccc2c1', 1, 36.0639, 82.4536, 31.94, 42.88, 52.08, 59.62, 70.72, 78.68, 90.24], ['C1CCC1', 1, 6.5148, 67.5963, 17.39, 23.91, 29.86, 34.76, 42.40, 47.98, 56.33], ['C1C=CC=C1', 1, 32.5363, 67.0035, 18.16, 24.71, 30.25, 34.70, 41.25, 45.83, 52.61], ] def testNewThermoGeneration(self): """ Test that the new ThermoDatabase generates appropriate thermo data. """ for smiles, symm, H298, S298, Cp300, Cp400, Cp500, Cp600, Cp800, Cp1000, Cp1500 in self.testCases: Cplist = [Cp300, Cp400, Cp500, Cp600, Cp800, Cp1000, Cp1500] species = Species(molecule=[Molecule(SMILES=smiles)]) species.generateResonanceIsomers() thermoData = self.database.getThermoData(Species(molecule=[species.molecule[0]])) molecule = species.molecule[0] for mol in species.molecule[1:]: thermoData0 = self.database.getAllThermoData(Species(molecule=[mol]))[0][0] for data in self.database.getAllThermoData(Species(molecule=[mol]))[1:]: if data.getEnthalpy(298) < thermoData0.getEnthalpy(298): thermoData0 = data if thermoData0.getEnthalpy(298) < thermoData.getEnthalpy(298): thermoData = thermoData0 molecule = mol self.assertEqual(molecule.calculateSymmetryNumber(), symm) self.assertTrue(1 - thermoData.getEnthalpy(298) / 4184 / H298 < 0.001) self.assertTrue(1 - thermoData.getEntropy(298) / 4.184 / S298 < 0.001) for T, Cp in zip(self.Tlist, Cplist): self.assertTrue(1 - thermoData.getHeatCapacity(T) / 4.184 / Cp < 0.001) def testOldThermoGeneration(self): """ Test that the old ThermoDatabase generates relatively accurate thermo data. """ for smiles, symm, H298, S298, Cp300, Cp400, Cp500, Cp600, Cp800, Cp1000, Cp1500 in self.testCases: Cplist = [Cp300, Cp400, Cp500, Cp600, Cp800, Cp1000, Cp1500] species = Species(molecule=[Molecule(SMILES=smiles)]) species.generateResonanceIsomers() thermoData = self.oldDatabase.getThermoData(Species(molecule=[species.molecule[0]])) molecule = species.molecule[0] for mol in species.molecule[1:]: thermoData0 = self.oldDatabase.getAllThermoData(Species(molecule=[mol]))[0][0] for data in self.oldDatabase.getAllThermoData(Species(molecule=[mol]))[1:]: if data.getEnthalpy(298) < thermoData0.getEnthalpy(298): thermoData0 = data if thermoData0.getEnthalpy(298) < thermoData.getEnthalpy(298): thermoData = thermoData0 molecule = mol self.assertEqual(molecule.calculateSymmetryNumber(), symm) self.assertTrue(1 - thermoData.getEnthalpy(298) / 4184 / H298 < 0.01) self.assertTrue(1 - thermoData.getEntropy(298) / 4.184 / S298 < 0.01) for T, Cp in zip(self.Tlist, Cplist): self.assertTrue(1 - thermoData.getHeatCapacity(T) / 4.184 / Cp < 0.1)
class RMGDatabase(object): """ The primary class for working with the RMG database. """ def __init__(self): self.thermo = None self.transport = None self.forbidden_structures = None self.kinetics = None self.statmech = None self.solvation = None self.surface = None # Store the newly created database in the module. global database if database is not None: logging.warning( 'An instance of RMGDatabase already exists. Re-initializing it.' ) database = self def load( self, path, thermo_libraries=None, transport_libraries=None, reaction_libraries=None, seed_mechanisms=None, kinetics_families=None, kinetics_depositories=None, statmech_libraries=None, depository=True, solvation=True, surface=True, # on by default, because solvation is also on by default testing=False): """ Load the RMG database from the given `path` on disk, where `path` points to the top-level folder of the RMG database. If none of the optional arguments are provided, then the entire database will be loaded. You can use the optional arguments to specify that only certain components of the database be loaded. Argument testing will load a lighter version of the database used for unit-tests """ if not testing: self.load_transport(os.path.join(path, 'transport'), transport_libraries) self.load_forbidden_structures( os.path.join(path, 'forbiddenStructures.py')) self.load_kinetics(os.path.join(path, 'kinetics'), reaction_libraries, seed_mechanisms, kinetics_families, kinetics_depositories) if not testing: self.load_statmech(os.path.join(path, 'statmech'), statmech_libraries, depository) if solvation: self.load_solvation(os.path.join(path, 'solvation')) if surface: self.load_thermo(os.path.join(path, 'thermo'), thermo_libraries, depository, surface) def load_thermo(self, path, thermo_libraries=None, depository=True, surface=False): """ Load the RMG thermo database from the given `path` on disk, where `path` points to the top-level folder of the RMG thermo database. """ self.thermo = ThermoDatabase() self.thermo.load(path, thermo_libraries, depository, surface) def load_transport(self, path, transport_libraries=None): """ Load the RMG transport database from the given 'path' on disk, where 'path' points to the top-level folder of the RMG transport database. """ self.transport = TransportDatabase() self.transport.load(path, transport_libraries) def load_forbidden_structures(self, path=None): """ Load the RMG forbidden structures from the given `path` on disk, where `path` points to the forbidden structures file. If no path is given, a blank forbidden structures object is created. """ self.forbidden_structures = ForbiddenStructures() if path is not None: self.forbidden_structures.load(path) def load_kinetics(self, path, reaction_libraries=None, seed_mechanisms=None, kinetics_families=None, kinetics_depositories=None): """ Load the RMG kinetics database from the given `path` on disk, where `path` points to the top-level folder of the RMG kinetics database. """ kinetics_libraries = [] library_order = [] if seed_mechanisms is None and reaction_libraries is None: kinetics_libraries = None if seed_mechanisms is not None: for library in seed_mechanisms: kinetics_libraries.append(library) library_order.append((library, 'Seed')) if reaction_libraries is not None: for library in reaction_libraries: kinetics_libraries.append(library) library_order.append((library, 'Reaction Library')) self.kinetics = KineticsDatabase() self.kinetics.library_order = library_order self.kinetics.load(path, families=kinetics_families, libraries=kinetics_libraries, depositories=kinetics_depositories) def load_solvation(self, path): """ Load the RMG solvation database from the given `path` on disk, where `path` points to the top-level folder of the RMG solvation database. """ self.solvation = SolvationDatabase() self.solvation.load(path) def load_surface(self, path): """ Load the RMG metal database from the given `path` on disk, where `path` points to the top-level folder of the RMG surface database. """ self.surface = MetalDatabase() self.surface.load(path) def load_statmech(self, path, statmech_libraries=None, depository=True): """ Load the RMG statmech database from the given `path` on disk, where `path` points to the top-level folder of the RMG statmech database. """ self.statmech = StatmechDatabase() self.statmech.load(path, statmech_libraries, depository) def load_old(self, path): """ Load the old RMG database from the given `path` on disk, where `path` points to the top-level folder of the old RMG database. """ self.thermo = ThermoDatabase() self.thermo.load_old(path) self.transport = TransportDatabase() # self.transport.load_old(path) # Currently no load_old import function available for transport groups self.forbidden_structures = ForbiddenStructures() self.forbidden_structures.load_old( os.path.join(path, 'ForbiddenStructures.txt')) self.kinetics = KineticsDatabase() self.kinetics.load_old(path) self.statmech = StatmechDatabase() self.statmech.load_old(path) self.solvation = SolvationDatabase() # Not completely implemented # self.solvation.load_old(path) def save(self, path): """ Save the RMG database to the given `path` on disk. """ if not os.path.exists(path): os.makedirs(path) self.forbidden_structures.save( os.path.join(path, 'forbiddenStructures.py')) self.thermo.save(os.path.join(path, 'thermo')) # self.transport.save(os.path.join(path, 'transport')) #Currently no function for saving transport groups self.kinetics.save(os.path.join(path, 'kinetics')) self.statmech.save(os.path.join(path, 'statmech')) self.solvation.save(os.path.join(path, 'solvation')) self.transport.save(os.path.join(path, 'transport')) def save_old(self, path): """ Save the old RMG database to the given `path` on disk. """ if not os.path.exists(path): os.makedirs(path) self.thermo.save_old(path) self.transport.save_old(path) self.forbidden_structures.save_old( os.path.join(path, 'ForbiddenStructures.txt')) self.kinetics.save_old(path) self.statmech.save_old(path)
def genNetwork(self, mol_object, **kwargs): """ Execute the automatic reaction discovery procedure. """ # Database qm_collection = db['qm_calculate_center'] config_collection = db['config'] statistics_collection = db['statistics'] targets = list(config_collection.find({'generations': 1})) config_collection.update_one( targets[0], {"$set": { 'config_path': kwargs['config_path'] }}, True) # Reactant information reactant_inchi_key = mol_object.write('inchiKey').strip() # inchikey # Generate all possible products gen = Generate(mol_object, **kwargs) self.logger.info('Generating all possible products...') gen.generateProducts() prod_mols = gen.get_prods() add_bonds = gen.get_add_bonds() break_bonds = gen.get_break_bonds() prod_mols_filtered = [] self.logger.info(f'{len(prod_mols)} possible products are generated\n') # Filter reactions based on standard heat of reaction delta H if self.method.lower() == 'mopac': self.logger.info( f'Now use {self.method} to filter the delta H of reactions....\n' ) if self.generations == 1: os.mkdir(path.join(path.dirname(self.ard_path), 'reactions')) H298_reac = self.get_mopac_H298(mol_object) config_collection.update_one( targets[0], { "$set": { 'reactant_energy': H298_reac, 'use_irc': self.use_irc, 'use_qmmm': self.use_qmmm } }, True) mol_object_copy = mol_object.copy() for prod_mol in prod_mols: if self.filter_dh_mopac( mol_object, self.cluster_bond, prod_mol, add_bonds[prod_mols.index(prod_mol)], break_bonds[prod_mols.index(prod_mol)], len(prod_mols), qm_collection, refH=None): prod_mols_filtered.append(prod_mol) # Recovery mol_object_copy = mol_object.copy() else: H298_reac = targets[0]['reactant_energy'] mol_object_copy = mol_object.copy() for prod_mol in prod_mols: if self.filter_dh_mopac( mol_object, self.cluster_bond, prod_mol, add_bonds[prod_mols.index(prod_mol)], break_bonds[prod_mols.index(prod_mol)], len(prod_mols), qm_collection, refH=H298_reac): prod_mols_filtered.append(prod_mol) # Recovery mol_object_copy = mol_object.copy() elif self.method.lower() == 'xtb': self.logger.info( 'Now use {} to filter the delta H of reactions....\n'.format( self.method)) if self.generations == 1: os.mkdir(path.join(path.dirname(self.ard_path), 'reactions')) H298_reac = self.get_xtb_H298( config_path=kwargs['config_path']) config_collection.update_one( targets[0], { "$set": { 'reactant_energy': H298_reac, 'use_irc': self.use_irc, 'use_qmmm': self.use_qmmm } }, True) mol_object_copy = mol_object.copy() for prod_mol in prod_mols: if self.filter_dh_xtb( mol_object, prod_mol, self.cluster_bond, add_bonds[prod_mols.index(prod_mol)], break_bonds[prod_mols.index(prod_mol)], len(prod_mols), qm_collection, config_path=kwargs['config_path'], refH=None): prod_mols_filtered.append(prod_mol) mol_object.setCoordsFromMol(mol_object_copy) else: H298_reac = targets[0]['reactant_energy'] mol_object_copy = mol_object.copy() for prod_mol in prod_mols: if self.filter_dh_xtb( mol_object, prod_mol, self.cluster_bond, add_bonds[prod_mols.index(prod_mol)], break_bonds[prod_mols.index(prod_mol)], len(prod_mols), qm_collection, config_path=kwargs['config_path'], refH=H298_reac): prod_mols_filtered.append(prod_mol) mol_object.setCoordsFromMol(mol_object_copy) else: self.logger.info( 'Now use {} to filter the delta H of reactions....\n'.format( self.method)) # Load thermo database and choose which libraries to search thermo_db = ThermoDatabase() thermo_db.load(path.join(settings['database.directory'], 'thermo')) thermo_db.libraryOrder = [ 'primaryThermoLibrary', 'NISTThermoLibrary', 'thermo_DFT_CCSDTF12_BAC', 'CBS_QB3_1dHR', 'DFT_QCI_thermo', 'BurkeH2O2', 'GRI-Mech3.0-N', ] if self.generations == 1: H298_reac = mol_object.getH298(thermo_db) update_field = {'reactant_energy': H298_reac} config_collection.update_one(targets[0], {"$set": update_field}, True) else: H298_reac = targets[0]['reactant_energy'] prod_mols_filtered = [ mol for mol in prod_mols if self.filter_dh_rmg(H298_reac, mol, thermo_db) ] self.logger.info('Generate geometry........\n') for mol in prod_mols_filtered: index = prod_mols.index(mol) # Generate geometry and return path mol_object.gen3D(self.fixed_atoms, forcefield=self.forcefield, method=self.constraintff_alg, make3D=False) reac_mol_copy = mol_object.copy() dir_path = self.gen_geometry(mol_object, mol, reac_mol_copy, add_bonds[index], break_bonds[index]) product_inchi_key = mol.write('inchiKey').strip() self.logger.info( f"\nReactant inchi key: {reactant_inchi_key}\nProduct inchi key: {product_inchi_key}\nReactant smiles: {mol_object.write('can').split()}\nProduct smiles: {mol.write('can').split()}\nDirectory path: {dir_path}\n" ) qm_collection.insert_one({ 'reaction': [reactant_inchi_key, product_inchi_key], 'reactant_smiles': mol_object.write('can').split()[0], 'reactant_inchi_key': reactant_inchi_key, 'product_inchi_key': product_inchi_key, 'product_smiles': mol.write('can').split()[0], 'path': dir_path, 'ssm_status': 'job_unrun', 'generations': self.generations, }) self.logger.info('After delta H filter {} product remain.\n'.format( len(prod_mols_filtered))) # Generate geometry and insert to database statistics_collection.insert_one({ 'reactant_smiles': mol_object.write('can').split()[0], 'reactant_inchi_key': reactant_inchi_key, 'add how many products': len(prod_mols_filtered), 'generations': self.generations })
def execute(self, **kwargs): """ Execute the automatic reaction discovery procedure. """ start_time = time.time() reac_mol = self.initialize() # self.optimizeReactant(reac_mol, **kwargs) gen = Generate(reac_mol) self.logger.info('Generating all possible products...') gen.generateProducts(nbreak=self.nbreak, nform=self.nform) prod_mols = gen.prod_mols self.logger.info('{} possible products generated\n'.format( len(prod_mols))) # Load thermo database and choose which libraries to search thermo_db = ThermoDatabase() thermo_db.load(os.path.join(settings['database.directory'], 'thermo')) thermo_db.libraryOrder = [ 'primaryThermoLibrary', 'NISTThermoLibrary', 'thermo_DFT_CCSDTF12_BAC', 'CBS_QB3_1dHR', 'DFT_QCI_thermo', 'KlippensteinH2O2', 'GRI-Mech3.0-N', ] # Filter reactions based on standard heat of reaction H298_reac = reac_mol.getH298(thermo_db) self.logger.info('Filtering reactions...') prod_mols_filtered = [ mol for mol in prod_mols if self.filterThreshold(H298_reac, mol, thermo_db, **kwargs) ] self.logger.info('{} products remaining\n'.format( len(prod_mols_filtered))) # Generate 3D geometries if prod_mols_filtered: self.logger.info('Feasible products:\n') rxn_dir = util.makeOutputSubdirectory(self.output_dir, 'reactions') # These two lines are required so that new coordinates are # generated for each new product. Otherwise, Open Babel tries to # use the coordinates of the previous molecule if it is isomorphic # to the current one, even if it has different atom indices # participating in the bonds. a hydrogen atom is chosen # arbitrarily, since it will never be the same as any of the # product structures. Hatom = gen3D.readstring('smi', '[H]') ff = pybel.ob.OBForceField.FindForceField(self.forcefield) reac_mol_copy = reac_mol.copy() for rxn, mol in enumerate(prod_mols_filtered): mol.gen3D(forcefield=self.forcefield, make3D=False) arrange3D = gen3D.Arrange3D(reac_mol, mol) msg = arrange3D.arrangeIn3D() if msg != '': self.logger.info(msg) ff.Setup( Hatom.OBMol ) # Ensures that new coordinates are generated for next molecule (see above) reac_mol.gen3D(make3D=False) ff.Setup(Hatom.OBMol) mol.gen3D(make3D=False) ff.Setup(Hatom.OBMol) reactant = reac_mol.toNode() product = mol.toNode() rxn_num = '{:04d}'.format(rxn) output_dir = util.makeOutputSubdirectory(rxn_dir, rxn_num) kwargs['output_dir'] = output_dir kwargs['name'] = rxn_num self.logger.info('Product {}: {}\n{}\n****\n{}\n'.format( rxn, product.toSMILES(), reactant, product)) self.makeInputFile(reactant, product, **kwargs) reac_mol.setCoordsFromMol(reac_mol_copy) else: self.logger.info('No feasible products found') # Finalize self.finalize(start_time)