def _load_refdata(cls): if cls.ref_data is None: logging.info('Loading reference database') db = ReferenceDatabase() db.load() cls.ref_data = { lbl: spc for lbl, spc in zip(SPECIES_LABELS, db.get_species_from_label(SPECIES_LABELS)) }
def test_extract_level_of_theory(self): """ Test that a given level of theory can be extracted from the reference set database """ # Create a quick example database ref_data_1 = ReferenceDataEntry(ThermoData(H298=(100, 'kJ/mol', '+|-', 2))) ref_data_2 = ReferenceDataEntry(ThermoData(H298=(25, 'kcal/mol', '+|-', 1))) calc_data_1 = CalculatedDataEntry(ThermoData(H298=(110, 'kJ/mol'))) calc_data_2 = CalculatedDataEntry(ThermoData(H298=(120, 'kJ/mol'))) ethane = ReferenceSpecies(smiles='CC', reference_data={'precise': ref_data_1, 'less_precise': ref_data_2}, calculated_data={LevelOfTheory('good_chem'): calc_data_1, LevelOfTheory('bad_chem'): calc_data_2}, preferred_reference='less_precise') propane = ReferenceSpecies(smiles='CCC', reference_data={'precise': ref_data_1, 'less_precise': ref_data_2}, calculated_data={LevelOfTheory('good_chem'): calc_data_1, LevelOfTheory('bad_chem'): calc_data_2}) butane = ReferenceSpecies(smiles='CCCC', reference_data={'precise': ref_data_1, 'less_precise': ref_data_2}, calculated_data={LevelOfTheory('bad_chem'): calc_data_2}) database = ReferenceDatabase() database.reference_sets = {'testing_1': [ethane, butane], 'testing_2': [propane]} model_chem_list = database.extract_level_of_theory(LevelOfTheory('good_chem')) self.assertEqual(len(model_chem_list), 2) self.assertIsInstance(model_chem_list[0], ErrorCancelingSpecies) for spcs in model_chem_list: smiles = spcs.molecule.to_smiles() self.assertNotIn(smiles, ['CCCC']) self.assertIn(smiles, ['CC', 'CCC']) if smiles == 'CC': # Test that `less_precise` is the source since it was set manually as preferred self.assertAlmostEqual(spcs.high_level_hf298.value_si, 25.0*4184.0) if smiles == 'CCC': # Test that `precise` is the source since it has the lowest uncertainty self.assertAlmostEqual(spcs.high_level_hf298.value_si, 100.0*1000.0)
def load_database(cls, paths: Union[str, List[str]] = None, names: Union[str, List[str]] = None, reload: bool = False) -> str: """ Load a reference database. Args: paths: Paths to database folders. names: Names of database folders in RMG database. reload: Force reload of database. Returns: Key to access just loaded database. """ paths = ReferenceDatabase.get_database_paths(paths=paths, names=names) key = cls.get_database_key(paths) if key not in cls.ref_databases or reload: logging.info(f'Loading reference database from {paths}') cls.ref_databases[key] = ReferenceDatabase() cls.ref_databases[key].load(paths=paths) return key
def extract_dataset( ref_database: ReferenceDatabase, level_of_theory: Union[LevelOfTheory, CompositeLevelOfTheory], exclude_elements: Union[Sequence[str], Set[str], str] = None, charge: Union[Sequence[Union[str, int]], Set[Union[str, int]], str, int] = 'all', multiplicity: Union[Sequence[int], Set[int], int, str] = 'all') -> BACDataset: """ Extract species for a given model chemistry from a reference database and convert to a BACDataset. Args: ref_database: Reference database. level_of_theory: Level of theory. exclude_elements: Sequence of element symbols to exclude. charge: Allowable charges. Possible values are 'all'; a combination of 'neutral, 'positive', and 'negative'; or a sequence of integers. multiplicity: Allowable multiplicites. Possible values are 'all' or positive integers. Returns: BACDataset containing species with data available at given level of theory. """ species = ref_database.extract_level_of_theory( level_of_theory, as_error_canceling_species=False) if exclude_elements is not None: elements = {exclude_elements} if isinstance( exclude_elements, str) else set(exclude_elements) species = [ spc for spc in species if not any(e in spc.formula for e in elements) ] if charge != 'all': charges = {charge} if isinstance(charge, (str, int)) else set(charge) species = [ spc for spc in species if spc.charge == 0 and 'neutral' in charges or spc.charge > 0 and 'positive' in charges or spc.charge < 0 and 'negative' in charges or spc.charge in charges ] if multiplicity != 'all': multiplicities = {multiplicity} if isinstance( multiplicity, int) else set(multiplicity) species = [ spc for spc in species if spc.multiplicity in multiplicities ] return BACDataset([ BACDatapoint(spc, level_of_theory=level_of_theory) for spc in species ])
def extract_dataset(ref_database: ReferenceDatabase, model_chemistry: str) -> BACDataset: """ Extract species for a given model chemistry from a reference database and convert to a BACDataset. Args: ref_database: Reference database model_chemistry: Model chemistry. Returns: BACDataset containing species with data available at given model chemistry. """ species = ref_database.extract_model_chemistry(model_chemistry, as_error_canceling_species=False) return BACDataset([BACDatapoint(spc, model_chemistry=model_chemistry) for spc in species])
def setUpClass(cls): cls.database = ReferenceDatabase() cls.database.load()
from collections import Counter import numpy as np import pybel from rmgpy.molecule import Molecule as RMGMolecule import arkane.encorr.data as data from arkane.encorr.data import (Molecule, Stats, BACDatapoint, DatasetProperty, BACDataset, extract_dataset, geo_to_mol, _pybel_to_rmg) from arkane.encorr.reference import ReferenceDatabase from arkane.exceptions import BondAdditivityCorrectionError from arkane.modelchem import LOT, LevelOfTheory DATABASE = ReferenceDatabase() DATABASE.load() LEVEL_OF_THEORY = LevelOfTheory(method='wb97m-v', basis='def2-tzvpd', software='qchem') class TestDataLoading(unittest.TestCase): """ A class for testing that the quantum correction data is loaded correctly from the RMG database. """ def test_contains_data(self): """ Test that the necessary dictionaries are available. """