def test(self, species: List[ReferenceSpecies] = None, dataset: BACDataset = None, db_names: Union[str, List[str]] = None) -> BACDataset: """ Test on data. Note: Only one of `species`, `dataset`, or `db_names` can be specified. Args: species: Species to test on. dataset: BACDataset to test on. db_names: Database names to test on.. Returns: BACDataset containing the calculated BAC enthalpies in `bac_data`. """ if sum(1 for arg in (species, dataset, db_names) if arg is not None) > 1: raise BondAdditivityCorrectionError('Cannot specify several data sources') if species is not None: dataset = BACDataset([BACDatapoint(spc, level_of_theory=self.level_of_theory) for spc in species]) elif db_names is not None: database_key = self.load_database(names=db_names) dataset = extract_dataset(self.ref_databases[database_key], self.level_of_theory) if dataset is None or len(dataset) == 0: raise BondAdditivityCorrectionError('No data available for evaluation') corr = np.array([self.get_correction(datapoint=d).value_si / 4184 for d in dataset]) dataset.bac_data = dataset.calc_data + corr return dataset
def get_correction(self, bonds: Dict[str, int] = None, coords: np.ndarray = None, nums: Iterable[int] = None, datapoint: BACDatapoint = None, spc: ReferenceSpecies = None, multiplicity: int = None) -> ScalarQuantity: """ Returns the bond additivity correction. There are two bond additivity corrections currently supported. Peterson-type corrections can be specified by setting `self.bac_type` to 'p'. This will use the `bonds` variable, which is a dictionary associating bond types with the number of that bond in the molecule. The Melius-type BAC is specified with 'm' and utilizes the atom coordinates in `coords` and the structure's multiplicity. Args: bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts. coords: A Numpy array of Cartesian molecular coordinates. nums: A sequence of atomic numbers. datapoint: If not using bonds, coords, nums, use BACDatapoint. spc: Alternatively, use ReferenceSpecies. multiplicity: The spin multiplicity of the molecule. Returns: The bond correction to the electronic energy. """ if self.bacs is None: bac_type_str = 'Melius' if self.bac_type == 'm' else 'Petersson' raise BondAdditivityCorrectionError( f'Missing {bac_type_str}-type BAC parameters for model chemistry {self.model_chemistry}' ) if datapoint is None and spc is not None: datapoint = BACDatapoint(spc, model_chemistry=self.model_chemistry) if self.bac_type == 'm': return self._get_melius_correction(coords=coords, nums=nums, datapoint=datapoint, multiplicity=multiplicity) elif self.bac_type == 'p': return self._get_petersson_correction(bonds=bonds, datapoint=datapoint)
def _get_melius_correction(self, coords: np.ndarray = None, nums: Iterable[int] = None, datapoint: BACDatapoint = None, multiplicity: int = None, params: Dict[str, Union[float, Dict[str, float]]] = None) -> ScalarQuantity: """ Given the level of theory, molecular coordinates, atomic numbers, and dictionaries of BAC parameters, return the total BAC. Notes: A molecular correction term other than 0 destroys the size consistency of the quantum chemistry method. This correction also requires the multiplicity of the molecule. The negative of the total correction described in Anantharaman and Melius (JPCA 2005) is returned so that it can be added to the energy. Args: coords: Numpy array of Cartesian atomic coordinates. nums: Sequence of atomic numbers. datapoint: BACDatapoint instead of molecule. multiplicity: Multiplicity of the molecule (not necessary if using datapoint). params: Optionally provide parameters other than those stored in self. Returns: Melius-type bond additivity correction. """ if params is None: params = self.bacs atom_corr = params['atom_corr'] bond_corr_length = params['bond_corr_length'] bond_corr_neighbor = params['bond_corr_neighbor'] mol_corr = params.get('mol_corr', 0.0) # Get single-bonded RMG molecule mol = None if datapoint is not None: if nums is None or coords is None: mol = datapoint.to_mol(from_geo=True) multiplicity = datapoint.spc.multiplicity # Use species multiplicity instead else: logging.warning( f'Species {datapoint.spc.label} will not be used because `nums` and `coords` were specified' ) if mol is None: mol = geo_to_mol(coords, nums=nums) # Molecular correction if mol_corr != 0 and multiplicity is None: raise BondAdditivityCorrectionError(f'Missing multiplicity for {mol}') bac_mol = mol_corr * self._get_mol_coeff(mol, multiplicity=multiplicity) # Atomic correction bac_atom = sum(count * atom_corr[symbol] for symbol, count in self._get_atom_counts(mol).items()) # Bond correction bac_length = sum( coeff * (bond_corr_length[symbol[0]] * bond_corr_length[symbol[1]]) ** 0.5 if isinstance(symbol, tuple) else coeff * bond_corr_length[symbol] for symbol, coeff in self._get_length_coeffs(mol).items() ) bac_neighbor = sum(count * bond_corr_neighbor[symbol] for symbol, count in self._get_neighbor_coeffs(mol).items()) bac_bond = bac_length + bac_neighbor # Note the minus sign return ScalarQuantity(-(bac_mol + bac_atom + bac_bond), 'kcal/mol')
def test_append(self): """ Test that a datapoint can be appended. """ self.dataset.append(BACDatapoint(self.species[0])) self.assertEqual(len(self.dataset), len(self.species) + 1)
def setUp(self): self.dataset = BACDataset([ BACDatapoint(spc, level_of_theory=LEVEL_OF_THEORY) for spc in self.species ])
def setUp(self): self.datapoint = BACDatapoint(self.spc, level_of_theory=LEVEL_OF_THEORY)
class TestBACDatapoint(unittest.TestCase): """ A class for testing that the BACDatapoint class functions properly. """ @classmethod def setUpClass(cls): cls.spc = list(DATABASE.reference_sets.values())[0][0] def setUp(self): self.datapoint = BACDatapoint(self.spc, level_of_theory=LEVEL_OF_THEORY) def test_assert_level_of_theory(self): """ Test that decorator correctly determines when a level of theory is not defined. """ self.datapoint.level_of_theory = None with self.assertRaises(BondAdditivityCorrectionError): _ = self.datapoint.calc_data def test_weight(self): """ Test that weight is initialized to 1. """ self.assertEqual(self.datapoint.weight, 1) def test_mol(self): """ Test that BACDatapoint can be converted to a Molecule. """ with self.assertRaises(ValueError): _ = self.datapoint.mol # From adjacency list mol_adj = self.datapoint.to_mol(from_geo=False) self.assertIsInstance(mol_adj, Molecule) self.assertIs(mol_adj, self.datapoint.mol) mol_adj2 = self.datapoint.to_mol(from_geo=False) self.assertIs(mol_adj, mol_adj2) # Check that cached molecule is used # From geometry mol_geo = self.datapoint.to_mol(from_geo=True) self.assertIsNot(mol_geo, mol_adj) # Check that cached molecule is NOT used coords_spc = np.vstack(tuple(a.coords for a in mol_geo.atoms)) coords_dp = self.spc.calculated_data[LEVEL_OF_THEORY].xyz_dict[ 'coords'] self.assertIsNone(np.testing.assert_allclose(coords_dp, coords_spc)) self.assertIsInstance(mol_geo, Molecule) self.assertIs(mol_geo, self.datapoint.mol) mol_geo2 = self.datapoint.to_mol(from_geo=True) self.assertIs(mol_geo, mol_geo2) # Check that cached molecule is used def test_bonds(self): """ Test that bonds can be obtained. """ bonds = self.datapoint.bonds self.assertIsInstance(bonds, Counter) bonds2 = self.datapoint.bonds self.assertIs(bonds, bonds2) # Check that cached bonds are used def test_ref_data(self): """ Test that reference data can be obtained. """ ref_data = self.datapoint.ref_data self.assertIsInstance(ref_data, float) def test_calc_data(self): """ Test that calculated data can be obtained. """ calc_data = self.datapoint.calc_data self.assertIsInstance(calc_data, float) def test_bac_data(self): """ Test that `bac_data` can be used. """ with self.assertRaises(ValueError): _ = self.datapoint.bac_data self.datapoint.bac_data = 1.0 self.assertIsInstance(self.datapoint.bac_data, float) def test_substructs(self): """ Test that BACDatapoint can be decomposed into substructures. """ substructs = self.datapoint.substructs self.assertIsInstance(substructs, Counter) # Check that exactly one of 'neutral', 'cation', or 'anion' is set # and same for 'singlet', 'doublet', 'triplet+'. self.assertEqual( sum(substructs[k] for k in ('neutral', 'cation', 'anion')), 1) # Can only be one of these self.assertEqual( sum(substructs[k] for k in ('singlet', 'doublet', 'triplet+')), 1) substructs2 = self.datapoint.substructs self.assertIs(substructs, substructs2) # Check that cached substructures are used
def setUp(self): self.datapoint = BACDatapoint(self.spc, model_chemistry=MODEL_CHEMISTRY)
def setUp(self): self.dataset = BACDataset([BACDatapoint(spc, model_chemistry=MODEL_CHEMISTRY) for spc in self.species])