Ejemplo n.º 1
0
def extract_dataset(
    ref_database: ReferenceDatabase,
    level_of_theory: Union[LevelOfTheory, CompositeLevelOfTheory],
    exclude_elements: Union[Sequence[str], Set[str], str] = None,
    charge: Union[Sequence[Union[str, int]], Set[Union[str, int]], str,
                  int] = 'all',
    multiplicity: Union[Sequence[int], Set[int], int,
                        str] = 'all') -> BACDataset:
    """
    Extract species for a given model chemistry from a reference
    database and convert to a BACDataset.

    Args:
         ref_database: Reference database.
         level_of_theory: Level of theory.
         exclude_elements: Sequence of element symbols to exclude.
         charge: Allowable charges. Possible values are 'all'; a combination of 'neutral, 'positive', and 'negative';
                 or a sequence of integers.
         multiplicity: Allowable multiplicites. Possible values are 'all' or positive integers.

    Returns:
        BACDataset containing species with data available at given level of theory.
    """
    species = ref_database.extract_level_of_theory(
        level_of_theory, as_error_canceling_species=False)

    if exclude_elements is not None:
        elements = {exclude_elements} if isinstance(
            exclude_elements, str) else set(exclude_elements)
        species = [
            spc for spc in species
            if not any(e in spc.formula for e in elements)
        ]
    if charge != 'all':
        charges = {charge} if isinstance(charge, (str, int)) else set(charge)
        species = [
            spc for spc in species if spc.charge == 0 and 'neutral' in charges
            or spc.charge > 0 and 'positive' in charges or spc.charge < 0
            and 'negative' in charges or spc.charge in charges
        ]
    if multiplicity != 'all':
        multiplicities = {multiplicity} if isinstance(
            multiplicity, int) else set(multiplicity)
        species = [
            spc for spc in species if spc.multiplicity in multiplicities
        ]

    return BACDataset([
        BACDatapoint(spc, level_of_theory=level_of_theory) for spc in species
    ])
Ejemplo n.º 2
0
    def test_extract_level_of_theory(self):
        """
        Test that a given level of theory can be extracted from the reference set database
        """
        # Create a quick example database
        ref_data_1 = ReferenceDataEntry(ThermoData(H298=(100, 'kJ/mol', '+|-', 2)))
        ref_data_2 = ReferenceDataEntry(ThermoData(H298=(25, 'kcal/mol', '+|-', 1)))

        calc_data_1 = CalculatedDataEntry(ThermoData(H298=(110, 'kJ/mol')))
        calc_data_2 = CalculatedDataEntry(ThermoData(H298=(120, 'kJ/mol')))

        ethane = ReferenceSpecies(smiles='CC',
                                  reference_data={'precise': ref_data_1, 'less_precise': ref_data_2},
                                  calculated_data={LevelOfTheory('good_chem'): calc_data_1,
                                                   LevelOfTheory('bad_chem'): calc_data_2},
                                  preferred_reference='less_precise')

        propane = ReferenceSpecies(smiles='CCC',
                                   reference_data={'precise': ref_data_1, 'less_precise': ref_data_2},
                                   calculated_data={LevelOfTheory('good_chem'): calc_data_1,
                                                    LevelOfTheory('bad_chem'): calc_data_2})

        butane = ReferenceSpecies(smiles='CCCC',
                                  reference_data={'precise': ref_data_1, 'less_precise': ref_data_2},
                                  calculated_data={LevelOfTheory('bad_chem'): calc_data_2})

        database = ReferenceDatabase()
        database.reference_sets = {'testing_1': [ethane, butane], 'testing_2': [propane]}

        model_chem_list = database.extract_level_of_theory(LevelOfTheory('good_chem'))
        self.assertEqual(len(model_chem_list), 2)
        self.assertIsInstance(model_chem_list[0], ErrorCancelingSpecies)

        for spcs in model_chem_list:
            smiles = spcs.molecule.to_smiles()
            self.assertNotIn(smiles, ['CCCC'])
            self.assertIn(smiles, ['CC', 'CCC'])

            if smiles == 'CC':  # Test that `less_precise` is the source since it was set manually as preferred
                self.assertAlmostEqual(spcs.high_level_hf298.value_si, 25.0*4184.0)

            if smiles == 'CCC':  # Test that `precise` is the source since it has the lowest uncertainty
                self.assertAlmostEqual(spcs.high_level_hf298.value_si, 100.0*1000.0)