Beispiel #1
0
def estimated_reference_sets():
    estimated_density = Density(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilogram / unit.meter**3,
        uncertainty=0.1 * unit.kilogram / unit.meter**3,
    )
    estimated_density.id = "1"
    estimated_enthalpy = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilocalorie / unit.mole,
        uncertainty=0.1 * unit.kilojoule / unit.mole,
    )
    estimated_enthalpy.id = "2"

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(estimated_density, estimated_enthalpy)

    reference_density = DataSetEntry(
        id=1,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=0.001,
        std_error=0.0001,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )
    reference_enthalpy = DataSetEntry(
        id=2,
        property_type="EnthalpyOfMixing",
        temperature=298.15,
        pressure=101.325,
        value=4.184,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[reference_density, reference_enthalpy],
    )

    return estimated_data_set, reference_data_set
def test_reindex_data_set_no_mole_fraction():
    """Tests that the ``reindex_data_set`` function behaves as expected
    when exact amounts are present."""

    setup_timestamp_logging(logging.INFO)

    substance = substances.Substance()
    substance.add_component(substances.Component(smiles="O"),
                            amount=substances.MoleFraction(1.0))
    substance.add_component(
        substances.Component(smiles="CO",
                             role=substances.Component.Role.Solute),
        amount=substances.ExactAmount(1),
    )

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        SolvationFreeEnergy(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=1.0 * SolvationFreeEnergy.default_unit(),
        ), )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="SolvationFreeEnergy",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=1.0),
                    Component(smiles="CO",
                              mole_fraction=0.0,
                              exact_amount=1,
                              role="Solute"),
                ],
            )
        ],
    )

    reindex_data_set(evaluator_data_set, data_set)
    assert evaluator_data_set.properties[0].id == "1"
def test_three_components_to_categories():

    with pytest.raises(NotImplementedError):
        components_to_categories(
            [Component(smiles="C", mole_fraction=1.0)] * 3,
            [ChemicalEnvironment.Alkane],
        )
Beispiel #4
0
    def analyze(
        cls,
        optimization: Optimization,
        target: RechargeTarget,
        target_directory: str,
        result_directory: str,
    ) -> Optional[RechargeTargetResult]:

        residuals_path = os.path.join(result_directory, "residuals.json")

        if not os.path.isfile(residuals_path):
            return None

        # Load in the residuals
        with open(residuals_path) as file:
            squared_residuals = json.load(file)

        # Categorize the smiles
        smiles_per_category = defaultdict(list)

        smiles_per_category[None] = [*squared_residuals]

        for smiles in squared_residuals:

            categories = components_to_categories(
                [Component(smiles=smiles, mole_fraction=0.0, exact_amount=1)],
                optimization.analysis_environments,
            )

            for category in categories:
                smiles_per_category[category].append(smiles)

        # Compute RMSE statistics for this target.
        statistic_entries = []

        for category in smiles_per_category:

            category_residuals = [
                squared_residuals[smiles]
                for smiles in smiles_per_category[category]
            ]

            rmse, rmse_std, rmse_ci = bootstrap_residuals(category_residuals)

            statistic_entry = Statistic(
                statistic_type=StatisticType.RMSE,
                category=category,
                value=rmse,
                lower_95_ci=rmse_ci[0],
                upper_95_ci=rmse_ci[1],
            )
            statistic_entries.append(statistic_entry)

        objective_function = cls._read_objective_function(result_directory)

        return RechargeTargetResult(
            objective_function=target.weight * objective_function,
            statistic_entries=statistic_entries,
        )
Beispiel #5
0
def create_data_set(data_set_id: str, entry_id: Optional[int] = None):
    """Creates a single author data set which contains a single
    density data entry. The entry contains two components, an
    aqueous solvent (x=1) and a methanol solute (n=1).

    Parameters
    ----------
    data_set_id: str
        The id to assign to the data set.
    entry_id
        The id to assign to the one data entry.

    Returns
    -------
    DataSet
    """

    author = create_author()

    data_entry = DataSetEntry(
        id=entry_id,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=1.0,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O",
                      mole_fraction=1.0,
                      exact_amount=0,
                      role="Solvent"),
            Component(smiles="CO",
                      mole_fraction=0.0,
                      exact_amount=1,
                      role="Solute"),
        ],
    )

    data_set = DataSet(id=data_set_id,
                       description=" ",
                       authors=[author],
                       entries=[data_entry])

    return data_set
Beispiel #6
0
def test_analysed_result_from_evaluator():
    """Tests the `AnalysedResult.from_evaluator` function."""
    expected_mean = 0.0
    expected_std = numpy.random.rand() + 1.0

    values = numpy.random.normal(expected_mean, expected_std, 1000)

    estimated_properties = []
    reference_entries = []

    for index, value in enumerate(values):
        property_id = index + 1

        estimated_density = Density(
            thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                                   pressure=1.0 *
                                                   unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=value * Density.default_unit(),
            uncertainty=0.0 * Density.default_unit(),
        )
        estimated_density.id = str(property_id)
        estimated_properties.append(estimated_density)

        reference_density = DataSetEntry(
            id=property_id,
            property_type="Density",
            temperature=298.15,
            pressure=101.325,
            value=expected_mean,
            std_error=None,
            doi=" ",
            components=[Component(smiles="O", mole_fraction=1.0)],
        )
        reference_entries.append(reference_density)

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(*estimated_properties)

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=reference_entries,
    )

    analysis_environments = [ChemicalEnvironment.Aqueous]

    analysed_results = DataSetResult.from_evaluator(
        reference_data_set=reference_data_set,
        estimated_data_set=estimated_data_set,
        analysis_environments=analysis_environments,
        statistic_types=[StatisticType.RMSE],
        bootstrap_iterations=1000,
    )

    assert len(analysed_results.result_entries) == len(estimated_properties)

    full_statistics = next(
        iter(x for x in analysed_results.statistic_entries
             if x.category is None))

    assert full_statistics.property_type == "Density"
    assert full_statistics.n_components == 1
    assert full_statistics.statistic_type == StatisticType.RMSE
    assert numpy.isclose(full_statistics.value, expected_std, rtol=0.10)
def test_components_to_categories_empty():

    assert (components_to_categories(
        [Component(smiles="C", mole_fraction=1.0)], []) == [])
def test_analyse_functional_groups(smiles, expected_environment):
    """Performs a simple test of the analyse_functional_groups function."""
    chemical_moieties = analyse_functional_groups(smiles)
    assert expected_environment in chemical_moieties


def test_analyse_functional_groups_error():
    """Tests the the function returns None when an unknown
    smiles pattern is passed."""
    assert analyse_functional_groups("[Ar]") is None


@pytest.mark.parametrize(
    "components, expected_categories",
    [
        ([Component(smiles="CC(O)CC", mole_fraction=1.0)], ["Alcohol"]),
        ([Component(smiles="CC(=O)CC", mole_fraction=1.0)], ["Ketone"]),
        ([Component(smiles="C(=O)CC", mole_fraction=1.0)], ["Other"]),
        ([Component(smiles="CC(=O)CO", mole_fraction=1.0)
          ], ["Alcohol", "Ketone"]),
        (
            [
                Component(smiles="CC(O)CC", mole_fraction=0.5),
                Component(smiles="CC(O)C", mole_fraction=0.5),
            ],
            ["Alcohol + Alcohol"],
        ),
        (
            [
                Component(smiles="CC(O)CC", mole_fraction=0.5),
                Component(smiles="CC(=O)CO", mole_fraction=0.5),
def test_reindex_data_set():
    """Tests that the ``reindex_data_set`` function behaves as expected."""

    setup_timestamp_logging(logging.INFO)

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=300.0 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
    )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=0.5),
                    Component(smiles="C", mole_fraction=0.5),
                ],
            ),
            DataSetEntry(
                id=2,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[Component(smiles="O", mole_fraction=1.0)],
            ),
        ],
    )

    un_indexed_id = evaluator_data_set.properties[2].id

    reindex_data_set(evaluator_data_set, data_set)

    assert evaluator_data_set.properties[0].id == "2"
    assert evaluator_data_set.properties[1].id == "1"
    assert evaluator_data_set.properties[2].id == un_indexed_id

    data_set_collection = DataSetCollection(data_sets=[
        DataSet(
            id="0",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=3,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[
                        Component(smiles="O", mole_fraction=0.5),
                        Component(smiles="C", mole_fraction=0.5),
                    ],
                )
            ],
        ),
        DataSet(
            id="1",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=4,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[Component(smiles="O", mole_fraction=1.0)],
                )
            ],
        ),
    ])

    reindex_data_set(evaluator_data_set, data_set_collection)

    assert evaluator_data_set.properties[0].id == "4"
    assert evaluator_data_set.properties[1].id == "3"
    assert evaluator_data_set.properties[2].id == un_indexed_id