Esempio n. 1
0
def estimated_reference_sets():
    estimated_density = Density(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilogram / unit.meter**3,
        uncertainty=0.1 * unit.kilogram / unit.meter**3,
    )
    estimated_density.id = "1"
    estimated_enthalpy = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilocalorie / unit.mole,
        uncertainty=0.1 * unit.kilojoule / unit.mole,
    )
    estimated_enthalpy.id = "2"

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(estimated_density, estimated_enthalpy)

    reference_density = DataSetEntry(
        id=1,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=0.001,
        std_error=0.0001,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )
    reference_enthalpy = DataSetEntry(
        id=2,
        property_type="EnthalpyOfMixing",
        temperature=298.15,
        pressure=101.325,
        value=4.184,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[reference_density, reference_enthalpy],
    )

    return estimated_data_set, reference_data_set
Esempio n. 2
0
def test_pandas_string_id():

    data_series = create_data_set("data-set-1").entries[0].to_series()
    data_series["Id"] = "String"

    data_entry = DataSetEntry.from_series(data_series)
    assert data_entry.id is None
Esempio n. 3
0
def property_type_to_title(property_type: str, n_components: int):

    try:
        from openff.evaluator import unit
    except ImportError:
        unit = None

    abbreviations = {
        "Density": r"\rho",
        "DielectricConstant": r"\epsilon",
        "EnthalpyOfMixing": r"H_{mix}",
        "EnthalpyOfVaporization": r"H_{vap}",
        "ExcessMolarVolume": r"V_{ex}",
        "SolvationFreeEnergy": r"G_{solv}",
    }

    unit_string = DataSetEntry.default_units()[property_type]

    if unit is not None:

        property_unit = unit.Unit(unit_string)

        unit_string = (
            "" if property_unit == unit.dimensionless else f" ({property_unit:~P})"
        )

    abbreviation = abbreviations.get(property_type, property_type)

    if "FreeEnergy" not in property_type and n_components > 1:
        abbreviation = f"{abbreviation} (x)"

    return f"${abbreviation}$ {unit_string}"
Esempio n. 4
0
def test_reindex_data_set_no_mole_fraction():
    """Tests that the ``reindex_data_set`` function behaves as expected
    when exact amounts are present."""

    setup_timestamp_logging(logging.INFO)

    substance = substances.Substance()
    substance.add_component(substances.Component(smiles="O"),
                            amount=substances.MoleFraction(1.0))
    substance.add_component(
        substances.Component(smiles="CO",
                             role=substances.Component.Role.Solute),
        amount=substances.ExactAmount(1),
    )

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        SolvationFreeEnergy(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=1.0 * SolvationFreeEnergy.default_unit(),
        ), )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="SolvationFreeEnergy",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=1.0),
                    Component(smiles="CO",
                              mole_fraction=0.0,
                              exact_amount=1,
                              role="Solute"),
                ],
            )
        ],
    )

    reindex_data_set(evaluator_data_set, data_set)
    assert evaluator_data_set.properties[0].id == "1"
Esempio n. 5
0
def create_data_set(data_set_id: str, entry_id: Optional[int] = None):
    """Creates a single author data set which contains a single
    density data entry. The entry contains two components, an
    aqueous solvent (x=1) and a methanol solute (n=1).

    Parameters
    ----------
    data_set_id: str
        The id to assign to the data set.
    entry_id
        The id to assign to the one data entry.

    Returns
    -------
    DataSet
    """

    author = create_author()

    data_entry = DataSetEntry(
        id=entry_id,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=1.0,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O",
                      mole_fraction=1.0,
                      exact_amount=0,
                      role="Solvent"),
            Component(smiles="CO",
                      mole_fraction=0.0,
                      exact_amount=1,
                      role="Solute"),
        ],
    )

    data_set = DataSet(id=data_set_id,
                       description=" ",
                       authors=[author],
                       entries=[data_entry])

    return data_set
Esempio n. 6
0
def test_analysed_result_from_evaluator():
    """Tests the `AnalysedResult.from_evaluator` function."""
    expected_mean = 0.0
    expected_std = numpy.random.rand() + 1.0

    values = numpy.random.normal(expected_mean, expected_std, 1000)

    estimated_properties = []
    reference_entries = []

    for index, value in enumerate(values):
        property_id = index + 1

        estimated_density = Density(
            thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                                   pressure=1.0 *
                                                   unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=value * Density.default_unit(),
            uncertainty=0.0 * Density.default_unit(),
        )
        estimated_density.id = str(property_id)
        estimated_properties.append(estimated_density)

        reference_density = DataSetEntry(
            id=property_id,
            property_type="Density",
            temperature=298.15,
            pressure=101.325,
            value=expected_mean,
            std_error=None,
            doi=" ",
            components=[Component(smiles="O", mole_fraction=1.0)],
        )
        reference_entries.append(reference_density)

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(*estimated_properties)

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=reference_entries,
    )

    analysis_environments = [ChemicalEnvironment.Aqueous]

    analysed_results = DataSetResult.from_evaluator(
        reference_data_set=reference_data_set,
        estimated_data_set=estimated_data_set,
        analysis_environments=analysis_environments,
        statistic_types=[StatisticType.RMSE],
        bootstrap_iterations=1000,
    )

    assert len(analysed_results.result_entries) == len(estimated_properties)

    full_statistics = next(
        iter(x for x in analysed_results.statistic_entries
             if x.category is None))

    assert full_statistics.property_type == "Density"
    assert full_statistics.n_components == 1
    assert full_statistics.statistic_type == StatisticType.RMSE
    assert numpy.isclose(full_statistics.value, expected_std, rtol=0.10)
Esempio n. 7
0
def test_reindex_data_set():
    """Tests that the ``reindex_data_set`` function behaves as expected."""

    setup_timestamp_logging(logging.INFO)

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=300.0 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
    )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=0.5),
                    Component(smiles="C", mole_fraction=0.5),
                ],
            ),
            DataSetEntry(
                id=2,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[Component(smiles="O", mole_fraction=1.0)],
            ),
        ],
    )

    un_indexed_id = evaluator_data_set.properties[2].id

    reindex_data_set(evaluator_data_set, data_set)

    assert evaluator_data_set.properties[0].id == "2"
    assert evaluator_data_set.properties[1].id == "1"
    assert evaluator_data_set.properties[2].id == un_indexed_id

    data_set_collection = DataSetCollection(data_sets=[
        DataSet(
            id="0",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=3,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[
                        Component(smiles="O", mole_fraction=0.5),
                        Component(smiles="C", mole_fraction=0.5),
                    ],
                )
            ],
        ),
        DataSet(
            id="1",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=4,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[Component(smiles="O", mole_fraction=1.0)],
                )
            ],
        ),
    ])

    reindex_data_set(evaluator_data_set, data_set_collection)

    assert evaluator_data_set.properties[0].id == "4"
    assert evaluator_data_set.properties[1].id == "3"
    assert evaluator_data_set.properties[2].id == un_indexed_id
Esempio n. 8
0
    def _evaluator_to_results_entries(
        cls,
        reference_data_set: Union[DataSet, DataSetCollection],
        estimated_data_set: "PhysicalPropertyDataSet",
        analysis_environments: List[ChemicalEnvironment],
    ) -> Tuple[List[DataSetResultEntry], pandas.DataFrame]:

        from openff.evaluator.datasets import PhysicalProperty

        if isinstance(reference_data_set, DataSet):
            reference_entries_by_id: Dict[str, DataSetEntry] = {
                int(x.id): x for x in reference_data_set.entries
            }
        elif isinstance(reference_data_set, DataSetCollection):
            reference_entries_by_id: Dict[str, DataSetEntry] = {
                int(y.id): y for x in reference_data_set.data_sets for y in x.entries
            }
        else:
            raise NotImplementedError()

        estimated_entries_by_id: Dict[str, PhysicalProperty] = {
            int(x.id): x for x in estimated_data_set
        }

        results_entries = []
        results_rows = []

        internal_units = DataSetEntry.default_units()

        for identifier in reference_entries_by_id:

            if identifier not in estimated_entries_by_id:

                logger.warning(
                    f"The property with id={identifier} appears in the reference data "
                    f"set but not in the estimated set."
                )

                continue

            reference_entry = reference_entries_by_id[identifier]
            estimated_entry = estimated_entries_by_id[identifier]

            # Check that at the very least the two types of property are of the same
            # type and were measured for the same number of components
            assert reference_entry.property_type == estimated_entry.__class__.__name__
            assert len(reference_entry.components) == len(estimated_entry.substance)

            internal_unit = internal_units[reference_entry.property_type]

            results_entry = DataSetResultEntry(
                reference_id=reference_entry.id,
                estimated_value=estimated_entry.value.to(internal_unit).magnitude,
                estimated_std_error=estimated_entry.uncertainty.to(
                    internal_unit
                ).magnitude,
                categories=components_to_categories(
                    reference_entry.components, analysis_environments
                ),
            )

            results_entries.append(results_entry)

            for category in (
                [None]
                if len(results_entry.categories) == 0
                else results_entry.categories
            ):

                results_rows.append(
                    {
                        "Property Type": reference_entry.property_type,
                        "N Components": len(reference_entry.components),
                        "Reference Value": reference_entry.value,
                        "Reference Std": reference_entry.std_error,
                        "Estimated Value": results_entry.estimated_value,
                        "Estimated Std": results_entry.estimated_std_error,
                        "Category": category,
                    }
                )

        results_frame = pandas.DataFrame(results_rows)

        return results_entries, results_frame