Example #1
0
def test_validate_data_set():

    valid_property = Density(
        ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(valid_property)

    data_set.validate()

    invalid_property = Density(
        ThermodynamicState(-1 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    with pytest.raises(AssertionError):
        data_set.add_properties(invalid_property)

    data_set.add_properties(invalid_property, validate=False)

    with pytest.raises(AssertionError):
        data_set.validate()
    def test_generate_request_options(self):

        training_set = create_data_set("data-set-1", 1)
        target = create_evaluator_target("evaluator-target-1",
                                         [training_set.id])

        target.allow_direct_simulation = True
        target.allow_reweighting = True
        target.n_molecules = 512
        target.n_effective_samples = 10

        request_options = OptimizationInputFactory._generate_request_options(
            target, training_set.to_evaluator())

        assert request_options.calculation_layers == [
            "ReweightingLayer",
            "SimulationLayer",
        ]

        assert request_options.calculation_schemas != UNDEFINED

        expected_simulation_schema = Density.default_simulation_schema(
            n_molecules=512)
        expected_reweighting_schema = Density.default_reweighting_schema(
            n_effective_samples=10)

        assert (
            request_options.calculation_schemas["Density"]
            ["SimulationLayer"].json() == expected_simulation_schema.json())
        assert (
            request_options.calculation_schemas["Density"]
            ["ReweightingLayer"].json() == expected_reweighting_schema.json())
def _build_entry(*smiles: str) -> Density:
    """Builds a density data entry measured at ambient conditions
    and for a system containing the specified smiles patterns in
    equal amounts.

    Parameters
    ----------
    smiles
        The smiles to build components for.

    Returns
    -------
        The built components.
    """
    assert len(smiles) > 0

    return Density(
        thermodynamic_state=ThermodynamicState(
            temperature=298.15 * unit.kelvin,
            pressure=101.325 * unit.kilopascal,
        ),
        phase=PropertyPhase.Liquid,
        value=1.0 * Density.default_unit(),
        uncertainty=1.0 * Density.default_unit(),
        source=MeasurementSource(doi=" "),
        substance=Substance.from_components(*smiles),
    )
Example #4
0
def estimated_reference_sets():
    estimated_density = Density(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilogram / unit.meter**3,
        uncertainty=0.1 * unit.kilogram / unit.meter**3,
    )
    estimated_density.id = "1"
    estimated_enthalpy = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilocalorie / unit.mole,
        uncertainty=0.1 * unit.kilojoule / unit.mole,
    )
    estimated_enthalpy.id = "2"

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(estimated_density, estimated_enthalpy)

    reference_density = DataSetEntry(
        id=1,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=0.001,
        std_error=0.0001,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )
    reference_enthalpy = DataSetEntry(
        id=2,
        property_type="EnthalpyOfMixing",
        temperature=298.15,
        pressure=101.325,
        value=4.184,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[reference_density, reference_enthalpy],
    )

    return estimated_data_set, reference_data_set
Example #5
0
def test_physical_property_state_methods():

    dummy_property = create_dummy_property(Density)
    property_state = dummy_property.__getstate__()

    recreated_property = Density()
    recreated_property.__setstate__(property_state)

    recreated_state = recreated_property.__getstate__()

    original_json = json.dumps(property_state, cls=TypedJSONEncoder)
    recreated_json = json.dumps(recreated_state, cls=TypedJSONEncoder)

    assert original_json == recreated_json
Example #6
0
def create_filterable_data_set():
    """Creates a dummy data with a diverse set of properties to
    be filtered, namely:

        - a liquid density measured at 298 K and 0.5 atm with 1 component containing only carbon.
        - a gaseous dielectric measured at 288 K and 1 atm with 2 components containing only nitrogen.
        - a solid EoM measured at 308 K and 1.5 atm with 3 components containing only oxygen.

    Returns
    -------
    PhysicalPropertyDataSet
        The created data set.
    """

    source = CalculationSource("Dummy", {})
    carbon_substance = create_dummy_substance(number_of_components=1,
                                              elements=["C"])

    density_property = Density(
        thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin,
                                               pressure=0.5 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=carbon_substance,
        value=1 * unit.gram / unit.milliliter,
        uncertainty=0.11 * unit.gram / unit.milliliter,
        source=source,
    )

    nitrogen_substance = create_dummy_substance(number_of_components=2,
                                                elements=["N"])

    dielectric_property = DielectricConstant(
        thermodynamic_state=ThermodynamicState(temperature=288 * unit.kelvin,
                                               pressure=1 * unit.atmosphere),
        phase=PropertyPhase.Gas,
        substance=nitrogen_substance,
        value=1 * unit.dimensionless,
        uncertainty=0.11 * unit.dimensionless,
        source=source,
    )

    oxygen_substance = create_dummy_substance(number_of_components=3,
                                              elements=["O"])

    enthalpy_property = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(temperature=308 * unit.kelvin,
                                               pressure=1.5 * unit.atmosphere),
        phase=PropertyPhase.Solid,
        substance=oxygen_substance,
        value=1 * unit.kilojoules / unit.mole,
        uncertainty=0.11 * unit.kilojoules / unit.mole,
        source=source,
    )

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(density_property, dielectric_property,
                            enthalpy_property)

    return data_set
Example #7
0
def test_same_component_batching():

    thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
    )

    options = RequestOptions()

    submission = EvaluatorClient._Submission()
    submission.dataset = data_set
    submission.options = options

    with DaskLocalCluster() as calculation_backend:

        server = EvaluatorServer(calculation_backend)
        batches = server._batch_by_same_component(submission, "")

    assert len(batches) == 2

    assert len(batches[0].queued_properties) == 2
    assert len(batches[1].queued_properties) == 2
Example #8
0
def test_from_pandas():
    """A test to ensure that data sets may be created from pandas objects."""

    thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    original_data_set = PhysicalPropertyDataSet()
    original_data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("CO", "O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=1.0 * unit.kilogram / unit.meter**3,
            source=MeasurementSource(doi="10.5281/zenodo.596537"),
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.from_string("Liquid + Gas"),
            substance=Substance.from_components("C"),
            value=2.0 * unit.kilojoule / unit.mole,
            source=MeasurementSource(reference="2"),
        ),
        DielectricConstant(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("C"),
            value=3.0 * unit.dimensionless,
            source=MeasurementSource(reference="3"),
        ),
    )

    data_frame = original_data_set.to_pandas()

    recreated_data_set = PhysicalPropertyDataSet.from_pandas(data_frame)
    assert len(original_data_set) == len(recreated_data_set)

    for original_property in original_data_set:

        recreated_property = next(x for x in recreated_data_set
                                  if x.id == original_property.id)

        assert (original_property.thermodynamic_state ==
                recreated_property.thermodynamic_state)
        assert original_property.phase == recreated_property.phase
        assert original_property.substance == recreated_property.substance
        assert numpy.isclose(original_property.value, recreated_property.value)

        if original_property.uncertainty == UNDEFINED:
            assert original_property.uncertainty == recreated_property.uncertainty
        else:
            assert numpy.isclose(original_property.uncertainty,
                                 recreated_property.uncertainty)

        assert original_property.source.doi == recreated_property.source.doi
        assert original_property.source.reference == recreated_property.source.reference
def test_filter_ionic_liquid():
    thermodynamic_state = ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=101.325 * unit.kilopascal,
    )

    # Ensure ionic liquids are filtered.
    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
            source=MeasurementSource(doi=" "),
            substance=Substance.from_components("[Na+].[Cl-]"),
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
            source=MeasurementSource(doi=" "),
            substance=Substance.from_components("C"),
        ),
    )

    data_frame = data_set.to_pandas()

    filtered_frame = FilterByIonicLiquid.apply(
        data_frame,
        FilterByIonicLiquidSchema(),
    )

    assert len(filtered_frame) == 1
Example #10
0
def simple_evaluator_data_set():
    """Create a simple evaluator `PhysicalPropertyDataSet` which contains
    a simple binary density measurement.

    Returns
    -------
    PhysicalPropertyDataSet
    """

    evaluator_density = Density(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilogram / unit.meter**3,
        uncertainty=0.1 * unit.kilogram / unit.meter**3,
        source=MeasurementSource(doi="10.1000/xyz123"),
    )
    evaluator_density.id = "1"

    evaluator_data_set = PhysicalPropertyDataSet()
    evaluator_data_set.add_properties(evaluator_density)

    return evaluator_data_set
Example #11
0
def data_frame() -> pandas.DataFrame:

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=101.325 * unit.kilopascal,
            ),
            phase=PropertyPhase.Liquid,
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
            source=MeasurementSource(doi=" "),
            substance=Substance.from_components("C"),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=305.15 * unit.kelvin,
                pressure=101.325 * unit.kilopascal,
            ),
            phase=PropertyPhase.Liquid,
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
            source=MeasurementSource(doi=" "),
            substance=Substance.from_components("C"),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=105.325 * unit.kilopascal,
            ),
            phase=PropertyPhase.Liquid,
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
            source=MeasurementSource(doi=" "),
            substance=Substance.from_components("C"),
        ),
    )

    return data_set.to_pandas()
Example #12
0
def define_data_set(reweighting: bool) -> PhysicalPropertyDataSet:

    # Define a common state to compute estimates at
    states = [
        ThermodynamicState(temperature=296.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
        ThermodynamicState(temperature=298.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
        ThermodynamicState(temperature=300.15 * unit.kelvin,
                           pressure=1.0 * unit.atmosphere),
    ]

    data_set = PhysicalPropertyDataSet()

    # Solvation free energies.
    if not reweighting:

        ethanol_substance = Substance.from_components("CCO")
        ethanol_substance.add_component(
            Component("CC=O", Component.Role.Solute), ExactAmount(1))
        ethanal_substance = Substance.from_components("CC=O")
        ethanal_substance.add_component(
            Component("CCO", Component.Role.Solute), ExactAmount(1))

        data_set.add_properties(
            SolvationFreeEnergy(
                thermodynamic_state=states[1],
                phase=PropertyPhase.Liquid,
                substance=ethanol_substance,
                value=0.0 * SolvationFreeEnergy.default_unit(),
            ),
            SolvationFreeEnergy(
                thermodynamic_state=states[1],
                phase=PropertyPhase.Liquid,
                substance=ethanal_substance,
                value=0.0 * SolvationFreeEnergy.default_unit(),
            ),
            *CurationWorkflow.apply(
                PhysicalPropertyDataSet(),
                CurationWorkflowSchema(component_schemas=[
                    ImportFreeSolvSchema(),
                    FilterBySubstancesSchema(substances_to_include=[("O",
                                                                     "CO")]),
                ]),
            ),
        )

    for state in states:

        # Excess properties.
        data_set.add_properties(
            ExcessMolarVolume(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CC=O", "CCO"),
                value=0.0 * ExcessMolarVolume.default_unit(),
            ),
            EnthalpyOfMixing(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CC=O", "CCO"),
                value=0.0 * EnthalpyOfMixing.default_unit(),
            ),
        )
        # Pure properties
        data_set.add_properties(
            Density(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CCO"),
                value=0.0 * Density.default_unit(),
            ),
            EnthalpyOfVaporization(
                thermodynamic_state=state,
                phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas),
                substance=Substance.from_components("CCO"),
                value=0.0 * EnthalpyOfVaporization.default_unit(),
            ),
            DielectricConstant(
                thermodynamic_state=state,
                phase=PropertyPhase.Liquid,
                substance=Substance.from_components("CCO"),
                value=0.0 * DielectricConstant.default_unit(),
            ),
        )

    return data_set
Example #13
0
def test_analysed_result_from_evaluator():
    """Tests the `AnalysedResult.from_evaluator` function."""
    expected_mean = 0.0
    expected_std = numpy.random.rand() + 1.0

    values = numpy.random.normal(expected_mean, expected_std, 1000)

    estimated_properties = []
    reference_entries = []

    for index, value in enumerate(values):
        property_id = index + 1

        estimated_density = Density(
            thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                                   pressure=1.0 *
                                                   unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=value * Density.default_unit(),
            uncertainty=0.0 * Density.default_unit(),
        )
        estimated_density.id = str(property_id)
        estimated_properties.append(estimated_density)

        reference_density = DataSetEntry(
            id=property_id,
            property_type="Density",
            temperature=298.15,
            pressure=101.325,
            value=expected_mean,
            std_error=None,
            doi=" ",
            components=[Component(smiles="O", mole_fraction=1.0)],
        )
        reference_entries.append(reference_density)

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(*estimated_properties)

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=reference_entries,
    )

    analysis_environments = [ChemicalEnvironment.Aqueous]

    analysed_results = DataSetResult.from_evaluator(
        reference_data_set=reference_data_set,
        estimated_data_set=estimated_data_set,
        analysis_environments=analysis_environments,
        statistic_types=[StatisticType.RMSE],
        bootstrap_iterations=1000,
    )

    assert len(analysed_results.result_entries) == len(estimated_properties)

    full_statistics = next(
        iter(x for x in analysed_results.statistic_entries
             if x.category is None))

    assert full_statistics.property_type == "Density"
    assert full_statistics.n_components == 1
    assert full_statistics.statistic_type == StatisticType.RMSE
    assert numpy.isclose(full_statistics.value, expected_std, rtol=0.10)
def test_storage_retrieval():
    # Create some dummy properties
    methane = Substance.from_components("C")
    methanol = Substance.from_components("CO")
    mixture = Substance.from_components("C", "CO")
    # Add extra unused data to make sure the wrong data isn't
    # Being retrieved.
    unused_pure = Substance.from_components("CCO")
    unused_mixture = Substance.from_components("CCO", "CO")

    data_to_store = [
        (methane, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Gas, 1),
        (mixture, PropertyPhase.Liquid, 1000),
        (unused_pure, PropertyPhase.Liquid, 1000),
        (unused_mixture, PropertyPhase.Liquid, 1000),
    ]
    storage_keys = {}

    state = ThermodynamicState(temperature=1.0 * unit.kelvin)

    properties = [
        # Properties with a full system query.
        Density(
            value=1.0 * unit.gram / unit.litre,
            substance=methanol,
            thermodynamic_state=state,
        ),
        DielectricConstant(
            value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state
        ),
        # Properties with a multi-component query.
        EnthalpyOfVaporization(
            value=1.0 * unit.joule / unit.mole,
            substance=methanol,
            thermodynamic_state=state,
        ),
        # Property with a multi-phase query.
        EnthalpyOfMixing(
            value=1.0 * unit.joule / unit.mole,
            substance=mixture,
            thermodynamic_state=state,
        ),
        ExcessMolarVolume(
            value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state
        ),
    ]
    expected_data_per_property = {
        Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]},
        DielectricConstant: {
            "full_system_data": [(methane, PropertyPhase.Liquid, 1000)]
        },
        EnthalpyOfVaporization: {
            "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)],
            "gas_data": [(methanol, PropertyPhase.Gas, 1)],
        },
        EnthalpyOfMixing: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
        ExcessMolarVolume: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
    }

    force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as base_directory:

        # Create a storage backend with some dummy data.
        backend_directory = os.path.join(base_directory, "storage_dir")
        storage_backend = LocalFileStorage(backend_directory)

        force_field_id = storage_backend.store_force_field(force_field)

        for substance, phase, n_mol in data_to_store:

            data_directory = os.path.join(base_directory, substance.identifier)
            data = create_dummy_simulation_data(
                data_directory,
                substance=substance,
                force_field_id=force_field_id,
                phase=phase,
                number_of_molecules=n_mol,
            )
            storage_key = storage_backend.store_object(data, data_directory)
            storage_keys[(substance, phase, n_mol)] = storage_key

        for physical_property in properties:

            schema = registered_calculation_schemas["ReweightingLayer"][
                physical_property.__class__.__name__
            ]

            if callable(schema):
                schema = schema()

            # noinspection PyProtectedMember
            metadata = ReweightingLayer._get_workflow_metadata(
                base_directory,
                physical_property,
                "",
                [],
                storage_backend,
                schema,
            )

            assert metadata is not None

            expected_data_list = expected_data_per_property[physical_property.__class__]

            for data_key in expected_data_list:

                assert data_key in metadata

                stored_metadata = metadata[data_key]
                expected_metadata = expected_data_list[data_key]

                assert len(stored_metadata) == len(expected_metadata)

                if isinstance(stored_metadata[0], list):
                    # Flatten any lists of lists.
                    stored_metadata = [
                        item for sublist in stored_metadata for item in sublist
                    ]
                    expected_metadata = [
                        item for sublist in expected_metadata for item in sublist
                    ]

                metadata_storage_keys = [
                    os.path.basename(x) for x, _, _ in stored_metadata
                ]
                expected_storage_keys = [storage_keys[x] for x in expected_metadata]

                assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
Example #15
0
def complete_evaluator_data_set():
    """Create a more comprehensive `PhysicalPropertyDataSet` which contains one
    measurement for each of:

        * pure density
        * binary density
        * pure enthalpy of vaporization
        * binary enthalpy of mixing
        * binary excess molar volume
        * hydration free energy

    Returns
    -------
    PhysicalPropertyDataSet
    """
    thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)
    source = MeasurementSource(doi="10.1000/xyz123")

    solvation_substance = Substance()
    solvation_substance.add_component(Component("O"), MoleFraction(1.0))
    solvation_substance.add_component(Component("CCCO"), ExactAmount(1))

    evaluator_properties = [
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O", "CC=O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas),
            substance=Substance.from_components("CCO"),
            value=1.0 * EnthalpyOfVaporization.default_unit(),
            uncertainty=0.1 * EnthalpyOfVaporization.default_unit(),
            source=source,
        ),
        EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("CCCCO", "CC(C=O)C"),
            value=1.0 * EnthalpyOfMixing.default_unit(),
            uncertainty=0.1 * EnthalpyOfMixing.default_unit(),
            source=source,
        ),
        ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("C(=O)CCCO", "CCCCCC"),
            value=1.0 * ExcessMolarVolume.default_unit(),
            uncertainty=0.1 * ExcessMolarVolume.default_unit(),
            source=source,
        ),
        SolvationFreeEnergy(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=solvation_substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=0.1 * SolvationFreeEnergy.default_unit(),
            source=source,
        ),
    ]

    for index, evaluator_property in enumerate(evaluator_properties):
        evaluator_property.id = str(index + 1)

    evaluator_data_set = PhysicalPropertyDataSet()
    evaluator_data_set.add_properties(*evaluator_properties)

    return evaluator_data_set
def test_reindex_data_set():
    """Tests that the ``reindex_data_set`` function behaves as expected."""

    setup_timestamp_logging(logging.INFO)

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=300.0 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
    )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=0.5),
                    Component(smiles="C", mole_fraction=0.5),
                ],
            ),
            DataSetEntry(
                id=2,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[Component(smiles="O", mole_fraction=1.0)],
            ),
        ],
    )

    un_indexed_id = evaluator_data_set.properties[2].id

    reindex_data_set(evaluator_data_set, data_set)

    assert evaluator_data_set.properties[0].id == "2"
    assert evaluator_data_set.properties[1].id == "1"
    assert evaluator_data_set.properties[2].id == un_indexed_id

    data_set_collection = DataSetCollection(data_sets=[
        DataSet(
            id="0",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=3,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[
                        Component(smiles="O", mole_fraction=0.5),
                        Component(smiles="C", mole_fraction=0.5),
                    ],
                )
            ],
        ),
        DataSet(
            id="1",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=4,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[Component(smiles="O", mole_fraction=1.0)],
                )
            ],
        ),
    ])

    reindex_data_set(evaluator_data_set, data_set_collection)

    assert evaluator_data_set.properties[0].id == "4"
    assert evaluator_data_set.properties[1].id == "3"
    assert evaluator_data_set.properties[2].id == un_indexed_id
Example #17
0
def test_to_pandas():
    """A test to ensure that data sets are convertable to pandas objects."""

    source = CalculationSource("Dummy", {})

    pure_substance = Substance.from_components("C")
    binary_substance = Substance.from_components("C", "O")

    data_set = PhysicalPropertyDataSet()

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        density_property = Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.gram / unit.milliliter,
            uncertainty=0.11 * unit.gram / unit.milliliter,
            source=source,
        )

        dielectric_property = DielectricConstant(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.dimensionless,
            uncertainty=0.11 * unit.dimensionless,
            source=source,
        )

        data_set.add_properties(density_property)
        data_set.add_properties(dielectric_property)

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        enthalpy_property = EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.kilojoules / unit.mole,
            uncertainty=0.11 * unit.kilojoules / unit.mole,
            source=source,
        )

        excess_property = ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.meter**3 / unit.mole,
            uncertainty=0.11 * unit.meter**3 / unit.mole,
            source=source,
        )

        data_set.add_properties(enthalpy_property)
        data_set.add_properties(excess_property)

    data_set_pandas = data_set.to_pandas()

    required_columns = [
        "Id",
        "Temperature (K)",
        "Pressure (kPa)",
        "Phase",
        "N Components",
        "Source",
        "Component 1",
        "Role 1",
        "Mole Fraction 1",
        "Exact Amount 1",
        "Component 2",
        "Role 2",
        "Mole Fraction 2",
        "Exact Amount 2",
    ]

    assert all(x in data_set_pandas for x in required_columns)

    assert data_set_pandas is not None
    assert data_set_pandas.shape == (12, 22)

    data_set_without_na = data_set_pandas.dropna(axis=1, how="all")
    assert data_set_without_na.shape == (12, 20)