def test_multiple_amounts():

    substance = Substance()

    sodium = Component("[Na+]")
    chloride = Component("[Cl-]")

    substance.add_component(sodium, MoleFraction(0.75))
    substance.add_component(sodium, ExactAmount(1))

    substance.add_component(chloride, MoleFraction(0.25))
    substance.add_component(chloride, ExactAmount(1))

    assert substance.number_of_components == 2

    sodium_amounts = substance.get_amounts(sodium)
    chlorine_amounts = substance.get_amounts(chloride)

    assert len(sodium_amounts) == 2
    assert len(chlorine_amounts) == 2

    molecule_counts = substance.get_molecules_per_component(6)

    assert len(molecule_counts) == 2

    assert molecule_counts[sodium.identifier] == 4
    assert molecule_counts[chloride.identifier] == 2
def test_truncate_n_molecules():

    substance = Substance()

    substance.add_component(
        component=Component(smiles="[Na+]"),
        amount=MoleFraction(0.00267),
    )
    substance.add_component(
        component=Component(smiles="[Cl-]"),
        amount=MoleFraction(0.00267),
    )
    substance.add_component(component=Component(smiles="O"),
                            amount=MoleFraction(1.0 - 2.0 * 0.00267))

    # Attempt to get the number of molecules without truncating.
    with pytest.raises(ValueError):
        substance.get_molecules_per_component(1000, truncate_n_molecules=False)

    # Attempt to get the number of molecules with truncating.
    molecule_counts = substance.get_molecules_per_component(
        1000, truncate_n_molecules=True)

    assert molecule_counts == {
        "[Na+]{solv}": 3,
        "[Cl-]{solv}": 3,
        "O{solv}": 994
    }
Ejemplo n.º 3
0
def create_dummy_substance(number_of_components, elements=None):
    """Creates a substance with a given number of components,
    each containing the specified elements.

    Parameters
    ----------
    number_of_components : int
        The number of components to add to the substance.
    elements : list of str
        The elements that each component should containt.

    Returns
    -------
    Substance
        The created substance.
    """
    if elements is None:
        elements = ["C"]

    substance = Substance()

    mole_fraction = 1.0 / number_of_components

    for index in range(number_of_components):

        smiles_pattern = "".join(elements * (index + 1))

        substance.add_component(Component(smiles_pattern),
                                MoleFraction(mole_fraction))

    return substance
Ejemplo n.º 4
0
def test_solvate_existing_structure_protocol():
    """Tests solvating a single methanol molecule in water."""

    import mdtraj

    methanol_component = Component("CO")

    methanol_substance = Substance()
    methanol_substance.add_component(methanol_component, ExactAmount(1))

    water_substance = Substance()
    water_substance.add_component(Component("O"), MoleFraction(1.0))

    with tempfile.TemporaryDirectory() as temporary_directory:

        build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol")
        build_methanol_coordinates.max_molecules = 1
        build_methanol_coordinates.substance = methanol_substance
        build_methanol_coordinates.execute(temporary_directory,
                                           ComputeResources())

        methanol_residue_name = build_methanol_coordinates.assigned_residue_names[
            methanol_component.identifier]

        solvate_coordinates = SolvateExistingStructure("solvate_methanol")
        solvate_coordinates.max_molecules = 9
        solvate_coordinates.substance = water_substance
        solvate_coordinates.solute_coordinate_file = (
            build_methanol_coordinates.coordinate_file_path)
        solvate_coordinates.execute(temporary_directory, ComputeResources())
        solvated_system = mdtraj.load_pdb(
            solvate_coordinates.coordinate_file_path)

        assert solvated_system.n_residues == 10
        assert solvated_system.top.residue(0).name == methanol_residue_name
Ejemplo n.º 5
0
    def from_components(cls, *components):
        """Creates a new `Substance` object from a list of components.
        This method assumes that all components should be present with
        equal mole fractions.

        Parameters
        ----------
        components: Component or str
            The components to add to the substance. These may either be full
            `Component` objects or just the smiles representation
            of the component.

        Returns
        -------
        Substance
            The substance containing the requested components in equal amounts.
        """

        if len(components) == 0:
            raise ValueError("At least one component must be specified")

        mole_fraction = 1.0 / len(components)

        return_substance = cls()

        for component in components:

            if isinstance(component, str):
                component = Component(smiles=component)

            return_substance.add_component(component,
                                           MoleFraction(mole_fraction))

        return return_substance
def test_add_mole_fractions():

    substance = Substance()

    substance.add_component(Component("C"), MoleFraction(0.5))
    substance.add_component(Component("C"), MoleFraction(0.5))

    assert substance.number_of_components == 1

    amounts = substance.get_amounts(substance.components[0])

    assert len(amounts) == 1

    amount = next(iter(amounts))

    assert isinstance(amount, MoleFraction)
    assert np.isclose(amount.value, 1.0)
Ejemplo n.º 7
0
def data_frame() -> pandas.DataFrame:

    temperatures = [298.15, 318.15]
    pressures = [101.325, 101.0]

    properties = [Density, EnthalpyOfMixing]

    mole_fractions = [(1.0, ), (1.0, ), (0.25, 0.75), (0.75, 0.25)]
    smiles = {1: [("C(F)(Cl)(Br)", ), ("C", )], 2: [("CO", "C"), ("C", "CO")]}

    loop_variables = [(
        temperature,
        pressure,
        property_type,
        mole_fraction,
    ) for temperature in temperatures for pressure in pressures
                      for property_type in properties
                      for mole_fraction in mole_fractions]

    data_entries = []

    for temperature, pressure, property_type, mole_fraction in loop_variables:

        n_components = len(mole_fraction)

        for smiles_tuple in smiles[n_components]:

            substance = Substance()

            for smiles_pattern, x in zip(smiles_tuple, mole_fraction):
                substance.add_component(Component(smiles_pattern),
                                        MoleFraction(x))

            data_entries.append(
                property_type(
                    thermodynamic_state=ThermodynamicState(
                        temperature=temperature * unit.kelvin,
                        pressure=pressure * unit.kilopascal,
                    ),
                    phase=PropertyPhase.Liquid,
                    value=1.0 * property_type.default_unit(),
                    uncertainty=1.0 * property_type.default_unit(),
                    source=MeasurementSource(doi=" "),
                    substance=substance,
                ))

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(*data_entries)

    return data_set.to_pandas()
Ejemplo n.º 8
0
    def _execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role not in self.component_roles:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if self.expected_components != UNDEFINED and self.expected_components != len(
            filtered_components
        ):

            raise ValueError(
                f"The filtered substance does not contain the expected number of "
                f"components ({self.expected_components}) - {filtered_components}",
            )

        inverse_mole_fraction = (
            1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction
        )

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, MoleFraction):
                    amount = MoleFraction(amount.value * inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)
Ejemplo n.º 9
0
def _build_input_output_substances():
    """Builds sets if input and expected substances for the
    `test_build_coordinate_composition` test.

    Returns
    -------
    list of tuple of Substance and Substance
        A list of input and expected substances.
    """

    # Start with some easy cases
    substances = [
        (Substance.from_components("O"), Substance.from_components("O")),
        (Substance.from_components("O",
                                   "C"), Substance.from_components("O", "C")),
        (
            Substance.from_components("O", "C", "CO"),
            Substance.from_components("O", "C", "CO"),
        ),
    ]

    # Handle some cases where rounding will need to occur.
    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.41))
    input_substance.add_component(Component("C"), MoleFraction(0.59))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.4))
    expected_substance.add_component(Component("C"), MoleFraction(0.6))

    substances.append((input_substance, expected_substance))

    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.59))
    input_substance.add_component(Component("C"), MoleFraction(0.41))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.6))
    expected_substance.add_component(Component("C"), MoleFraction(0.4))

    substances.append((input_substance, expected_substance))

    return substances
Ejemplo n.º 10
0
def test_build_coordinates_packmol_exact(count_exact_amount):
    """Tests that the build coordinate protocol behaves correctly for substances
    with exact amounts."""

    import mdtraj

    substance = Substance()
    substance.add_component(Component("O"), MoleFraction(1.0))
    substance.add_component(Component("C"), ExactAmount(1))

    max_molecule = 11 if count_exact_amount else 10

    build_coordinates = BuildCoordinatesPackmol("build_coordinates")
    build_coordinates.max_molecules = max_molecule
    build_coordinates.count_exact_amount = count_exact_amount
    build_coordinates.substance = substance

    with tempfile.TemporaryDirectory() as directory:
        build_coordinates.execute(directory)
        built_system = mdtraj.load_pdb(build_coordinates.coordinate_file_path)

    assert built_system.n_residues == 11
    def create_substance():
        test_substance = Substance()

        test_substance.add_component(
            Component("C", role=Component.Role.Solute),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("CC", role=Component.Role.Ligand),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("CCC", role=Component.Role.Receptor),
            ExactAmount(1),
        )

        test_substance.add_component(
            Component("O", role=Component.Role.Solvent),
            MoleFraction(1.0),
        )

        return test_substance
Ejemplo n.º 12
0
    def _build_substance(
        guest_smiles: Optional[str],
        host_smiles: str,
        ionic_strength: Optional[unit.Quantity],
        negative_buffer_ion: str = "[Cl-]",
        positive_buffer_ion: str = "[Na+]",
    ):
        """Builds a substance containing a ligand and receptor solvated in an aqueous
        solution with a given ionic strength

        Parameters
        ----------
        guest_smiles
            The SMILES descriptor of the guest.
        host_smiles
            The SMILES descriptor of the host.
        ionic_strength
            The ionic strength of the aqueous solvent.

        Returns
        -------
            The built substance.
        """
        from openff.toolkit.topology import Molecule
        from simtk import unit as simtk_unit

        substance = Substance()

        if guest_smiles is not None:

            guest = Component(smiles=guest_smiles, role=Component.Role.Ligand)
            substance.add_component(component=guest, amount=ExactAmount(1))

        host = Component(smiles=host_smiles, role=Component.Role.Receptor)
        substance.add_component(component=host, amount=ExactAmount(1))

        water = Component(smiles="O", role=Component.Role.Solvent)
        sodium = Component(smiles=positive_buffer_ion,
                           role=Component.Role.Solvent)
        chlorine = Component(smiles=negative_buffer_ion,
                             role=Component.Role.Solvent)

        water_mole_fraction = 1.0

        if ionic_strength is not None:

            salt_mole_fraction = Substance.calculate_aqueous_ionic_mole_fraction(
                ionic_strength)

            if isinstance(salt_mole_fraction, unit.Quantity):
                # noinspection PyUnresolvedReferences
                salt_mole_fraction = salt_mole_fraction.magnitude

            water_mole_fraction = 1.0 - salt_mole_fraction * 2

            substance.add_component(
                component=sodium,
                amount=MoleFraction(salt_mole_fraction),
            )
            substance.add_component(
                component=chlorine,
                amount=MoleFraction(salt_mole_fraction),
            )

        substance.add_component(component=water,
                                amount=MoleFraction(water_mole_fraction))

        host_molecule_charge = Molecule.from_smiles(host_smiles).total_charge
        guest_molecule_charge = (
            0.0 * simtk_unit.elementary_charge if guest_smiles is None else
            Molecule.from_smiles(guest_smiles).total_charge)

        net_charge = (host_molecule_charge +
                      guest_molecule_charge).value_in_unit(
                          simtk_unit.elementary_charge)
        n_counter_ions = abs(int(net_charge))

        if net_charge <= -0.9999:
            substance.add_component(sodium, ExactAmount(n_counter_ions))
        elif net_charge >= 0.9999:
            substance.add_component(chlorine, ExactAmount(n_counter_ions))

        return substance
Ejemplo n.º 13
0
def complete_evaluator_data_set():
    """Create a more comprehensive `PhysicalPropertyDataSet` which contains one
    measurement for each of:

        * pure density
        * binary density
        * pure enthalpy of vaporization
        * binary enthalpy of mixing
        * binary excess molar volume
        * hydration free energy

    Returns
    -------
    PhysicalPropertyDataSet
    """
    thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)
    source = MeasurementSource(doi="10.1000/xyz123")

    solvation_substance = Substance()
    solvation_substance.add_component(Component("O"), MoleFraction(1.0))
    solvation_substance.add_component(Component("CCCO"), ExactAmount(1))

    evaluator_properties = [
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O", "CC=O"),
            value=1.0 * unit.kilogram / unit.meter**3,
            uncertainty=0.1 * unit.kilogram / unit.meter**3,
            source=source,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas),
            substance=Substance.from_components("CCO"),
            value=1.0 * EnthalpyOfVaporization.default_unit(),
            uncertainty=0.1 * EnthalpyOfVaporization.default_unit(),
            source=source,
        ),
        EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("CCCCO", "CC(C=O)C"),
            value=1.0 * EnthalpyOfMixing.default_unit(),
            uncertainty=0.1 * EnthalpyOfMixing.default_unit(),
            source=source,
        ),
        ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("C(=O)CCCO", "CCCCCC"),
            value=1.0 * ExcessMolarVolume.default_unit(),
            uncertainty=0.1 * ExcessMolarVolume.default_unit(),
            source=source,
        ),
        SolvationFreeEnergy(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=solvation_substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=0.1 * SolvationFreeEnergy.default_unit(),
            source=source,
        ),
    ]

    for index, evaluator_property in enumerate(evaluator_properties):
        evaluator_property.id = str(index + 1)

    evaluator_data_set = PhysicalPropertyDataSet()
    evaluator_data_set.add_properties(*evaluator_properties)

    return evaluator_data_set
Ejemplo n.º 14
0
    def _apply(
        cls,
        data_frame: pandas.DataFrame,
        schema: ImportFreeSolvSchema,
        n_processes,
    ) -> pandas.DataFrame:

        from openff.evaluator import properties, substances, unit

        # Convert the data frame into data rows.
        free_solv_data_frame = cls._download_free_solv()

        data_entries = []

        for _, row in free_solv_data_frame.iterrows():

            # Extract and standardize the SMILES pattern of the
            solute_smiles = row["SMILES"].lstrip().rstrip()
            solute_smiles = substances.Component(solute_smiles).smiles

            # Build the substance.
            substance = Substance()
            substance.add_component(Component(smiles="O"), MoleFraction(1.0))
            substance.add_component(
                Component(smiles=solute_smiles, role=Component.Role.Solute),
                ExactAmount(1),
            )

            # Extract the value and uncertainty
            value = (float(row["experimental value (kcal/mol)"]) *
                     unit.kilocalorie / unit.mole)
            std_error = (float(row["experimental uncertainty (kcal/mol)"]) *
                         unit.kilocalorie / unit.mole)

            # Attempt to extract a DOI
            original_source = row[
                "experimental reference (original or paper this value was taken from)"]
            doi = cls._validate_doi(original_source)

            data_entry = SolvationFreeEnergy(
                thermodynamic_state=ThermodynamicState(
                    temperature=298.15 * unit.kelvin,
                    pressure=101.325 * unit.kilopascal,
                ),
                phase=PropertyPhase.Liquid,
                substance=substance,
                value=value.to(properties.SolvationFreeEnergy.default_unit()),
                uncertainty=std_error.to(
                    properties.SolvationFreeEnergy.default_unit()),
                source=MeasurementSource(doi=doi),
            )
            data_entries.append(data_entry)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*data_entries)

        free_solv_data_frame = data_set.to_pandas()

        data_frame = pandas.concat([data_frame, free_solv_data_frame],
                                   ignore_index=True,
                                   sort=False)

        return data_frame
def main():

    os.makedirs("raw_data_v2", exist_ok=True)

    for data_set_name in [
            "curated_data_set",
            "gaff 1.81",
            "gaff 2.11",
            "parsley 1.0.0",
            "smirnoff99frosst 1.1.0",
    ]:

        with open(os.path.join("raw_data", f"{data_set_name}.json")) as file:
            raw_data_set = json.load(file)

        assert (raw_data_set["@type"] ==
                "propertyestimator.datasets.datasets.PhysicalPropertyDataSet")

        physical_properties = []

        for raw_data_set_entries in raw_data_set["properties"].values():

            for raw_data_set_entry in raw_data_set_entries:

                # Extract the substance this entry was measured for.
                substance = Substance()

                for raw_component in raw_data_set_entry["substance"][
                        "components"]:

                    component = Component(
                        smiles=raw_component["smiles"],
                        role=Component.Role[raw_component["role"]["value"]],
                    )

                    raw_amounts = raw_data_set_entry["substance"]["amounts"][
                        raw_component["smiles"]]

                    for raw_amount in raw_amounts["value"]:

                        if (raw_amount["@type"] ==
                                "propertyestimator.substances.Substance->MoleFraction"
                            ):

                            substance.add_component(
                                component, MoleFraction(raw_amount["value"]))

                        elif (raw_amount["@type"] ==
                              "propertyestimator.substances.Substance->ExactAmount"
                              ):

                            substance.add_component(
                                component, ExactAmount(raw_amount["value"]))

                        else:
                            raise NotImplementedError()

                # Extract the source of the property
                if (raw_data_set_entry["source"]["@type"] ==
                        "propertyestimator.properties.properties.CalculationSource"
                    ):
                    source = CalculationSource(
                        fidelity=raw_data_set_entry["source"]["fidelity"])
                elif (raw_data_set_entry["source"]["@type"] ==
                      "propertyestimator.properties.properties.MeasurementSource"
                      ):
                    source = MeasurementSource(doi=correct_doi(
                        raw_data_set_entry["source"]["reference"]))
                else:
                    raise NotImplementedError()

                # Generate the new property object.
                property_class = getattr(
                    properties, raw_data_set_entry["@type"].split(".")[-1])

                physical_property = property_class(
                    thermodynamic_state=ThermodynamicState(
                        temperature=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["temperature"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["temperature"]["unit"])),
                        pressure=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["pressure"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["pressure"]["unit"])),
                    ),
                    phase=PropertyPhase(raw_data_set_entry["phase"]),
                    substance=substance,
                    value=(raw_data_set_entry["value"]["value"] *
                           unit.Unit(raw_data_set_entry["value"]["unit"])),
                    uncertainty=(
                        None if isinstance(source, MeasurementSource) else
                        (raw_data_set_entry["uncertainty"]["value"] *
                         unit.Unit(raw_data_set_entry["uncertainty"]["unit"])
                         )),
                    source=source,
                )
                physical_property.id = raw_data_set_entry["id"]

                physical_properties.append(physical_property)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*physical_properties)

        data_set.json(os.path.join("raw_data_v2", f"{data_set_name}.json"),
                      format=True)
        data_set.to_pandas().to_csv(
            os.path.join("raw_data_v2", f"{data_set_name}.csv"))
Ejemplo n.º 16
0
def test_ligand_receptor_yank_protocol():

    full_substance = Substance()

    full_substance.add_component(
        Component(smiles="c1ccccc1", role=Component.Role.Receptor),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles="C", role=Component.Role.Ligand),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles="O", role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solute_substance = Substance()
    solute_substance.add_component(
        Component(smiles="C", role=Component.Role.Ligand),
        ExactAmount(1),
    )
    solute_substance.add_component(
        Component(smiles="O", role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            force_field_path = "ff.json"

            with open(force_field_path, "w") as file:
                file.write(build_tip3p_smirnoff_force_field().json())

            complex_coordinate_path, complex_system = _setup_dummy_system(
                "full", full_substance, 3, force_field_path)

            ligand_coordinate_path, ligand_system = _setup_dummy_system(
                "ligand", solute_substance, 2, force_field_path)

            run_yank = LigandReceptorYankProtocol("yank")
            run_yank.substance = full_substance
            run_yank.thermodynamic_state = thermodynamic_state
            run_yank.number_of_iterations = 1
            run_yank.steps_per_iteration = 1
            run_yank.checkpoint_interval = 1
            run_yank.verbose = True
            run_yank.setup_only = True

            run_yank.ligand_residue_name = "TMP"
            run_yank.receptor_residue_name = "TMP"
            run_yank.solvated_ligand_coordinates = ligand_coordinate_path
            run_yank.solvated_ligand_system = ligand_system
            run_yank.solvated_complex_coordinates = complex_coordinate_path
            run_yank.solvated_complex_system = complex_system

            run_yank.force_field_path = force_field_path
            run_yank.execute("", ComputeResources())
Ejemplo n.º 17
0
def test_solvation_yank_protocol(solvent_smiles):

    full_substance = Substance()

    full_substance.add_component(
        Component(smiles="CO", role=Component.Role.Solute),
        ExactAmount(1),
    )
    full_substance.add_component(
        Component(smiles=solvent_smiles, role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solvent_substance = Substance()
    solvent_substance.add_component(
        Component(smiles=solvent_smiles, role=Component.Role.Solvent),
        MoleFraction(1.0),
    )

    solute_substance = Substance()
    solute_substance.add_component(
        Component(smiles="CO", role=Component.Role.Solute),
        ExactAmount(1),
    )

    thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            force_field_path = "ff.json"

            with open(force_field_path, "w") as file:
                file.write(build_tip3p_smirnoff_force_field().json())

            solvated_coordinate_path, solvated_system = _setup_dummy_system(
                "full", full_substance, 2, force_field_path)

            vacuum_coordinate_path, vacuum_system = _setup_dummy_system(
                "vacuum", solute_substance, 1, force_field_path)

            run_yank = SolvationYankProtocol("yank")
            run_yank.solute = solute_substance
            run_yank.solvent_1 = solvent_substance
            run_yank.solvent_2 = Substance()
            run_yank.thermodynamic_state = thermodynamic_state
            run_yank.number_of_iterations = 1
            run_yank.steps_per_iteration = 1
            run_yank.checkpoint_interval = 1
            run_yank.verbose = True
            run_yank.setup_only = True
            run_yank.solution_1_coordinates = solvated_coordinate_path
            run_yank.solution_1_system = solvated_system
            run_yank.solution_2_coordinates = vacuum_coordinate_path
            run_yank.solution_2_system = vacuum_system

            run_yank.electrostatic_lambdas_1 = [1.00]
            run_yank.steric_lambdas_1 = [1.00]
            run_yank.electrostatic_lambdas_2 = [1.00]
            run_yank.steric_lambdas_2 = [1.00]
            run_yank.execute("", ComputeResources())
Ejemplo n.º 18
0
    def from_pandas(cls,
                    data_frame: pandas.DataFrame) -> "PhysicalPropertyDataSet":
        """Constructs a data set object from a pandas ``DataFrame`` object.

        Notes
        -----
        * All physical properties are assumed to be source from experimental
          measurements.
        * Currently this method onlu supports data frames containing properties
          which are built-in to the framework (e.g. Density).
        * This method assumes the data frame has a structure identical to that
          produced by the ``PhysicalPropertyDataSet.to_pandas`` function.

        Parameters
        ----------
        data_frame
            The data frame to construct the data set from.

        Returns
        -------
            The constructed data set.
        """

        from openff.evaluator import properties

        property_header_matches = {
            re.match(r"^([a-zA-Z]+) Value \(([a-zA-Z0-9+-/\s]*)\)$", header)
            for header in data_frame if header.find(" Value ") >= 0
        }
        property_headers = {}

        # Validate that the headers have the correct format, specify a
        # built-in property type, and specify correctly the properties
        # units.
        for match in property_header_matches:

            assert match

            property_type_string, property_unit_string = match.groups()

            assert hasattr(properties, property_type_string)
            property_type = getattr(properties, property_type_string)

            property_unit = unit.Unit(property_unit_string)
            assert property_unit is not None

            assert (property_unit.dimensionality ==
                    property_type.default_unit().dimensionality)

            property_headers[match.group(0)] = (property_type, property_unit)

        # Convert the data rows to property objects.
        physical_properties = []

        for _, data_row in data_frame.iterrows():

            data_row = data_row.dropna()

            # Extract the state at which the measurement was made.
            thermodynamic_state = ThermodynamicState(
                temperature=data_row["Temperature (K)"] * unit.kelvin,
                pressure=data_row["Pressure (kPa)"] * unit.kilopascal,
            )
            property_phase = PropertyPhase.from_string(data_row["Phase"])

            # Extract the substance the measurement was made for.
            substance = Substance()

            for i in range(data_row["N Components"]):

                component = Component(
                    smiles=data_row[f"Component {i + 1}"],
                    role=Component.Role[data_row.get(f"Role {i + 1}",
                                                     "Solvent")],
                )

                mole_fraction = data_row.get(f"Mole Fraction {i + 1}", 0.0)
                exact_amount = data_row.get(f"Exact Amount {i + 1}", 0)

                if not numpy.isclose(mole_fraction, 0.0):
                    substance.add_component(component,
                                            MoleFraction(mole_fraction))
                if not numpy.isclose(exact_amount, 0.0):
                    substance.add_component(component,
                                            ExactAmount(exact_amount))

            for (
                    property_header,
                (property_type, property_unit),
            ) in property_headers.items():

                # Check to see whether the row contains a value for this
                # type of property.
                if property_header not in data_row:
                    continue

                uncertainty_header = property_header.replace(
                    "Value", "Uncertainty")

                source_string = data_row["Source"]

                is_doi = all(
                    any(
                        re.match(pattern, split_string, re.I) for pattern in [
                            r"^10.\d{4,9}/[-._;()/:A-Z0-9]+$",
                            r"^10.1002/[^\s]+$",
                            r"^10.\d{4}/\d+-\d+X?(\d+)\d+<[\d\w]+:[\d\w]*>\d+.\d+.\w+;\d$",
                            r"^10.1021/\w\w\d+$",
                            r"^10.1207/[\w\d]+\&\d+_\d+$",
                        ]) for split_string in source_string.split(" + "))

                physical_property = property_type(
                    thermodynamic_state=thermodynamic_state,
                    phase=property_phase,
                    value=data_row[property_header] * property_unit,
                    uncertainty=None if uncertainty_header not in data_row else
                    data_row[uncertainty_header] * property_unit,
                    substance=substance,
                    source=MeasurementSource(
                        doi="" if not is_doi else source_string,
                        reference=source_string if not is_doi else "",
                    ),
                )

                identifier = data_row.get("Id", None)

                if identifier:
                    physical_property.id = identifier

                physical_properties.append(physical_property)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*physical_properties)

        return data_set
Ejemplo n.º 19
0
    def _rebuild_substance(self, number_of_molecules):
        """Rebuilds the `Substance` object which this protocol is building
        coordinates for.

        This may not be the same as the input substance due to the finite
        number of molecules to be added causing rounding of mole fractions.

        Parameters
        ----------
        number_of_molecules: list of int
            The number of each component which should be added to the system.

        Returns
        -------
        Substance
            The substance which contains the corrected component amounts.
        """

        new_amounts = defaultdict(list)

        total_number_of_molecules = sum(number_of_molecules)

        # Handle any exact amounts.
        for component in self.substance.components:

            exact_amounts = [
                amount
                for amount in self.substance.get_amounts(component)
                if isinstance(amount, ExactAmount)
            ]

            if len(exact_amounts) == 0:
                continue

            total_number_of_molecules -= exact_amounts[0].value
            new_amounts[component].append(exact_amounts[0])

        # Recompute the mole fractions.
        total_mole_fraction = 0.0
        number_of_new_mole_fractions = 0

        for index, component in enumerate(self.substance.components):

            mole_fractions = [
                amount
                for amount in self.substance.get_amounts(component)
                if isinstance(amount, MoleFraction)
            ]

            if len(mole_fractions) == 0:
                continue

            molecule_count = number_of_molecules[index]

            if component in new_amounts:
                molecule_count -= new_amounts[component][0].value

            new_mole_fraction = molecule_count / total_number_of_molecules
            new_amounts[component].append(MoleFraction(new_mole_fraction))

            total_mole_fraction += new_mole_fraction
            number_of_new_mole_fractions += 1

        if (
            not np.isclose(total_mole_fraction, 1.0)
            and number_of_new_mole_fractions > 0
        ):
            raise ValueError("The new mole fraction does not equal 1.0")

        output_substance = Substance()

        for component, amounts in new_amounts.items():

            for amount in amounts:
                output_substance.add_component(component, amount)

        return output_substance