예제 #1
0
def test_build_docked_coordinates_protocol():
    """Tests docking a methanol molecule into alpha-Cyclodextrin."""

    if not has_openeye():
        pytest.skip("The `BuildDockedCoordinates` protocol requires OpenEye.")

    ligand_substance = Substance()
    ligand_substance.add_component(
        Component("CO", role=Component.Role.Ligand),
        ExactAmount(1),
    )

    # TODO: This test could likely be made substantially faster
    #       by storing the binary prepared receptor. Would this
    #       be in breach of any oe license terms?
    with tempfile.TemporaryDirectory() as temporary_directory:

        build_docked_coordinates = BuildDockedCoordinates("build_methanol")
        build_docked_coordinates.ligand_substance = ligand_substance
        build_docked_coordinates.number_of_ligand_conformers = 5
        build_docked_coordinates.receptor_coordinate_file = get_data_filename(
            "test/molecules/acd.mol2")
        build_docked_coordinates.execute(temporary_directory,
                                         ComputeResources())

        docked_pdb = PDBFile(
            build_docked_coordinates.docked_complex_coordinate_path)
        assert docked_pdb.topology.getNumResidues() == 2
예제 #2
0
def test_validate_data_set():

    valid_property = Density(
        ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(valid_property)

    data_set.validate()

    invalid_property = Density(
        ThermodynamicState(-1 * unit.kelvin, 1 * unit.atmosphere),
        PropertyPhase.Liquid,
        Substance.from_components("O"),
        0.0 * unit.gram / unit.milliliter,
        0.0 * unit.gram / unit.milliliter,
    )

    with pytest.raises(AssertionError):
        data_set.add_properties(invalid_property)

    data_set.add_properties(invalid_property, validate=False)

    with pytest.raises(AssertionError):
        data_set.validate()
예제 #3
0
def create_dummy_substance(number_of_components, elements=None):
    """Creates a substance with a given number of components,
    each containing the specified elements.

    Parameters
    ----------
    number_of_components : int
        The number of components to add to the substance.
    elements : list of str
        The elements that each component should containt.

    Returns
    -------
    Substance
        The created substance.
    """
    if elements is None:
        elements = ["C"]

    substance = Substance()

    mole_fraction = 1.0 / number_of_components

    for index in range(number_of_components):

        smiles_pattern = "".join(elements * (index + 1))

        substance.add_component(Component(smiles_pattern),
                                MoleFraction(mole_fraction))

    return substance
예제 #4
0
def test_solvate_existing_structure_protocol():
    """Tests solvating a single methanol molecule in water."""

    import mdtraj

    methanol_component = Component("CO")

    methanol_substance = Substance()
    methanol_substance.add_component(methanol_component, ExactAmount(1))

    water_substance = Substance()
    water_substance.add_component(Component("O"), MoleFraction(1.0))

    with tempfile.TemporaryDirectory() as temporary_directory:

        build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol")
        build_methanol_coordinates.max_molecules = 1
        build_methanol_coordinates.substance = methanol_substance
        build_methanol_coordinates.execute(temporary_directory,
                                           ComputeResources())

        methanol_residue_name = build_methanol_coordinates.assigned_residue_names[
            methanol_component.identifier]

        solvate_coordinates = SolvateExistingStructure("solvate_methanol")
        solvate_coordinates.max_molecules = 9
        solvate_coordinates.substance = water_substance
        solvate_coordinates.solute_coordinate_file = (
            build_methanol_coordinates.coordinate_file_path)
        solvate_coordinates.execute(temporary_directory, ComputeResources())
        solvated_system = mdtraj.load_pdb(
            solvate_coordinates.coordinate_file_path)

        assert solvated_system.n_residues == 10
        assert solvated_system.top.residue(0).name == methanol_residue_name
예제 #5
0
def test_gradient_reduced_potentials(use_subset):

    substance = Substance.from_components("O")
    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:

        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        reduced_potentials = OpenMMGradientPotentials(f"reduced_potentials")
        reduced_potentials.substance = substance
        reduced_potentials.thermodynamic_state = thermodynamic_state
        reduced_potentials.statistics_path = get_data_filename(
            "test/statistics/stats_pandas.csv")
        reduced_potentials.force_field_path = force_field_path
        reduced_potentials.trajectory_file_path = get_data_filename(
            "test/trajectories/water.dcd")
        reduced_potentials.coordinate_file_path = get_data_filename(
            "test/trajectories/water.pdb")
        reduced_potentials.use_subset_of_force_field = use_subset
        reduced_potentials.enable_pbc = True
        reduced_potentials.parameter_key = ParameterGradientKey(
            "vdW", "[#1]-[#8X2H2+0:1]-[#1]", "epsilon")

        reduced_potentials.execute(directory, ComputeResources())
        assert path.isfile(reduced_potentials.forward_potentials_path)
        assert path.isfile(reduced_potentials.reverse_potentials_path)
예제 #6
0
def test_simulation_data_storage():

    substance = Substance.from_components("C")

    with tempfile.TemporaryDirectory() as base_directory:

        data_directory = os.path.join(base_directory, "data_directory")
        data_object = create_dummy_simulation_data(data_directory, substance)

        backend_directory = os.path.join(base_directory, "storage_dir")

        storage = LocalFileStorage(backend_directory)
        storage_key = storage.store_object(data_object, data_directory)

        # Regenerate the data directory.
        os.makedirs(data_directory, exist_ok=True)

        assert storage.has_object(data_object)
        assert storage_key == storage.store_object(data_object, data_directory)

        retrieved_object, retrieved_directory = storage.retrieve_object(
            storage_key, StoredSimulationData)

        assert backend_directory in retrieved_directory
        assert data_object.json() == retrieved_object.json()
def test_weight_by_mole_fraction_protocol(component_smiles, value):

    full_substance = Substance.from_components("C", "CC", "CCC")
    component = Substance.from_components(component_smiles)

    mole_fraction = next(
        iter(full_substance.get_amounts(component.components[0].identifier))
    ).value

    with tempfile.TemporaryDirectory() as temporary_directory:

        weight_protocol = WeightByMoleFraction("weight")
        weight_protocol.value = value
        weight_protocol.full_substance = full_substance
        weight_protocol.component = component
        weight_protocol.execute(temporary_directory, ComputeResources())
        assert weight_protocol.weighted_value == value * mole_fraction
def test_add_mole_fractions():

    substance = Substance()

    substance.add_component(Component("C"), MoleFraction(0.5))
    substance.add_component(Component("C"), MoleFraction(0.5))

    assert substance.number_of_components == 1

    amounts = substance.get_amounts(substance.components[0])

    assert len(amounts) == 1

    amount = next(iter(amounts))

    assert isinstance(amount, MoleFraction)
    assert np.isclose(amount.value, 1.0)
예제 #9
0
def test_same_component_batching():

    thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
    )

    options = RequestOptions()

    submission = EvaluatorClient._Submission()
    submission.dataset = data_set
    submission.options = options

    with DaskLocalCluster() as calculation_backend:

        server = EvaluatorServer(calculation_backend)
        batches = server._batch_by_same_component(submission, "")

    assert len(batches) == 2

    assert len(batches[0].queued_properties) == 2
    assert len(batches[1].queued_properties) == 2
예제 #10
0
    def _execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role != self.component_role:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if self.expected_components != UNDEFINED and self.expected_components != len(
                filtered_components):

            raise ValueError(
                f"The filtered substance does not contain the expected number of "
                f"components ({self.expected_components}) - {filtered_components}",
            )

        inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0)
                                 else 1.0 / total_mole_fraction)

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, MoleFraction):
                    amount = MoleFraction(amount.value * inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)
    def create_substance():
        test_substance = Substance()

        test_substance.add_component(
            Component("C", role=Component.Role.Solute), ExactAmount(1),
        )

        test_substance.add_component(
            Component("CC", role=Component.Role.Ligand), ExactAmount(1),
        )

        test_substance.add_component(
            Component("CCC", role=Component.Role.Receptor), ExactAmount(1),
        )

        test_substance.add_component(
            Component("O", role=Component.Role.Solvent), MoleFraction(1.0),
        )

        return test_substance
예제 #12
0
def test_simulation_data_query():

    substance_a = Substance.from_components("C")
    substance_b = Substance.from_components("CO")

    substance_full = Substance.from_components("C", "CO")

    substances = [substance_a, substance_b, substance_full]

    with tempfile.TemporaryDirectory() as base_directory:

        backend_directory = os.path.join(base_directory, "storage_dir")
        storage = LocalFileStorage(backend_directory)

        for substance in substances:

            data_directory = os.path.join(base_directory,
                                          f"{substance.identifier}")
            data_object = create_dummy_simulation_data(data_directory,
                                                       substance)

            storage.store_object(data_object, data_directory)

        for substance in substances:

            substance_query = SimulationDataQuery()
            substance_query.substance = substance

            results = storage.query(substance_query)
            assert results is not None and len(results) == 1
            assert len(next(iter(results.values()))[0]) == 3

        component_query = SimulationDataQuery()
        component_query.substance = substance_full
        component_query.substance_query = SubstanceQuery()
        component_query.substance_query.components_only = True

        results = storage.query(component_query)
        assert results is not None and len(results) == 2
예제 #13
0
def test_calculate_reduced_potential_openmm():

    substance = Substance.from_components("O")
    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:
        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 10
        build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters
        build_coordinates.substance = substance
        build_coordinates.execute(directory, None)

        assign_parameters = BuildSmirnoffSystem(f"assign_parameters")
        assign_parameters.force_field_path = force_field_path
        assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
        assign_parameters.substance = substance
        assign_parameters.execute(directory, None)

        reduced_potentials = OpenMMReducedPotentials(f"reduced_potentials")
        reduced_potentials.substance = substance
        reduced_potentials.thermodynamic_state = thermodynamic_state
        reduced_potentials.reference_force_field_paths = [force_field_path]
        reduced_potentials.system_path = assign_parameters.system_path
        reduced_potentials.trajectory_file_path = get_data_filename(
            "test/trajectories/water.dcd")
        reduced_potentials.coordinate_file_path = get_data_filename(
            "test/trajectories/water.pdb")
        reduced_potentials.kinetic_energies_path = get_data_filename(
            "test/statistics/stats_pandas.csv")
        reduced_potentials.high_precision = False
        reduced_potentials.execute(directory, ComputeResources())

        assert path.isfile(reduced_potentials.statistics_file_path)

        final_array = StatisticsArray.from_pandas_csv(
            reduced_potentials.statistics_file_path)
        assert ObservableType.ReducedPotential in final_array
예제 #14
0
def test_build_tleap_system():

    with tempfile.TemporaryDirectory() as directory:

        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(TLeapForceFieldSource().json())

        substance = Substance.from_components("C", "O", "C(=O)N")

        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 9
        build_coordinates.substance = substance
        build_coordinates.execute(directory, None)

        assign_parameters = BuildTLeapSystem(f"assign_parameters")
        assign_parameters.force_field_path = force_field_path
        assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
        assign_parameters.substance = substance
        assign_parameters.execute(directory, None)
        assert path.isfile(assign_parameters.system_path)
예제 #15
0
def _setup_dummy_system(directory):

    force_field_path = path.join(directory, "ff.json")

    with open(force_field_path, "w") as file:
        file.write(build_tip3p_smirnoff_force_field().json())

    substance = Substance.from_components("C")

    build_coordinates = BuildCoordinatesPackmol("build_coordinates")
    build_coordinates.max_molecules = 1
    build_coordinates.mass_density = 0.001 * unit.grams / unit.milliliters
    build_coordinates.substance = substance
    build_coordinates.execute(directory, None)

    assign_parameters = BuildSmirnoffSystem(f"assign_parameters")
    assign_parameters.force_field_path = force_field_path
    assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
    assign_parameters.substance = substance
    assign_parameters.execute(directory, None)

    return build_coordinates.coordinate_file_path, assign_parameters.system_path
예제 #16
0
def test_filter_by_smiles():
    """A test to ensure that data sets may be filtered by which smiles their
    measured properties contain."""

    methanol_substance = Substance()
    methanol_substance.add_component(Component("CO"), MoleFraction(1.0))

    ethanol_substance = Substance()
    ethanol_substance.add_component(Component("CCO"), MoleFraction(1.0))

    property_a = create_dummy_property(Density)
    property_a.substance = methanol_substance

    property_b = create_dummy_property(Density)
    property_b.substance = ethanol_substance

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(property_a, property_b)

    data_set.filter_by_smiles("CO")

    assert len(data_set) == 1
    assert methanol_substance in data_set.substances
    assert ethanol_substance not in data_set.substances
예제 #17
0
class FilterSubstanceByRole(Protocol):
    """A protocol which takes a substance as input, and returns a substance which only
    contains components whose role match a given criteria.
    """

    input_substance = InputAttribute(
        docstring="The substance to filter.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    component_role = InputAttribute(
        docstring="The role to filter substance components against.",
        type_hint=Component.Role,
        default_value=UNDEFINED,
    )

    expected_components = InputAttribute(
        docstring="The number of components expected to remain after filtering. "
        "An exception is raised if this number is not matched.",
        type_hint=int,
        default_value=UNDEFINED,
        optional=True,
    )

    filtered_substance = OutputAttribute(docstring="The filtered substance.",
                                         type_hint=Substance)

    def _execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role != self.component_role:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if self.expected_components != UNDEFINED and self.expected_components != len(
                filtered_components):

            raise ValueError(
                f"The filtered substance does not contain the expected number of "
                f"components ({self.expected_components}) - {filtered_components}",
            )

        inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0)
                                 else 1.0 / total_mole_fraction)

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, MoleFraction):
                    amount = MoleFraction(amount.value * inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)
예제 #18
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=2000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_solvated_coordinates")
        build_full_coordinates.substance = ProtocolPath("substance", "global")
        build_full_coordinates.max_molecules = n_molecules

        assign_full_parameters = forcefield.BaseBuildSystem(
            f"assign_solvated_parameters")
        assign_full_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_full_parameters.substance = ProtocolPath("substance", "global")
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = openmm.OpenMMEnergyMinimisation(
            "energy_minimisation")
        energy_minimisation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        equilibration_simulation = openmm.OpenMMSimulation(
            "equilibration_simulation")
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        equilibration_simulation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_role = Component.Role.Solvent

        filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute")
        filter_solute.input_substance = ProtocolPath("substance", "global")
        filter_solute.component_role = Component.Role.Solute

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_vacuum_coordinates")
        build_vacuum_coordinates.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BaseBuildSystem(
            f"assign_parameters")
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_vacuum_parameters.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol("run_solvation_yank")
        run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id)
        run_yank.solvent_1 = ProtocolPath("filtered_substance",
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                    "global")
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            "output_coordinate_file", equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath("system_path",
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath("system_path",
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup(f"conditional_group")
        conditional_group.max_iterations = 20

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.left_hand_value = ProtocolPath(
                "estimated_free_energy.error", conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue("total_iterations")
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath("current_iteration",
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath("result",
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            build_full_coordinates.schema,
            assign_full_parameters.schema,
            energy_minimisation.schema,
            equilibration_simulation.schema,
            filter_solvent.schema,
            filter_solute.schema,
            build_vacuum_coordinates.schema,
            assign_vacuum_parameters.schema,
            conditional_group.schema,
        ]

        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 conditional_group.id,
                                                 run_yank.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
예제 #19
0
def test_duplicate_simulation_data_storage(reverse_order):

    substance = Substance.from_components("CO")

    with tempfile.TemporaryDirectory() as base_directory_path:

        storage_directory = os.path.join(base_directory_path, "storage")
        local_storage = LocalFileStorage(storage_directory)

        # Construct some data to store with increasing
        # statistical inefficiencies.
        data_to_store = []

        for index in range(3):

            data_directory = os.path.join(base_directory_path, f"data_{index}")
            coordinate_name = f"data_{index}.pdb"

            data_object = create_dummy_simulation_data(
                directory_path=data_directory,
                substance=substance,
                force_field_id="ff_id_1",
                coordinate_file_name=coordinate_name,
                statistical_inefficiency=float(index),
                calculation_id="id",
            )
            data_to_store.append((data_object, data_directory))

        # Keep a track of the storage keys.
        all_storage_keys = set()

        iterator = enumerate(data_to_store)

        if reverse_order:
            iterator = reversed(list(iterator))

        # Store the data
        for index, data in iterator:

            data_object, data_directory = data

            storage_key = local_storage.store_object(data_object,
                                                     data_directory)
            all_storage_keys.add(storage_key)

            retrieved_object, stored_directory = local_storage.retrieve_object(
                storage_key)

            # Handle the case where we haven't reversed the order of
            # the data to store. Here only the first object in the list
            # should be stored an never replaced as it has the lowest
            # statistical inefficiency.
            if not reverse_order:
                expected_index = 0
            # Handle the case where we have reversed the order of
            # the data to store. Here only the each new piece of
            # data should replace the last, as it will have a lower
            # statistical inefficiency.
            else:
                expected_index = index

            assert retrieved_object.json(
            ) == data_to_store[expected_index][0].json()

            # Make sure the directory has been correctly overwritten / retained
            # depending on the data order.
            coordinate_path = os.path.join(stored_directory,
                                           f"data_{expected_index}.pdb")
            assert os.path.isfile(coordinate_path)

        # Make sure all pieces of data got assigned the same key if
        # reverse order.
        assert len(all_storage_keys) == 1
예제 #20
0
def test_to_pandas():
    """A test to ensure that data sets are convertable to pandas objects."""

    source = CalculationSource("Dummy", {})

    pure_substance = Substance.from_components("C")
    binary_substance = Substance.from_components("C", "O")

    data_set = PhysicalPropertyDataSet()

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        density_property = Density(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.gram / unit.milliliter,
            uncertainty=0.11 * unit.gram / unit.milliliter,
            source=source,
        )

        dielectric_property = DielectricConstant(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=pure_substance,
            value=1 * unit.dimensionless,
            uncertainty=0.11 * unit.dimensionless,
            source=source,
        )

        data_set.add_properties(density_property)
        data_set.add_properties(dielectric_property)

    for temperature in [
            298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin
    ]:

        thermodynamic_state = ThermodynamicState(temperature=temperature,
                                                 pressure=1.0 *
                                                 unit.atmosphere)

        enthalpy_property = EnthalpyOfMixing(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.kilojoules / unit.mole,
            uncertainty=0.11 * unit.kilojoules / unit.mole,
            source=source,
        )

        excess_property = ExcessMolarVolume(
            thermodynamic_state=thermodynamic_state,
            phase=PropertyPhase.Liquid,
            substance=binary_substance,
            value=1 * unit.meter**3 / unit.mole,
            uncertainty=0.11 * unit.meter**3 / unit.mole,
            source=source,
        )

        data_set.add_properties(enthalpy_property)
        data_set.add_properties(excess_property)

    data_set_pandas = data_set.to_pandas()

    required_columns = [
        "Temperature (K)",
        "Pressure (kPa)",
        "Phase",
        "N Components",
        "Source",
        "Component 1",
        "Role 1",
        "Mole Fraction 1",
        "Exact Amount 1",
        "Component 2",
        "Role 2",
        "Mole Fraction 2",
        "Exact Amount 2",
    ]

    assert all(x in data_set_pandas for x in required_columns)

    assert data_set_pandas is not None
    assert data_set_pandas.shape == (12, 21)

    data_set_without_na = data_set_pandas.dropna(axis=1, how="all")
    assert data_set_without_na.shape == (12, 19)
예제 #21
0
def test_storage_retrieval():
    # Create some dummy properties
    methane = Substance.from_components("C")
    methanol = Substance.from_components("CO")
    mixture = Substance.from_components("C", "CO")
    # Add extra unused data to make sure the wrong data isn't
    # Being retrieved.
    unused_pure = Substance.from_components("CCO")
    unused_mixture = Substance.from_components("CCO", "CO")

    data_to_store = [
        (methane, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Gas, 1),
        (mixture, PropertyPhase.Liquid, 1000),
        (unused_pure, PropertyPhase.Liquid, 1000),
        (unused_mixture, PropertyPhase.Liquid, 1000),
    ]
    storage_keys = {}

    state = ThermodynamicState(temperature=1.0 * unit.kelvin)

    properties = [
        # Properties with a full system query.
        Density(
            value=1.0 * unit.gram / unit.litre,
            substance=methanol,
            thermodynamic_state=state,
        ),
        DielectricConstant(
            value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state
        ),
        # Properties with a multi-component query.
        EnthalpyOfVaporization(
            value=1.0 * unit.joule / unit.mole,
            substance=methanol,
            thermodynamic_state=state,
        ),
        # Property with a multi-phase query.
        EnthalpyOfMixing(
            value=1.0 * unit.joule / unit.mole,
            substance=mixture,
            thermodynamic_state=state,
        ),
        ExcessMolarVolume(
            value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state
        ),
    ]
    expected_data_per_property = {
        Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]},
        DielectricConstant: {
            "full_system_data": [(methane, PropertyPhase.Liquid, 1000)]
        },
        EnthalpyOfVaporization: {
            "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)],
            "gas_data": [(methanol, PropertyPhase.Gas, 1)],
        },
        EnthalpyOfMixing: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
        ExcessMolarVolume: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
    }

    force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as base_directory:

        # Create a storage backend with some dummy data.
        backend_directory = os.path.join(base_directory, "storage_dir")
        storage_backend = LocalFileStorage(backend_directory)

        force_field_id = storage_backend.store_force_field(force_field)

        for substance, phase, n_mol in data_to_store:

            data_directory = os.path.join(base_directory, substance.identifier)
            data = create_dummy_simulation_data(
                data_directory,
                substance=substance,
                force_field_id=force_field_id,
                phase=phase,
                number_of_molecules=n_mol,
            )
            storage_key = storage_backend.store_object(data, data_directory)
            storage_keys[(substance, phase, n_mol)] = storage_key

        for physical_property in properties:

            schema = registered_calculation_schemas["ReweightingLayer"][
                physical_property.__class__.__name__
            ]

            if callable(schema):
                schema = schema()

            # noinspection PyProtectedMember
            metadata = ReweightingLayer._get_workflow_metadata(
                base_directory, physical_property, "", [], storage_backend, schema,
            )

            assert metadata is not None

            expected_data_list = expected_data_per_property[physical_property.__class__]

            for data_key in expected_data_list:

                assert data_key in metadata

                stored_metadata = metadata[data_key]
                expected_metadata = expected_data_list[data_key]

                assert len(stored_metadata) == len(expected_metadata)

                if isinstance(stored_metadata[0], list):
                    # Flatten any lists of lists.
                    stored_metadata = [
                        item for sublist in stored_metadata for item in sublist
                    ]
                    expected_metadata = [
                        item for sublist in expected_metadata for item in sublist
                    ]

                metadata_storage_keys = [
                    os.path.basename(x) for x, _, _ in stored_metadata
                ]
                expected_storage_keys = [storage_keys[x] for x in expected_metadata]

                assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
예제 #22
0
def _build_input_output_substances():
    """Builds sets if input and expected substances for the
    `test_build_coordinate_composition` test.

    Returns
    -------
    list of tuple of Substance and Substance
        A list of input and expected substances.
    """

    # Start with some easy cases
    substances = [
        (Substance.from_components("O"), Substance.from_components("O")),
        (Substance.from_components("O",
                                   "C"), Substance.from_components("O", "C")),
        (
            Substance.from_components("O", "C", "CO"),
            Substance.from_components("O", "C", "CO"),
        ),
    ]

    # Handle some cases where rounding will need to occur.
    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.41))
    input_substance.add_component(Component("C"), MoleFraction(0.59))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.4))
    expected_substance.add_component(Component("C"), MoleFraction(0.6))

    substances.append((input_substance, expected_substance))

    input_substance = Substance()
    input_substance.add_component(Component("O"), MoleFraction(0.59))
    input_substance.add_component(Component("C"), MoleFraction(0.41))

    expected_substance = Substance()
    expected_substance.add_component(Component("O"), MoleFraction(0.6))
    expected_substance.add_component(Component("C"), MoleFraction(0.4))

    substances.append((input_substance, expected_substance))

    return substances
예제 #23
0
def test_density_dielectric_merging(workflow_merge_function):

    substance = Substance.from_components("C")

    density = evaluator.properties.Density(
        thermodynamic_state=ThermodynamicState(
            temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere
        ),
        phase=PropertyPhase.Liquid,
        substance=substance,
        value=10 * unit.gram / unit.mole,
        uncertainty=1 * unit.gram / unit.mole,
    )

    dielectric = evaluator.properties.DielectricConstant(
        thermodynamic_state=ThermodynamicState(
            temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere
        ),
        phase=PropertyPhase.Liquid,
        substance=substance,
        value=10 * unit.gram / unit.mole,
        uncertainty=1 * unit.gram / unit.mole,
    )

    density_schema = density.default_simulation_schema().workflow_schema
    dielectric_schema = dielectric.default_simulation_schema().workflow_schema

    density_metadata = Workflow.generate_default_metadata(
        density, "smirnoff99Frosst-1.1.0.offxml", []
    )

    dielectric_metadata = Workflow.generate_default_metadata(
        density, "smirnoff99Frosst-1.1.0.offxml", []
    )

    density_workflow = Workflow(density_metadata)
    density_workflow.schema = density_schema

    dielectric_workflow = Workflow(dielectric_metadata)
    dielectric_workflow.schema = dielectric_schema

    workflow_merge_function(density_workflow, dielectric_workflow)

    density_workflow_graph = density_workflow.to_graph()
    dielectric_workflow_graph = dielectric_workflow.to_graph()

    dependants_graph_a = density_workflow_graph._protocol_graph._build_dependants_graph(
        density_workflow_graph.protocols, False, apply_reduction=True
    )
    dependants_graph_b = dielectric_workflow_graph._protocol_graph._build_dependants_graph(
        dielectric_workflow_graph.protocols, False, apply_reduction=True
    )

    merge_order_a = graph.topological_sort(dependants_graph_a)
    merge_order_b = graph.topological_sort(dependants_graph_b)

    for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b):

        if (
            protocol_id_A.find("extract_traj") < 0
            and protocol_id_A.find("extract_stats") < 0
        ):

            assert (
                density_workflow.protocols[protocol_id_A].schema.json()
                == dielectric_workflow.protocols[protocol_id_B].schema.json()
            )

        else:

            assert (
                density_workflow.protocols[protocol_id_A].schema.json()
                != dielectric_workflow.protocols[protocol_id_B].schema.json()
            )
예제 #24
0
    def generate_default_metadata(
        physical_property,
        force_field_path,
        parameter_gradient_keys=None,
        target_uncertainty=None,
    ):
        """Generates the default global metadata dictionary.

        Parameters
        ----------
        physical_property: PhysicalProperty
            The physical property whose arguments are available in the
            global scope.
        force_field_path: str
            The path to the force field parameters to use in the workflow.
        parameter_gradient_keys: list of ParameterGradientKey
                A list of references to all of the parameters which all observables
                should be differentiated with respect to.
        target_uncertainty: pint.Quantity, optional
            The uncertainty which the property should be estimated to
            within.

        Returns
        -------
        dict of str, Any

            The metadata dictionary, with the following
            keys / types:

            - thermodynamic_state: `ThermodynamicState` - The state (T,p) at which the
                                                          property is being computed
            - substance: `Substance` - The composition of the system of interest.
            - components: list of `Substance` - The components present in the system for
                                              which the property is being estimated.
            - target_uncertainty: pint.Quantity - The target uncertainty with which
                                                        properties should be estimated.
            - per_component_uncertainty: pint.Quantity - The target uncertainty divided
                                                               by the sqrt of the number of
                                                               components in the system + 1
            - force_field_path: str - A path to the force field parameters with which the
                                      property should be evaluated with.
            - parameter_gradient_keys: list of ParameterGradientKey - A list of references to all of the
                                                                      parameters which all observables
                                                                      should be differentiated with respect to.
        """
        components = []

        for component in physical_property.substance.components:

            component_substance = Substance.from_components(component)
            components.append(component_substance)

        if target_uncertainty is None:
            target_uncertainty = math.inf * physical_property.value.units

        target_uncertainty = target_uncertainty.to(
            physical_property.value.units)

        # +1 comes from inclusion of the full mixture as a possible component.
        per_component_uncertainty = target_uncertainty / sqrt(
            physical_property.substance.number_of_components + 1)

        # Find only those gradient keys which will actually be relevant to the
        # property of interest
        relevant_gradient_keys = Workflow._find_relevant_gradient_keys(
            physical_property.substance, force_field_path,
            parameter_gradient_keys)

        # Define a dictionary of accessible 'global' properties.
        global_metadata = {
            "thermodynamic_state": physical_property.thermodynamic_state,
            "substance": physical_property.substance,
            "components": components,
            "target_uncertainty": target_uncertainty,
            "per_component_uncertainty": per_component_uncertainty,
            "force_field_path": force_field_path,
            "parameter_gradient_keys": relevant_gradient_keys,
        }

        # Include the properties metadata
        if physical_property.metadata != UNDEFINED:
            global_metadata.update(physical_property.metadata)

        return global_metadata
def test_substance_len():

    substance = Substance.from_components("C", "CC", "CCC", "CCC")
    assert len(substance) == 3
예제 #26
0
def test_build_ligpargen_system(requests_mock):

    force_field_source = LigParGenForceFieldSource(
        request_url="http://testligpargen.com/request",
        download_url="http://testligpargen.com/download",
    )

    substance = Substance.from_components("C", "O")

    def request_callback(request, context):
        context.status_code = 200
        smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n',
                           request.text).group(1)

        cmiles_molecule = load_molecule(smiles, toolkit="rdkit")
        smiles = mol_to_smiles(cmiles_molecule,
                               isomeric=False,
                               explicit_hydrogen=False,
                               mapped=False)

        assert smiles == "C"
        return 'value="/tmp/0000.xml"'

    def download_callback(_, context):
        context.status_code = 200
        return """
<ForceField>
<AtomTypes>
<Type name="opls_802" class="H802" element="H" mass="1.008000" />
<Type name="opls_804" class="H804" element="H" mass="1.008000" />
<Type name="opls_803" class="H803" element="H" mass="1.008000" />
<Type name="opls_800" class="C800" element="C" mass="12.011000" />
<Type name="opls_801" class="H801" element="H" mass="1.008000" />
</AtomTypes>
<Residues>
<Residue name="UNK">
<Atom name="C00" type="opls_800" />
<Atom name="H01" type="opls_801" />
<Atom name="H02" type="opls_802" />
<Atom name="H03" type="opls_803" />
<Atom name="H04" type="opls_804" />
<Bond from="0" to="1"/>
<Bond from="0" to="2"/>
<Bond from="0" to="3"/>
<Bond from="0" to="4"/>
</Residue>
</Residues>
<HarmonicBondForce>
<Bond class1="H801" class2="C800" length="0.109000" k="284512.000000"/>
<Bond class1="H802" class2="C800" length="0.109000" k="284512.000000"/>
<Bond class1="H803" class2="C800" length="0.109000" k="284512.000000"/>
<Bond class1="H804" class2="C800" length="0.109000" k="284512.000000"/>
</HarmonicBondForce>
<HarmonicAngleForce>
<Angle class1="H801" class2="C800" class3="H802" angle="1.881465" k="276.144000"/>
<Angle class1="H801" class2="C800" class3="H803" angle="1.881465" k="276.144000"/>
<Angle class1="H801" class2="C800" class3="H804" angle="1.881465" k="276.144000"/>
<Angle class1="H802" class2="C800" class3="H803" angle="1.881465" k="276.144000"/>
<Angle class1="H803" class2="C800" class3="H804" angle="1.881465" k="276.144000"/>
<Angle class1="H802" class2="C800" class3="H804" angle="1.881465" k="276.144000"/>
</HarmonicAngleForce>
<PeriodicTorsionForce>
<Improper class1="C800" class2="H801" class3="H802" class4="H803" k1="0.000000" k2="0.000000" k3="0.000000"
k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00"
phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/>
<Improper class1="C800" class2="H801" class3="H802" class4="H804" k1="0.000000" k2="0.000000" k3="0.000000"
k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00"
phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/>
</PeriodicTorsionForce>
<NonbondedForce coulomb14scale="0.5" lj14scale="0.5">
<Atom type="opls_803" charge="0.074800" sigma="0.250000" epsilon="0.125520" />
<Atom type="opls_802" charge="0.074800" sigma="0.250000" epsilon="0.125520" />
<Atom type="opls_800" charge="-0.299400" sigma="0.350000" epsilon="0.276144" />
<Atom type="opls_804" charge="0.074800" sigma="0.250000" epsilon="0.125520" />
<Atom type="opls_801" charge="0.074800" sigma="0.250000" epsilon="0.125520" />
</NonbondedForce>
</ForceField>
"""

    requests_mock.post(force_field_source.request_url, text=request_callback)
    requests_mock.post(force_field_source.download_url, text=download_callback)

    with tempfile.TemporaryDirectory() as directory:

        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(force_field_source.json())

        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 8
        build_coordinates.substance = substance
        build_coordinates.execute(directory, None)

        assign_parameters = BuildLigParGenSystem(f"assign_parameters")
        assign_parameters.force_field_path = force_field_path
        assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
        assign_parameters.substance = substance
        assign_parameters.execute(directory, None)
        assert path.isfile(assign_parameters.system_path)
예제 #27
0
def data_set_from_data_frame(data_frame):
    """Converts a `pandas.DataFrame` to a `PhysicalPropertyDataSet` object.
    See the `PhysicalPropertyDataSet.to_pandas()` function for information
    on the required columns.

    Parameters
    ----------
    data_frame: pandas.DataFrame
        The data frame to convert.

    Returns
    -------
    PhysicalPropertyDataSet
        The converted data set.
    """

    return_value = PhysicalPropertyDataSet()

    if len(data_frame) == 0:
        return return_value

    # Make sure the base columns are present.
    required_base_columns = [
        "Temperature (K)",
        "Pressure (kPa)",
        "Phase",
        "N Components",
        "Source",
    ]

    assert all(x in data_frame for x in required_base_columns)

    # Make sure the substance columns are present.
    max_components = max(int(x) for x in data_frame["N Components"])
    assert max_components > 0

    required_components_columns = [
        x for i in range(max_components) for x in [
            f"Component {i + 1}",
            f"Role {i + 1}",
            f"Mole Fraction {i + 1}",
            f"Exact Amount {i + 1}",
        ]
    ]

    assert all(x in data_frame for x in required_components_columns)

    property_types = []

    for column_name in data_frame:

        if " Value" not in column_name:
            continue

        column_name_split = column_name.split(" ")

        assert len(column_name_split) >= 2

        property_type = getattr(evaluator.properties, column_name_split[0])
        property_types.append(property_type)

    assert len(property_types) > 0

    # Make sure we don't have duplicate property columns.
    assert len(set(property_types)) == len(property_types)

    properties = []

    for _, row in data_frame.iterrows():

        # Create the substance from the component columns
        number_of_components = row["N Components"]

        substance = Substance()

        for component_index in range(number_of_components):

            smiles = row[f"Component {component_index + 1}"]
            role = Component.Role[row[f"Role {component_index + 1}"]]
            mole_fraction = row[f"Mole Fraction {component_index + 1}"]
            exact_amount = row[f"Exact Amount {component_index + 1}"]

            assert not numpy.isnan(mole_fraction) or not numpy.isnan(
                exact_amount)

            component = Component(smiles, role)

            if not numpy.isnan(mole_fraction):
                substance.add_component(component, MoleFraction(mole_fraction))
            if not numpy.isnan(exact_amount):
                substance.add_component(component, ExactAmount(exact_amount))

        # Extract the state
        pressure = row["Pressure (kPa)"] * unit.kilopascal
        temperature = row["Temperature (K)"] * unit.kelvin

        thermodynamic_state = ThermodynamicState(temperature, pressure)

        phase = PropertyPhase.from_string(row["Phase"])

        source = MeasurementSource(reference=row["Source"])

        for property_type in property_types:

            default_unit = property_type.default_unit()
            value_header = f"{property_type.__name__} Value ({default_unit:~})"

            if numpy.isnan(row[value_header]):
                continue

            value = row[value_header] * default_unit
            uncertainty = 0.0 * default_unit

            physical_property = property_type(
                thermodynamic_state=thermodynamic_state,
                phase=phase,
                substance=substance,
                value=value,
                uncertainty=uncertainty,
                source=source,
            )

            properties.append(physical_property)

    return_value.add_properties(*properties)
    return return_value
예제 #28
0
    def _rebuild_substance(self, number_of_molecules):
        """Rebuilds the `Substance` object which this protocol is building
        coordinates for.

        This may not be the same as the input substance due to the finite
        number of molecules to be added causing rounding of mole fractions.

        Parameters
        ----------
        number_of_molecules: list of int
            The number of each component which should be added to the system.

        Returns
        -------
        Substance
            The substance which contains the corrected component amounts.
        """

        new_amounts = defaultdict(list)

        total_number_of_molecules = sum(number_of_molecules)

        # Handle any exact amounts.
        for component in self.substance.components:

            exact_amounts = [
                amount for amount in self.substance.get_amounts(component)
                if isinstance(amount, ExactAmount)
            ]

            if len(exact_amounts) == 0:
                continue

            total_number_of_molecules -= exact_amounts[0].value
            new_amounts[component].append(exact_amounts[0])

        # Recompute the mole fractions.
        total_mole_fraction = 0.0
        number_of_new_mole_fractions = 0

        for index, component in enumerate(self.substance.components):

            mole_fractions = [
                amount for amount in self.substance.get_amounts(component)
                if isinstance(amount, MoleFraction)
            ]

            if len(mole_fractions) == 0:
                continue

            molecule_count = number_of_molecules[index]

            if component in new_amounts:
                molecule_count -= new_amounts[component][0].value

            new_mole_fraction = molecule_count / total_number_of_molecules
            new_amounts[component].append(MoleFraction(new_mole_fraction))

            total_mole_fraction += new_mole_fraction
            number_of_new_mole_fractions += 1

        if (not np.isclose(total_mole_fraction, 1.0)
                and number_of_new_mole_fractions > 0):
            raise ValueError("The new mole fraction does not equal 1.0")

        output_substance = Substance()

        for component, amounts in new_amounts.items():

            for amount in amounts:
                output_substance.add_component(component, amount)

        return output_substance
def test_multiple_amounts():

    substance = Substance()

    sodium = Component("[Na+]")
    chloride = Component("[Cl-]")

    substance.add_component(sodium, MoleFraction(0.75))
    substance.add_component(sodium, ExactAmount(1))

    substance.add_component(chloride, MoleFraction(0.25))
    substance.add_component(chloride, ExactAmount(1))

    assert substance.number_of_components == 2

    sodium_amounts = substance.get_amounts(sodium)
    chlorine_amounts = substance.get_amounts(chloride)

    assert len(sodium_amounts) == 2
    assert len(chlorine_amounts) == 2

    molecule_counts = substance.get_molecules_per_component(6)

    assert len(molecule_counts) == 2

    assert molecule_counts[sodium.identifier] == 4
    assert molecule_counts[chloride.identifier] == 2