Example #1
0
class CalculationLayerSchema(AttributeClass):
    """A schema which encodes the options that a `CalculationLayer`
    should use when estimating a given class of physical properties.
    """

    absolute_tolerance = Attribute(
        docstring="The absolute uncertainty that the property should "
        "be estimated to within. This attribute is mutually exclusive "
        "with the `relative_tolerance` attribute.",
        type_hint=pint.Quantity,
        default_value=UNDEFINED,
        optional=True,
    )
    relative_tolerance = Attribute(
        docstring="The relative uncertainty that the property should "
        "be estimated to within, i.e `relative_tolerance * "
        "measured_property.uncertainty`. This attribute is mutually "
        "exclusive with the `absolute_tolerance` attribute.",
        type_hint=float,
        default_value=UNDEFINED,
        optional=True,
    )

    def validate(self, attribute_type=None):

        if (self.absolute_tolerance != UNDEFINED
                and self.relative_tolerance != UNDEFINED):

            raise ValueError(
                "Only one of `absolute_tolerance` and `relative_tolerance` "
                "can be set.")

        super(CalculationLayerSchema, self).validate(attribute_type)
Example #2
0
    class _Submission(AttributeClass):
        """The data packet encoding an estimation request which will be sent to
        the server.
        """

        dataset = Attribute(
            docstring="The set of properties to estimate.",
            type_hint=PhysicalPropertyDataSet,
        )
        options = Attribute(
            docstring="The options to use when estimating the dataset.",
            type_hint=RequestOptions,
        )
        force_field_source = Attribute(
            docstring=
            "The force field parameters to estimate the dataset using.",
            type_hint=ForceFieldSource,
        )
        parameter_gradient_keys = Attribute(
            docstring="A list of the parameters that the physical properties "
            "should be differentiated with respect to.",
            type_hint=list,
        )

        def validate(self, attribute_type=None):
            super(EvaluatorClient._Submission, self).validate(attribute_type)
            assert all(
                isinstance(x, ParameterGradientKey)
                for x in self.parameter_gradient_keys)
Example #3
0
class ConnectionOptions(AttributeClass):
    """The options to use when connecting to an `EvaluatorServer`"""

    server_address = Attribute(
        docstring="The address of the server to connect to.",
        type_hint=str,
        default_value="localhost",
    )
    server_port = Attribute(
        docstring="The port of the server to connect to.",
        type_hint=int,
        default_value=8000,
    )

    def __init__(self, server_address=None, server_port=None):
        """

        Parameters
        ----------
        server_address: str
            The address of the server to connect to.
        server_port: int
            The port of the server to connect to.
        """
        if server_address is not None:
            self.server_address = server_address
        if server_port is not None:
            self.server_port = server_port
Example #4
0
class CalculationLayerResult(AttributeClass):
    """The result of attempting to estimate a property using
    a `CalculationLayer`.
    """

    physical_property = Attribute(
        docstring="The estimated property (if the layer was successful).",
        type_hint=PhysicalProperty,
        optional=True,
    )
    data_to_store = Attribute(
        docstring="Paths to the data objects to store.",
        type_hint=list,
        default_value=[],
    )

    exceptions = Attribute(
        docstring="Any exceptions raised by the layer while estimating the "
        "property.",
        type_hint=list,
        default_value=[],
    )

    def validate(self, attribute_type=None):
        super(CalculationLayerResult, self).validate(attribute_type)

        assert all(isinstance(x, (tuple, list)) for x in self.data_to_store)
        assert all(len(x) == 2 for x in self.data_to_store)
        assert all(all(isinstance(y, str) for y in x) for x in self.data_to_store)

        assert all(isinstance(x, EvaluatorException) for x in self.exceptions)
Example #5
0
class ProtocolGroupSchema(ProtocolSchema):
    """A json serializable representation of a workflow protocol
    group.
    """

    protocol_schemas = Attribute(
        docstring="The schemas of the protocols within this group.",
        type_hint=dict,
        read_only=True,
    )

    def __init__(self,
                 unique_id=None,
                 protocol_type=None,
                 inputs=None,
                 protocol_schemas=None):
        super(ProtocolGroupSchema, self).__init__(unique_id, protocol_type,
                                                  inputs)

        if protocol_schemas is not None:
            self._set_value("protocol_schemas", protocol_schemas)

    def validate(self, attribute_type=None):
        super(ProtocolGroupSchema, self).validate(attribute_type)

        for key, value in self.protocol_schemas.items():

            assert isinstance(key, str)
            assert isinstance(value, ProtocolSchema)
Example #6
0
class WorkflowResult(AttributeClass):
    """The result of executing a `Workflow` as part of a
    `WorkflowGraph`.
    """

    workflow_id = Attribute(
        docstring="The id of the workflow associated with this result.",
        type_hint=str,
    )

    value = Attribute(
        docstring="The estimated value of the property and the uncertainty "
        "in that value.",
        type_hint=pint.Measurement,
        optional=True,
    )
    gradients = Attribute(
        docstring="The gradients of the estimated value with respect to the "
        "specified force field parameters.",
        type_hint=list,
        default_value=[],
    )

    exceptions = Attribute(
        docstring="Any exceptions raised by the layer while estimating the "
        "property.",
        type_hint=list,
        default_value=[],
    )

    data_to_store = Attribute(
        docstring="Paths to the data objects to store.",
        type_hint=list,
        default_value=[],
    )

    def validate(self, attribute_type=None):
        super(WorkflowResult, self).validate(attribute_type)

        assert all(isinstance(x, ParameterGradient) for x in self.gradients)

        assert all(isinstance(x, tuple) for x in self.data_to_store)
        assert all(len(x) == 2 for x in self.data_to_store)
        assert all(
            all(isinstance(y, str) for y in x) for x in self.data_to_store)

        assert all(isinstance(x, EvaluatorException) for x in self.exceptions)
Example #7
0
class MoleFraction(Amount):
    """The mole fraction of a `Component` in a `Substance`."""

    value = Attribute(docstring="The value of this amount.", type_hint=float)

    @property
    def identifier(self):
        return f"x={self.value:.6f}"

    def to_number_of_molecules(self,
                               total_substance_molecules,
                               tolerance=None):

        # Determine how many molecules of each type will be present in the system.
        number_of_molecules = self.value * total_substance_molecules
        fractional_number_of_molecules = number_of_molecules % 1

        if np.isclose(fractional_number_of_molecules, 0.5):
            number_of_molecules = int(number_of_molecules)
        else:
            number_of_molecules = int(round(number_of_molecules))

        if number_of_molecules == 0:

            raise ValueError(
                "The total number of substance molecules was not large enough, "
                "such that this non-zero amount translates into zero molecules "
                "of this component in the substance.")

        if tolerance is not None:

            mole_fraction = number_of_molecules / total_substance_molecules

            if abs(mole_fraction - self.value) > tolerance:

                raise ValueError(
                    f"The mole fraction ({mole_fraction}) given a total number of molecules "
                    f"({total_substance_molecules}) is outside of the tolerance {tolerance} "
                    f"of the target mole fraction {self.value}")

        return number_of_molecules

    def validate(self, attribute_type=None):
        super(MoleFraction, self).validate(attribute_type)

        if self.value <= 0.0 or self.value > 1.0:

            raise ValueError(
                "A mole fraction must be greater than zero, and less than or "
                "equal to one.")

        if math.floor(self.value * 1e6) < 1:

            raise ValueError("Mole fractions are only precise to the sixth "
                             "decimal place within this class representation.")
Example #8
0
    class Condition(AttributeClass):
        """Defines a specific condition which must be met of the form
        `left_hand_value` [TYPE] `right_hand_value`, where `[TYPE]` may
        be less than or greater than.
        """
        @unique
        class Type(Enum):
            """The available condition types."""

            LessThan = "lessthan"
            GreaterThan = "greaterthan"

        left_hand_value = Attribute(
            docstring="The left-hand value to compare.",
            type_hint=typing.Union[int, float, pint.Quantity],
        )
        right_hand_value = Attribute(
            docstring="The right-hand value to compare.",
            type_hint=typing.Union[int, float, pint.Quantity],
        )

        type = Attribute(
            docstring="The right-hand value to compare.",
            type_hint=Type,
            default_value=Type.LessThan,
        )

        def __eq__(self, other):

            return (type(self) == type(other)
                    and self.left_hand_value == other.left_hand_value
                    and self.right_hand_value == other.right_hand_value
                    and self.type == other.type)

        def __ne__(self, other):
            return not self.__eq__(other)

        def __str__(self):
            return f"{self.left_hand_value} {self.type} {self.right_hand_value}"

        def __repr__(self):
            return f"<Condition {str(self)}>"
Example #9
0
class ReweightingSchema(WorkflowCalculationSchema):
    """A schema which encodes the options and the workflow schema
    that the `SimulationLayer` should use when estimating a given class
    of physical properties using the built-in workflow framework.
    """

    storage_queries = Attribute(
        docstring="The queries to perform when retrieving data for each "
        "of the components in the system from the storage backend. The "
        "keys of this dictionary will correspond to the metadata keys made "
        "available to the workflow system.",
        type_hint=dict,
        default_value=default_storage_query(),
    )

    maximum_data_points = Attribute(
        docstring="The maximum number of data points to include "
        "as part of the multi-state reweighting calculations. If "
        "zero, no cap will be applied.",
        type_hint=int,
        default_value=4,
    )
    temperature_cutoff = Attribute(
        docstring="The maximum difference between the target temperature "
        "and the temperature at which cached data was collected to. Data "
        "collected for temperatures outside of this cutoff will be ignored.",
        type_hint=pint.Quantity,
        default_value=5.0 * unit.kelvin,
    )

    def validate(self, attribute_type=None):
        super(ReweightingSchema, self).validate(attribute_type)

        assert len(self.storage_queries) > 0
        assert self.maximum_data_points > 0

        assert all(
            isinstance(x, SimulationDataQuery)
            for x in self.storage_queries.values())
Example #10
0
class TimeSeriesStatistics(AttributeClass):
    """A class which encodes statistics such as the statistical inefficiency and
    the index after which the time series has become stationary (i.e. is equilibrated).
    """

    n_total_points: int = Attribute(
        docstring="The total number of data point in the time series.",
        type_hint=int)
    n_uncorrelated_points: int = Attribute(
        docstring="The number of data point in the time series which are "
        "uncorrelated.",
        type_hint=int,
    )

    statistical_inefficiency: float = Attribute(
        docstring="The statistical inefficiency of the time series.",
        type_hint=float)
    equilibration_index: int = Attribute(
        docstring=
        "The index after which the time series has become stationary.",
        type_hint=int,
    )

    def __init__(
        self,
        n_total_points: int = None,
        n_uncorrelated_points: int = None,
        statistical_inefficiency: float = None,
        equilibration_index: int = None,
    ):
        if n_total_points is not None:
            self.n_total_points = n_total_points
        if n_uncorrelated_points is not None:
            self.n_uncorrelated_points = n_uncorrelated_points
        if statistical_inefficiency is not None:
            self.statistical_inefficiency = statistical_inefficiency
        if equilibration_index is not None:
            self.equilibration_index = equilibration_index
Example #11
0
class RequestResult(AttributeClass):
    """The current results of an estimation request - these
    results may be partial if the server hasn't yet completed
    the request.
    """

    queued_properties = Attribute(
        docstring="The set of properties which have yet to be, or "
        "are currently being estimated.",
        type_hint=PhysicalPropertyDataSet,
        default_value=PhysicalPropertyDataSet(),
    )

    estimated_properties = Attribute(
        docstring=
        "The set of properties which have been successfully estimated.",
        type_hint=PhysicalPropertyDataSet,
        default_value=PhysicalPropertyDataSet(),
    )
    unsuccessful_properties = Attribute(
        docstring=
        "The set of properties which could not be successfully estimated.",
        type_hint=PhysicalPropertyDataSet,
        default_value=PhysicalPropertyDataSet(),
    )

    exceptions = Attribute(
        docstring="The set of properties which have yet to be, or "
        "are currently being estimated.",
        type_hint=list,
        default_value=[],
    )

    def validate(self, attribute_type=None):
        super(RequestResult, self).validate(attribute_type)
        assert all(
            (isinstance(x, EvaluatorException) for x in self.exceptions))
Example #12
0
class WorkflowCalculationSchema(CalculationLayerSchema):
    """A schema which encodes the options and the workflow schema
    that a `CalculationLayer` should use when estimating a given class
    of physical properties using the built-in workflow framework.
    """

    workflow_schema = Attribute(
        docstring="The workflow schema to use when estimating properties.",
        type_hint=WorkflowSchema,
        default_value=UNDEFINED,
    )

    def validate(self, attribute_type=None):
        super(WorkflowCalculationSchema, self).validate(attribute_type)
        self.workflow_schema.validate()
Example #13
0
class ProtocolSchema(AttributeClass):
    """A json serializable representation of a workflow protocol."""

    id = Attribute(
        docstring="The unique id associated with the protocol.",
        type_hint=str,
    )
    type = Attribute(
        docstring="The type of protocol associated with this schema.",
        type_hint=str,
        read_only=True,
    )

    inputs = Attribute(docstring="The inputs to the protocol.",
                       type_hint=dict,
                       read_only=True)

    def __init__(self, unique_id=None, protocol_type=None, inputs=None):
        if unique_id is not None:
            self._set_value("id", unique_id)
        if protocol_type is not None:
            self._set_value("type", protocol_type)
        if inputs is not None:
            self._set_value("inputs", inputs)

    def to_protocol(self):
        """Creates a new protocol object from this schema.

        Returns
        -------
        Protocol
            The protocol created from this schema.
        """
        from openff.evaluator.workflow import Protocol

        return Protocol.from_schema(self)
Example #14
0
class ExactAmount(Amount):
    """The exact number of instances of a `Component` in a `Substance`.

    An assumption is made that this amount is for a component which is
    infinitely dilute (such as ligands in binding calculations), and hence
    do not contribute to the total mole fraction of a `Substance`.
    """

    value = Attribute(docstring="The value of this amount.", type_hint=int)

    @property
    def identifier(self):
        return f"n={int(round(self.value)):d}"

    def to_number_of_molecules(self,
                               total_substance_molecules,
                               tolerance=None):
        return self.value
Example #15
0
    class _ObjectKeyData(BaseStoredData):
        """An object which keeps track of the items in
        the storage system.
        """

        object_keys = Attribute(
            docstring=
            "The unique keys of the objects stored in a `StorageBackend`.",
            type_hint=dict,
            default_value=dict(),
        )

        @classmethod
        def has_ancillary_data(cls):
            return False

        def to_storage_query(self):
            # This should never be called so doesn't need an
            # implementation.
            raise NotImplementedError()
Example #16
0
class SubstanceQuery(AttributeClass, abc.ABC):
    """A query which focuses on finding data which was
    collected for substances with specific traits, e.g
    which contains both a solute and solvent, or only a
    solvent etc.
    """

    components_only = Attribute(
        docstring="Only match pure data which was collected for "
        "one of the components in the query substance.",
        type_hint=bool,
        default_value=False,
    )

    # component_roles = QueryAttribute(
    #     docstring="Returns data for only the subset of a substance "
    #     "which has the requested roles.",
    #     type_hint=list,
    #     optional=True,
    # )

    def validate(self, attribute_type=None):

        super(SubstanceQuery, self).validate(attribute_type)
Example #17
0
class ThermodynamicState(AttributeClass):
    """Data specifying a physical thermodynamic state obeying
    Boltzmann statistics.

    Notes
    -----
    Equality of two thermodynamic states is determined by comparing
    the temperature in kelvin to within 3 decimal places, and comparing
    the pressure (if defined) in pascals to within 3 decimal places.

    Examples
    --------
    Specify an NPT state at 298 K and 1 atm pressure.

    >>> state = ThermodynamicState(temperature=298.0*unit.kelvin, pressure=1.0*unit.atmospheres)

    Note that the pressure is only relevant for periodic systems.
    """

    temperature = Attribute(docstring="The external temperature.",
                            type_hint=pint.Quantity)
    pressure = Attribute(docstring="The external pressure.",
                         type_hint=pint.Quantity,
                         optional=True)

    @property
    def inverse_beta(self):
        """Returns the temperature multiplied by the molar gas constant"""
        return (self.temperature * unit.molar_gas_constant).to(unit.kilojoule /
                                                               unit.mole)

    @property
    def beta(self):
        """Returns one divided by the temperature multiplied by the molar gas constant"""
        return 1.0 / self.inverse_beta

    def __init__(self, temperature=None, pressure=None):
        """Constructs a new ThermodynamicState object.

        Parameters
        ----------
        temperature : pint.Quantity
            The external temperature
        pressure : pint.Quantity
            The external pressure
        """
        if temperature is not None:
            self.temperature = temperature
        if pressure is not None:
            self.pressure = pressure

    def validate(self, attribute_type=None):
        super(ThermodynamicState, self).validate(attribute_type)

        if self.pressure != UNDEFINED:
            self.pressure.to(unit.pascals)
            assert self.pressure > 0.0 * unit.pascals

        self.temperature.to(unit.kelvin)
        assert self.temperature > 0.0 * unit.kelvin

    def __repr__(self):
        return f"<ThermodynamicState {str(self)}>"

    def __str__(self):
        return_value = f"T={self.temperature:~}"

        if self.pressure != UNDEFINED:
            return_value += f" P={self.pressure:~}"

        return return_value

    def __hash__(self):

        temperature = self.temperature.to(unit.kelvin).magnitude
        pressure = (None if self.pressure == UNDEFINED else self.pressure.to(
            unit.pascal).magnitude)

        return hash((f"{temperature:.3f}",
                     None if pressure is None else f"{pressure:.3f}"))

    def __eq__(self, other):

        if not isinstance(other, ThermodynamicState):
            return False

        return hash(self) == hash(other)

    def __ne__(self, other):
        return not (self == other)
Example #18
0
class PhysicalProperty(AttributeClass, abc.ABC):
    """Represents the value of any physical property and it's uncertainty
    if provided.

    It additionally stores the thermodynamic state at which the property
    was collected, the phase it was collected in, information about
    the composition of the observed system, and metadata about how the
    property was collected.
    """
    @classmethod
    @abc.abstractmethod
    def default_unit(cls):
        """pint.Unit: The default unit (e.g. g / mol) associated with this
        class of property."""
        raise NotImplementedError()

    id = Attribute(
        docstring="A unique identifier string assigned to this property",
        type_hint=str,
        default_value=lambda: str(uuid.uuid4()).replace("-", ""),
    )

    substance = Attribute(
        docstring=
        "The substance that this property was measured estimated for.",
        type_hint=Substance,
    )
    phase = Attribute(
        docstring="The phase / phases that this property was measured in.",
        type_hint=PropertyPhase,
    )
    thermodynamic_state = Attribute(
        docstring="The thermodynamic state that this property"
        "was measured / estimated at.",
        type_hint=ThermodynamicState,
    )

    value = Attribute(
        docstring="The measured / estimated value of this property.",
        type_hint=pint.Quantity,
    )
    uncertainty = Attribute(
        docstring=
        "The uncertainty in measured / estimated value of this property.",
        type_hint=pint.Quantity,
        optional=True,
    )

    source = Attribute(
        docstring="The original source of this physical property.",
        type_hint=Source,
        optional=True,
    )
    metadata = Attribute(
        docstring=
        "Additional metadata associated with this property. All property "
        "metadata will be made accessible to estimation workflows.",
        type_hint=dict,
        optional=True,
    )

    gradients = Attribute(
        docstring="The gradients of this property with respect to "
        "different force field parameters.",
        type_hint=list,
        optional=True,
    )

    def __init__(
        self,
        thermodynamic_state=None,
        phase=PropertyPhase.Undefined,
        substance=None,
        value=None,
        uncertainty=None,
        source=None,
    ):
        """Constructs a new PhysicalProperty object.

        Parameters
        ----------
        thermodynamic_state : ThermodynamicState
            The thermodynamic state that the property was measured in.
        phase : PropertyPhase
            The phase that the property was measured in.
        substance : Substance
            The composition of the substance that was measured.
        value: pint.Quantity
            The value of the measured physical property.
        uncertainty: pint.Quantity
            The uncertainty in the measured value.
        source: Source
            The source of this property.
        """
        if thermodynamic_state is not None:
            self.thermodynamic_state = thermodynamic_state
        if phase is not None:
            self.phase = phase

        if substance is not None:
            self.substance = substance

        if value is not None:
            self.value = value
        if uncertainty is not None:
            self.uncertainty = uncertainty

        self.gradients = []

        if source is not None:
            self.source = source

    def __setstate__(self, state):

        if "id" not in state:
            state["id"] = str(uuid.uuid4()).replace("-", "")

        super(PhysicalProperty, self).__setstate__(state)

    def validate(self, attribute_type=None):
        super(PhysicalProperty, self).validate(attribute_type)

        assert self.value.units.dimensionality == self.default_unit(
        ).dimensionality

        if self.uncertainty != UNDEFINED:
            assert (self.uncertainty.units.dimensionality ==
                    self.default_unit().dimensionality)
class NestedAttributeObject(AttributeClass):

    some_value = Attribute("", AttributeObject)

    some_list = Attribute("", list, UNDEFINED, optional=True)
    some_dict = Attribute("", dict, UNDEFINED, optional=True)
Example #20
0
class Amount(AttributeClass, abc.ABC):
    """A representation of the amount of a given component
    in a `Substance`.
    """

    value = Attribute(
        docstring="The value of this amount.",
        type_hint=typing.Union[float, int],
        read_only=True,
    )

    def __init__(self, value=UNDEFINED):
        """
        Parameters
        ----------
        value: float or int
            The value of this amount.
        """
        self._set_value("value", value)

    @property
    def identifier(self):
        """A string identifier for this amount."""
        raise NotImplementedError()

    @abc.abstractmethod
    def to_number_of_molecules(self,
                               total_substance_molecules,
                               tolerance=None):
        """Converts this amount to an exact number of molecules

        Parameters
        ----------
        total_substance_molecules: int
            The total number of molecules in the whole substance. This amount
            will contribute to a portion of this total number.
        tolerance: float, optional
            The tolerance with which this amount should be in. As an example,
            when converting a mole fraction into a number of molecules, the
            total number of molecules may not be sufficiently large enough to
            reproduce this amount.

        Returns
        -------
        int
            The number of molecules which this amount represents,
            given the `total_substance_molecules`.
        """
        raise NotImplementedError()

    def __str__(self):
        return self.identifier

    def __repr__(self):
        return f"<{self.__class__.__name__} {str(self)}>"

    def __eq__(self, other):
        return type(self) == type(other) and np.isclose(
            self.value, other.value)

    def __ne__(self, other):
        return not (self == other)

    def __hash__(self):
        return hash(self.identifier)
Example #21
0
class WorkflowSchema(AttributeClass):
    """The schematic for a property estimation workflow."""

    protocol_schemas = Attribute(
        docstring=
        "The schemas for the protocols which will make up the workflow.",
        type_hint=list,
        default_value=[],
    )
    protocol_replicators = Attribute(
        docstring=
        "A set of replicators which will replicate parts of the workflow.",
        type_hint=list,
        optional=True,
    )

    final_value_source = Attribute(
        docstring=
        "A reference to which protocol output corresponds to the estimated "
        "value of the property.",
        type_hint=ProtocolPath,
        optional=True,
    )
    gradients_sources = Attribute(
        docstring=
        "A list of references the protcol outputs which correspond to the gradients "
        "of the estimated property with respect to specified force field parameters.",
        type_hint=list,
        optional=True,
    )
    outputs_to_store = Attribute(
        docstring=
        "A collection of data classes to populate ready to be stored by a "
        "`StorageBackend`.",
        type_hint=dict,
        optional=True,
    )

    def replace_protocol_types(self,
                               protocol_replacements,
                               protocol_group_schema=None):
        """Replaces protocols with given types with other protocols
        of specified replacements. This is useful when replacing
        the default protocols with custom ones, or swapping out base
        protocols with actual implementations

        Warnings
        --------
        This method is NOT fully implemented and is likely to fail in
        all but a few specific cases. This method should be used with
        extreme caution.

        Parameters
        ----------
        protocol_replacements: dict of str and str, None
            A dictionary with keys of the types of protocols which should be replaced
            with those protocols named by the values.
        protocol_group_schema: ProtocolGroupSchema
            The protocol group to apply the replacements to. This
            is mainly used when applying this method recursively.
        """

        if protocol_replacements is None:
            return

        if protocol_group_schema is None:
            protocol_schemas = {x.id: x for x in self.protocol_schemas}
        else:
            protocol_schemas = protocol_group_schema.protocol_schemas

        for protocol_schema_key in protocol_schemas:

            protocol_schema = protocol_schemas[protocol_schema_key]

            if protocol_schema.type not in protocol_replacements:
                continue

            protocol = protocol_schema.to_protocol()

            new_protocol = registered_workflow_protocols[protocol_replacements[
                protocol_schema.type]](protocol_schema.id)

            for input_path in new_protocol.required_inputs:

                if input_path not in protocol.required_inputs:
                    continue

                value = protocol.get_value(input_path)
                new_protocol.set_value(input_path, value)

            protocol_schemas[protocol_schema_key] = new_protocol.schema

            self.protocol_schemas.remove(protocol_schema)
            self.protocol_schemas.append(new_protocol.schema)

            if isinstance(protocol_schemas[protocol_schema_key],
                          ProtocolGroupSchema):
                self.replace_protocol_types(
                    protocol_replacements,
                    protocol_schemas[protocol_schema_key])

    def _find_protocols_to_be_replicated(self, replicator, protocols=None):
        """Finds all protocols which have been flagged to be replicated
        by a specified replicator.

        Parameters
        ----------
        replicator: ProtocolReplicator
            The replicator of interest.
        protocols: dict of str and ProtocolSchema or list of ProtocolSchema, optional
            The protocols to search through. If None, then
            all protocols in this schema will be searched.

        Returns
        -------
        list of str
            The ids of the protocols to be replicated by the specified replicator
        """

        if protocols is None:
            protocols = {x.id: x for x in self.protocol_schemas}

        if isinstance(protocols, list):
            protocols = {protocol.id: protocol for protocol in protocols}

        protocols_to_replicate = []

        for protocol_id, protocol in protocols.items():

            if protocol_id.find(replicator.placeholder_id) >= 0:
                protocols_to_replicate.append(protocol_id)

            # Search through any children
            if not isinstance(protocol, ProtocolGroupSchema):
                continue

            protocols_to_replicate.extend(
                self._find_protocols_to_be_replicated(
                    replicator, protocol.protocol_schemas))

        return protocols_to_replicate

    def _get_unreplicated_path(self, protocol_path):
        """Checks to see if the protocol pointed to by this path will only
        exist after a replicator has been applied, and if so, returns a
        path to the unreplicated protocol.

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to convert to an unreplicated path.

        Returns
        -------
        ProtocolPath
            The path which should point to only unreplicated protocols
        """

        if self.protocol_replicators == UNDEFINED:
            return protocol_path.copy()

        full_unreplicated_path = str(protocol_path.full_path)

        for replicator in self.protocol_replicators:

            if replicator.placeholder_id in full_unreplicated_path:
                continue

            protocols_to_replicate = self._find_protocols_to_be_replicated(
                replicator)

            for protocol_id in protocols_to_replicate:

                match_pattern = re.escape(
                    protocol_id.replace(replicator.placeholder_id, r"\d+"))
                match_pattern = match_pattern.replace(re.escape(r"\d+"),
                                                      r"\d+")

                full_unreplicated_path = re.sub(match_pattern, protocol_id,
                                                full_unreplicated_path)

        return ProtocolPath.from_string(full_unreplicated_path)

    @staticmethod
    def _get_unnested_protocol_path(protocol_path):
        """Returns a protocol path whose nested property name
        has been truncated to only include the top level name,
        e.g:

        `some_protocol_id.value.error` would be truncated to `some_protocol_id.value`

        and

        `some_protocol_id.value[1]` would be truncated to `some_protocol_id.value`

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to truncate.

        Returns
        -------
        ProtocolPath
            The truncated path.
        """
        property_name = protocol_path.property_name

        # Remove any nested property names from the path
        if protocol_path.property_name.find(".") >= 0:
            property_name = property_name.split(".")[0]

        # Remove any array indices from the path
        if protocol_path.property_name.find("[") >= 0:
            property_name = property_name.split("[")[0]

        return ProtocolPath(property_name, *protocol_path.protocol_ids)

    def _validate_replicators(self, schemas_by_id):

        if self.protocol_replicators == UNDEFINED:
            return

        assert all(
            isinstance(x, ProtocolReplicator)
            for x in self.protocol_replicators)

        for replicator in self.protocol_replicators:

            assert replicator.id is not None and len(replicator.id) > 0

            if not isinstance(replicator.template_values,
                              list) and not isinstance(
                                  replicator.template_values, ProtocolPath):

                raise ValueError(
                    "The template values of a replicator must either be "
                    "a list of values, or a reference to a list of values.")

            if isinstance(replicator.template_values, list):

                for template_value in replicator.template_values:

                    if not isinstance(template_value, ProtocolPath):
                        continue

                    if template_value.start_protocol not in schemas_by_id:

                        raise ValueError(
                            f"The value source {template_value} does not exist."
                        )

            elif isinstance(replicator.template_values, ProtocolPath):

                if not replicator.template_values.is_global:

                    raise ValueError(
                        "Template values must either be a constant, or come from the "
                        "global scope.")

            if (self.final_value_source != UNDEFINED
                    and self.final_value_source.protocol_path.find(
                        replicator.placeholder_id) >= 0):

                raise ValueError("The final value source cannot come from"
                                 "a protocol which is being replicated.")

    def _validate_final_value(self, schemas_by_id):

        if self.final_value_source == UNDEFINED:
            return

        assert isinstance(self.final_value_source, ProtocolPath)

        if self.final_value_source.start_protocol not in schemas_by_id:

            raise ValueError(
                f"The value source {self.final_value_source} does not exist.")

        protocol_schema = schemas_by_id[self.final_value_source.start_protocol]
        protocol_object = protocol_schema.to_protocol()
        protocol_object.get_value(self.final_value_source)

        attribute_type = protocol_object.get_class_attribute(
            self.final_value_source).type_hint

        # TODO: In Python < 3.7 the Union type will collapse pint.Quantity
        #       and pint.Measurement into pint.Quantity such that this check
        #       will fail. For now we allow Measurements or Quantities, but
        #       this should be reverted to just pint.Measurement when dropping
        #       3.6 support.
        if is_union_type(attribute_type):
            assert is_type_subclass_of_type(attribute_type, pint.Quantity)
        else:
            assert is_type_subclass_of_type(attribute_type, pint.Measurement)

    def _validate_gradients(self, schemas_by_id):

        if self.gradients_sources == UNDEFINED:
            return

        assert all(isinstance(x, ProtocolPath) for x in self.gradients_sources)

        for gradient_source in self.gradients_sources:

            if gradient_source.start_protocol not in schemas_by_id:

                raise ValueError(
                    f"The gradient source {gradient_source} does not exist.")

            protocol_schema = schemas_by_id[gradient_source.start_protocol]

            protocol_object = protocol_schema.to_protocol()
            protocol_object.get_value(gradient_source)

            attribute_type = protocol_object.get_class_attribute(
                gradient_source).type_hint

            assert is_type_subclass_of_type(attribute_type, ParameterGradient)

    def _validate_outputs_to_store(self, schemas_by_id):
        """Validates that the references to the outputs to store
        are valid.
        """
        if self.outputs_to_store == UNDEFINED:
            return

        assert all(
            isinstance(x, BaseStoredData)
            for x in self.outputs_to_store.values())

        for output_label in self.outputs_to_store:

            output_to_store = self.outputs_to_store[output_label]
            output_to_store.validate()

            for attribute_name in output_to_store.get_attributes(
                    StorageAttribute):

                attribute_value = getattr(output_to_store, attribute_name)

                if isinstance(attribute_value, ReplicatorValue):

                    matching_replicas = [
                        x for x in self.protocol_replicators
                        if attribute_value.replicator_id == x.id
                    ]

                    if len(matching_replicas) == 0:

                        raise ValueError(
                            f"An output to store is trying to take its value from a "
                            f"replicator {attribute_value.replicator_id} which does "
                            f"not exist.")

                if (not isinstance(attribute_value, ProtocolPath)
                        or attribute_value.is_global):
                    continue

                if attribute_value.start_protocol not in schemas_by_id:
                    raise ValueError(
                        f"The {attribute_value} source does not exist.")

                protocol_schema = schemas_by_id[attribute_value.start_protocol]

                protocol_object = protocol_schema.to_protocol()
                protocol_object.get_value(attribute_value)

    def _validate_interfaces(self, schemas_by_id):
        """Validates the flow of the data between protocols, ensuring
        that inputs and outputs correctly match up.
        """

        for protocol_schema in schemas_by_id.values():

            protocol_object = protocol_schema.to_protocol()

            for input_path in protocol_object.required_inputs:

                input_value = protocol_object.get_value(input_path)
                input_attribute = protocol_object.get_class_attribute(
                    input_path)

                if not isinstance(input_attribute, InputAttribute):
                    continue

                is_optional = input_attribute.optional

                if input_value == UNDEFINED and is_optional is False:

                    raise ValueError(
                        f"The {input_path} required input of protocol "
                        f"{protocol_schema.id} was not set.")

            for input_path in protocol_object.required_inputs:

                value_references = protocol_object.get_value_references(
                    input_path)

                for source_path, value_reference in value_references.items():

                    if value_reference.is_global:
                        # We handle global input validation separately
                        continue

                    value_reference = self._get_unreplicated_path(
                        value_reference)

                    # Make sure the other protocol whose output we are interested
                    # in actually exists.
                    if (value_reference.start_protocol not in schemas_by_id
                            and value_reference.start_protocol !=
                            protocol_object.id):

                        raise ValueError(
                            f"The {protocol_object.id} protocol tries to take input "
                            f"from a non-existent protocol: {value_reference.full_path}"
                        )

                    if value_reference.start_protocol != protocol_object.id:

                        other_protocol_schema = schemas_by_id[
                            value_reference.start_protocol]
                        other_protocol_object = other_protocol_schema.to_protocol(
                        )

                    else:
                        other_protocol_object = protocol_object

                    unnested_value_reference = self._get_unnested_protocol_path(
                        value_reference)
                    unnested_source_path = self._get_unnested_protocol_path(
                        source_path)

                    # Make sure the other protocol has the output referenced
                    # by this input.
                    other_protocol_object.get_value(unnested_value_reference)

                    # Do a very rudimentary type check between the input and
                    # output types. This is not currently possible for nested
                    # or indexed properties, or outputs of replicated protocols.
                    if (value_reference.full_path !=
                            unnested_value_reference.full_path
                            or source_path.full_path !=
                            unnested_source_path.full_path):

                        continue

                    is_replicated_reference = False
                    protocol_replicators = self.protocol_replicators

                    if protocol_replicators == UNDEFINED:
                        protocol_replicators = []

                    for replicator in protocol_replicators:

                        if (replicator.placeholder_id in protocol_schema.id
                                and replicator.placeholder_id
                                in value_reference.protocol_path) or (
                                    replicator.placeholder_id
                                    not in protocol_schema.id
                                    and replicator.placeholder_id
                                    not in value_reference.protocol_path):

                            continue

                        is_replicated_reference = True
                        break

                    if is_replicated_reference:
                        continue

                    expected_input_type = protocol_object.get_class_attribute(
                        unnested_source_path).type_hint
                    expected_output_type = other_protocol_object.get_class_attribute(
                        unnested_value_reference).type_hint

                    if expected_input_type is None or expected_output_type is None:
                        continue

                    if not is_type_subclass_of_type(expected_output_type,
                                                    expected_input_type):

                        raise ValueError(
                            f"The output type ({expected_output_type}) of "
                            f"{value_reference} does not match the requested "
                            f"input type ({expected_input_type}) of {source_path}."
                        )

    def validate(self, attribute_type=None):

        super(WorkflowSchema, self).validate(attribute_type)

        # Do some simple type checking.
        assert len(self.protocol_schemas) > 0
        assert all(
            isinstance(x, ProtocolSchema) for x in self.protocol_schemas)

        schemas_by_id = {x.id: x for x in self.protocol_schemas}

        # Validate the different pieces of data to populate / draw from.
        self._validate_final_value(schemas_by_id)
        self._validate_gradients(schemas_by_id)
        self._validate_replicators(schemas_by_id)
        self._validate_outputs_to_store(schemas_by_id)

        # Validate the interfaces between protocols
        self._validate_interfaces(schemas_by_id)
Example #22
0
class Request(AttributeClass):
    """An estimation request which has been sent to a `EvaluatorServer`
    instance.

    This object can be used to query and retrieve the results of the
    request when finished, or be stored to retrieve the request at some
    point in the future."""

    id = Attribute(
        docstring="The unique id assigned to this request by the server.",
        type_hint=str)
    connection_options = Attribute(
        docstring=
        "The options used to connect to the server handling the request.",
        type_hint=ConnectionOptions,
    )

    def __init__(self, client=None):
        """
        Parameters
        ----------
        client: EvaluatorClient, optional
            The client which submitted this request.
        """

        if client is not None:

            self.connection_options = ConnectionOptions()
            self.connection_options.server_address = client.server_address
            self.connection_options.server_port = client.server_port

        self._client = client

    def results(self, synchronous=False, polling_interval=5):
        """Attempt to retrieve the results of the request from the
        server.

        If the method is run synchronously it will block the main
        thread either all of the requested properties have been
        estimated, or an exception is returned.

        Parameters
        ----------
        synchronous: bool
            If `True`, this method will block the main thread until
            the server either returns a result or an error.
        polling_interval: float
            If running synchronously, this is the time interval (seconds)
            between checking if the calculation has finished. This will
            be ignored if running asynchronously.

        Returns
        -------
        RequestResult, optional
            Returns the current results of the request. This may
            be `None` if any unexpected exceptions occurred while
            retrieving the estimate.
        EvaluatorException, optional
            The exception raised will trying to retrieve the result
            if any.
        """
        if (self._client is None
                or self._client.server_address != self._client.server_address
                or self._client.server_port != self._client.server_port):

            self.validate()
            self._client = EvaluatorClient(self.connection_options)

        return self._client.retrieve_results(self.id, synchronous,
                                             polling_interval)

    def __str__(self):
        return f"Request id={self.id}"

    def __repr__(self):
        return f"<{str(self)}>"
class Substance(AttributeClass):
    """Defines the components, their amounts, and their roles in a system.

    Examples
    --------
    A neat liquid containing only a single component:

    >>> from openff.evaluator.substances import Component, ExactAmount, MoleFraction
    >>> liquid = Substance()
    >>> liquid.add_component(Component(smiles='O'), MoleFraction(1.0))

    A binary mixture containing two components, where the mole fractions are explicitly stated:

    >>> binary_mixture = Substance()
    >>> binary_mixture.add_component(Component(smiles='O'), MoleFraction(0.2))
    >>> binary_mixture.add_component(Component(smiles='CO'), MoleFraction(0.8))

    The infinite dilution of one molecule within a bulk solvent or mixture may also be specified
    by defining the exact number of copies of that molecule, rather than a mole fraction:

    >>> benzene = Component(smiles='C1=CC=CC=C1', role=Component.Role.Solute)
    >>> water = Component(smiles='O', role=Component.Role.Solvent)
    >>>
    >>> infinite_dilution = Substance()
    >>> infinite_dilution.add_component(component=benzene, amount=ExactAmount(1)) # Infinite dilution.
    >>> infinite_dilution.add_component(component=water, amount=MoleFraction(1.0))

    In this example we explicitly flag benzene as being the solute and the water component the solvent.
    This enables workflow's to easily identify key molecules of interest, such as the molecule which should
    be 'grown' into solution during solvation free energy calculations.
    """

    components = Attribute(
        docstring="A list of all of the components in this substance.",
        type_hint=tuple,
        default_value=tuple(),
        read_only=True,
    )
    amounts = Attribute(
        docstring="the amounts of the component in this substance",
        type_hint=dict,
        default_value=dict(),
        read_only=True,
    )

    @property
    def identifier(self):
        """str: A unique str representation of this substance, which encodes all
        components and their amounts in the substance."""
        return self._get_identifier()

    @property
    def number_of_components(self):
        """int: The number of different components in this substance."""
        return len(self.components)

    def _get_identifier(self):
        """Generates a unique string identifier for this substance, which
        encodes all components and their amounts in the substance

        Returns
        -------
        str
            The string identifier.
        """
        component_identifiers = [
            component.identifier for component in self.components
        ]
        component_identifiers.sort()

        identifier_split = []

        for component_identifier in component_identifiers:

            component_amounts = sorted(self.amounts[component_identifier],
                                       key=lambda x: type(x).__name__)
            amount_identifier = ",".join([
                component_amount.identifier
                for component_amount in component_amounts
            ])

            identifier = f"{component_identifier}{{{amount_identifier}}}"
            identifier_split.append(identifier)

        return "|".join(identifier_split)

    @classmethod
    def from_components(cls, *components):
        """Creates a new `Substance` object from a list of components.
        This method assumes that all components should be present with
        equal mole fractions.

        Parameters
        ----------
        components: Component or str
            The components to add to the substance. These may either be full
            `Component` objects or just the smiles representation
            of the component.

        Returns
        -------
        Substance
            The substance containing the requested components in equal amounts.
        """

        if len(components) == 0:
            raise ValueError("At least one component must be specified")

        mole_fraction = 1.0 / len(components)

        return_substance = cls()

        for component in components:

            if isinstance(component, str):
                component = Component(smiles=component)

            return_substance.add_component(component,
                                           MoleFraction(mole_fraction))

        return return_substance

    def add_component(self, component, amount):
        """Add a component to the Substance. If the component is already present in
        the substance, then the mole fraction will be added to the current mole
        fraction of that component.

        Parameters
        ----------
        component : Component
            The component to add to the system.
        amount : Amount
            The amount of this component in the substance.
        """

        assert isinstance(component, Component)
        assert isinstance(amount, Amount)

        component.validate()
        amount.validate()

        if isinstance(amount, MoleFraction):

            total_mole_fraction = amount.value

            for component_identifier in self.amounts:

                total_mole_fraction += sum([
                    amount.value
                    for amount in self.amounts[component_identifier]
                    if isinstance(amount, MoleFraction)
                ])

            if np.isclose(total_mole_fraction, 1.0):
                total_mole_fraction = 1.0

            if total_mole_fraction > 1.0:

                raise ValueError(
                    f"The total mole fraction of this substance {total_mole_fraction} exceeds 1.0"
                )

        if component.identifier not in self.amounts:

            components = (*self.components, component)
            self._set_value("components", components)

        existing_amount_of_type = None

        all_amounts = ([] if component.identifier not in self.amounts else
                       self.amounts[component.identifier])
        remaining_amounts = []

        # Check to see if an amount of the same type already exists in
        # the substance, such that this amount should be appended to it.
        for existing_amount in all_amounts:

            if not type(existing_amount) is type(amount):

                remaining_amounts.append(existing_amount)
                continue

            existing_amount_of_type = existing_amount
            break

        if existing_amount_of_type is not None:

            # Append any existing amounts to the new amount.
            amount = type(amount)(existing_amount_of_type.value + amount.value)

        remaining_amounts.append(amount)

        amounts = dict(self.amounts)
        amounts[component.identifier] = tuple(remaining_amounts)

        self._set_value("amounts", amounts)

    def get_amounts(self, component):
        """Returns the amounts of the component in this substance.

        Parameters
        ----------
        component: str or Component
            The component (or it's identifier) to retrieve the amount of.

        Returns
        -------
        tuple of Amount
            The amounts of the component in this substance.
        """
        assert isinstance(component, str) or isinstance(component, Component)
        identifier = component if isinstance(component,
                                             str) else component.identifier

        return self.amounts[identifier]

    def get_molecules_per_component(
        self,
        maximum_molecules,
        tolerance=None,
        count_exact_amount=True,
        truncate_n_molecules=True,
    ):
        """Returns the number of molecules for each component in this substance,
        given a maximum total number of molecules.

        Parameters
        ----------
        maximum_molecules: int
            The maximum number of molecules.
        tolerance: float, optional
            The tolerance within which this amount should be represented. As
            an example, when converting a mole fraction into a number of molecules,
            the total number of molecules may not be sufficiently large enough to
            reproduce this amount.
        count_exact_amount: bool
            Whether components present in an exact amount (i.e. defined with an
            ``ExactAmount``) should be considered when apply the maximum number
             of molecules constraint. This may be set false, for example, when
             building a separate solvated protein (n = 1) and solvated protein +
             ligand complex (n = 2) system but wish for both systems to have the
             same number of solvent molecules.
        truncate_n_molecules: bool
            Whether or not to attempt to truncate the number of molecules in the
            substance if the total number is over the specified maximum. If False, an
            exception will be raised in this case.

            The truncation works by iteratively removing one molecule of the
            predominant component up to a limit of removing a total number of molecules
            equal to the number of components  in the substance (e.g. for a binary
            substance a maximum of two molecules can be removed). An exception is
            raised if the number of molecules cannot be sensibly truncated.

        Returns
        -------
        dict of str and int
            A dictionary of molecule counts per component, where each key is
            a component identifier.
        """

        remaining_molecule_slots = maximum_molecules

        for index, component in enumerate(self.components):

            amounts = self.amounts[component.identifier]

            for amount in amounts:

                if not isinstance(amount,
                                  ExactAmount) or not count_exact_amount:
                    continue

                remaining_molecule_slots -= amount.value

        if remaining_molecule_slots < 0:

            raise ValueError(
                f"The required number of molecules {maximum_molecules - remaining_molecule_slots} "
                f"exceeds the provided maximum number ({maximum_molecules}).")

        # Track the total number of molecules.
        n_molecules = defaultdict(int)
        # Track the number of molecules added from mole fraction amounts.
        n_mole_fractions = defaultdict(int)

        for component in self.components:

            for amount in self.amounts[component.identifier]:

                n_amount_molecules = amount.to_number_of_molecules(
                    remaining_molecule_slots, tolerance)

                n_molecules[component.identifier] += n_amount_molecules

                if isinstance(amount, MoleFraction):
                    n_mole_fractions[
                        component.identifier] += n_amount_molecules

        # Attempt to fix rounding issues which lead to more molecules being added than
        # the maximum.
        total_molecules = (sum(n_molecules.values()) if count_exact_amount else
                           sum(n_mole_fractions.values()))

        max_truncation_attempts = len(self.components)
        n_truncation_attempts = 0

        while (truncate_n_molecules and total_molecules > maximum_molecules
               and sum(n_mole_fractions.values()) > 0
               and n_truncation_attempts < max_truncation_attempts):

            largest_component = max(n_mole_fractions.items(),
                                    key=operator.itemgetter(1))[0]

            n_molecules[largest_component] -= 1
            n_mole_fractions[largest_component] -= 1

            total_molecules = (sum(n_molecules.values()) if count_exact_amount
                               else sum(n_mole_fractions.values()))

            n_truncation_attempts += 1

        if total_molecules > maximum_molecules:

            raise ValueError(
                f"The total number of molecules ({total_molecules}) exceeds the maximum "
                f"number ({maximum_molecules}). This should not be able to happen."
            )

        return n_molecules

    @staticmethod
    def calculate_aqueous_ionic_mole_fraction(ionic_strength):
        """Determines what mole fraction of ions is needed to yield
         an aqueous system of a given ionic strength.

        Parameters
        ----------
        ionic_strength: pint.Quantity
            The ionic string in units of molar.

        Returns
        -------
        float
            The mole fraction of ions.
        """

        # Taken from YANK:
        # https://github.com/choderalab/yank/blob/4dfcc8e127c51c20180fe6caeb49fcb1f21730c6/Yank/pipeline.py#L1869
        water_molarity = (998.23 * unit.gram /
                          unit.litre) / (18.01528 * unit.gram / unit.mole)

        ionic_mole_fraction = ionic_strength / (ionic_strength +
                                                water_molarity)
        return ionic_mole_fraction

    def __str__(self):
        return self.identifier

    def __repr__(self):
        return f"<Substance {str(self)}>"

    def __hash__(self):
        return hash(self.identifier)

    def __eq__(self, other):
        return type(self) == type(other) and hash(self) == hash(other)

    def __ne__(self, other):
        return not (self == other)

    def __setstate__(self, state):
        # Handle the list -> tuple conversion manually.

        assert "amounts" in state

        for key in state["amounts"]:

            assert isinstance(state["amounts"][key], (list, tuple))
            state["amounts"][key] = tuple(state["amounts"][key])

        super(Substance, self).__setstate__(state)

    def __len__(self):
        return len(self.components)

    def __iter__(self):
        return iter(self.components)

    def validate(self, attribute_type=None):
        super(Substance, self).validate(attribute_type)

        # Validate all of the components.
        assert all(isinstance(x, Component) for x in self.components)
        assert all(x.identifier in self.amounts for x in self.components)

        # Validate the amounts
        assert all(x.identifier in self.amounts for x in self.components)
        assert all(isinstance(x, tuple) for x in self.amounts.values())
        assert all(len(x) > 0 for x in self.amounts.values())

        for component in self.components:

            component.validate(attribute_type)
            amounts = self.amounts[component.identifier]

            assert all(isinstance(x, Amount) for x in amounts)

            for amount in amounts:
                amount.validate(attribute_type)

        contains_mole_fraction = any(
            isinstance(x, MoleFraction) for y in self.amounts.values()
            for x in y)

        if contains_mole_fraction:

            total_mole_fraction = 0.0

            for component_identifier in self.amounts:

                total_mole_fraction += sum([
                    amount.value
                    for amount in self.amounts[component_identifier]
                    if isinstance(amount, MoleFraction)
                ])

            if not np.isclose(total_mole_fraction, 1.0):

                raise ValueError(f"The total mole fraction of this substance "
                                 f"({total_mole_fraction}) must equal 1.0")
Example #24
0
class Component(AttributeClass):
    """Defines a single component in a chemical system, as well
    as it's role within the system (if any).
    """

    class Role(Enum):
        """An enum which describes the role of a component in the system,
        such as whether the component is a solvent, a solute, a receptor etc.

        These roles are mainly used by workflow to identify the correct
        species in a system, such as when doing docking or performing
        solvation free energy calculations.
        """

        Solvent = "solv"
        Solute = "sol"

        Ligand = "lig"
        Receptor = "rec"

    smiles = Attribute(
        docstring="The SMILES pattern which describes this component.",
        type_hint=str,
        read_only=True,
    )
    role = Attribute(
        docstring="The role of this component in the system.",
        type_hint=Role,
        default_value=Role.Solvent,
        read_only=True,
    )

    @property
    def identifier(self):
        """str: A unique identifier for this component."""
        return f"{self.smiles}{{{self.role.value}}}"

    def __init__(self, smiles=UNDEFINED, role=Role.Solvent):
        """Constructs a new Component object with either a label or
        a smiles string, but not both.

        Notes
        -----
        The `label` and `smiles` arguments are mutually exclusive, and only
        one can be passed while the other should be `None`.

        Parameters
        ----------
        smiles: str
            A SMILES descriptor of the component
        role: Component.Role
            The role of this component in the system.
        """
        if smiles != UNDEFINED:
            smiles = self._standardize_smiles(smiles)

        self._set_value("smiles", smiles)
        self._set_value("role", role)

    @staticmethod
    def _standardize_smiles(smiles):
        """Standardizes a SMILES pattern to be canonical (but not necessarily isomeric)
        using the `cmiles` library.

        Parameters
        ----------
        smiles: str
            The SMILES pattern to standardize.

        Returns
        -------
        The standardized SMILES pattern.
        """
        from cmiles.utils import load_molecule, mol_to_smiles

        molecule = load_molecule(smiles, toolkit="rdkit")

        try:
            # Try to make the smiles isomeric.
            smiles = mol_to_smiles(
                molecule, isomeric=True, explicit_hydrogen=False, mapped=False
            )
        except ValueError:
            # Fall-back to non-isomeric.
            smiles = mol_to_smiles(
                molecule, isomeric=False, explicit_hydrogen=False, mapped=False
            )

        return smiles

    def __str__(self):
        return self.identifier

    def __repr__(self):
        return f"<{self.__class__.__name__} {str(self)}>"

    def __hash__(self):
        return hash(self.identifier)

    def __eq__(self, other):
        return type(self) == type(other) and self.identifier == other.identifier

    def __ne__(self, other):
        return not (self == other)

    def __setstate__(self, state):
        # Make sure the smiles pattern is standardized.
        state["smiles"] = Component._standardize_smiles(state["smiles"])
        super(Component, self).__setstate__(state)
Example #25
0
class Batch(AttributeClass):
    """Represents a batch of physical properties which are being estimated by
    the server for a given set of force field parameters.

    The expectation is that this object will be passed between calculation layers,
    whereby each layer will attempt to estimate each of the `queued_properties`.
    Those properties which can be estimated will be moved to the `estimated_properties`
    set, while those that couldn't will remain in the `queued_properties` set ready
    for the next layer.
    """

    id = Attribute(
        docstring="The unique id of this batch.",
        type_hint=str,
        default_value=lambda: str(uuid.uuid4()).replace("-", ""),
    )

    force_field_id = Attribute(
        docstring="The id of the force field being used to estimate"
        "this batch of properties.",
        type_hint=str,
    )
    options = Attribute(
        docstring="The options being used to estimate this batch.",
        type_hint=RequestOptions,
    )
    parameter_gradient_keys = Attribute(
        docstring="The parameters that this batch of physical properties "
        "should be differentiated with respect to.",
        type_hint=list,
    )

    queued_properties = Attribute(
        docstring="The set of properties which have yet to be estimated.",
        type_hint=list,
        default_value=[],
    )
    estimated_properties = Attribute(
        docstring=
        "The set of properties which have been successfully estimated.",
        type_hint=list,
        default_value=[],
    )
    unsuccessful_properties = Attribute(
        docstring=
        "The set of properties which have been could not be estimated.",
        type_hint=list,
        default_value=[],
    )
    exceptions = Attribute(
        docstring="The set of properties which have yet to be, or "
        "are currently being estimated.",
        type_hint=list,
        default_value=[],
    )

    def validate(self, attribute_type=None):
        super(Batch, self).validate(attribute_type)

        assert all(
            isinstance(x, PhysicalProperty) for x in self.queued_properties)
        assert all(
            isinstance(x, PhysicalProperty) for x in self.estimated_properties)
        assert all(
            isinstance(x, PhysicalProperty)
            for x in self.unsuccessful_properties)
        assert all(isinstance(x, EvaluatorException) for x in self.exceptions)
        assert all(
            isinstance(x, ParameterGradientKey)
            for x in self.parameter_gradient_keys)
Example #26
0
class RequestOptions(AttributeClass):
    """The options to use when requesting a set of physical
    properties be estimated by the server.
    """

    calculation_layers = Attribute(
        docstring="The calculation layers which may be used to "
        "estimate the set of physical properties. The order in which "
        "the layers appears in this list determines the order in which "
        "the layers will attempt to estimate the data set.",
        type_hint=list,
        default_value=["ReweightingLayer", "SimulationLayer"],
    )
    calculation_schemas = Attribute(
        docstring="The schemas that each calculation layer should "
        "use when estimating the set of physical properties. The "
        "dictionary should be of the form [property_type][layer_type].",
        type_hint=dict,
        optional=True,
    )

    batch_mode = Attribute(
        docstring="The way in which the server should batch together "
        "properties to estimate. Properties will only be marked as finished "
        "when all properties in a single batch are completed.",
        type_hint=BatchMode,
        default_value=BatchMode.SharedComponents,
        optional=True,
    )

    def add_schema(self, layer_type, property_type, schema):
        """A convenience function for adding a calculation schema
        to the schema dictionary.

        Parameters
        ----------
        layer_type: str or type of CalculationLayer
            The layer to associate the schema with.
        property_type: str or type of PhysicalProperty
            The class of property to associate the schema
            with.
        schema: CalculationSchema
            The schema to add.
        """

        # Validate the schema.
        schema.validate()

        # Make sure the schema is compatible with the layer.
        assert layer_type in registered_calculation_layers
        calculation_layer = registered_calculation_layers[layer_type]
        assert type(schema) == calculation_layer.required_schema_type()

        if isinstance(property_type, type):
            property_type = property_type.__name__

        if self.calculation_schemas == UNDEFINED:
            self.calculation_schemas = {}

        if property_type not in self.calculation_schemas:
            self.calculation_schemas[property_type] = {}
        if layer_type not in self.calculation_schemas[property_type]:
            self.calculation_schemas[property_type][layer_type] = {}

        self.calculation_schemas[property_type][layer_type] = schema

    def validate(self, attribute_type=None):

        super(RequestOptions, self).validate(attribute_type)

        assert all(isinstance(x, str) for x in self.calculation_layers)
        assert all(x in registered_calculation_layers
                   for x in self.calculation_layers)

        if self.calculation_schemas != UNDEFINED:

            for property_type in self.calculation_schemas:

                assert isinstance(self.calculation_schemas[property_type],
                                  dict)

                for layer_type in self.calculation_schemas[property_type]:

                    assert layer_type in self.calculation_layers
                    calculation_layer = registered_calculation_layers[
                        layer_type]

                    schema = self.calculation_schemas[property_type][
                        layer_type]
                    required_type = calculation_layer.required_schema_type()
                    assert isinstance(schema, required_type)