예제 #1
0
        def __init__(self):

            self.connection_options = ConnectionOptions()
            self.estimation_options = RequestOptions()

            self.data_set_path = ""
            self.weights = {}
            self.denominators = {}

            self.polling_interval = 600
예제 #2
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Load in the data set containing the pure and binary properties.
    data_set = PhysicalPropertyDataSet.from_json("pure_data_set.json")
    data_set.merge(PhysicalPropertyDataSet.from_json("binary_data_set.json"))

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8001)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8001))

        for calculation_layer in ["SimulationLayer", "ReweightingLayer"]:

            options = RequestOptions()
            options.calculation_layers = [calculation_layer]

            parameter_gradient_keys = [
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="epsilon"),
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="rmin_half"),
            ]

            request, _ = property_estimator.request_estimate(
                property_set=data_set,
                force_field_source=force_field_source,
                options=options,
                parameter_gradient_keys=parameter_gradient_keys,
            )

            # Wait for the results.
            results, _ = request.results(True, 5)

            layer_name = re.sub(r"(?<!^)(?=[A-Z])", "_",
                                calculation_layer).lower()
            results.json(f"pure_binary_{layer_name}.json", True)
예제 #3
0
def test_run_command(runner, monkeypatch):

    from nonbonded.cli.projects.benchmark import run

    monkeypatch.setattr(run, "_prepare_restart", lambda *args:
                        (None, successful_result()))
    monkeypatch.setattr(run, "_load_force_field", lambda *args: None)
    monkeypatch.setattr(run, "_run_calculations",
                        lambda *args: RequestResult())

    # Save a copy of the result model.
    with temporary_cd():

        # Create mock inputs
        with open("server-config.json", "w") as file:

            file.write(
                EvaluatorServerConfig(backend_config=DaskLocalClusterConfig(
                    resources_per_worker=ComputeResources())).json())

        RequestOptions().json("estimation-options.json")

        result = runner.invoke(run_command())

        with open("results.json") as file:
            assert successful_result().json() == file.read()

    if result.exit_code != 0:
        raise result.exception
예제 #4
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Create a data set containing three solvation free energies.
    data_set = PhysicalPropertyDataSet.from_json("hydration_data_set.json")
    data_set.json("hydration_data_set.json", format=True)

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8002)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8002))

        options = RequestOptions()
        options.calculation_layers = ["SimulationLayer"]
        options.add_schema("SimulationLayer", "SolvationFreeEnergy",
                           _get_fixed_lambda_schema())

        request, _ = property_estimator.request_estimate(
            property_set=data_set,
            force_field_source=force_field_source,
            options=options,
        )

        # Wait for the results.
        results, _ = request.results(True, 60)

        # Save the result to file.
        results.json("results.json", True)
예제 #5
0
    def _batch_by_same_component(self, submission, force_field_id):
        """Batches a set of requested properties based on which substance they were
        measured for. Properties which were measured for substances containing the
        exact same components (but not necessarily in the same amounts) will be placed
        into the same batch.

        Parameters
        ----------
        submission: EvaluatorClient._Submission
            The full request submission.
        force_field_id: str
            The unique id of the force field to use.

        Returns
        -------
        list of Batch
            The property batches.
        """

        reserved_batch_ids = {
            *self._queued_batches.keys(),
            *self._finished_batches.keys(),
        }

        batches = []

        for substance in submission.dataset.substances:

            batch = Batch()
            batch.force_field_id = force_field_id
            batch.enable_data_caching = self._enable_data_caching

            # Make sure we don't somehow generate the same uuid
            # twice (although this is very unlikely to ever happen).
            while batch.id in reserved_batch_ids:
                batch.id = str(uuid.uuid4()).replace("-", "")

            batch.queued_properties = [
                x
                for x in submission.dataset.properties_by_substance(substance)
            ]
            batch.options = RequestOptions.parse_json(
                submission.options.json())

            batch.parameter_gradient_keys = copy.deepcopy(
                submission.parameter_gradient_keys)

            reserved_batch_ids.add(batch.id)
            batches.append(batch)

        return batches
예제 #6
0
        def from_json(cls, json_source):
            """Creates this class from a JSON string.

            Parameters
            -------
            json_source: str or file-like object
                The JSON representation of this class.
            """

            if isinstance(json_source, str):
                with open(json_source, "r") as file:
                    dictionary = json.load(file, cls=TypedJSONDecoder)
            else:
                dictionary = json.load(json_source, cls=TypedJSONDecoder)

            if "polling_interval" not in dictionary:
                dictionary["polling_interval"] = 600

            assert (
                "connection_options" in dictionary
                and "estimation_options" in dictionary
                and "data_set_path" in dictionary
                and "weights" in dictionary
                and "denominators" in dictionary
                and "polling_interval" in dictionary
            )

            value = cls()

            value.connection_options = ConnectionOptions()
            value.connection_options.__setstate__(dictionary["connection_options"])

            value.estimation_options = RequestOptions()
            value.estimation_options.__setstate__(dictionary["estimation_options"])

            value.data_set_path = dictionary["data_set_path"]

            value.weights = {
                property_name: dictionary["weights"][property_name]
                for property_name in dictionary["weights"]
            }
            value.denominators = {
                property_name: dictionary["denominators"][property_name]
                for property_name in dictionary["denominators"]
            }

            value.polling_interval = dictionary["polling_interval"]

            return value
예제 #7
0
def test_base_layer():

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    dummy_options = RequestOptions()

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = dummy_options
    batch.force_field_id = ""
    batch.options.calculation_schemas = {
        "Density": {
            "DummyCalculationLayer": CalculationLayerSchema()
        }
    }

    with tempfile.TemporaryDirectory() as temporary_directory:

        with temporarily_change_directory(temporary_directory):

            # Create a simple calculation backend to test with.
            test_backend = DaskLocalCluster()
            test_backend.start()

            # Create a simple storage backend to test with.
            test_storage = LocalFileStorage()

            layer_directory = "dummy_layer"
            makedirs(layer_directory)

            def dummy_callback(returned_request):

                assert len(returned_request.estimated_properties) == 1
                assert len(returned_request.exceptions) == 2

            dummy_layer = DummyCalculationLayer()

            dummy_layer.schedule_calculation(
                test_backend,
                test_storage,
                layer_directory,
                batch,
                dummy_callback,
                True,
            )
예제 #8
0
    def base_function(**kwargs):

        from openff.evaluator.client import RequestOptions

        # Load in the force field.
        force_field = _load_force_field()

        # Load any previous results and the data set to estimate.
        data_set, existing_results = _prepare_restart(kwargs.pop("restart"))

        # Load the server configuration.
        server_config = EvaluatorServerConfig.parse_file(
            kwargs.pop("server_config"))

        if server_config.enable_data_caching is None:
            server_config.enable_data_caching = False

        # Load in the request options
        request_options = RequestOptions.from_json(
            kwargs.pop("request_options"))

        results = _run_calculations(
            data_set,
            force_field,
            kwargs.pop("polling_interval"),
            request_options,
            server_config,
        )

        # Save a copy of the results in case adding the already estimated
        # properties failed for some reason.
        results.json("results.tmp.json")

        if existing_results is not None:

            results.estimated_properties.add_properties(
                *existing_results.estimated_properties.properties,
                validate=False,
            )

        # Save the results to disk.
        results.json("results.json")

        if os.path.isfile("results.tmp.json"):
            # Remove the backup results.
            os.unlink("results.tmp.json")
예제 #9
0
def test_same_component_batching():

    thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
    )

    options = RequestOptions()

    submission = EvaluatorClient._Submission()
    submission.dataset = data_set
    submission.options = options

    with DaskLocalCluster() as calculation_backend:

        server = EvaluatorServer(calculation_backend)
        batches = server._batch_by_same_component(submission, "")

    assert len(batches) == 2

    assert len(batches[0].queued_properties) == 2
    assert len(batches[1].queued_properties) == 2
예제 #10
0
def test_launch_batch():

    # Set up a dummy data set
    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(create_dummy_property(Density),
                            create_dummy_property(Density))

    batch = Batch()
    batch.force_field_id = ""
    batch.options = RequestOptions()
    batch.options.calculation_layers = ["QuickCalculationLayer"]
    batch.options.calculation_schemas = {
        "Density": {
            "QuickCalculationLayer": CalculationLayerSchema()
        }
    }
    batch.parameter_gradient_keys = []
    batch.queued_properties = [*data_set]
    batch.validate()

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            with DaskLocalCluster() as calculation_backend:

                server = EvaluatorServer(
                    calculation_backend=calculation_backend,
                    working_directory=directory,
                )

                server._queued_batches[batch.id] = batch
                server._launch_batch(batch)

                while len(batch.queued_properties) > 0:
                    sleep(0.01)

                assert len(batch.estimated_properties) == 1
                assert len(batch.unsuccessful_properties) == 1
예제 #11
0
    class OptionsFile:
        """Represents the set of options that a `Evaluator_SMIRNOFF`
        target will be run with.

        Attributes
        ----------
        connection_options: openff.evaluator.client.ConnectionOptions
            The options for how the `evaluator` client should
            connect to a running server instance.
        estimation_options: openff.evaluator.client.RequestOptions
            The options for how properties should be estimated by the
            `evaluator` (e.g. the uncertainties to which properties
            should be estimated).
        data_set_path: str
            The path to a JSON serialized PhysicalPropertyDataSet which
            contains those physical properties which will be optimised
            against.
        weights: dict of float
            The weighting of each property which will be optimised against.
        denominators: dict of str and unit.Quantity
            The denominators will be used to remove units from the properties
            and scale their values.
        polling_interval: float
            The time interval with which to check whether the evaluator has
            finished fulfilling the request (in seconds).
        """

        def __init__(self):

            self.connection_options = ConnectionOptions()
            self.estimation_options = RequestOptions()

            self.data_set_path = ""
            self.weights = {}
            self.denominators = {}

            self.polling_interval = 600

        def to_json(self):
            """Converts this class into a JSON string.

            Returns
            -------
            str
                The JSON representation of this class.
            """

            value = {
                "connection_options": self.connection_options.__getstate__(),
                "estimation_options": self.estimation_options.__getstate__(),
                "data_set_path": self.data_set_path,
                "weights": {
                    property_name: self.weights[property_name]
                    for property_name in self.weights
                },
                "denominators": {
                    property_name: self.denominators[property_name]
                    for property_name in self.denominators
                },
                "polling_interval": self.polling_interval
            }

            return json.dumps(
                value,
                sort_keys=True,
                indent=4,
                separators=(",", ": "),
                cls=TypedJSONEncoder,
            )

        @classmethod
        def from_json(cls, json_source):
            """Creates this class from a JSON string.

            Parameters
            -------
            json_source: str or file-like object
                The JSON representation of this class.
            """

            if isinstance(json_source, str):
                with open(json_source, "r") as file:
                    dictionary = json.load(file, cls=TypedJSONDecoder)
            else:
                dictionary = json.load(json_source, cls=TypedJSONDecoder)

            if "polling_interval" not in dictionary:
                dictionary["polling_interval"] = 600

            assert (
                "connection_options" in dictionary
                and "estimation_options" in dictionary
                and "data_set_path" in dictionary
                and "weights" in dictionary
                and "denominators" in dictionary
                and "polling_interval" in dictionary
            )

            value = cls()

            value.connection_options = ConnectionOptions()
            value.connection_options.__setstate__(dictionary["connection_options"])

            value.estimation_options = RequestOptions()
            value.estimation_options.__setstate__(dictionary["estimation_options"])

            value.data_set_path = dictionary["data_set_path"]

            value.weights = {
                property_name: dictionary["weights"][property_name]
                for property_name in dictionary["weights"]
            }
            value.denominators = {
                property_name: dictionary["denominators"][property_name]
                for property_name in dictionary["denominators"]
            }

            value.polling_interval = dictionary["polling_interval"]

            return value
def test_workflow_layer():
    """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer`
    is the simplest implementation of the abstract layer, we settle for
    testing this."""

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    # Create a very simple workflow which just returns some placeholder
    # value.
    estimated_value = Observable(
        (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin))
    protocol_a = DummyProtocol("protocol_a")
    protocol_a.input_value = estimated_value

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_a.id)

    layer_schema = SimulationSchema()
    layer_schema.workflow_schema = schema

    options = RequestOptions()
    options.add_schema("SimulationLayer", "Density", layer_schema)

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = options

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            # Create a directory for the layer.
            layer_directory = "simulation_layer"
            os.makedirs(layer_directory)

            # Set-up a simple storage backend and add a force field to it.
            force_field = SmirnoffForceFieldSource.from_path(
                "smirnoff99Frosst-1.1.0.offxml")

            storage_backend = LocalFileStorage()
            batch.force_field_id = storage_backend.store_force_field(
                force_field)

            # Create a simple calculation backend to test with.
            with DaskLocalCluster() as calculation_backend:

                def dummy_callback(returned_request):

                    assert len(returned_request.estimated_properties) == 2
                    assert len(returned_request.exceptions) == 0

                simulation_layer = SimulationLayer()

                simulation_layer.schedule_calculation(
                    calculation_backend,
                    storage_backend,
                    layer_directory,
                    batch,
                    dummy_callback,
                    True,
                )
예제 #13
0
    def _generate(
        cls,
        model: Benchmark,
        conda_environment,
        max_time,
        evaluator_preset,
        evaluator_port,
        n_evaluator_workers,
        include_results,
        reference_data_sets: Optional[List[Union[DataSet, QCDataSet]]],
        optimization_result: Optional[OptimizationResult],
    ):

        from openff.evaluator.client import RequestOptions

        super(BenchmarkInputFactory, cls)._generate(
            model=model,
            conda_environment=conda_environment,
            max_time=max_time,
            evaluator_preset=evaluator_preset,
            evaluator_port=evaluator_port,
            n_evaluator_workers=n_evaluator_workers,
            include_results=include_results,
            reference_data_sets=reference_data_sets,
            optimization_result=optimization_result,
        )

        # Save the benchmark definition in the directory
        benchmark_path = "benchmark.json"

        with open(benchmark_path, "w") as file:
            file.write(model.json())

        # Retrieve the force field.
        cls._retrieve_force_field(model, optimization_result)

        # Retrieve the data sets.
        cls._retrieve_data_sets(model, reference_data_sets)

        # Create an Evaluator server configuration
        evaluator_configuration = cls._generate_evaluator_config(
            preset_name=evaluator_preset,
            conda_environment=conda_environment,
            n_workers=n_evaluator_workers,
            port=evaluator_port,
        )

        with open("server-config.json", "w") as file:
            file.write(evaluator_configuration.json())

        # Create a job submission file
        cls._generate_submission_script(
            "bench",
            conda_environment,
            evaluator_preset,
            max_time,
            [
                "nonbonded benchmark run --restart true",
                "nonbonded benchmark analyze",
            ],
        )

        # Generate a set of request options
        request_options = RequestOptions()
        request_options.calculation_layers = ["SimulationLayer"]
        request_options.json("estimation-options.json", format=True)

        # Optionally retrieve any previously generated results.
        if include_results:
            cls._retrieve_results(model)
예제 #14
0
    def _batch_by_shared_component(self, submission, force_field_id):
        """Batches a set of requested properties based on which substance they were
        measured for. Properties which were measured for substances sharing at least
        one common component (defined only by its smiles pattern and not necessarily
        in the same amount) will be placed into the same batch.

        Parameters
        ----------
        submission: EvaluatorClient._Submission
            The full request submission.
        force_field_id: str
            The unique id of the force field to use.

        Returns
        -------
        list of Batch
            The property batches.
        """

        reserved_batch_ids = {
            *self._queued_batches.keys(),
            *self._finished_batches.keys(),
        }

        all_smiles = set(x.smiles for y in submission.dataset.substances
                         for x in y)

        # Build a graph containing all of the different component
        # smiles patterns as nodes.
        substance_graph = networkx.Graph()
        substance_graph.add_nodes_from(all_smiles)

        # Add edges to the graph based on which substances contain
        # the different component nodes.
        for substance in submission.dataset.substances:

            if len(substance) < 2:
                continue

            smiles = [x.smiles for x in substance]

            for smiles_a, smiles_b in zip(smiles, smiles[1:]):
                substance_graph.add_edge(smiles_a, smiles_b)

        # Find clustered islands of those smiles which exist in
        # overlapping substances.
        islands = [
            substance_graph.subgraph(c)
            for c in networkx.connected_components(substance_graph)
        ]

        # Create one batch per island
        batches = []

        for _ in range(len(islands)):

            batch = Batch()
            batch.force_field_id = force_field_id

            # Make sure we don't somehow generate the same uuid
            # twice (although this is very unlikely to ever happen).
            while batch.id in reserved_batch_ids:
                batch.id = str(uuid.uuid4()).replace("-", "")

            batch.options = RequestOptions.parse_json(
                submission.options.json())

            batch.parameter_gradient_keys = copy.deepcopy(
                submission.parameter_gradient_keys)

            reserved_batch_ids.add(batch.id)
            batches.append(batch)

        for physical_property in submission.dataset:

            smiles = [x.smiles for x in physical_property.substance]

            island_id = 0

            for island_id, island in enumerate(islands):

                if not any(x in island for x in smiles):
                    continue

                break

            batches[island_id].queued_properties.append(physical_property)

        return batches
예제 #15
0
def main():

    setup_timestamp_logging()

    # Retrieve the current version.
    version = evaluator.__version__.replace(".", "-").replace("v", "")

    if "+" in version:
        version = "latest"

    # Create a new directory to run the current versions results in.
    os.makedirs(os.path.join(version, "results"))

    with temporarily_change_directory(version):

        with DaskLSFBackend(
                minimum_number_of_workers=1,
                maximum_number_of_workers=12,
                resources_per_worker=QueueWorkerResources(
                    number_of_gpus=1,
                    preferred_gpu_toolkit=QueueWorkerResources.GPUToolkit.CUDA,
                    per_thread_memory_limit=5 * unit.gigabyte,
                    wallclock_time_limit="05:59",
                ),
                setup_script_commands=[
                    f"conda activate openff-evaluator-{version}",
                    "module load cuda/10.0",
                ],
                queue_name="gpuqueue",
        ) as calculation_backend:

            with EvaluatorServer(
                    calculation_backend,
                    working_directory="outputs",
                    storage_backend=LocalFileStorage("cached-data"),
            ):

                client = EvaluatorClient()

                for allowed_layer in ["SimulationLayer", "ReweightingLayer"]:

                    data_set = define_data_set(
                        allowed_layer == "ReweightingLayer")

                    options = RequestOptions()
                    options.calculation_layers = [allowed_layer]
                    options.calculation_schemas = {
                        property_type: {}
                        for property_type in data_set.property_types
                    }

                    if allowed_layer == "SimulationLayer":

                        options.add_schema(
                            "SimulationLayer",
                            "SolvationFreeEnergy",
                            solvation_free_energy_schema(),
                        )

                    request, _ = client.request_estimate(
                        data_set,
                        ForceField("openff-1.2.0.offxml"),
                        options,
                        parameter_gradient_keys=[
                            ParameterGradientKey("vdW", smirks, attribute)
                            for smirks in [
                                "[#1:1]-[#6X4]",
                                "[#1:1]-[#6X4]-[#7,#8,#9,#16,#17,#35]",
                                "[#1:1]-[#8]",
                                "[#6X4:1]",
                                "[#8X2H1+0:1]",
                                "[#1]-[#8X2H2+0:1]-[#1]",
                            ] for attribute in ["epsilon", "rmin_half"]
                        ],
                    )

                    results, _ = request.results(synchronous=True,
                                                 polling_interval=60)
                    results.json(
                        os.path.join("results", f"{allowed_layer}.json"))
예제 #16
0
    def _generate_request_options(
            cls, target: EvaluatorTarget,
            training_set: "PhysicalPropertyDataSet") -> "RequestOptions":
        """Generates the request options to use when estimating an evaluator
        optimization targets.

        Parameters
        ----------
        target
            The evaluator target which will spawn the request.
        training_set
            The training set which will be estimated.

        Returns
        -------
            The request options.
        """

        import inspect

        from openff.evaluator.client import RequestOptions
        from openff.evaluator.layers import registered_calculation_schemas

        request_options = RequestOptions()

        # Specify the calculation layers to use.
        request_options.calculation_layers = []

        if target.allow_reweighting:
            request_options.calculation_layers.append("ReweightingLayer")
        if target.allow_direct_simulation:
            request_options.calculation_layers.append("SimulationLayer")

        # Check if a non-default option has been specified.
        if target.n_molecules is None and target.n_effective_samples is None:
            return request_options

        # Generate estimation schemas for each of the properties if a non-default
        # option has been specified in the optimization options.
        property_types = training_set.property_types

        request_options.calculation_schemas = defaultdict(dict)

        for property_type in property_types:

            default_reweighting_schemas = registered_calculation_schemas.get(
                "ReweightingLayer", {})

            if (target.allow_reweighting
                    and target.n_effective_samples is not None
                    and property_type in default_reweighting_schemas
                    and callable(default_reweighting_schemas[property_type])):

                default_schema = default_reweighting_schemas[property_type]

                if "n_effective_samples" in inspect.getfullargspec(
                        default_schema).args:

                    default_schema = default_schema(
                        n_effective_samples=target.n_effective_samples)
                    request_options.calculation_schemas[property_type][
                        "ReweightingLayer"] = default_schema

            default_simulation_schemas = registered_calculation_schemas.get(
                "SimulationLayer", {})

            if (target.allow_direct_simulation
                    and target.n_molecules is not None
                    and property_type in default_simulation_schemas
                    and callable(default_simulation_schemas[property_type])):

                default_schema = default_simulation_schemas[property_type]

                if "n_molecules" in inspect.getfullargspec(
                        default_schema).args:

                    default_schema = default_schema(
                        n_molecules=target.n_molecules)
                    request_options.calculation_schemas[property_type][
                        "SimulationLayer"] = default_schema

        return request_options