Beispiel #1
0
def test_simple_store_and_retrieve(data_class):
    """Tests storing and retrieving a simple object."""
    with tempfile.TemporaryDirectory() as temporary_directory:

        local_storage = LocalFileStorage(temporary_directory)

        storage_object = data_class()

        # Make sure the validation fails
        with pytest.raises(ValueError):
            local_storage.store_object(storage_object)

        # This should now pass.
        storage_object.some_attribute = 10

        storage_key = local_storage.store_object(storage_object)
        assert local_storage.has_object(storage_object)

        retrieved_object, _ = local_storage.retrieve_object(storage_key)

        assert retrieved_object is not None
        assert storage_object.json() == retrieved_object.json()

        # Ensure that the same key is returned when storing duplicate
        # data
        new_storage_key = local_storage.store_object(storage_object)
        assert storage_key == new_storage_key
Beispiel #2
0
    def __init__(
        self,
        calculation_backend,
        storage_backend=None,
        port=8000,
        working_directory="working-data",
        enable_data_caching=True,
        delete_working_files=True,
    ):
        """Constructs a new EvaluatorServer object.

        Parameters
        ----------
        calculation_backend: CalculationBackend
            The backend to use for executing calculations.
        storage_backend: StorageBackend, optional
            The backend to use for storing information from any calculations.
            If `None`, a default `LocalFileStorage` backend will be used.
        port: int
            The port on which to listen for incoming client requests.
        working_directory: str
            The local directory in which to store all local, temporary calculation data.
        enable_data_caching: bool
            Whether the server should attempt to cache any data, mainly the output of
            simulations, produced by estimation requests for future re-processing (e.g
            for reweighting).
        delete_working_files: bool
            Whether to delete the working files produced while estimated a batch of
            properties using a specific calculation layer.
        """

        # Initialize the main 'server' attributes.
        self._port = port

        self._server_thread = None
        self._socket = None

        self._started = False
        self._stopped = True

        # Initialize the internal components.
        assert calculation_backend is not None and calculation_backend.started
        self._calculation_backend = calculation_backend

        if storage_backend is None:
            storage_backend = LocalFileStorage()

        self._storage_backend = storage_backend
        self._enable_data_caching = enable_data_caching

        self._working_directory = working_directory
        os.makedirs(self._working_directory, exist_ok=True)

        self._delete_working_files = delete_working_files

        self._queued_batches = {}
        self._finished_batches = {}

        self._batch_ids_per_client_id = {}
Beispiel #3
0
def test_base_simulation_data_storage():

    substance = Substance.from_components("C")

    with tempfile.TemporaryDirectory() as base_directory:

        data_directory = os.path.join(base_directory, "data_directory")
        data_object = create_dummy_simulation_data(data_directory, substance)

        backend_directory = os.path.join(base_directory, "storage_dir")

        storage = LocalFileStorage(backend_directory)
        storage_key = storage.store_object(data_object, data_directory)

        # Regenerate the data directory.
        os.makedirs(data_directory, exist_ok=True)

        assert storage.has_object(data_object)
        assert storage_key == storage.store_object(data_object, data_directory)

        retrieved_object, retrieved_directory = storage.retrieve_object(
            storage_key, StoredSimulationData)

        assert backend_directory in retrieved_directory
        assert data_object.json() == retrieved_object.json()
def test_base_layer():

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    dummy_options = RequestOptions()

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = dummy_options
    batch.force_field_id = ""
    batch.options.calculation_schemas = {
        "Density": {
            "DummyCalculationLayer": CalculationLayerSchema()
        }
    }

    with tempfile.TemporaryDirectory() as temporary_directory:

        with temporarily_change_directory(temporary_directory):

            # Create a simple calculation backend to test with.
            test_backend = DaskLocalCluster()
            test_backend.start()

            # Create a simple storage backend to test with.
            test_storage = LocalFileStorage()

            layer_directory = "dummy_layer"
            makedirs(layer_directory)

            def dummy_callback(returned_request):

                assert len(returned_request.estimated_properties) == 1
                assert len(returned_request.exceptions) == 2

            dummy_layer = DummyCalculationLayer()

            dummy_layer.schedule_calculation(
                test_backend,
                test_storage,
                layer_directory,
                batch,
                dummy_callback,
                True,
            )
Beispiel #5
0
    def __init__(
        self,
        calculation_backend,
        storage_backend=None,
        port=8000,
        working_directory="working-data",
    ):
        """Constructs a new EvaluatorServer object.

        Parameters
        ----------
        calculation_backend: CalculationBackend
            The backend to use for executing calculations.
        storage_backend: StorageBackend, optional
            The backend to use for storing information from any calculations.
            If `None`, a default `LocalFileStorage` backend will be used.
        port: int
            The port on which to listen for incoming client requests.
        working_directory: str
            The local directory in which to store all local, temporary calculation data.
        """

        # Initialize the main 'server' attributes.
        self._port = port

        self._server_thread = None
        self._socket = None

        self._started = False
        self._stopped = True

        # Initialize the internal components.
        assert calculation_backend is not None and calculation_backend.started
        self._calculation_backend = calculation_backend

        if storage_backend is None:
            storage_backend = LocalFileStorage()

        self._storage_backend = storage_backend

        self._working_directory = working_directory
        os.makedirs(self._working_directory, exist_ok=True)

        self._queued_batches = {}
        self._finished_batches = {}

        self._batch_ids_per_client_id = {}
Beispiel #6
0
def test_force_field_storage():
    """A simple test to that force fields can be stored and
    retrieved using the local storage backend."""

    force_field_source = SmirnoffForceFieldSource.from_path(
        "smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as temporary_directory:

        local_storage = LocalFileStorage(temporary_directory)

        force_field_id = local_storage.store_force_field(force_field_source)
        retrieved_force_field = local_storage.retrieve_force_field(
            force_field_id)

        assert force_field_source.json() == retrieved_force_field.json()

        local_storage_new = LocalFileStorage(temporary_directory)
        assert local_storage_new.has_force_field(force_field_source)

        new_force_field_id = local_storage_new.store_force_field(
            force_field_source)
        assert new_force_field_id == force_field_id
Beispiel #7
0
def test_base_simulation_data_query():

    substance_a = Substance.from_components("C")
    substance_b = Substance.from_components("CO")

    substance_full = Substance.from_components("C", "CO")

    substances = [substance_a, substance_b, substance_full]

    with tempfile.TemporaryDirectory() as base_directory:

        backend_directory = os.path.join(base_directory, "storage_dir")
        storage = LocalFileStorage(backend_directory)

        for substance in substances:

            data_directory = os.path.join(base_directory,
                                          f"{substance.identifier}")
            data_object = create_dummy_simulation_data(data_directory,
                                                       substance)

            storage.store_object(data_object, data_directory)

        for substance in substances:

            substance_query = SimulationDataQuery()
            substance_query.substance = substance

            results = storage.query(substance_query)
            assert results is not None and len(results) == 1
            assert len(next(iter(results.values()))[0]) == 3

        component_query = SimulationDataQuery()
        component_query.substance = substance_full
        component_query.substance_query = SubstanceQuery()
        component_query.substance_query.components_only = True

        results = storage.query(component_query)
        assert results is not None and len(results) == 2
def test_storage_retrieval():
    # Create some dummy properties
    methane = Substance.from_components("C")
    methanol = Substance.from_components("CO")
    mixture = Substance.from_components("C", "CO")
    # Add extra unused data to make sure the wrong data isn't
    # Being retrieved.
    unused_pure = Substance.from_components("CCO")
    unused_mixture = Substance.from_components("CCO", "CO")

    data_to_store = [
        (methane, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Gas, 1),
        (mixture, PropertyPhase.Liquid, 1000),
        (unused_pure, PropertyPhase.Liquid, 1000),
        (unused_mixture, PropertyPhase.Liquid, 1000),
    ]
    storage_keys = {}

    state = ThermodynamicState(temperature=1.0 * unit.kelvin)

    properties = [
        # Properties with a full system query.
        Density(
            value=1.0 * unit.gram / unit.litre,
            substance=methanol,
            thermodynamic_state=state,
        ),
        DielectricConstant(
            value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state
        ),
        # Properties with a multi-component query.
        EnthalpyOfVaporization(
            value=1.0 * unit.joule / unit.mole,
            substance=methanol,
            thermodynamic_state=state,
        ),
        # Property with a multi-phase query.
        EnthalpyOfMixing(
            value=1.0 * unit.joule / unit.mole,
            substance=mixture,
            thermodynamic_state=state,
        ),
        ExcessMolarVolume(
            value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state
        ),
    ]
    expected_data_per_property = {
        Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]},
        DielectricConstant: {
            "full_system_data": [(methane, PropertyPhase.Liquid, 1000)]
        },
        EnthalpyOfVaporization: {
            "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)],
            "gas_data": [(methanol, PropertyPhase.Gas, 1)],
        },
        EnthalpyOfMixing: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
        ExcessMolarVolume: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
    }

    force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as base_directory:

        # Create a storage backend with some dummy data.
        backend_directory = os.path.join(base_directory, "storage_dir")
        storage_backend = LocalFileStorage(backend_directory)

        force_field_id = storage_backend.store_force_field(force_field)

        for substance, phase, n_mol in data_to_store:

            data_directory = os.path.join(base_directory, substance.identifier)
            data = create_dummy_simulation_data(
                data_directory,
                substance=substance,
                force_field_id=force_field_id,
                phase=phase,
                number_of_molecules=n_mol,
            )
            storage_key = storage_backend.store_object(data, data_directory)
            storage_keys[(substance, phase, n_mol)] = storage_key

        for physical_property in properties:

            schema = registered_calculation_schemas["ReweightingLayer"][
                physical_property.__class__.__name__
            ]

            if callable(schema):
                schema = schema()

            # noinspection PyProtectedMember
            metadata = ReweightingLayer._get_workflow_metadata(
                base_directory,
                physical_property,
                "",
                [],
                storage_backend,
                schema,
            )

            assert metadata is not None

            expected_data_list = expected_data_per_property[physical_property.__class__]

            for data_key in expected_data_list:

                assert data_key in metadata

                stored_metadata = metadata[data_key]
                expected_metadata = expected_data_list[data_key]

                assert len(stored_metadata) == len(expected_metadata)

                if isinstance(stored_metadata[0], list):
                    # Flatten any lists of lists.
                    stored_metadata = [
                        item for sublist in stored_metadata for item in sublist
                    ]
                    expected_metadata = [
                        item for sublist in expected_metadata for item in sublist
                    ]

                metadata_storage_keys = [
                    os.path.basename(x) for x, _, _ in stored_metadata
                ]
                expected_storage_keys = [storage_keys[x] for x in expected_metadata]

                assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def test_workflow_layer():
    """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer`
    is the simplest implementation of the abstract layer, we settle for
    testing this."""

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    # Create a very simple workflow which just returns some placeholder
    # value.
    estimated_value = Observable(
        (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin))
    protocol_a = DummyProtocol("protocol_a")
    protocol_a.input_value = estimated_value

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_a.id)

    layer_schema = SimulationSchema()
    layer_schema.workflow_schema = schema

    options = RequestOptions()
    options.add_schema("SimulationLayer", "Density", layer_schema)

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = options

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            # Create a directory for the layer.
            layer_directory = "simulation_layer"
            os.makedirs(layer_directory)

            # Set-up a simple storage backend and add a force field to it.
            force_field = SmirnoffForceFieldSource.from_path(
                "smirnoff99Frosst-1.1.0.offxml")

            storage_backend = LocalFileStorage()
            batch.force_field_id = storage_backend.store_force_field(
                force_field)

            # Create a simple calculation backend to test with.
            with DaskLocalCluster() as calculation_backend:

                def dummy_callback(returned_request):

                    assert len(returned_request.estimated_properties) == 2
                    assert len(returned_request.exceptions) == 0

                simulation_layer = SimulationLayer()

                simulation_layer.schedule_calculation(
                    calculation_backend,
                    storage_backend,
                    layer_directory,
                    batch,
                    dummy_callback,
                    True,
                )
Beispiel #10
0
def test_duplicate_simulation_data_storage(reverse_order):

    substance = Substance.from_components("CO")

    with tempfile.TemporaryDirectory() as base_directory_path:

        storage_directory = os.path.join(base_directory_path, "storage")
        local_storage = LocalFileStorage(storage_directory)

        # Construct some data to store with increasing
        # statistical inefficiencies.
        data_to_store = []

        for index in range(3):

            data_directory = os.path.join(base_directory_path, f"data_{index}")
            coordinate_name = f"data_{index}.pdb"

            data_object = create_dummy_simulation_data(
                directory_path=data_directory,
                substance=substance,
                force_field_id="ff_id_1",
                coordinate_file_name=coordinate_name,
                statistical_inefficiency=float(index),
                calculation_id="id",
            )
            data_to_store.append((data_object, data_directory))

        # Keep a track of the storage keys.
        all_storage_keys = set()

        iterator = enumerate(data_to_store)

        if reverse_order:
            iterator = reversed(list(iterator))

        # Store the data
        for index, data in iterator:

            data_object, data_directory = data

            storage_key = local_storage.store_object(data_object,
                                                     data_directory)
            all_storage_keys.add(storage_key)

            retrieved_object, stored_directory = local_storage.retrieve_object(
                storage_key)

            # Handle the case where we haven't reversed the order of
            # the data to store. Here only the first object in the list
            # should be stored an never replaced as it has the lowest
            # statistical inefficiency.
            if not reverse_order:
                expected_index = 0
            # Handle the case where we have reversed the order of
            # the data to store. Here only the each new piece of
            # data should replace the last, as it will have a lower
            # statistical inefficiency.
            else:
                expected_index = index

            assert retrieved_object.json(
            ) == data_to_store[expected_index][0].json()

            # Make sure the directory has been correctly overwritten / retained
            # depending on the data order.
            coordinate_path = os.path.join(stored_directory,
                                           f"data_{expected_index}.pdb")
            assert os.path.isfile(coordinate_path)

        # Make sure all pieces of data got assigned the same key if
        # reverse order.
        assert len(all_storage_keys) == 1
Beispiel #11
0
def main():

    setup_timestamp_logging()

    # Retrieve the current version.
    version = evaluator.__version__.replace(".", "-").replace("v", "")

    if "+" in version:
        version = "latest"

    # Create a new directory to run the current versions results in.
    os.makedirs(os.path.join(version, "results"))

    with temporarily_change_directory(version):

        with DaskLSFBackend(
                minimum_number_of_workers=1,
                maximum_number_of_workers=12,
                resources_per_worker=QueueWorkerResources(
                    number_of_gpus=1,
                    preferred_gpu_toolkit=QueueWorkerResources.GPUToolkit.CUDA,
                    per_thread_memory_limit=5 * unit.gigabyte,
                    wallclock_time_limit="05:59",
                ),
                setup_script_commands=[
                    f"conda activate openff-evaluator-{version}",
                    "module load cuda/10.0",
                ],
                queue_name="gpuqueue",
        ) as calculation_backend:

            with EvaluatorServer(
                    calculation_backend,
                    working_directory="outputs",
                    storage_backend=LocalFileStorage("cached-data"),
            ):

                client = EvaluatorClient()

                for allowed_layer in ["SimulationLayer", "ReweightingLayer"]:

                    data_set = define_data_set(
                        allowed_layer == "ReweightingLayer")

                    options = RequestOptions()
                    options.calculation_layers = [allowed_layer]
                    options.calculation_schemas = {
                        property_type: {}
                        for property_type in data_set.property_types
                    }

                    if allowed_layer == "SimulationLayer":

                        options.add_schema(
                            "SimulationLayer",
                            "SolvationFreeEnergy",
                            solvation_free_energy_schema(),
                        )

                    request, _ = client.request_estimate(
                        data_set,
                        ForceField("openff-1.2.0.offxml"),
                        options,
                        parameter_gradient_keys=[
                            ParameterGradientKey("vdW", smirks, attribute)
                            for smirks in [
                                "[#1:1]-[#6X4]",
                                "[#1:1]-[#6X4]-[#7,#8,#9,#16,#17,#35]",
                                "[#1:1]-[#8]",
                                "[#6X4:1]",
                                "[#8X2H1+0:1]",
                                "[#1]-[#8X2H2+0:1]-[#1]",
                            ] for attribute in ["epsilon", "rmin_half"]
                        ],
                    )

                    results, _ = request.results(synchronous=True,
                                                 polling_interval=60)
                    results.json(
                        os.path.join("results", f"{allowed_layer}.json"))