Example #1
0
def test_simulation_data_storage():

    substance = Substance.from_components("C")

    with tempfile.TemporaryDirectory() as base_directory:

        data_directory = os.path.join(base_directory, "data_directory")
        data_object = create_dummy_simulation_data(data_directory, substance)

        backend_directory = os.path.join(base_directory, "storage_dir")

        storage = LocalFileStorage(backend_directory)
        storage_key = storage.store_object(data_object, data_directory)

        # Regenerate the data directory.
        os.makedirs(data_directory, exist_ok=True)

        assert storage.has_object(data_object)
        assert storage_key == storage.store_object(data_object, data_directory)

        retrieved_object, retrieved_directory = storage.retrieve_object(
            storage_key, StoredSimulationData)

        assert backend_directory in retrieved_directory
        assert data_object.json() == retrieved_object.json()
Example #2
0
def test_simulation_data_query():

    substance_a = Substance.from_components("C")
    substance_b = Substance.from_components("CO")

    substance_full = Substance.from_components("C", "CO")

    substances = [substance_a, substance_b, substance_full]

    with tempfile.TemporaryDirectory() as base_directory:

        backend_directory = os.path.join(base_directory, "storage_dir")
        storage = LocalFileStorage(backend_directory)

        for substance in substances:

            data_directory = os.path.join(base_directory,
                                          f"{substance.identifier}")
            data_object = create_dummy_simulation_data(data_directory,
                                                       substance)

            storage.store_object(data_object, data_directory)

        for substance in substances:

            substance_query = SimulationDataQuery()
            substance_query.substance = substance

            results = storage.query(substance_query)
            assert results is not None and len(results) == 1
            assert len(next(iter(results.values()))[0]) == 3

        component_query = SimulationDataQuery()
        component_query.substance = substance_full
        component_query.substance_query = SubstanceQuery()
        component_query.substance_query.components_only = True

        results = storage.query(component_query)
        assert results is not None and len(results) == 2
def test_unpack_stored_simulation_data():
    """A test that compatible simulation data gets merged
    together within the`LocalStorage` system."""

    with tempfile.TemporaryDirectory() as directory:

        force_field_path = os.path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        dummy_substance = create_dummy_substance(1)

        dummy_directory_path = os.path.join(directory, "data")
        dummy_data_path = os.path.join(directory, "data.json")

        data_coordinate_name = "data_1.pdb"

        data_object = create_dummy_simulation_data(
            directory_path=dummy_directory_path,
            substance=dummy_substance,
            force_field_id="ff_id_1",
            coordinate_file_name=data_coordinate_name,
            statistical_inefficiency=1.0,
        )

        with open(dummy_data_path, "w") as file:
            json.dump(data_object, file, cls=TypedJSONEncoder)

        unpack_stored_data = UnpackStoredSimulationData("unpack_data")
        unpack_stored_data.simulation_data_path = (
            dummy_data_path,
            dummy_directory_path,
            force_field_path,
        )
        unpack_stored_data.execute(directory, None)
Example #4
0
def test_duplicate_simulation_data_storage(reverse_order):

    substance = Substance.from_components("CO")

    with tempfile.TemporaryDirectory() as base_directory_path:

        storage_directory = os.path.join(base_directory_path, "storage")
        local_storage = LocalFileStorage(storage_directory)

        # Construct some data to store with increasing
        # statistical inefficiencies.
        data_to_store = []

        for index in range(3):

            data_directory = os.path.join(base_directory_path, f"data_{index}")
            coordinate_name = f"data_{index}.pdb"

            data_object = create_dummy_simulation_data(
                directory_path=data_directory,
                substance=substance,
                force_field_id="ff_id_1",
                coordinate_file_name=coordinate_name,
                statistical_inefficiency=float(index),
                calculation_id="id",
            )
            data_to_store.append((data_object, data_directory))

        # Keep a track of the storage keys.
        all_storage_keys = set()

        iterator = enumerate(data_to_store)

        if reverse_order:
            iterator = reversed(list(iterator))

        # Store the data
        for index, data in iterator:

            data_object, data_directory = data

            storage_key = local_storage.store_object(data_object,
                                                     data_directory)
            all_storage_keys.add(storage_key)

            retrieved_object, stored_directory = local_storage.retrieve_object(
                storage_key)

            # Handle the case where we haven't reversed the order of
            # the data to store. Here only the first object in the list
            # should be stored an never replaced as it has the lowest
            # statistical inefficiency.
            if not reverse_order:
                expected_index = 0
            # Handle the case where we have reversed the order of
            # the data to store. Here only the each new piece of
            # data should replace the last, as it will have a lower
            # statistical inefficiency.
            else:
                expected_index = index

            assert retrieved_object.json(
            ) == data_to_store[expected_index][0].json()

            # Make sure the directory has been correctly overwritten / retained
            # depending on the data order.
            coordinate_path = os.path.join(stored_directory,
                                           f"data_{expected_index}.pdb")
            assert os.path.isfile(coordinate_path)

        # Make sure all pieces of data got assigned the same key if
        # reverse order.
        assert len(all_storage_keys) == 1
Example #5
0
def test_storage_retrieval():
    # Create some dummy properties
    methane = Substance.from_components("C")
    methanol = Substance.from_components("CO")
    mixture = Substance.from_components("C", "CO")
    # Add extra unused data to make sure the wrong data isn't
    # Being retrieved.
    unused_pure = Substance.from_components("CCO")
    unused_mixture = Substance.from_components("CCO", "CO")

    data_to_store = [
        (methane, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Liquid, 1000),
        (methanol, PropertyPhase.Gas, 1),
        (mixture, PropertyPhase.Liquid, 1000),
        (unused_pure, PropertyPhase.Liquid, 1000),
        (unused_mixture, PropertyPhase.Liquid, 1000),
    ]
    storage_keys = {}

    state = ThermodynamicState(temperature=1.0 * unit.kelvin)

    properties = [
        # Properties with a full system query.
        Density(
            value=1.0 * unit.gram / unit.litre,
            substance=methanol,
            thermodynamic_state=state,
        ),
        DielectricConstant(
            value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state
        ),
        # Properties with a multi-component query.
        EnthalpyOfVaporization(
            value=1.0 * unit.joule / unit.mole,
            substance=methanol,
            thermodynamic_state=state,
        ),
        # Property with a multi-phase query.
        EnthalpyOfMixing(
            value=1.0 * unit.joule / unit.mole,
            substance=mixture,
            thermodynamic_state=state,
        ),
        ExcessMolarVolume(
            value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state
        ),
    ]
    expected_data_per_property = {
        Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]},
        DielectricConstant: {
            "full_system_data": [(methane, PropertyPhase.Liquid, 1000)]
        },
        EnthalpyOfVaporization: {
            "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)],
            "gas_data": [(methanol, PropertyPhase.Gas, 1)],
        },
        EnthalpyOfMixing: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
        ExcessMolarVolume: {
            "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)],
            "component_data": [
                [(methane, PropertyPhase.Liquid, 1000)],
                [(methanol, PropertyPhase.Liquid, 1000)],
            ],
        },
    }

    force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml")

    with tempfile.TemporaryDirectory() as base_directory:

        # Create a storage backend with some dummy data.
        backend_directory = os.path.join(base_directory, "storage_dir")
        storage_backend = LocalFileStorage(backend_directory)

        force_field_id = storage_backend.store_force_field(force_field)

        for substance, phase, n_mol in data_to_store:

            data_directory = os.path.join(base_directory, substance.identifier)
            data = create_dummy_simulation_data(
                data_directory,
                substance=substance,
                force_field_id=force_field_id,
                phase=phase,
                number_of_molecules=n_mol,
            )
            storage_key = storage_backend.store_object(data, data_directory)
            storage_keys[(substance, phase, n_mol)] = storage_key

        for physical_property in properties:

            schema = registered_calculation_schemas["ReweightingLayer"][
                physical_property.__class__.__name__
            ]

            if callable(schema):
                schema = schema()

            # noinspection PyProtectedMember
            metadata = ReweightingLayer._get_workflow_metadata(
                base_directory, physical_property, "", [], storage_backend, schema,
            )

            assert metadata is not None

            expected_data_list = expected_data_per_property[physical_property.__class__]

            for data_key in expected_data_list:

                assert data_key in metadata

                stored_metadata = metadata[data_key]
                expected_metadata = expected_data_list[data_key]

                assert len(stored_metadata) == len(expected_metadata)

                if isinstance(stored_metadata[0], list):
                    # Flatten any lists of lists.
                    stored_metadata = [
                        item for sublist in stored_metadata for item in sublist
                    ]
                    expected_metadata = [
                        item for sublist in expected_metadata for item in sublist
                    ]

                metadata_storage_keys = [
                    os.path.basename(x) for x, _, _ in stored_metadata
                ]
                expected_storage_keys = [storage_keys[x] for x in expected_metadata]

                assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)