def test_base_simulation_data_storage(): substance = Substance.from_components("C") with tempfile.TemporaryDirectory() as base_directory: data_directory = os.path.join(base_directory, "data_directory") data_object = create_dummy_simulation_data(data_directory, substance) backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) storage_key = storage.store_object(data_object, data_directory) # Regenerate the data directory. os.makedirs(data_directory, exist_ok=True) assert storage.has_object(data_object) assert storage_key == storage.store_object(data_object, data_directory) retrieved_object, retrieved_directory = storage.retrieve_object( storage_key, StoredSimulationData) assert backend_directory in retrieved_directory assert data_object.json() == retrieved_object.json()
def test_base_simulation_data_query(): substance_a = Substance.from_components("C") substance_b = Substance.from_components("CO") substance_full = Substance.from_components("C", "CO") substances = [substance_a, substance_b, substance_full] with tempfile.TemporaryDirectory() as base_directory: backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) for substance in substances: data_directory = os.path.join(base_directory, f"{substance.identifier}") data_object = create_dummy_simulation_data(data_directory, substance) storage.store_object(data_object, data_directory) for substance in substances: substance_query = SimulationDataQuery() substance_query.substance = substance results = storage.query(substance_query) assert results is not None and len(results) == 1 assert len(next(iter(results.values()))[0]) == 3 component_query = SimulationDataQuery() component_query.substance = substance_full component_query.substance_query = SubstanceQuery() component_query.substance_query.components_only = True results = storage.query(component_query) assert results is not None and len(results) == 2
def test_unpack_stored_simulation_data(): """A test that compatible simulation data gets merged together within the`LocalStorage` system.""" with tempfile.TemporaryDirectory() as directory: force_field_path = os.path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) dummy_substance = create_dummy_substance(1) dummy_directory_path = os.path.join(directory, "data") dummy_data_path = os.path.join(directory, "data.json") data_coordinate_name = "data_1.pdb" data_object = create_dummy_simulation_data( directory_path=dummy_directory_path, substance=dummy_substance, force_field_id="ff_id_1", coordinate_file_name=data_coordinate_name, statistical_inefficiency=1.0, ) with open(dummy_data_path, "w") as file: json.dump(data_object, file, cls=TypedJSONEncoder) unpack_stored_data = UnpackStoredSimulationData("unpack_data") unpack_stored_data.simulation_data_path = ( dummy_data_path, dummy_directory_path, force_field_path, ) unpack_stored_data.execute(directory, None)
def test_storage_retrieval(): # Create some dummy properties methane = Substance.from_components("C") methanol = Substance.from_components("CO") mixture = Substance.from_components("C", "CO") # Add extra unused data to make sure the wrong data isn't # Being retrieved. unused_pure = Substance.from_components("CCO") unused_mixture = Substance.from_components("CCO", "CO") data_to_store = [ (methane, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Gas, 1), (mixture, PropertyPhase.Liquid, 1000), (unused_pure, PropertyPhase.Liquid, 1000), (unused_mixture, PropertyPhase.Liquid, 1000), ] storage_keys = {} state = ThermodynamicState(temperature=1.0 * unit.kelvin) properties = [ # Properties with a full system query. Density( value=1.0 * unit.gram / unit.litre, substance=methanol, thermodynamic_state=state, ), DielectricConstant( value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state ), # Properties with a multi-component query. EnthalpyOfVaporization( value=1.0 * unit.joule / unit.mole, substance=methanol, thermodynamic_state=state, ), # Property with a multi-phase query. EnthalpyOfMixing( value=1.0 * unit.joule / unit.mole, substance=mixture, thermodynamic_state=state, ), ExcessMolarVolume( value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state ), ] expected_data_per_property = { Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]}, DielectricConstant: { "full_system_data": [(methane, PropertyPhase.Liquid, 1000)] }, EnthalpyOfVaporization: { "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)], "gas_data": [(methanol, PropertyPhase.Gas, 1)], }, EnthalpyOfMixing: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, ExcessMolarVolume: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, } force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as base_directory: # Create a storage backend with some dummy data. backend_directory = os.path.join(base_directory, "storage_dir") storage_backend = LocalFileStorage(backend_directory) force_field_id = storage_backend.store_force_field(force_field) for substance, phase, n_mol in data_to_store: data_directory = os.path.join(base_directory, substance.identifier) data = create_dummy_simulation_data( data_directory, substance=substance, force_field_id=force_field_id, phase=phase, number_of_molecules=n_mol, ) storage_key = storage_backend.store_object(data, data_directory) storage_keys[(substance, phase, n_mol)] = storage_key for physical_property in properties: schema = registered_calculation_schemas["ReweightingLayer"][ physical_property.__class__.__name__ ] if callable(schema): schema = schema() # noinspection PyProtectedMember metadata = ReweightingLayer._get_workflow_metadata( base_directory, physical_property, "", [], storage_backend, schema, ) assert metadata is not None expected_data_list = expected_data_per_property[physical_property.__class__] for data_key in expected_data_list: assert data_key in metadata stored_metadata = metadata[data_key] expected_metadata = expected_data_list[data_key] assert len(stored_metadata) == len(expected_metadata) if isinstance(stored_metadata[0], list): # Flatten any lists of lists. stored_metadata = [ item for sublist in stored_metadata for item in sublist ] expected_metadata = [ item for sublist in expected_metadata for item in sublist ] metadata_storage_keys = [ os.path.basename(x) for x, _, _ in stored_metadata ] expected_storage_keys = [storage_keys[x] for x in expected_metadata] assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def test_duplicate_simulation_data_storage(reverse_order): substance = Substance.from_components("CO") with tempfile.TemporaryDirectory() as base_directory_path: storage_directory = os.path.join(base_directory_path, "storage") local_storage = LocalFileStorage(storage_directory) # Construct some data to store with increasing # statistical inefficiencies. data_to_store = [] for index in range(3): data_directory = os.path.join(base_directory_path, f"data_{index}") coordinate_name = f"data_{index}.pdb" data_object = create_dummy_simulation_data( directory_path=data_directory, substance=substance, force_field_id="ff_id_1", coordinate_file_name=coordinate_name, statistical_inefficiency=float(index), calculation_id="id", ) data_to_store.append((data_object, data_directory)) # Keep a track of the storage keys. all_storage_keys = set() iterator = enumerate(data_to_store) if reverse_order: iterator = reversed(list(iterator)) # Store the data for index, data in iterator: data_object, data_directory = data storage_key = local_storage.store_object(data_object, data_directory) all_storage_keys.add(storage_key) retrieved_object, stored_directory = local_storage.retrieve_object( storage_key) # Handle the case where we haven't reversed the order of # the data to store. Here only the first object in the list # should be stored an never replaced as it has the lowest # statistical inefficiency. if not reverse_order: expected_index = 0 # Handle the case where we have reversed the order of # the data to store. Here only the each new piece of # data should replace the last, as it will have a lower # statistical inefficiency. else: expected_index = index assert retrieved_object.json( ) == data_to_store[expected_index][0].json() # Make sure the directory has been correctly overwritten / retained # depending on the data order. coordinate_path = os.path.join(stored_directory, f"data_{expected_index}.pdb") assert os.path.isfile(coordinate_path) # Make sure all pieces of data got assigned the same key if # reverse order. assert len(all_storage_keys) == 1