def retrieve_simulation_data(self, substance, include_pure_data=True): substance_ids = [substance.identifier] if isinstance(substance, Mixture) and include_pure_data is True: for component in substance.components: component_mixture = Mixture() component_mixture.add_component(component.smiles, 1.0, False) if component_mixture.identifier not in substance_ids: substance_ids.append(component_mixture.identifier) return_paths = {} for substance_id in substance_ids: if substance_id not in self._simulation_data_by_substance: continue return_paths[substance_id] = [] for simulation_data_key in self._simulation_data_by_substance[ substance_id]: stored_object = self.retrieve_object(simulation_data_key) return_paths[substance_id].append( path.join(self._root_directory, f'{stored_object.unique_id}_data')) return return_paths
def test_local_simulation_storage(): """A simple test to that force fields can be stored and retrieved using the local storage backend.""" substance = Mixture() substance.add_component('C', 1.0, False) dummy_simulation_data = StoredSimulationData() dummy_simulation_data.thermodynamic_state = ThermodynamicState( 298.0 * unit.kelvin, 1.0 * unit.atmosphere) dummy_simulation_data.statistical_inefficiency = 1.0 dummy_simulation_data.force_field_id = 'tmp_ff_id' dummy_simulation_data.substance = substance temporary_data_directory = 'temp_data' temporary_backend_directory = 'storage_dir' if path.isdir(temporary_data_directory): rmtree(temporary_data_directory) if path.isdir(temporary_backend_directory): rmtree(temporary_backend_directory) makedirs(temporary_data_directory) makedirs(temporary_backend_directory) with open(path.join(temporary_data_directory, 'data.json'), 'w') as file: json.dump(dummy_simulation_data, file, cls=TypedJSONEncoder) local_storage = LocalFileStorage(temporary_backend_directory) dummy_simulation_data.unique_id = local_storage.store_simulation_data( substance.identifier, temporary_data_directory) retrieved_data_directories = local_storage.retrieve_simulation_data( substance) assert len(retrieved_data_directories) == 1 retrieved_data_directory = retrieved_data_directories[ substance.identifier][0] with open(path.join(retrieved_data_directory, 'data.json'), 'r') as file: retrieved_data = json.load(file, cls=TypedJSONDecoder) assert dummy_simulation_data.thermodynamic_state == retrieved_data.thermodynamic_state assert dummy_simulation_data.statistical_inefficiency == retrieved_data.statistical_inefficiency assert dummy_simulation_data.force_field_id == retrieved_data.force_field_id assert dummy_simulation_data.substance == retrieved_data.substance local_storage_new = LocalFileStorage(temporary_backend_directory) assert local_storage_new.has_object(dummy_simulation_data.unique_id) if path.isdir(temporary_data_directory): rmtree(temporary_data_directory) if path.isdir(temporary_backend_directory): rmtree(temporary_backend_directory)
def test_density_dielectric_merging(): substance = Mixture() substance.add_component('C', 1.0) density = Density(thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole) dielectric = DielectricConstant(thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole) density_schema = density.get_default_workflow_schema( 'SimulationLayer', PropertyWorkflowOptions()) dielectric_schema = dielectric.get_default_workflow_schema( 'SimulationLayer', PropertyWorkflowOptions()) density_metadata = Workflow.generate_default_metadata( density, get_data_filename('forcefield/smirnoff99Frosst.offxml'), PropertyEstimatorOptions()) dielectric_metadata = Workflow.generate_default_metadata( density, get_data_filename('forcefield/smirnoff99Frosst.offxml'), PropertyEstimatorOptions()) density_workflow = Workflow(density, density_metadata) density_workflow.schema = density_schema dielectric_workflow = Workflow(dielectric, dielectric_metadata) dielectric_workflow.schema = dielectric_schema workflow_graph = WorkflowGraph('') workflow_graph.add_workflow(density_workflow) workflow_graph.add_workflow(dielectric_workflow) merge_order_a = graph.topological_sort(density_workflow.dependants_graph) merge_order_b = graph.topological_sort( dielectric_workflow.dependants_graph) for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b): if protocol_id_A.find('extract_traj') < 0 and protocol_id_A.find( 'extract_stats') < 0: assert density_workflow.protocols[protocol_id_A].schema.json() == \ dielectric_workflow.protocols[protocol_id_B].schema.json() else: assert density_workflow.protocols[protocol_id_A].schema.json() != \ dielectric_workflow.protocols[protocol_id_B].schema.json()
def test_cloned_schema_merging_simulation(registered_property_name, available_layer): """Tests that two, the exact the same, calculations get merged into one by the `WorkflowGraph`.""" registered_property = registered_properties[registered_property_name] substance = Mixture() substance.add_component('C', 1.0) dummy_property = create_dummy_property(registered_property) workflow_schema = dummy_property.get_default_workflow_schema( available_layer, PropertyWorkflowOptions()) if workflow_schema is None: return global_metadata = create_dummy_metadata(dummy_property, available_layer) workflow_a = Workflow(dummy_property, global_metadata) workflow_a.schema = workflow_schema workflow_b = Workflow(dummy_property, global_metadata) workflow_b.schema = workflow_schema workflow_graph = WorkflowGraph() workflow_graph.add_workflow(workflow_a) workflow_graph.add_workflow(workflow_b) ordered_dict_a = OrderedDict(sorted(workflow_a.dependants_graph.items())) ordered_dict_b = OrderedDict(sorted(workflow_b.dependants_graph.items())) merge_order_a = graph.topological_sort(ordered_dict_a) merge_order_b = graph.topological_sort(ordered_dict_b) assert len(workflow_graph._protocols_by_id) == len(workflow_a.protocols) for protocol_id in workflow_a.protocols: assert protocol_id in workflow_graph._protocols_by_id for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b): assert protocol_id_A == protocol_id_B assert workflow_a.protocols[protocol_id_A].schema.json() == \ workflow_b.protocols[protocol_id_B].schema.json()
def build_solvated_system(): """An example of how to build a solvated system using the built in utilities and protocol classes. """ # Define the system that you wish to create coordinates for. mixed_system = Mixture() # Here we simply define a 1:1 mix of water and octanol. mixed_system.add_component(smiles='O', mole_fraction=0.5) mixed_system.add_component(smiles='CCCCCCCCO', mole_fraction=0.5) # Add any 'impurities' such as single solute molecules. # In this case we add a molecule of paracetamol. mixed_system.add_component(smiles='CC(=O)NC1=CC=C(C=C1)O', mole_fraction=0.0, impurity=True) # Create an object which under the hood calls the packmol utility # in a friendlier way: print('Building the coordinates (this may take a while...)') build_coordinates = BuildCoordinatesPackmol('') # Set the maximum number of molecules in the system. build_coordinates.max_molecules = 1500 # and the target density (the default 1.0 g/ml is normally fine) build_coordinates.mass_density = 1.0 * unit.grams / unit.milliliters # and finally the system which coordinates should be generated for. build_coordinates.substance = mixed_system # Build the coordinates, creating a file called output.pdb build_coordinates.execute('', None) # Assign some smirnoff force field parameters to the # coordinates print('Assigning some parameters.') assign_force_field_parameters = BuildSmirnoffSystem('') assign_force_field_parameters.force_field_path = get_data_filename('forcefield/smirnoff99Frosst.offxml') assign_force_field_parameters.coordinate_file_path = 'output.pdb' assign_force_field_parameters.substance = mixed_system assign_force_field_parameters.execute('', None) # Do a simple energy minimisation print('Performing energy minimisation.') energy_minimisation = RunEnergyMinimisation('') energy_minimisation.input_coordinate_file = 'output.pdb' energy_minimisation.system_path = assign_force_field_parameters.system_path energy_minimisation.execute('', ComputeResources())
def create_dummy_property(property_class): substance = Mixture() substance.add_component('C', 0.5) substance.add_component('CO', 0.5) dummy_property = property_class(thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram, uncertainty=1 * unit.gram) dummy_property.source = CalculationSource(fidelity='dummy', provenance={}) return dummy_property
def test_base_simulation_protocols(): """Tests that the commonly chain build coordinates, assigned topology, energy minimise and perform simulation are able to work together without raising an exception.""" mixed_system = Mixture() mixed_system.add_component(smiles='O', mole_fraction=1.0) thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere) with tempfile.TemporaryDirectory() as temporary_directory: build_coordinates = BuildCoordinatesPackmol('') # Set the maximum number of molecules in the system. build_coordinates.max_molecules = 10 # and the target density (the default 1.0 g/ml is normally fine) build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters # and finally the system which coordinates should be generated for. build_coordinates.substance = mixed_system # Build the coordinates, creating a file called output.pdb result = build_coordinates.execute(temporary_directory, None) assert not isinstance(result, PropertyEstimatorException) # Assign some smirnoff force field parameters to the # coordinates print('Assigning some parameters.') assign_force_field_parameters = BuildSmirnoffSystem('') assign_force_field_parameters.force_field_path = get_data_filename( 'forcefield/smirnoff99Frosst.offxml') assign_force_field_parameters.coordinate_file_path = path.join( temporary_directory, 'output.pdb') assign_force_field_parameters.substance = mixed_system result = assign_force_field_parameters.execute(temporary_directory, None) assert not isinstance(result, PropertyEstimatorException) # Do a simple energy minimisation print('Performing energy minimisation.') energy_minimisation = RunEnergyMinimisation('') energy_minimisation.input_coordinate_file = path.join( temporary_directory, 'output.pdb') energy_minimisation.system_path = assign_force_field_parameters.system_path result = energy_minimisation.execute(temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException) npt_equilibration = RunOpenMMSimulation('npt_equilibration') npt_equilibration.ensemble = Ensemble.NPT npt_equilibration.steps = 20 # Debug settings. npt_equilibration.output_frequency = 2 # Debug settings. npt_equilibration.thermodynamic_state = thermodynamic_state npt_equilibration.input_coordinate_file = path.join( temporary_directory, 'minimised.pdb') npt_equilibration.system_path = assign_force_field_parameters.system_path result = npt_equilibration.execute(temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException) extract_density = ExtractAverageStatistic('extract_density') extract_density.statistics_type = ObservableType.Density extract_density.statistics_path = path.join(temporary_directory, 'statistics.csv') result = extract_density.execute(temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException) extract_dielectric = ExtractAverageDielectric('extract_dielectric') extract_dielectric.thermodynamic_state = thermodynamic_state extract_dielectric.input_coordinate_file = path.join( temporary_directory, 'input.pdb') extract_dielectric.trajectory_path = path.join(temporary_directory, 'trajectory.dcd') extract_dielectric.system_path = assign_force_field_parameters.system_path result = extract_dielectric.execute(temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException) extract_uncorrelated_trajectory = ExtractUncorrelatedTrajectoryData( 'extract_traj') extract_uncorrelated_trajectory.statistical_inefficiency = extract_density.statistical_inefficiency extract_uncorrelated_trajectory.equilibration_index = extract_density.equilibration_index extract_uncorrelated_trajectory.input_coordinate_file = path.join( temporary_directory, 'input.pdb') extract_uncorrelated_trajectory.input_trajectory_path = path.join( temporary_directory, 'trajectory.dcd') result = extract_uncorrelated_trajectory.execute( temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException) extract_uncorrelated_statistics = ExtractUncorrelatedStatisticsData( 'extract_stats') extract_uncorrelated_statistics.statistical_inefficiency = extract_density.statistical_inefficiency extract_uncorrelated_statistics.equilibration_index = extract_density.equilibration_index extract_uncorrelated_statistics.input_statistics_path = path.join( temporary_directory, 'statistics.csv') result = extract_uncorrelated_statistics.execute( temporary_directory, ComputeResources()) assert not isinstance(result, PropertyEstimatorException)
def _build_workflow_graph(working_directory, properties, target_force_field_path, stored_data_paths, options): """Construct a workflow graph, containing all of the workflows which should be followed to estimate a set of properties by reweighting. Parameters ---------- working_directory: str The local directory in which to store all local, temporary calculation data from this graph. properties : list of PhysicalProperty The properties to attempt to compute. target_force_field_path : str The path to the target force field parameters to use in the workflow. stored_data_paths: dict of str and tuple(str, str) A dictionary partitioned by substance identifiers, whose values are a tuple of a path to a stored simulation data object, and its corresponding force field path. options: PropertyEstimatorOptions The options to run the workflows with. """ workflow_graph = WorkflowGraph(working_directory) for property_to_calculate in properties: if (not isinstance(property_to_calculate, IReweightable) or not isinstance(property_to_calculate, IWorkflowProperty)): # Only properties which implement the IReweightable and # IWorkflowProperty interfaces can be reweighted continue property_type = type(property_to_calculate).__name__ if property_type not in options.workflow_schemas: logging.warning('The reweighting layer does not support {} ' 'workflows.'.format(property_type)) continue if ReweightingLayer.__name__ not in options.workflow_schemas[ property_type]: continue schema = options.workflow_schemas[property_type][ ReweightingLayer.__name__] global_metadata = Workflow.generate_default_metadata( property_to_calculate, target_force_field_path, options) if property_to_calculate.substance.identifier not in stored_data_paths: continue global_metadata['full_system_data'] = stored_data_paths[ property_to_calculate.substance.identifier] global_metadata['component_data'] = [] if property_to_calculate.multi_component_property: has_data_for_property = True for component in property_to_calculate.substance.components: temporary_component = Mixture.MixtureComponent( component.smiles, mole_fraction=1.0, impurity=False) if temporary_component.identifier not in stored_data_paths: has_data_for_property = False break global_metadata['component_data'].append( stored_data_paths[temporary_component.identifier]) if not has_data_for_property: continue workflow = Workflow(property_to_calculate, global_metadata) workflow.schema = schema from propertyestimator.properties import CalculationSource workflow.physical_property.source = CalculationSource( fidelity=ReweightingLayer.__name__, provenance={}) workflow_graph.add_workflow(workflow) return workflow_graph