def process_successful_property(physical_property, layer_directory, **_): """Return a result as if the property had been successfully estimated.""" dummy_data_directory = path.join(layer_directory, "good_dummy_data") makedirs(dummy_data_directory, exist_ok=True) dummy_stored_object = StoredSimulationData() dummy_stored_object.substance = physical_property.substance dummy_stored_object.thermodynamic_state = physical_property.thermodynamic_state dummy_stored_object.property_phase = physical_property.phase dummy_stored_object.force_field_id = "" dummy_stored_object.coordinate_file_name = "" dummy_stored_object.trajectory_file_name = "" dummy_stored_object.statistics_file_name = "" dummy_stored_object.statistical_inefficiency = 1.0 dummy_stored_object.number_of_molecules = 10 dummy_stored_object.source_calculation_id = "" dummy_stored_object_path = path.join(layer_directory, "good_dummy_data.json") with open(dummy_stored_object_path, "w") as file: json.dump(dummy_stored_object, file, cls=TypedJSONEncoder) return_object = CalculationLayerResult() return_object.physical_property = physical_property return_object.data_to_store = [(dummy_stored_object_path, dummy_data_directory)] return return_object
def create_dummy_simulation_data( directory_path, substance, force_field_id="dummy_ff_id", coordinate_file_name="output.pdb", trajectory_file_name="trajectory.dcd", statistics_file_name="statistics.csv", statistical_inefficiency=1.0, phase=PropertyPhase.Liquid, number_of_molecules=1, calculation_id=None, ): """Creates a dummy `StoredSimulationData` object and the corresponding data directory. Parameters ---------- directory_path: str The path to the dummy data directory to create. substance: Substance force_field_id coordinate_file_name trajectory_file_name statistics_file_name statistical_inefficiency phase number_of_molecules calculation_id Returns ------- StoredSimulationData The dummy stored data object. """ os.makedirs(directory_path, exist_ok=True) data = StoredSimulationData() data.substance = substance data.force_field_id = force_field_id data.thermodynamic_state = ThermodynamicState(1.0 * unit.kelvin) data.property_phase = phase data.coordinate_file_name = coordinate_file_name data.trajectory_file_name = trajectory_file_name data.statistics_file_name = statistics_file_name with open(os.path.join(directory_path, coordinate_file_name), "w") as file: file.write("") with open(os.path.join(directory_path, trajectory_file_name), "w") as file: file.write("") with open(os.path.join(directory_path, statistics_file_name), "w") as file: file.write("") data.statistical_inefficiency = statistical_inefficiency data.number_of_molecules = number_of_molecules if calculation_id is None: calculation_id = str(uuid.uuid4()) data.source_calculation_id = calculation_id return data
def generate_base_simulation_protocols( analysis_protocol, use_target_uncertainty, id_suffix="", conditional_group=None, n_molecules=1000, ): """Constructs a set of protocols which, when combined in a workflow schema, may be executed to run a single simulation to estimate a particular property. The observable of interest to extract from the simulation is determined by the passed in `analysis_protocol`. The protocols returned will: 1) Build a set of liquid coordinates for the property substance using packmol. 2) Assign a set of smirnoff force field parameters to the system. 3) Perform an energy minimisation on the system. 4) Run a short NPT equilibration simulation for 100000 steps using a timestep of 2fs. 5) Within a conditional group (up to a maximum of 100 times): 5a) Run a longer NPT production simulation for 1000000 steps using a timestep of 2fs 5b) Extract the average value of an observable and it's uncertainty. 5c) If a convergence mode is set by the options, check if the target uncertainty has been met. If not, repeat steps 5a), 5b) and 5c). 6) Extract uncorrelated configurations from a generated production simulation. 7) Extract uncorrelated statistics from a generated production simulation. Parameters ---------- analysis_protocol: AveragePropertyProtocol The protocol which will extract the observable of interest from the generated simulation data. use_target_uncertainty: bool Whether to run the simulation until the observable is estimated to within the target uncertainty. id_suffix: str A string suffix to append to each of the protocol ids. conditional_group: ProtocolGroup, optional A custom group to wrap the main simulation / extraction protocols within. It is up to the caller of this method to manually add the convergence conditions to this group. If `None`, a default group with uncertainty convergence conditions is automatically constructed. n_molecules: int The number of molecules to use in the workflow. Returns ------- BaseSimulationProtocols A named tuple of the generated protocols. ProtocolPath A reference to the final value of the estimated observable and its uncertainty (a `pint.Measurement`). StoredSimulationData An object which describes the default data from a simulation to store, such as the uncorrelated statistics and configurations. """ assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol) build_coordinates = coordinates.BuildCoordinatesPackmol( f"build_coordinates{id_suffix}") build_coordinates.substance = ProtocolPath("substance", "global") build_coordinates.max_molecules = n_molecules assign_parameters = forcefield.BaseBuildSystem( f"assign_parameters{id_suffix}") assign_parameters.force_field_path = ProtocolPath("force_field_path", "global") assign_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_coordinates.id) assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id) # Equilibration energy_minimisation = openmm.OpenMMEnergyMinimisation( f"energy_minimisation{id_suffix}") energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_coordinates.id) energy_minimisation.system_path = ProtocolPath("system_path", assign_parameters.id) equilibration_simulation = openmm.OpenMMSimulation( f"equilibration_simulation{id_suffix}") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 5000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) equilibration_simulation.system_path = ProtocolPath( "system_path", assign_parameters.id) # Production production_simulation = openmm.OpenMMSimulation( f"production_simulation{id_suffix}") production_simulation.ensemble = Ensemble.NPT production_simulation.steps_per_iteration = 1000000 production_simulation.output_frequency = 2000 production_simulation.timestep = 2.0 * unit.femtosecond production_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") production_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) production_simulation.system_path = ProtocolPath("system_path", assign_parameters.id) # Set up a conditional group to ensure convergence of uncertainty if conditional_group is None: conditional_group = groups.ConditionalGroup( f"conditional_group{id_suffix}") conditional_group.max_iterations = 100 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath("value.error", conditional_group.id, analysis_protocol.id) conditional_group.add_condition(condition) # Make sure the simulation gets extended after each iteration. production_simulation.total_number_of_iterations = ProtocolPath( "current_iteration", conditional_group.id) conditional_group.add_protocols(production_simulation, analysis_protocol) # Point the analyse protocol to the correct data source if isinstance(analysis_protocol, analysis.AverageTrajectoryProperty): analysis_protocol.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_coordinates.id) analysis_protocol.trajectory_path = ProtocolPath( "trajectory_file_path", production_simulation.id) elif isinstance(analysis_protocol, analysis.ExtractAverageStatistic): analysis_protocol.statistics_path = ProtocolPath( "statistics_file_path", production_simulation.id) else: raise ValueError( "The analysis protocol must inherit from either the " "AverageTrajectoryProperty or ExtractAverageStatistic " "protocols.") # Finally, extract uncorrelated data statistical_inefficiency = ProtocolPath("statistical_inefficiency", conditional_group.id, analysis_protocol.id) equilibration_index = ProtocolPath("equilibration_index", conditional_group.id, analysis_protocol.id) coordinate_file = ProtocolPath("output_coordinate_file", conditional_group.id, production_simulation.id) trajectory_path = ProtocolPath("trajectory_file_path", conditional_group.id, production_simulation.id) statistics_path = ProtocolPath("statistics_file_path", conditional_group.id, production_simulation.id) extract_uncorrelated_trajectory = analysis.ExtractUncorrelatedTrajectoryData( f"extract_traj{id_suffix}") extract_uncorrelated_trajectory.statistical_inefficiency = statistical_inefficiency extract_uncorrelated_trajectory.equilibration_index = equilibration_index extract_uncorrelated_trajectory.input_coordinate_file = coordinate_file extract_uncorrelated_trajectory.input_trajectory_path = trajectory_path extract_uncorrelated_statistics = analysis.ExtractUncorrelatedStatisticsData( f"extract_stats{id_suffix}") extract_uncorrelated_statistics.statistical_inefficiency = statistical_inefficiency extract_uncorrelated_statistics.equilibration_index = equilibration_index extract_uncorrelated_statistics.input_statistics_path = statistics_path # Build the object which defines which pieces of simulation data to store. output_to_store = StoredSimulationData() output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") output_to_store.property_phase = PropertyPhase.Liquid output_to_store.force_field_id = PlaceholderValue() output_to_store.number_of_molecules = ProtocolPath( "output_number_of_molecules", build_coordinates.id) output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id) output_to_store.statistical_inefficiency = statistical_inefficiency output_to_store.statistics_file_name = ProtocolPath( "output_statistics_path", extract_uncorrelated_statistics.id) output_to_store.trajectory_file_name = ProtocolPath( "output_trajectory_path", extract_uncorrelated_trajectory.id) output_to_store.coordinate_file_name = coordinate_file output_to_store.source_calculation_id = PlaceholderValue() # Define where the final values come from. final_value_source = ProtocolPath("value", conditional_group.id, analysis_protocol.id) base_protocols = BaseSimulationProtocols( build_coordinates, assign_parameters, energy_minimisation, equilibration_simulation, production_simulation, analysis_protocol, conditional_group, extract_uncorrelated_trajectory, extract_uncorrelated_statistics, ) return base_protocols, final_value_source, output_to_store