def test_nested_input(): dict_protocol = DummyProtocol("dict_protocol") dict_protocol.input_value = {"a": ThermodynamicState(1.0 * unit.kelvin)} quantity_protocol = DummyProtocol("quantity_protocol") quantity_protocol.input_value = ProtocolPath("output_value[a].temperature", dict_protocol.id) schema = WorkflowSchema() schema.protocol_schemas = [dict_protocol.schema, quantity_protocol.schema] schema.validate() workflow = Workflow({}) workflow.schema = schema workflow_graph = workflow.to_graph() with tempfile.TemporaryDirectory() as temporary_directory: with DaskLocalCluster() as calculation_backend: results_futures = workflow_graph.execute(temporary_directory, calculation_backend) assert len(results_futures) == 1 result = results_futures[0].result() assert isinstance(result, WorkflowResult)
def test_simple_workflow_graph(calculation_backend, compute_resources, exception): expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin) protocol_a = DummyInputOutputProtocol("protocol_a") protocol_a.input_value = expected_value protocol_b = DummyInputOutputProtocol("protocol_b") protocol_b.input_value = ProtocolPath("output_value", protocol_a.id) schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema, protocol_b.schema] schema.final_value_source = ProtocolPath("output_value", protocol_b.id) schema.validate() workflow = Workflow({}) workflow.schema = schema workflow_graph = workflow.to_graph() with tempfile.TemporaryDirectory() as directory: if calculation_backend is not None: with DaskLocalCluster() as calculation_backend: if exception: with pytest.raises(AssertionError): workflow_graph.execute(directory, calculation_backend, compute_resources) return else: results_futures = workflow_graph.execute( directory, calculation_backend, compute_resources) assert len(results_futures) == 1 result = results_futures[0].result() else: result = workflow_graph.execute(directory, calculation_backend, compute_resources)[0] if exception: with pytest.raises(AssertionError): workflow_graph.execute(directory, calculation_backend, compute_resources) return assert isinstance(result, WorkflowResult) assert result.value.value == expected_value.value
def test_from_schema(): protocol_a = DummyProtocol("protocol_a") protocol_a.input_value = 1 * unit.kelvin schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] workflow = Workflow.from_schema(schema, {}, unique_id="") assert workflow is not None rebuilt_schema = workflow.schema rebuilt_schema.outputs_to_store = UNDEFINED assert rebuilt_schema.json(format=True) == schema.json(format=True)
def test_workflow_with_groups(): expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin) protocol_a = DummyInputOutputProtocol("protocol_a") protocol_a.input_value = expected_value protocol_b = DummyInputOutputProtocol("protocol_b") protocol_b.input_value = ProtocolPath("output_value", protocol_a.id) conditional_group = ConditionalGroup("conditional_group") conditional_group.add_protocols(protocol_a, protocol_b) condition = ConditionalGroup.Condition() condition.right_hand_value = 2 * unit.kelvin condition.type = ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath("output_value.value", conditional_group.id, protocol_b.id) conditional_group.add_condition(condition) schema = WorkflowSchema() schema.protocol_schemas = [conditional_group.schema] schema.final_value_source = ProtocolPath("output_value", conditional_group.id, protocol_b.id) schema.validate() workflow = Workflow({}) workflow.schema = schema workflow_graph = workflow.to_graph() with tempfile.TemporaryDirectory() as directory: with DaskLocalCluster() as calculation_backend: results_futures = workflow_graph.execute(directory, calculation_backend) assert len(results_futures) == 1 result = results_futures[0].result() assert isinstance(result, WorkflowResult) assert result.value.value == expected_value.value
def test_nested_replicators(): dummy_schema = WorkflowSchema() dummy_protocol = DummyReplicableProtocol("dummy_$(rep_a)_$(rep_b)") dummy_protocol.replicated_value_a = ReplicatorValue("rep_a") dummy_protocol.replicated_value_b = ReplicatorValue("rep_b") dummy_schema.protocol_schemas = [dummy_protocol.schema] replicator_a = ProtocolReplicator(replicator_id="rep_a") replicator_a.template_values = ["a", "b"] replicator_b = ProtocolReplicator(replicator_id="rep_b") replicator_b.template_values = [1, 2] dummy_schema.protocol_replicators = [replicator_a, replicator_b] dummy_schema.validate() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata( dummy_property, "smirnoff99Frosst-1.1.0.offxml", []) dummy_workflow = Workflow(dummy_metadata, "") dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 4 assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a" assert dummy_workflow.protocols["dummy_0_1"].replicated_value_a == "a" assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b" assert dummy_workflow.protocols["dummy_1_1"].replicated_value_a == "b" assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1 assert dummy_workflow.protocols["dummy_0_1"].replicated_value_b == 2 assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 1 assert dummy_workflow.protocols["dummy_1_1"].replicated_value_b == 2 print(dummy_workflow.schema)
def test_replicated_ids(): replicator = ProtocolReplicator("replicator-a") protocol_a = DummyProtocol("protocol-a") protocol_a.input_value = 1 group_a = ProtocolGroup(f"group-a-{replicator.placeholder_id}") group_a.add_protocols(protocol_a) schema = WorkflowSchema() schema.protocol_schemas = [group_a.schema] schema.protocol_replicators = [replicator] with pytest.raises(ValueError) as error_info: schema.validate() assert ( f"The children of replicated protocol {group_a.id} must also contain the " "replicators placeholder" in str(error_info.value))
def test_unique_ids(): protocol_a = DummyProtocol("protocol-a") protocol_a.input_value = 1 group_a = ProtocolGroup("group-a") group_a.add_protocols(protocol_a) group_b = ProtocolGroup("group-b") group_b.add_protocols(protocol_a) schema = WorkflowSchema() schema.protocol_schemas = [group_a.schema, group_b.schema] with pytest.raises(ValueError) as error_info: schema.validate() assert "Several protocols in the schema have the same id" in str( error_info.value) assert "protocol-a" in str(error_info.value)
def test_advanced_nested_replicators(): dummy_schema = WorkflowSchema() replicator_a = ProtocolReplicator(replicator_id="replicator_a") replicator_a.template_values = ["a", "b"] replicator_b = ProtocolReplicator( replicator_id=f"replicator_b_{replicator_a.placeholder_id}") replicator_b.template_values = ProtocolPath( f"dummy_list[{replicator_a.placeholder_id}]", "global") dummy_protocol = DummyReplicableProtocol(f"dummy_" f"{replicator_a.placeholder_id}_" f"{replicator_b.placeholder_id}") dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id) dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id) dummy_schema.protocol_schemas = [dummy_protocol.schema] dummy_schema.protocol_replicators = [replicator_a, replicator_b] dummy_schema.validate() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata( dummy_property, "smirnoff99Frosst-1.1.0.offxml", []) dummy_metadata["dummy_list"] = [[1], [2]] dummy_workflow = Workflow(dummy_metadata, "") dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 2 assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a" assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1 assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b" assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 2
def test_index_replicated_protocol(): replicator = ProtocolReplicator("replicator") replicator.template_values = ["a", "b", "c", "d"] replicated_protocol = DummyProtocol( f"protocol_{replicator.placeholder_id}") replicated_protocol.input_value = ReplicatorValue(replicator.id) schema = WorkflowSchema() schema.protocol_replicators = [replicator] schema.protocol_schemas = [replicated_protocol.schema] for index in range(len(replicator.template_values)): indexing_protocol = DummyProtocol(f"indexing_protocol_{index}") indexing_protocol.input_value = ProtocolPath("output_value", f"protocol_{index}") schema.protocol_schemas.append(indexing_protocol.schema) schema.validate() workflow = Workflow({}) workflow.schema = schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=2000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Setup the fully solvated systems. build_full_coordinates = coordinates.BuildCoordinatesPackmol( "build_solvated_coordinates") build_full_coordinates.substance = ProtocolPath("substance", "global") build_full_coordinates.max_molecules = n_molecules assign_full_parameters = forcefield.BaseBuildSystem( "assign_solvated_parameters") assign_full_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_full_parameters.substance = ProtocolPath("substance", "global") assign_full_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) # Perform a quick minimisation of the full system to give # YANK a better starting point for its minimisation. energy_minimisation = openmm.OpenMMEnergyMinimisation( "energy_minimisation") energy_minimisation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) equilibration_simulation = openmm.OpenMMSimulation( "equilibration_simulation") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 10000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) # Create a substance which only contains the solute (e.g. for the # vacuum phase simulations). filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute") filter_solute.input_substance = ProtocolPath("substance", "global") filter_solute.component_roles = [Component.Role.Solute] # Setup the solute in vacuum system. build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol( "build_vacuum_coordinates") build_vacuum_coordinates.substance = ProtocolPath( "filtered_substance", filter_solute.id) build_vacuum_coordinates.max_molecules = 1 assign_vacuum_parameters = forcefield.BaseBuildSystem( "assign_parameters") assign_vacuum_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_vacuum_parameters.substance = ProtocolPath( "filtered_substance", filter_solute.id) assign_vacuum_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) # Set up the protocol to run yank. run_yank = yank.SolvationYankProtocol("run_solvation_yank") run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id) run_yank.solvent_1 = ProtocolPath("filtered_substance", filter_solvent.id) run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") run_yank.steps_per_iteration = 500 run_yank.checkpoint_interval = 50 run_yank.solvent_1_coordinates = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) run_yank.solvent_1_system = ProtocolPath("system_path", assign_full_parameters.id) run_yank.solvent_2_coordinates = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) run_yank.solvent_2_system = ProtocolPath("system_path", assign_vacuum_parameters.id) # Set up the group which will run yank until the free energy has been determined to within # a given uncertainty conditional_group = groups.ConditionalGroup("conditional_group") conditional_group.max_iterations = 20 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.left_hand_value = ProtocolPath( "estimated_free_energy.error", conditional_group.id, run_yank.id) conditional_group.add_condition(condition) # Define the total number of iterations that yank should run for. total_iterations = miscellaneous.MultiplyValue("total_iterations") total_iterations.value = 2000 total_iterations.multiplier = ProtocolPath("current_iteration", conditional_group.id) # Make sure the simulations gets extended after each iteration. run_yank.number_of_iterations = ProtocolPath("result", total_iterations.id) conditional_group.add_protocols(total_iterations, run_yank) # Define the full workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ build_full_coordinates.schema, assign_full_parameters.schema, energy_minimisation.schema, equilibration_simulation.schema, filter_solvent.schema, filter_solute.schema, build_vacuum_coordinates.schema, assign_vacuum_parameters.schema, conditional_group.schema, ] schema.final_value_source = ProtocolPath("estimated_free_energy", conditional_group.id, run_yank.id) calculation_schema.workflow_schema = schema return calculation_schema
def test_group_replicators(): dummy_schema = WorkflowSchema() replicator_id = "replicator" dummy_replicated_protocol = DummyInputOutputProtocol( f"dummy_$({replicator_id})") dummy_replicated_protocol.input_value = ReplicatorValue(replicator_id) dummy_group = ProtocolGroup("dummy_group") dummy_group.add_protocols(dummy_replicated_protocol) dummy_protocol_single_value = DummyInputOutputProtocol( f"dummy_single_$({replicator_id})") dummy_protocol_single_value.input_value = ProtocolPath( "output_value", dummy_group.id, dummy_replicated_protocol.id) dummy_protocol_list_value = AddValues("dummy_list") dummy_protocol_list_value.values = ProtocolPath( "output_value", dummy_group.id, dummy_replicated_protocol.id) dummy_schema.protocol_schemas = [ dummy_group.schema, dummy_protocol_single_value.schema, dummy_protocol_list_value.schema, ] replicator = ProtocolReplicator(replicator_id) replicator.template_values = [ (1.0 * unit.kelvin).plus_minus(1.0 * unit.kelvin), (2.0 * unit.kelvin).plus_minus(2.0 * unit.kelvin), ] dummy_schema.protocol_replicators = [replicator] dummy_schema.validate() dummy_property = create_dummy_property(Density) dummy_metadata = Workflow.generate_default_metadata( dummy_property, "smirnoff99Frosst-1.1.0.offxml", []) dummy_workflow = Workflow(dummy_metadata, "") dummy_workflow.schema = dummy_schema assert len(dummy_workflow.protocols) == 4 assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_0"]. input_value.value == replicator.template_values[0].value) assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_1"]. input_value.value == replicator.template_values[1].value) assert dummy_workflow.protocols[ "dummy_single_0"].input_value == ProtocolPath("output_value", dummy_group.id, "dummy_0") assert dummy_workflow.protocols[ "dummy_single_1"].input_value == ProtocolPath("output_value", dummy_group.id, "dummy_1") assert len(dummy_workflow.protocols["dummy_list"].values) == 2 assert dummy_workflow.protocols["dummy_list"].values[0] == ProtocolPath( "output_value", dummy_group.id, "dummy_0") assert dummy_workflow.protocols["dummy_list"].values[1] == ProtocolPath( "output_value", dummy_group.id, "dummy_1")
def test_workflow_layer(): """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer` is the simplest implementation of the abstract layer, we settle for testing this.""" properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] # Create a very simple workflow which just returns some placeholder # value. estimated_value = Observable( (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)) protocol_a = DummyProtocol("protocol_a") protocol_a.input_value = estimated_value schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] schema.final_value_source = ProtocolPath("output_value", protocol_a.id) layer_schema = SimulationSchema() layer_schema.workflow_schema = schema options = RequestOptions() options.add_schema("SimulationLayer", "Density", layer_schema) batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = options with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): # Create a directory for the layer. layer_directory = "simulation_layer" os.makedirs(layer_directory) # Set-up a simple storage backend and add a force field to it. force_field = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") storage_backend = LocalFileStorage() batch.force_field_id = storage_backend.store_force_field( force_field) # Create a simple calculation backend to test with. with DaskLocalCluster() as calculation_backend: def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 2 assert len(returned_request.exceptions) == 0 simulation_layer = SimulationLayer() simulation_layer.schedule_calculation( calculation_backend, storage_backend, layer_directory, batch, dummy_callback, True, )
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_simulation_protocols( AverageDielectricConstant("average_dielectric"), use_target_uncertainty, n_molecules=n_molecules, ) # Add a protocol to compute the dipole moments and pass these to # the analysis protocol. compute_dipoles = ComputeDipoleMoments("compute_dipoles") compute_dipoles.parameterized_system = ProtocolPath( "parameterized_system", protocols.assign_parameters.id ) compute_dipoles.trajectory_path = ProtocolPath( "trajectory_file_path", protocols.production_simulation.id ) compute_dipoles.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) protocols.converge_uncertainty.add_protocols(compute_dipoles) protocols.analysis_protocol.volumes = ProtocolPath( f"observables[{ObservableType.Volume.value}]", protocols.production_simulation.id, ) protocols.analysis_protocol.dipole_moments = ProtocolPath( "dipole_moments", compute_dipoles.id, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.decorrelate_trajectory.schema, protocols.decorrelate_observables.schema, ] schema.outputs_to_store = {"full_system": output_to_store} schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ): """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance protocols, data_replicator = generate_base_reweighting_protocols( statistical_inefficiency=AverageDielectricConstant( "average_dielectric_$(data_replicator)" ), reweight_observable=ReweightDielectricConstant("reweight_dielectric"), ) protocols.zero_gradients.input_observables = ProtocolPath( "output_observables[Volume]", protocols.join_observables.id, ) protocols.statistical_inefficiency.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) protocols.reweight_observable.required_effective_samples = n_effective_samples # We don't need to perform bootstrapping as this protocol is only used to # calculate the statistical inefficiency and equilibration time. The # re-weighting protocol will instead compute the bootstrapped uncertainties. protocols.statistical_inefficiency.bootstrap_iterations = 1 # Set up a protocol to re-evaluate the dipole moments at the target state # and concatenate the into a single array. compute_dipoles = ComputeDipoleMoments("compute_dipoles_$(data_replicator)") compute_dipoles.parameterized_system = ProtocolPath( "parameterized_system", protocols.build_target_system.id ) compute_dipoles.trajectory_path = ProtocolPath( "trajectory_file_path", protocols.unpack_stored_data.id ) compute_dipoles.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) join_dipoles = ConcatenateObservables("join_dipoles") join_dipoles.input_observables = ProtocolPath( "dipole_moments", compute_dipoles.id, ) # Point the dielectric protocols to the volumes and dipole moments. protocols.statistical_inefficiency.volumes = ProtocolPath( "observables[Volume]", protocols.unpack_stored_data.id ) protocols.statistical_inefficiency.dipole_moments = ProtocolPath( "dipole_moments", compute_dipoles.id ) # Make sure to decorrelate the dipole moments. decorrelate_dipoles = DecorrelateObservables("decorrelate_dipoles") decorrelate_dipoles.time_series_statistics = ProtocolPath( "time_series_statistics", protocols.statistical_inefficiency.id ) decorrelate_dipoles.input_observables = ProtocolPath( "output_observables", join_dipoles.id ) protocols.reweight_observable.dipole_moments = ProtocolPath( "output_observables", decorrelate_dipoles.id ) protocols.reweight_observable.volumes = ProtocolPath( "output_observables", protocols.decorrelate_observable.id ) protocols.reweight_observable.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) schema = WorkflowSchema() schema.protocol_schemas = [ *(x.schema for x in protocols), compute_dipoles.schema, join_dipoles.schema, decorrelate_dipoles.schema, ] schema.protocol_replicators = [data_replicator] schema.final_value_source = ProtocolPath( "value", protocols.reweight_observable.id ) calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ): """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance data_replicator_id = "data_replicator" # Set up a protocol to extract the dielectric constant from the stored data. extract_dielectric = ExtractAverageDielectric( f"calc_dielectric_$({data_replicator_id})") # For the dielectric constant, we employ a slightly more advanced reweighting # protocol set up for calculating fluctuation properties. reweight_dielectric = ReweightDielectricConstant("reweight_dielectric") reweight_dielectric.reference_dipole_moments = ProtocolPath( "uncorrelated_values", extract_dielectric.id) reweight_dielectric.reference_volumes = ProtocolPath( "uncorrelated_volumes", extract_dielectric.id) reweight_dielectric.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") reweight_dielectric.bootstrap_uncertainties = True reweight_dielectric.bootstrap_iterations = 200 reweight_dielectric.required_effective_samples = n_effective_samples protocols, data_replicator = generate_base_reweighting_protocols( extract_dielectric, reweight_dielectric, data_replicator_id) # Make sure input is taken from the correct protocol outputs. extract_dielectric.system_path = ProtocolPath( "system_path", protocols.build_reference_system.id) extract_dielectric.thermodynamic_state = ProtocolPath( "thermodynamic_state", protocols.unpack_stored_data.id) # Set up the gradient calculations coordinate_path = ProtocolPath("output_coordinate_path", protocols.concatenate_trajectories.id) trajectory_path = ProtocolPath("output_trajectory_path", protocols.concatenate_trajectories.id) statistics_path = ProtocolPath("statistics_file_path", protocols.reduced_target_potential.id) reweight_dielectric_template = copy.deepcopy(reweight_dielectric) ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( reweight_dielectric_template, ProtocolPath("force_field_path", "global"), coordinate_path, trajectory_path, statistics_path, replicator_id="grad", effective_sample_indices=ProtocolPath("effective_sample_indices", reweight_dielectric.id), ) schema = WorkflowSchema() schema.protocol_schemas = [ *(x.schema for x in protocols), gradient_group.schema, ] schema.protocol_replicators = [data_replicator, gradient_replicator] schema.gradients_sources = [gradient_source] schema.final_value_source = ProtocolPath("value", protocols.mbar_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Define the protocol which will extract the average dielectric constant # from the results of a simulation. extract_dielectric = ExtractAverageDielectric("extract_dielectric") extract_dielectric.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") # Define the protocols which will run the simulation itself. use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_dielectric, use_target_uncertainty, n_molecules=n_molecules, ) # Make sure the input of the analysis protcol is properly hooked up. extract_dielectric.system_path = ProtocolPath( "system_path", protocols.assign_parameters.id) # Dielectric constants typically take longer to converge, so we need to # reflect this in the maximum number of convergence iterations. protocols.converge_uncertainty.max_iterations = 400 # Set up the gradient calculations. For dielectric constants, we need to use # a slightly specialised reweighting protocol which we set up here. coordinate_source = ProtocolPath("output_coordinate_file", protocols.equilibration_simulation.id) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) gradient_mbar_protocol = ReweightDielectricConstant("gradient_mbar") gradient_mbar_protocol.reference_dipole_moments = [ ProtocolPath( "dipole_moments", protocols.converge_uncertainty.id, extract_dielectric.id, ) ] gradient_mbar_protocol.reference_volumes = [ ProtocolPath("volumes", protocols.converge_uncertainty.id, extract_dielectric.id) ] gradient_mbar_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") gradient_mbar_protocol.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( gradient_mbar_protocol, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema