def default_simulation_schema(existing_schema=None): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- existing_schema: SimulationSchema, optional An existing schema whose settings to use. If set, the schema's `workflow_schema` will be overwritten by this method. Returns ------- SimulationSchema The schema to follow when estimating this property. """ calculation_schema = SimulationSchema() if existing_schema is not None: assert isinstance(existing_schema, SimulationSchema) calculation_schema = copy.deepcopy(existing_schema) schema = WorkflowSchema( property_type=HostGuestBindingAffinity.__name__) schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema") # Initial coordinate and topology setup. filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand") filter_ligand.input_substance = ProtocolPath("substance", "global") filter_ligand.component_roles = [Component.Role.Ligand] # We only support substances with a single guest ligand. filter_ligand.expected_components = 1 schema.protocols[filter_ligand.id] = filter_ligand.schema # Construct the protocols which will (for now) take as input a set of host coordinates, # and generate a set of charges for them. filter_receptor = miscellaneous.FilterSubstanceByRole( "filter_receptor") filter_receptor.input_substance = ProtocolPath("substance", "global") filter_receptor.component_roles = [Component.Role.Receptor] # We only support substances with a single host receptor. filter_receptor.expected_components = 1 schema.protocols[filter_receptor.id] = filter_receptor.schema # Perform docking to position the guest within the host. perform_docking = coordinates.BuildDockedCoordinates("perform_docking") perform_docking.ligand_substance = ProtocolPath( "filtered_substance", filter_ligand.id) perform_docking.receptor_coordinate_file = ProtocolPath( "receptor_mol2", "global") schema.protocols[perform_docking.id] = perform_docking.schema # Solvate the docked structure using packmol filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] schema.protocols[filter_solvent.id] = filter_solvent.schema solvate_complex = coordinates.SolvateExistingStructure( "solvate_complex") solvate_complex.max_molecules = 1000 solvate_complex.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_complex.solute_coordinate_file = ProtocolPath( "docked_complex_coordinate_path", perform_docking.id) schema.protocols[solvate_complex.id] = solvate_complex.schema # Assign force field parameters to the solvated complex system. build_solvated_complex_system = forcefield.BaseBuildSystem( "build_solvated_complex_system") build_solvated_complex_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_complex_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_complex.id) build_solvated_complex_system.substance = ProtocolPath( "substance", "global") build_solvated_complex_system.charged_molecule_paths = [ ProtocolPath("receptor_mol2", "global") ] schema.protocols[build_solvated_complex_system. id] = build_solvated_complex_system.schema # Solvate the ligand using packmol solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand") solvate_ligand.max_molecules = 1000 solvate_ligand.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_ligand.solute_coordinate_file = ProtocolPath( "docked_ligand_coordinate_path", perform_docking.id) schema.protocols[solvate_ligand.id] = solvate_ligand.schema # Assign force field parameters to the solvated ligand system. build_solvated_ligand_system = forcefield.BaseBuildSystem( "build_solvated_ligand_system") build_solvated_ligand_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_ligand_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_ligand.id) build_solvated_ligand_system.substance = ProtocolPath( "substance", "global") schema.protocols[build_solvated_ligand_system. id] = build_solvated_ligand_system.schema # Employ YANK to estimate the binding free energy. yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol") yank_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") yank_protocol.number_of_iterations = 2000 yank_protocol.steps_per_iteration = 500 yank_protocol.checkpoint_interval = 10 yank_protocol.verbose = True yank_protocol.force_field_path = ProtocolPath("force_field_path", "global") yank_protocol.ligand_residue_name = ProtocolPath( "ligand_residue_name", perform_docking.id) yank_protocol.receptor_residue_name = ProtocolPath( "receptor_residue_name", perform_docking.id) yank_protocol.solvated_ligand_coordinates = ProtocolPath( "coordinate_file_path", solvate_ligand.id) yank_protocol.solvated_ligand_system = ProtocolPath( "system_path", build_solvated_ligand_system.id) yank_protocol.solvated_complex_coordinates = ProtocolPath( "coordinate_file_path", solvate_complex.id) yank_protocol.solvated_complex_system = ProtocolPath( "system_path", build_solvated_complex_system.id) schema.protocols[yank_protocol.id] = yank_protocol.schema # Define where the final values come from. schema.final_value_source = ProtocolPath("estimated_free_energy", yank_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=2000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Setup the fully solvated systems. build_full_coordinates = coordinates.BuildCoordinatesPackmol( "build_solvated_coordinates") build_full_coordinates.substance = ProtocolPath("substance", "global") build_full_coordinates.max_molecules = n_molecules assign_full_parameters = forcefield.BaseBuildSystem( f"assign_solvated_parameters") assign_full_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_full_parameters.substance = ProtocolPath("substance", "global") assign_full_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) # Perform a quick minimisation of the full system to give # YANK a better starting point for its minimisation. energy_minimisation = openmm.OpenMMEnergyMinimisation( "energy_minimisation") energy_minimisation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) equilibration_simulation = openmm.OpenMMSimulation( "equilibration_simulation") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 10000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) # Create a substance which only contains the solute (e.g. for the # vacuum phase simulations). filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_role = Component.Role.Solvent filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute") filter_solute.input_substance = ProtocolPath("substance", "global") filter_solute.component_role = Component.Role.Solute # Setup the solute in vacuum system. build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol( "build_vacuum_coordinates") build_vacuum_coordinates.substance = ProtocolPath( "filtered_substance", filter_solute.id) build_vacuum_coordinates.max_molecules = 1 assign_vacuum_parameters = forcefield.BaseBuildSystem( f"assign_parameters") assign_vacuum_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_vacuum_parameters.substance = ProtocolPath( "filtered_substance", filter_solute.id) assign_vacuum_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) # Set up the protocol to run yank. run_yank = yank.SolvationYankProtocol("run_solvation_yank") run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id) run_yank.solvent_1 = ProtocolPath("filtered_substance", filter_solvent.id) run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") run_yank.steps_per_iteration = 500 run_yank.checkpoint_interval = 50 run_yank.solvent_1_coordinates = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) run_yank.solvent_1_system = ProtocolPath("system_path", assign_full_parameters.id) run_yank.solvent_2_coordinates = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) run_yank.solvent_2_system = ProtocolPath("system_path", assign_vacuum_parameters.id) # Set up the group which will run yank until the free energy has been determined to within # a given uncertainty conditional_group = groups.ConditionalGroup(f"conditional_group") conditional_group.max_iterations = 20 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.left_hand_value = ProtocolPath( "estimated_free_energy.error", conditional_group.id, run_yank.id) conditional_group.add_condition(condition) # Define the total number of iterations that yank should run for. total_iterations = miscellaneous.MultiplyValue("total_iterations") total_iterations.value = 2000 total_iterations.multiplier = ProtocolPath("current_iteration", conditional_group.id) # Make sure the simulations gets extended after each iteration. run_yank.number_of_iterations = ProtocolPath("result", total_iterations.id) conditional_group.add_protocols(total_iterations, run_yank) # Define the full workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ build_full_coordinates.schema, assign_full_parameters.schema, energy_minimisation.schema, equilibration_simulation.schema, filter_solvent.schema, filter_solute.schema, build_vacuum_coordinates.schema, assign_vacuum_parameters.schema, conditional_group.schema, ] schema.final_value_source = ProtocolPath("estimated_free_energy", conditional_group.id, run_yank.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Define the protocol which will extract the average dielectric constant # from the results of a simulation. extract_dielectric = ExtractAverageDielectric("extract_dielectric") extract_dielectric.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) # Define the protocols which will run the simulation itself. use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_dielectric, use_target_uncertainty, n_molecules=n_molecules, ) # Make sure the input of the analysis protcol is properly hooked up. extract_dielectric.system_path = ProtocolPath( "system_path", protocols.assign_parameters.id ) # Dielectric constants typically take longer to converge, so we need to # reflect this in the maximum number of convergence iterations. protocols.converge_uncertainty.max_iterations = 400 # Set up the gradient calculations. For dielectric constants, we need to use # a slightly specialised reweighting protocol which we set up here. coordinate_source = ProtocolPath( "output_coordinate_file", protocols.equilibration_simulation.id ) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) gradient_mbar_protocol = ReweightDielectricConstant("gradient_mbar") gradient_mbar_protocol.reference_dipole_moments = [ ProtocolPath( "dipole_moments", protocols.converge_uncertainty.id, extract_dielectric.id, ) ] gradient_mbar_protocol.reference_volumes = [ ProtocolPath( "volumes", protocols.converge_uncertainty.id, extract_dielectric.id ) ] gradient_mbar_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) gradient_mbar_protocol.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( gradient_mbar_protocol, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the id of the replicator which will clone the gradient protocols # for each gradient key to be estimated. gradient_replicator_id = "gradient_replicator" # Set up a workflow to calculate the molar volume of the full, mixed system. ( full_system_protocols, full_system_molar_molecules, full_system_volume, full_output, full_system_gradient_group, full_system_gradient_replicator, full_system_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_full", gradient_replicator_id, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Set up a general workflow for calculating the molar volume of one of the system components. component_replicator_id = "component_replicator" component_substance = ReplicatorValue(component_replicator_id) # Make sure to weight by the mole fractions of the actual full system as these may be slightly # different to the mole fractions of the measure property due to rounding. full_substance = ProtocolPath( "output_substance", full_system_protocols.build_coordinates.id ) ( component_protocols, component_molar_molecules, component_volumes, component_output, component_gradient_group, component_gradient_replicator, component_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_component", gradient_replicator_id, replicator_id=component_replicator_id, weight_by_mole_fraction=True, component_substance_reference=component_substance, full_substance_reference=full_substance, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Finally, set up the protocols which will be responsible for adding together # the component molar volumes, and subtracting these from the mixed system molar volume. add_component_molar_volumes = miscellaneous.AddValues( "add_component_molar_volumes" ) add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( "calculate_excess_volume" ) calculate_excess_volume.value_b = full_system_volume calculate_excess_volume.value_a = ProtocolPath( "result", add_component_molar_volumes.id ) # Create the replicator object which defines how the pure component # molar volume estimation protocols will be replicated for each component. component_replicator = ProtocolReplicator(replicator_id=component_replicator_id) component_replicator.template_values = ProtocolPath("components", "global") # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f"add_component_gradients" f"_$({gradient_replicator_id})" ) add_component_gradients.values = component_gradient combine_gradients = miscellaneous.SubtractValues( f"combine_gradients_$({gradient_replicator_id})" ) combine_gradients.value_b = full_system_gradient combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id) # Combine the gradient replicators. gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id) gradient_replicator.template_values = ProtocolPath( "parameter_gradient_keys", "global" ) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ component_protocols.build_coordinates.schema, component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.schema, component_molar_molecules.schema, full_system_protocols.build_coordinates.schema, full_system_protocols.assign_parameters.schema, full_system_protocols.energy_minimisation.schema, full_system_protocols.equilibration_simulation.schema, full_system_protocols.converge_uncertainty.schema, full_system_molar_molecules.schema, component_protocols.extract_uncorrelated_trajectory.schema, component_protocols.extract_uncorrelated_statistics.schema, full_system_protocols.extract_uncorrelated_trajectory.schema, full_system_protocols.extract_uncorrelated_statistics.schema, add_component_molar_volumes.schema, calculate_excess_volume.schema, component_gradient_group.schema, full_system_gradient_group.schema, add_component_gradients.schema, combine_gradients.schema, ] schema.protocol_replicators = [gradient_replicator, component_replicator] # Finally, tell the schemas where to look for its final values. schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)] schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id) schema.outputs_to_store = { "full_system": full_output, f"component_$({component_replicator_id})": component_output, } calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the protocol which will extract the average density from # the results of a simulation. extract_density = analysis.ExtractAverageStatistic("extract_density") extract_density.statistics_type = ObservableType.Density # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_density, use_target_uncertainty, n_molecules=n_molecules, ) # Set up the gradient calculations coordinate_source = ProtocolPath( "output_coordinate_file", protocols.equilibration_simulation.id ) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) reweight_density_template = reweighting.ReweightStatistics("") reweight_density_template.statistics_type = ObservableType.Density reweight_density_template.statistics_paths = statistics_source reweight_density_template.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( reweight_density_template, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def test_workflow_layer(): """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer` is the simplest implementation of the abstract layer, we settle for testing this.""" properties_to_estimate = [ create_dummy_property(Density), create_dummy_property(Density), ] # Create a very simple workflow which just returns some placeholder # value. estimated_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin) protocol_a = DummyInputOutputProtocol("protocol_a") protocol_a.input_value = estimated_value schema = WorkflowSchema() schema.protocol_schemas = [protocol_a.schema] schema.final_value_source = ProtocolPath("output_value", protocol_a.id) layer_schema = SimulationSchema() layer_schema.workflow_schema = schema options = RequestOptions() options.add_schema("SimulationLayer", "Density", layer_schema) batch = server.Batch() batch.queued_properties = properties_to_estimate batch.options = options with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): # Create a directory for the layer. layer_directory = "simulation_layer" os.makedirs(layer_directory) # Set-up a simple storage backend and add a force field to it. force_field = SmirnoffForceFieldSource.from_path( "smirnoff99Frosst-1.1.0.offxml") storage_backend = LocalFileStorage() batch.force_field_id = storage_backend.store_force_field( force_field) # Create a simple calculation backend to test with. with DaskLocalCluster() as calculation_backend: def dummy_callback(returned_request): assert len(returned_request.estimated_properties) == 2 assert len(returned_request.exceptions) == 0 simulation_layer = SimulationLayer() simulation_layer.schedule_calculation( calculation_backend, storage_backend, layer_directory, batch, dummy_callback, True, )