예제 #1
0
    def process_successful_property(physical_property, layer_directory, **_):
        """Return a result as if the property had been successfully estimated."""

        dummy_data_directory = path.join(layer_directory, "good_dummy_data")
        makedirs(dummy_data_directory, exist_ok=True)

        dummy_stored_object = StoredSimulationData()
        dummy_stored_object.substance = physical_property.substance
        dummy_stored_object.thermodynamic_state = physical_property.thermodynamic_state
        dummy_stored_object.property_phase = physical_property.phase
        dummy_stored_object.force_field_id = ""
        dummy_stored_object.coordinate_file_name = ""
        dummy_stored_object.trajectory_file_name = ""
        dummy_stored_object.statistics_file_name = ""
        dummy_stored_object.statistical_inefficiency = 1.0
        dummy_stored_object.number_of_molecules = 10
        dummy_stored_object.source_calculation_id = ""

        dummy_stored_object_path = path.join(layer_directory,
                                             "good_dummy_data.json")

        with open(dummy_stored_object_path, "w") as file:
            json.dump(dummy_stored_object, file, cls=TypedJSONEncoder)

        return_object = CalculationLayerResult()
        return_object.physical_property = physical_property
        return_object.data_to_store = [(dummy_stored_object_path,
                                        dummy_data_directory)]

        return return_object
예제 #2
0
def create_dummy_simulation_data(
    directory_path,
    substance,
    force_field_id="dummy_ff_id",
    coordinate_file_name="output.pdb",
    trajectory_file_name="trajectory.dcd",
    statistics_file_name="statistics.csv",
    statistical_inefficiency=1.0,
    phase=PropertyPhase.Liquid,
    number_of_molecules=1,
    calculation_id=None,
):
    """Creates a dummy `StoredSimulationData` object and
    the corresponding data directory.

    Parameters
    ----------
    directory_path: str
        The path to the dummy data directory to create.
    substance: Substance
    force_field_id
    coordinate_file_name
    trajectory_file_name
    statistics_file_name
    statistical_inefficiency
    phase
    number_of_molecules
    calculation_id

    Returns
    -------
    StoredSimulationData
        The dummy stored data object.
    """

    os.makedirs(directory_path, exist_ok=True)

    data = StoredSimulationData()

    data.substance = substance
    data.force_field_id = force_field_id
    data.thermodynamic_state = ThermodynamicState(1.0 * unit.kelvin)
    data.property_phase = phase

    data.coordinate_file_name = coordinate_file_name
    data.trajectory_file_name = trajectory_file_name
    data.statistics_file_name = statistics_file_name

    with open(os.path.join(directory_path, coordinate_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, trajectory_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, statistics_file_name), "w") as file:
        file.write("")

    data.statistical_inefficiency = statistical_inefficiency

    data.number_of_molecules = number_of_molecules

    if calculation_id is None:
        calculation_id = str(uuid.uuid4())

    data.source_calculation_id = calculation_id

    return data
예제 #3
0
def generate_base_simulation_protocols(
    analysis_protocol,
    use_target_uncertainty,
    id_suffix="",
    conditional_group=None,
    n_molecules=1000,
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to run a single simulation to estimate a particular
    property. The observable of interest to extract from the simulation is determined
    by the passed in `analysis_protocol`.

    The protocols returned will:

        1) Build a set of liquid coordinates for the
           property substance using packmol.

        2) Assign a set of smirnoff force field parameters
           to the system.

        3) Perform an energy minimisation on the system.

        4) Run a short NPT equilibration simulation for 100000 steps
           using a timestep of 2fs.

        5) Within a conditional group (up to a maximum of 100 times):

            5a) Run a longer NPT production simulation for 1000000 steps using a timestep of 2fs

            5b) Extract the average value of an observable and it's uncertainty.

            5c) If a convergence mode is set by the options, check if the target uncertainty has been met.
                If not, repeat steps 5a), 5b) and 5c).

        6) Extract uncorrelated configurations from a generated production
           simulation.

        7) Extract uncorrelated statistics from a generated production
           simulation.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will extract the observable of
        interest from the generated simulation data.
    use_target_uncertainty: bool
        Whether to run the simulation until the observable is
        estimated to within the target uncertainty.
    id_suffix: str
        A string suffix to append to each of the protocol ids.
    conditional_group: ProtocolGroup, optional
        A custom group to wrap the main simulation / extraction
        protocols within. It is up to the caller of this method to
        manually add the convergence conditions to this group.
        If `None`, a default group with uncertainty convergence
        conditions is automatically constructed.
    n_molecules: int
        The number of molecules to use in the workflow.

    Returns
    -------
    BaseSimulationProtocols
        A named tuple of the generated protocols.
    ProtocolPath
        A reference to the final value of the estimated observable
        and its uncertainty (a `pint.Measurement`).
    StoredSimulationData
        An object which describes the default data from a simulation to store,
        such as the uncorrelated statistics and configurations.
    """

    assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol)

    build_coordinates = coordinates.BuildCoordinatesPackmol(
        f"build_coordinates{id_suffix}")
    build_coordinates.substance = ProtocolPath("substance", "global")
    build_coordinates.max_molecules = n_molecules

    assign_parameters = forcefield.BaseBuildSystem(
        f"assign_parameters{id_suffix}")
    assign_parameters.force_field_path = ProtocolPath("force_field_path",
                                                      "global")
    assign_parameters.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", build_coordinates.id)
    assign_parameters.substance = ProtocolPath("output_substance",
                                               build_coordinates.id)

    # Equilibration
    energy_minimisation = openmm.OpenMMEnergyMinimisation(
        f"energy_minimisation{id_suffix}")
    energy_minimisation.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", build_coordinates.id)
    energy_minimisation.system_path = ProtocolPath("system_path",
                                                   assign_parameters.id)

    equilibration_simulation = openmm.OpenMMSimulation(
        f"equilibration_simulation{id_suffix}")
    equilibration_simulation.ensemble = Ensemble.NPT
    equilibration_simulation.steps_per_iteration = 100000
    equilibration_simulation.output_frequency = 5000
    equilibration_simulation.timestep = 2.0 * unit.femtosecond
    equilibration_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global")
    equilibration_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", energy_minimisation.id)
    equilibration_simulation.system_path = ProtocolPath(
        "system_path", assign_parameters.id)

    # Production
    production_simulation = openmm.OpenMMSimulation(
        f"production_simulation{id_suffix}")
    production_simulation.ensemble = Ensemble.NPT
    production_simulation.steps_per_iteration = 1000000
    production_simulation.output_frequency = 2000
    production_simulation.timestep = 2.0 * unit.femtosecond
    production_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global")
    production_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", equilibration_simulation.id)
    production_simulation.system_path = ProtocolPath("system_path",
                                                     assign_parameters.id)

    # Set up a conditional group to ensure convergence of uncertainty
    if conditional_group is None:

        conditional_group = groups.ConditionalGroup(
            f"conditional_group{id_suffix}")
        conditional_group.max_iterations = 100

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.left_hand_value = ProtocolPath("value.error",
                                                     conditional_group.id,
                                                     analysis_protocol.id)

            conditional_group.add_condition(condition)

            # Make sure the simulation gets extended after each iteration.
            production_simulation.total_number_of_iterations = ProtocolPath(
                "current_iteration", conditional_group.id)

    conditional_group.add_protocols(production_simulation, analysis_protocol)

    # Point the analyse protocol to the correct data source
    if isinstance(analysis_protocol, analysis.AverageTrajectoryProperty):
        analysis_protocol.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_coordinates.id)
        analysis_protocol.trajectory_path = ProtocolPath(
            "trajectory_file_path", production_simulation.id)

    elif isinstance(analysis_protocol, analysis.ExtractAverageStatistic):
        analysis_protocol.statistics_path = ProtocolPath(
            "statistics_file_path", production_simulation.id)

    else:
        raise ValueError(
            "The analysis protocol must inherit from either the "
            "AverageTrajectoryProperty or ExtractAverageStatistic "
            "protocols.")

    # Finally, extract uncorrelated data
    statistical_inefficiency = ProtocolPath("statistical_inefficiency",
                                            conditional_group.id,
                                            analysis_protocol.id)
    equilibration_index = ProtocolPath("equilibration_index",
                                       conditional_group.id,
                                       analysis_protocol.id)
    coordinate_file = ProtocolPath("output_coordinate_file",
                                   conditional_group.id,
                                   production_simulation.id)
    trajectory_path = ProtocolPath("trajectory_file_path",
                                   conditional_group.id,
                                   production_simulation.id)
    statistics_path = ProtocolPath("statistics_file_path",
                                   conditional_group.id,
                                   production_simulation.id)

    extract_uncorrelated_trajectory = analysis.ExtractUncorrelatedTrajectoryData(
        f"extract_traj{id_suffix}")
    extract_uncorrelated_trajectory.statistical_inefficiency = statistical_inefficiency
    extract_uncorrelated_trajectory.equilibration_index = equilibration_index
    extract_uncorrelated_trajectory.input_coordinate_file = coordinate_file
    extract_uncorrelated_trajectory.input_trajectory_path = trajectory_path

    extract_uncorrelated_statistics = analysis.ExtractUncorrelatedStatisticsData(
        f"extract_stats{id_suffix}")
    extract_uncorrelated_statistics.statistical_inefficiency = statistical_inefficiency
    extract_uncorrelated_statistics.equilibration_index = equilibration_index
    extract_uncorrelated_statistics.input_statistics_path = statistics_path

    # Build the object which defines which pieces of simulation data to store.
    output_to_store = StoredSimulationData()

    output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                       "global")
    output_to_store.property_phase = PropertyPhase.Liquid

    output_to_store.force_field_id = PlaceholderValue()

    output_to_store.number_of_molecules = ProtocolPath(
        "output_number_of_molecules", build_coordinates.id)
    output_to_store.substance = ProtocolPath("output_substance",
                                             build_coordinates.id)
    output_to_store.statistical_inefficiency = statistical_inefficiency
    output_to_store.statistics_file_name = ProtocolPath(
        "output_statistics_path", extract_uncorrelated_statistics.id)
    output_to_store.trajectory_file_name = ProtocolPath(
        "output_trajectory_path", extract_uncorrelated_trajectory.id)
    output_to_store.coordinate_file_name = coordinate_file

    output_to_store.source_calculation_id = PlaceholderValue()

    # Define where the final values come from.
    final_value_source = ProtocolPath("value", conditional_group.id,
                                      analysis_protocol.id)

    base_protocols = BaseSimulationProtocols(
        build_coordinates,
        assign_parameters,
        energy_minimisation,
        equilibration_simulation,
        production_simulation,
        analysis_protocol,
        conditional_group,
        extract_uncorrelated_trajectory,
        extract_uncorrelated_statistics,
    )

    return base_protocols, final_value_source, output_to_store