Beispiel #1
0
def test_conditional_protocol_group_fail():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = 2 * unit.kelvin

        value_protocol_a = DummyInputOutputProtocol('protocol_a')
        value_protocol_a.input_value = initial_value

        add_values = AddValues('add_values')
        add_values.values = [
            ProtocolPath('output_value', value_protocol_a.id),
            ProtocolPath('output_value', value_protocol_a.id)
        ]

        condition = ConditionalGroup.Condition()
        condition.left_hand_value = ProtocolPath('result', add_values.id)
        condition.right_hand_value = ProtocolPath('output_value',
                                                  value_protocol_a.id)
        condition.type = ConditionalGroup.ConditionType.LessThan

        protocol_group = ConditionalGroup('protocol_group')
        protocol_group.conditions.append(condition)
        protocol_group.max_iterations = 10
        protocol_group.add_protocols(value_protocol_a, add_values)

        result = protocol_group.execute(directory, None)

        assert isinstance(result, PropertyEstimatorException)
Beispiel #2
0
    def get_default_reweighting_workflow_schema(options):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: PropertyWorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # The protocol which will be used to calculate the densities from
        # the existing data.
        density_calculation = protocols.ExtractAverageStatistic('calc_density_$(data_repl)')
        base_reweighting_protocols, data_replicator = generate_base_reweighting_protocols(density_calculation)

        density_calculation.statistics_type = ObservableType.Density
        density_calculation.statistics_path = ProtocolPath('statistics_file_path',
                                                           base_reweighting_protocols.unpack_stored_data.id)

        schema = WorkflowSchema(property_type=Density.__name__)
        schema.id = '{}{}'.format(Density.__name__, 'Schema')

        schema.protocols = {protocol.id: protocol.schema for protocol in base_reweighting_protocols}
        schema.replicators = [data_replicator]

        schema.final_value_source = ProtocolPath('value', base_reweighting_protocols.mbar_protocol.id)

        return schema
def test_simple_workflow_graph_with_groups():
    dummy_schema = WorkflowSchema()

    dummy_protocol_a = DummyEstimatedQuantityProtocol('protocol_a')
    dummy_protocol_a.input_value = EstimatedQuantity(1 * unit.kelvin,
                                                     0.1 * unit.kelvin,
                                                     'dummy_source')

    dummy_protocol_b = DummyEstimatedQuantityProtocol('protocol_b')
    dummy_protocol_b.input_value = ProtocolPath('output_value',
                                                dummy_protocol_a.id)

    conditional_group = ConditionalGroup('conditional_group')
    conditional_group.add_protocols(dummy_protocol_a, dummy_protocol_b)

    condition = ConditionalGroup.Condition()
    condition.right_hand_value = 2 * unit.kelvin
    condition.type = ConditionalGroup.ConditionType.LessThan

    condition.left_hand_value = ProtocolPath('output_value.value',
                                             conditional_group.id,
                                             dummy_protocol_b.id)

    conditional_group.add_condition(condition)

    dummy_schema.protocols[conditional_group.id] = conditional_group.schema

    dummy_schema.final_value_source = ProtocolPath('output_value',
                                                   conditional_group.id,
                                                   dummy_protocol_b.id)

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_workflow = Workflow(dummy_property, {})
    dummy_workflow.schema = dummy_schema

    with tempfile.TemporaryDirectory() as temporary_directory:

        workflow_graph = WorkflowGraph(temporary_directory)
        workflow_graph.add_workflow(dummy_workflow)

        dask_local_backend = DaskLocalClusterBackend(1, ComputeResources(1))
        dask_local_backend.start()

        results_futures = workflow_graph.submit(dask_local_backend)

        assert len(results_futures) == 1

        result = results_futures[0].result()
        assert isinstance(result, CalculationLayerResult)
        assert result.calculated_property.value == 1 * unit.kelvin
def test_nested_input():

    dummy_schema = WorkflowSchema()

    dict_protocol = DummyInputOutputProtocol('dict_protocol')
    dict_protocol.input_value = {'a': ThermodynamicState(temperature=1*unit.kelvin)}
    dummy_schema.protocols[dict_protocol.id] = dict_protocol.schema

    quantity_protocol = DummyInputOutputProtocol('quantity_protocol')
    quantity_protocol.input_value = ProtocolPath('output_value[a].temperature', dict_protocol.id)
    dummy_schema.protocols[quantity_protocol.id] = quantity_protocol.schema

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_workflow = Workflow(dummy_property, {})
    dummy_workflow.schema = dummy_schema

    with tempfile.TemporaryDirectory() as temporary_directory:

        workflow_graph = WorkflowGraph(temporary_directory)
        workflow_graph.add_workflow(dummy_workflow)

        dask_local_backend = DaskLocalCluster(1, ComputeResources(1))
        dask_local_backend.start()

        results_futures = workflow_graph.submit(dask_local_backend)

        assert len(results_futures) == 1

        result = results_futures[0].result()
        assert isinstance(result, CalculationLayerResult)
Beispiel #5
0
def test_protocol_group():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = random.random() * unit.kelvin

        protocol_group = ProtocolGroup('protocol_group')

        value_protocol_a = DummyInputOutputProtocol('protocol_a')
        value_protocol_a.input_value = initial_value

        value_protocol_b = DummyInputOutputProtocol('value_protocol_b')
        value_protocol_b.input_value = ProtocolPath('output_value',
                                                    value_protocol_a.id)

        protocol_group.add_protocols(value_protocol_a, value_protocol_b)
        result = protocol_group.execute(directory, None)

        assert not isinstance(result, PropertyEstimatorException)
        assert protocol_group.get_value(
            ProtocolPath('output_value', value_protocol_b.id)) == initial_value
def test_group_replicators():

    dummy_schema = WorkflowSchema()

    replicator_id = 'replicator'

    dummy_replicated_protocol = DummyInputOutputProtocol(f'dummy_$({replicator_id})')
    dummy_replicated_protocol.input_value = ReplicatorValue(replicator_id)

    dummy_group = ProtocolGroup('dummy_group')
    dummy_group.add_protocols(dummy_replicated_protocol)
    dummy_schema.protocols[dummy_group.id] = dummy_group.schema

    dummy_protocol_single_value = DummyInputOutputProtocol(f'dummy_single_$({replicator_id})')
    dummy_protocol_single_value.input_value = ProtocolPath('output_value', dummy_group.id,
                                                           dummy_replicated_protocol.id)
    dummy_schema.protocols[dummy_protocol_single_value.id] = dummy_protocol_single_value.schema

    dummy_protocol_list_value = AddValues(f'dummy_list')
    dummy_protocol_list_value.values = ProtocolPath('output_value', dummy_group.id,
                                                    dummy_replicated_protocol.id)
    dummy_schema.protocols[dummy_protocol_list_value.id] = dummy_protocol_list_value.schema

    replicator = ProtocolReplicator(replicator_id)

    replicator.template_values = [
        EstimatedQuantity(1.0 * unit.kelvin, 1.0 * unit.kelvin, 'dummy_source'),
        EstimatedQuantity(2.0 * unit.kelvin, 2.0 * unit.kelvin, 'dummy_source')
    ]

    dummy_schema.replicators.append(replicator)

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(dummy_property,
                                                        'smirnoff99Frosst-1.1.0.offxml',
                                                        [])

    dummy_workflow = Workflow(dummy_property, dummy_metadata, '')
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert dummy_workflow.protocols[dummy_group.id].protocols['dummy_0'].input_value == replicator.template_values[0]
    assert dummy_workflow.protocols[dummy_group.id].protocols['dummy_1'].input_value == replicator.template_values[1]

    assert dummy_workflow.protocols['dummy_single_0'].input_value == ProtocolPath('output_value',
                                                                                  dummy_group.id, 'dummy_0')
    assert dummy_workflow.protocols['dummy_single_1'].input_value == ProtocolPath('output_value',
                                                                                  dummy_group.id, 'dummy_1')

    assert len(dummy_workflow.protocols['dummy_list'].values) == 2

    assert dummy_workflow.protocols['dummy_list'].values[0] == ProtocolPath('output_value', dummy_group.id, 'dummy_0')
    assert dummy_workflow.protocols['dummy_list'].values[1] == ProtocolPath('output_value', dummy_group.id, 'dummy_1')
def test_simple_workflow_graph():
    dummy_schema = WorkflowSchema()

    dummy_protocol_a = DummyInputOutputProtocol('protocol_a')
    dummy_protocol_a.input_value = EstimatedQuantity(1 * unit.kelvin, 0.1 * unit.kelvin, 'dummy_source')

    dummy_schema.protocols[dummy_protocol_a.id] = dummy_protocol_a.schema

    dummy_protocol_b = DummyInputOutputProtocol('protocol_b')
    dummy_protocol_b.input_value = ProtocolPath('output_value', dummy_protocol_a.id)

    dummy_schema.protocols[dummy_protocol_b.id] = dummy_protocol_b.schema

    dummy_schema.final_value_source = ProtocolPath('output_value', dummy_protocol_b.id)

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_workflow = Workflow(dummy_property, {})
    dummy_workflow.schema = dummy_schema

    with tempfile.TemporaryDirectory() as temporary_directory:

        workflow_graph = WorkflowGraph(temporary_directory)
        workflow_graph.add_workflow(dummy_workflow)

        dask_local_backend = DaskLocalCluster(1, ComputeResources(1))
        dask_local_backend.start()

        results_futures = workflow_graph.submit(dask_local_backend)

        assert len(results_futures) == 1

        result = results_futures[0].result()
        assert isinstance(result, CalculationLayerResult)
        assert result.calculated_property.value == 1 * unit.kelvin
Beispiel #8
0
def test_conditional_group_self_reference():
    """Tests that protocols within a conditional group
    can access the outputs of its parent, such as the
    current iteration of the group."""

    max_iterations = 10
    criteria = random.randint(1, max_iterations - 1)

    dummy_group = ConditionalGroup('conditional_group')
    dummy_group.max_iterations = max_iterations

    dummy_protocol = DummyInputOutputProtocol('protocol_a')
    dummy_protocol.input_value = ProtocolPath('current_iteration',
                                              dummy_group.id)

    dummy_condition_1 = ConditionalGroup.Condition()
    dummy_condition_1.left_hand_value = ProtocolPath('output_value',
                                                     dummy_group.id,
                                                     dummy_protocol.id)
    dummy_condition_1.right_hand_value = criteria
    dummy_condition_1.type = ConditionalGroup.ConditionType.GreaterThan

    dummy_condition_2 = ConditionalGroup.Condition()
    dummy_condition_2.left_hand_value = ProtocolPath('current_iteration',
                                                     dummy_group.id)
    dummy_condition_2.right_hand_value = criteria
    dummy_condition_2.type = ConditionalGroup.ConditionType.GreaterThan

    dummy_group.add_protocols(dummy_protocol)
    dummy_group.add_condition(dummy_condition_1)
    dummy_group.add_condition(dummy_condition_2)

    with tempfile.TemporaryDirectory() as directory:

        assert not isinstance(dummy_group.execute(directory, None),
                              PropertyEstimatorException)
        assert dummy_protocol.output_value == criteria + 1
def test_advanced_nested_replicators():

    dummy_schema = WorkflowSchema()

    replicator_a = ProtocolReplicator(replicator_id='replicator_a')
    replicator_a.template_values = ['a', 'b']

    replicator_b = ProtocolReplicator(replicator_id=f'replicator_b_{replicator_a.placeholder_id}')
    replicator_b.template_values = ProtocolPath(f'dummy_list[{replicator_a.placeholder_id}]', 'global')

    dummy_protocol = DummyReplicableProtocol(f'dummy_'
                                             f'{replicator_a.placeholder_id}_'
                                             f'{replicator_b.placeholder_id}')

    dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id)
    dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id)

    dummy_schema.protocols[dummy_protocol.id] = dummy_protocol.schema
    dummy_schema.replicators = [replicator_a, replicator_b]

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)
    dummy_metadata = Workflow.generate_default_metadata(dummy_property, 'smirnoff99Frosst-1.1.0.offxml', [])
    dummy_metadata['dummy_list'] = [[1], [2]]

    dummy_workflow = Workflow(dummy_property, dummy_metadata, '')
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 2

    assert dummy_workflow.protocols['dummy_0_0'].replicated_value_a == 'a'
    assert dummy_workflow.protocols['dummy_0_0'].replicated_value_b == 1

    assert dummy_workflow.protocols['dummy_1_0'].replicated_value_a == 'b'
    assert dummy_workflow.protocols['dummy_1_0'].replicated_value_b == 2

    print(dummy_workflow.schema)
def test_index_replicated_protocol():

    dummy_schema = WorkflowSchema()

    dummy_replicator = ProtocolReplicator('dummy_replicator')
    dummy_replicator.template_values = ['a', 'b', 'c', 'd']
    dummy_schema.replicators = [dummy_replicator]

    replicated_protocol = DummyInputOutputProtocol(f'protocol_{dummy_replicator.placeholder_id}')
    replicated_protocol.input_value = ReplicatorValue(dummy_replicator.id)
    dummy_schema.protocols[replicated_protocol.id] = replicated_protocol.schema

    for index in range(len(dummy_replicator.template_values)):

        indexing_protocol = DummyInputOutputProtocol(f'indexing_protocol_{index}')
        indexing_protocol.input_value = ProtocolPath('output_value', f'protocol_{index}')
        dummy_schema.protocols[indexing_protocol.id] = indexing_protocol.schema

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_workflow = Workflow(dummy_property, {})
    dummy_workflow.schema = dummy_schema
Beispiel #11
0
def generate_base_reweighting_protocols(analysis_protocol,
                                        replicator_id='data_repl',
                                        id_suffix=''):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will take input from the stored data,
        and generate a set of observables to reweight.
    replicator_id: str
        The id to use for the data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
    BaseReweightingProtocols:
        A named tuple of the protocol which should form the bulk of
        a property estimation workflow.
    ProtocolReplicator:
        A replicator which will clone the workflow for each piece of
        stored data.
    """

    assert isinstance(analysis_protocol, protocols.AveragePropertyProtocol)

    replicator_suffix = '_$({}){}'.format(replicator_id, id_suffix)

    # Unpack all the of the stored data.
    unpack_stored_data = protocols.UnpackStoredSimulationData(
        'unpack_data{}'.format(replicator_suffix))
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # The autocorrelation time of each of the stored files will be calculated for this property
    # using the passed in analysis protocol.

    # Decorrelate the frames of the concatenated trajectory.
    decorrelate_trajectory = protocols.ExtractUncorrelatedTrajectoryData(
        'decorrelate_traj{}'.format(replicator_suffix))

    decorrelate_trajectory.statistical_inefficiency = ProtocolPath(
        'statistical_inefficiency', analysis_protocol.id)
    decorrelate_trajectory.equilibration_index = ProtocolPath(
        'equilibration_index', analysis_protocol.id)
    decorrelate_trajectory.input_coordinate_file = ProtocolPath(
        'coordinate_file_path', unpack_stored_data.id)
    decorrelate_trajectory.input_trajectory_path = ProtocolPath(
        'trajectory_file_path', unpack_stored_data.id)

    # Stitch together all of the trajectories
    concatenate_trajectories = protocols.ConcatenateTrajectories(
        'concat_traj' + id_suffix)

    concatenate_trajectories.input_coordinate_paths = [
        ProtocolPath('coordinate_file_path', unpack_stored_data.id)
    ]

    concatenate_trajectories.input_trajectory_paths = [
        ProtocolPath('output_trajectory_path', decorrelate_trajectory.id)
    ]

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = protocols.BuildSmirnoffSystem(
        'build_system{}'.format(replicator_suffix))

    build_reference_system.force_field_path = ProtocolPath(
        'force_field_path', unpack_stored_data.id)
    build_reference_system.substance = ProtocolPath('substance',
                                                    unpack_stored_data.id)
    build_reference_system.coordinate_file_path = ProtocolPath(
        'coordinate_file_path', unpack_stored_data.id)

    reduced_reference_potential = protocols.CalculateReducedPotentialOpenMM(
        'reduced_potential{}'.format(replicator_suffix))

    reduced_reference_potential.system_path = ProtocolPath(
        'system_path', build_reference_system.id)
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        'thermodynamic_state', unpack_stored_data.id)
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        'coordinate_file_path', unpack_stored_data.id)
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        'output_trajectory_path', concatenate_trajectories.id)

    # Calculate the reduced potential of the target state.
    build_target_system = protocols.BuildSmirnoffSystem('build_system_target' +
                                                        id_suffix)

    build_target_system.force_field_path = ProtocolPath(
        'force_field_path', 'global')
    build_target_system.substance = ProtocolPath('substance', 'global')
    build_target_system.coordinate_file_path = ProtocolPath(
        'output_coordinate_path', concatenate_trajectories.id)

    reduced_target_potential = protocols.CalculateReducedPotentialOpenMM(
        'reduced_potential_target' + id_suffix)

    reduced_target_potential.thermodynamic_state = ProtocolPath(
        'thermodynamic_state', 'global')
    reduced_target_potential.system_path = ProtocolPath(
        'system_path', build_target_system.id)
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        'output_coordinate_path', concatenate_trajectories.id)
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        'output_trajectory_path', concatenate_trajectories.id)

    # Finally, apply MBAR to get the reweighted value.
    mbar_protocol = protocols.ReweightWithMBARProtocol('mbar' + id_suffix)

    mbar_protocol.reference_reduced_potentials = [
        ProtocolPath('reduced_potentials', reduced_reference_potential.id)
    ]

    mbar_protocol.reference_observables = [
        ProtocolPath('uncorrelated_values', analysis_protocol.id)
    ]
    mbar_protocol.target_reduced_potentials = [
        ProtocolPath('reduced_potentials', reduced_target_potential.id)
    ]

    base_protocols = BaseReweightingProtocols(
        unpack_stored_data, analysis_protocol, decorrelate_trajectory,
        concatenate_trajectories, build_reference_system,
        reduced_reference_potential, build_target_system,
        reduced_target_potential, mbar_protocol)

    # Create the replicator object.
    component_replicator = ProtocolReplicator(replicator_id=replicator_id)
    component_replicator.protocols_to_replicate = []

    # Pass it paths to the protocols to be replicated.
    for protocol in base_protocols:

        if protocol.id.find('$({})'.format(replicator_id)) < 0:
            continue

        component_replicator.protocols_to_replicate.append(
            ProtocolPath('', protocol.id))

    component_replicator.template_values = ProtocolPath(
        'full_system_data', 'global')

    return base_protocols, component_replicator
Beispiel #12
0
    def get_default_reweighting_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Set up a replicator that will re-run the component reweighting workflow for each
        # component in the system.
        component_replicator = ProtocolReplicator(
            replicator_id='component_replicator')
        component_replicator.template_values = ProtocolPath(
            'components', 'global')

        gradient_replicator = ProtocolReplicator('gradient')
        gradient_replicator.template_values = ProtocolPath(
            'parameter_gradient_keys', 'global')

        # Set up the protocols which will reweight data for the full system.
        full_data_replicator_id = 'full_data_replicator'

        (full_protocols, full_volume, full_data_replicator,
         full_gradient_group,
         full_gradient_source) = ExcessMolarVolume._get_reweighting_protocols(
             '_full',
             gradient_replicator.id,
             full_data_replicator_id,
             options=options)

        # Set up the protocols which will reweight data for each component.
        component_data_replicator_id = f'component_{component_replicator.placeholder_id}_data_replicator'

        (component_protocols, component_volumes, component_data_replicator,
         component_gradient_group, component_gradient_source
         ) = ExcessMolarVolume._get_reweighting_protocols(
             '_component',
             gradient_replicator.id,
             component_data_replicator_id,
             replicator_id=component_replicator.id,
             weight_by_mole_fraction=True,
             substance_reference=ReplicatorValue(component_replicator.id),
             options=options)

        # Make sure the replicator is only replicating over component data.
        component_data_replicator.template_values = ProtocolPath(
            f'component_data[$({component_replicator.id})]', 'global')

        add_component_molar_volumes = miscellaneous.AddValues(
            'add_component_molar_volumes')
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            'calculate_excess_potential')
        calculate_excess_volume.value_b = full_volume
        calculate_excess_volume.value_a = ProtocolPath(
            'result', add_component_molar_volumes.id)

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f'add_component_gradients'
            f'_{gradient_replicator.placeholder_id}')
        add_component_gradients.values = component_gradient_source

        combine_gradients = miscellaneous.SubtractValues(
            f'combine_gradients_{gradient_replicator.placeholder_id}')
        combine_gradients.value_b = full_gradient_source
        combine_gradients.value_a = ProtocolPath('result',
                                                 add_component_gradients.id)

        # Build the final workflow schema.
        schema = WorkflowSchema(property_type=ExcessMolarVolume.__name__)
        schema.id = '{}{}'.format(ExcessMolarVolume.__name__, 'Schema')

        schema.protocols = dict()

        schema.protocols.update(
            {protocol.id: protocol.schema
             for protocol in full_protocols})
        schema.protocols.update(
            {protocol.id: protocol.schema
             for protocol in component_protocols})

        schema.protocols[add_component_molar_volumes.
                         id] = add_component_molar_volumes.schema
        schema.protocols[
            calculate_excess_volume.id] = calculate_excess_volume.schema

        schema.protocols[full_gradient_group.id] = full_gradient_group.schema
        schema.protocols[
            component_gradient_group.id] = component_gradient_group.schema
        schema.protocols[
            add_component_gradients.id] = add_component_gradients.schema
        schema.protocols[combine_gradients.id] = combine_gradients.schema

        schema.replicators = [
            full_data_replicator, component_replicator,
            component_data_replicator, gradient_replicator
        ]

        schema.gradients_sources = [
            ProtocolPath('result', combine_gradients.id)
        ]
        schema.final_value_source = ProtocolPath('result',
                                                 calculate_excess_volume.id)

        return schema
Beispiel #13
0
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Define the id of the replicator which will clone the gradient protocols
        # for each gradient key to be estimated.
        gradient_replicator_id = 'gradient_replicator'

        # Set up a workflow to calculate the molar volume of the full, mixed system.
        (full_system_protocols, full_system_molar_molecules,
         full_system_volume, full_output, full_system_gradient_group,
         full_system_gradient_replicator,
         full_system_gradient) = ExcessMolarVolume._get_simulation_protocols(
             '_full', gradient_replicator_id, options=options)

        # Set up a general workflow for calculating the molar volume of one of the system components.
        component_replicator_id = 'component_replicator'
        component_substance = ReplicatorValue(component_replicator_id)

        # Make sure to weight by the mole fractions of the actual full system as these may be slightly
        # different to the mole fractions of the measure property due to rounding.
        full_substance = ProtocolPath(
            'output_substance', full_system_protocols.build_coordinates.id)

        (component_protocols, component_molar_molecules, component_volumes,
         component_output, component_gradient_group,
         component_gradient_replicator,
         component_gradient) = ExcessMolarVolume._get_simulation_protocols(
             '_component',
             gradient_replicator_id,
             replicator_id=component_replicator_id,
             weight_by_mole_fraction=True,
             component_substance_reference=component_substance,
             full_substance_reference=full_substance,
             options=options)

        # Finally, set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the mixed system molar volume.
        add_component_molar_volumes = miscellaneous.AddValues(
            'add_component_molar_volumes')
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            'calculate_excess_volume')
        calculate_excess_volume.value_b = full_system_volume
        calculate_excess_volume.value_a = ProtocolPath(
            'result', add_component_molar_volumes.id)

        # Create the replicator object which defines how the pure component
        # molar volume estimation protocols will be replicated for each component.
        component_replicator = ProtocolReplicator(
            replicator_id=component_replicator_id)
        component_replicator.template_values = ProtocolPath(
            'components', 'global')

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f'add_component_gradients'
            f'_$({gradient_replicator_id})')
        add_component_gradients.values = component_gradient

        combine_gradients = miscellaneous.SubtractValues(
            f'combine_gradients_$({gradient_replicator_id})')
        combine_gradients.value_b = full_system_gradient
        combine_gradients.value_a = ProtocolPath('result',
                                                 add_component_gradients.id)

        # Combine the gradient replicators.
        gradient_replicator = ProtocolReplicator(
            replicator_id=gradient_replicator_id)
        gradient_replicator.template_values = ProtocolPath(
            'parameter_gradient_keys', 'global')

        # Build the final workflow schema
        schema = WorkflowSchema(property_type=ExcessMolarVolume.__name__)
        schema.id = '{}{}'.format(ExcessMolarVolume.__name__, 'Schema')

        schema.protocols = {
            component_protocols.build_coordinates.id:
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.id:
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.id:
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.id:
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.id:
            component_protocols.converge_uncertainty.schema,
            component_molar_molecules.id:
            component_molar_molecules.schema,
            full_system_protocols.build_coordinates.id:
            full_system_protocols.build_coordinates.schema,
            full_system_protocols.assign_parameters.id:
            full_system_protocols.assign_parameters.schema,
            full_system_protocols.energy_minimisation.id:
            full_system_protocols.energy_minimisation.schema,
            full_system_protocols.equilibration_simulation.id:
            full_system_protocols.equilibration_simulation.schema,
            full_system_protocols.converge_uncertainty.id:
            full_system_protocols.converge_uncertainty.schema,
            full_system_molar_molecules.id:
            full_system_molar_molecules.schema,
            component_protocols.extract_uncorrelated_trajectory.id:
            component_protocols.extract_uncorrelated_trajectory.schema,
            component_protocols.extract_uncorrelated_statistics.id:
            component_protocols.extract_uncorrelated_statistics.schema,
            full_system_protocols.extract_uncorrelated_trajectory.id:
            full_system_protocols.extract_uncorrelated_trajectory.schema,
            full_system_protocols.extract_uncorrelated_statistics.id:
            full_system_protocols.extract_uncorrelated_statistics.schema,
            add_component_molar_volumes.id:
            add_component_molar_volumes.schema,
            calculate_excess_volume.id:
            calculate_excess_volume.schema,
            component_gradient_group.id:
            component_gradient_group.schema,
            full_system_gradient_group.id:
            full_system_gradient_group.schema,
            add_component_gradients.id:
            add_component_gradients.schema,
            combine_gradients.id:
            combine_gradients.schema
        }

        schema.replicators = [gradient_replicator, component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.gradients_sources = [
            ProtocolPath('result', combine_gradients.id)
        ]
        schema.final_value_source = ProtocolPath('result',
                                                 calculate_excess_volume.id)

        schema.outputs_to_store = {
            'full_system': full_output,
            f'component_$({component_replicator_id})': component_output
        }

        return schema
Beispiel #14
0
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Define the protocol which will extract the average density from
        # the results of a simulation.
        extract_density = analysis.ExtractAverageStatistic('extract_density')
        extract_density.statistics_type = ObservableType.Density

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_density, options)

        # Set up the gradient calculations
        reweight_density_template = reweighting.ReweightStatistics('')
        reweight_density_template.statistics_type = ObservableType.Density
        reweight_density_template.statistics_paths = [
            ProtocolPath('statistics_file_path',
                         protocols.converge_uncertainty.id,
                         protocols.production_simulation.id)
        ]

        coordinate_source = ProtocolPath('output_coordinate_file',
                                         protocols.equilibration_simulation.id)
        trajectory_source = ProtocolPath('trajectory_file_path',
                                         protocols.converge_uncertainty.id,
                                         protocols.production_simulation.id)
        statistics_source = ProtocolPath('statistics_file_path',
                                         protocols.converge_uncertainty.id,
                                         protocols.production_simulation.id)

        gradient_group, gradient_replicator, gradient_source = \
            generate_gradient_protocol_group(reweight_density_template,
                                             [ProtocolPath('force_field_path', 'global')],
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_source,
                                             trajectory_source,
                                             statistics_source)

        # Build the workflow schema.
        schema = WorkflowSchema(property_type=Density.__name__)
        schema.id = '{}{}'.format(Density.__name__, 'Schema')

        schema.protocols = {
            protocols.build_coordinates.id:
            protocols.build_coordinates.schema,
            protocols.assign_parameters.id:
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.id:
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.id:
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.id:
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.id:
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.id:
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.id:
            gradient_group.schema
        }

        schema.replicators = [gradient_replicator]

        schema.outputs_to_store = {'full_system': output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        return schema
Beispiel #15
0
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: PropertyWorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        schema = WorkflowSchema(property_type=EnthalpyOfMixing.__name__)
        schema.id = '{}{}'.format(EnthalpyOfMixing.__name__, 'Schema')

        # Set up a general workflow for calculating the enthalpy of one of the system components.
        # Here we affix a prefix which contains the special string $(comp_index). Protocols which are
        # replicated by a replicator will have the $(comp_index) tag in their id replaced by the index
        # of the replication.
        component_workflow = EnthalpyOfMixing.get_enthalpy_workflow(
            'component_$(repl)_', True, options)

        # Set the substance of the build_coordinates and assign_topology protocols
        # as a placeholder for now - these will be later set by the replicator.
        component_workflow.build_coordinates.substance = ReplicatorValue(
            'repl')
        component_workflow.assign_topology.substance = ReplicatorValue('repl')

        # Set up a workflow to calculate the enthalpy of the full, mixed system.
        mixed_system_workflow = EnthalpyOfMixing.get_enthalpy_workflow(
            'mixed_', False, options)

        # Finally, set up the protocols which will be responsible for adding together
        # the component enthalpies, and subtracting these from the mixed system enthalpy.
        add_component_enthalpies = protocols.AddQuantities(
            'add_component_enthalpies')

        # Although we only give a list of a single ProtocolPath pointing to our template
        # component workflow's `weight_by_mole_fraction` protocol, the replicator
        # will actually populate this list with references to all of the newly generated
        # protocols of the individual components.
        add_component_enthalpies.values = [
            ProtocolPath('weighted_value',
                         component_workflow.converge_uncertainty.id,
                         'component_$(repl)_weight_by_mole_fraction')
        ]

        schema.protocols[
            add_component_enthalpies.id] = add_component_enthalpies.schema

        calculate_enthalpy_of_mixing = protocols.SubtractQuantities(
            'calculate_enthalpy_of_mixing')

        calculate_enthalpy_of_mixing.value_b = ProtocolPath(
            'value', mixed_system_workflow.converge_uncertainty.id,
            'mixed_extract_enthalpy')
        calculate_enthalpy_of_mixing.value_a = ProtocolPath(
            'result', add_component_enthalpies.id)

        schema.protocols[calculate_enthalpy_of_mixing.
                         id] = calculate_enthalpy_of_mixing.schema

        for component_protocol in component_workflow:
            schema.protocols[component_protocol.id] = component_protocol.schema

        for mixed_protocol in mixed_system_workflow:
            schema.protocols[mixed_protocol.id] = mixed_protocol.schema

        # Create the replicator object which defines how the pure component
        # enthalpy estimation workflow will be replicated for each component.
        component_replicator = ProtocolReplicator(replicator_id='repl')

        component_replicator.protocols_to_replicate = []

        # Pass it paths to the protocols to be replicated.
        for component_protocol in component_workflow:
            component_replicator.protocols_to_replicate.append(
                ProtocolPath('', component_protocol.id))

        for component_protocol_id in component_workflow.converge_uncertainty.protocols:

            path_to_protocol = ProtocolPath(
                '', component_workflow.converge_uncertainty.id,
                component_protocol_id)

            component_replicator.protocols_to_replicate.append(
                path_to_protocol)

        # Tell the replicator to take the components of a properties substance,
        # and pass these to the replicated workflows being produced, and in particular,
        # the inputs specified by the `template_targets`
        component_replicator.template_values = ProtocolPath(
            'components', 'global')

        schema.replicators = [component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.final_value_source = ProtocolPath(
            'result', calculate_enthalpy_of_mixing.id)

        mixed_output_to_store = WorkflowOutputToStore()

        mixed_output_to_store.trajectory_file_path = ProtocolPath(
            'output_trajectory_path',
            mixed_system_workflow.subsample_trajectory.id)

        mixed_output_to_store.coordinate_file_path = ProtocolPath(
            'output_coordinate_file',
            mixed_system_workflow.converge_uncertainty.id,
            'mixed_npt_production')

        mixed_output_to_store.statistics_file_path = ProtocolPath(
            'output_statistics_path',
            mixed_system_workflow.subsample_statistics.id)

        mixed_output_to_store.statistical_inefficiency = ProtocolPath(
            'statistical_inefficiency',
            mixed_system_workflow.converge_uncertainty.id,
            'mixed_extract_enthalpy')

        component_output_to_store = WorkflowOutputToStore()

        component_output_to_store.substance = ReplicatorValue('repl')

        component_output_to_store.trajectory_file_path = ProtocolPath(
            'output_trajectory_path',
            component_workflow.subsample_trajectory.id)

        component_output_to_store.coordinate_file_path = ProtocolPath(
            'output_coordinate_file',
            component_workflow.converge_uncertainty.id,
            'component_$(repl)_npt_production')

        component_output_to_store.statistics_file_path = ProtocolPath(
            'output_statistics_path',
            component_workflow.subsample_statistics.id)

        component_output_to_store.statistical_inefficiency = ProtocolPath(
            'statistical_inefficiency',
            component_workflow.converge_uncertainty.id,
            'component_$(repl)_extract_enthalpy')

        schema.outputs_to_store = {
            'mixed_system': mixed_output_to_store,
            'component_$(repl)': component_output_to_store
        }

        return schema
def test_nested_replicators():

    dummy_schema = WorkflowSchema()

    dummy_protocol = DummyReplicableProtocol('dummy_$(rep_a)_$(rep_b)')

    dummy_protocol.replicated_value_a = ReplicatorValue('rep_a')
    dummy_protocol.replicated_value_b = ReplicatorValue('rep_b')

    dummy_schema.protocols[dummy_protocol.id] = dummy_protocol.schema

    dummy_schema.final_value_source = ProtocolPath('final_value',
                                                   dummy_protocol.id)

    replicator_a = ProtocolReplicator(replicator_id='rep_a')

    replicator_a.template_values = ['a', 'b']
    replicator_a.protocols_to_replicate = [ProtocolPath('', dummy_protocol.id)]

    replicator_b = ProtocolReplicator(replicator_id='rep_b')

    replicator_b.template_values = [1, 2]
    replicator_b.protocols_to_replicate = [ProtocolPath('', dummy_protocol.id)]

    dummy_schema.replicators = [replicator_a, replicator_b]

    dummy_schema.validate_interfaces()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property,
        get_data_filename('forcefield/smirnoff99Frosst.offxml'),
        PropertyEstimatorOptions())

    dummy_workflow = Workflow(dummy_property, dummy_metadata)
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_0_0'].replicated_value_a == 'a'
    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_0_1'].replicated_value_a == 'a'

    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_1_0'].replicated_value_a == 'b'
    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_1_1'].replicated_value_a == 'b'

    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_0_0'].replicated_value_b == 1
    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_0_1'].replicated_value_b == 2

    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_1_0'].replicated_value_b == 1
    assert dummy_workflow.protocols[dummy_workflow.uuid +
                                    '|dummy_1_1'].replicated_value_b == 2

    print(dummy_workflow.schema)
Beispiel #17
0
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = '{}{}'.format(HostGuestBindingAffinity.__name__, 'Schema')

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole('filter_ligand')
        filter_ligand.input_substance = ProtocolPath('substance', 'global')

        filter_ligand.component_role = Substance.ComponentRole.Ligand
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            'filter_receptor')
        filter_receptor.input_substance = ProtocolPath('substance', 'global')

        filter_receptor.component_role = Substance.ComponentRole.Receptor
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates('perform_docking')

        perform_docking.ligand_substance = ProtocolPath(
            'filtered_substance', filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            'receptor_mol2', 'global')

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole('filter_solvent')
        filter_solvent.input_substance = ProtocolPath('substance', 'global')
        filter_solvent.component_role = Substance.ComponentRole.Solvent

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            'solvate_complex')
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath('filtered_substance',
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            'docked_complex_coordinate_path', perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BuildSmirnoffSystem(
            'build_solvated_complex_system')

        build_solvated_complex_system.force_field_path = ProtocolPath(
            'force_field_path', 'global')

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            'coordinate_file_path', solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            'substance', 'global')

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath('receptor_mol2', 'global')
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure('solvate_ligand')
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath('filtered_substance',
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            'docked_ligand_coordinate_path', perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BuildSmirnoffSystem(
            'build_solvated_ligand_system')

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            'force_field_path', 'global')

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            'coordinate_file_path', solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            'substance', 'global')

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol('yank_protocol')

        yank_protocol.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath('force_field_path',
                                                      'global')

        yank_protocol.ligand_residue_name = ProtocolPath(
            'ligand_residue_name', perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            'receptor_residue_name', perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            'coordinate_file_path', solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            'system_path', build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            'coordinate_file_path', solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            'system_path', build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath('estimated_free_energy',
                                                 yank_protocol.id)

        # output_to_store = WorkflowOutputToStore()
        #
        # output_to_store.trajectory_file_path = ProtocolPath('output_trajectory_path',
        #                                                     extract_uncorrelated_trajectory.id)
        # output_to_store.coordinate_file_path = ProtocolPath('output_coordinate_file',
        #                                                     converge_uncertainty.id, npt_production.id)
        #
        # output_to_store.statistics_file_path = ProtocolPath('output_statistics_path',
        #                                                     extract_uncorrelated_statistics.id)
        #
        # output_to_store.statistical_inefficiency = ProtocolPath('statistical_inefficiency', converge_uncertainty.id,
        #                                                                                     extract_density.id)
        #
        # schema.outputs_to_store = {'full_system': output_to_store}

        return schema
    def get_default_reweighting_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        data_replicator_id = 'data_replicator'

        # Set up a protocol to extract the dielectric constant from the stored data.
        extract_dielectric = ExtractAverageDielectric(
            f'calc_dielectric_$({data_replicator_id})')

        # For the dielectric constant, we employ a slightly more advanced reweighting
        # protocol set up for calculating fluctuation properties.
        reweight_dielectric = ReweightDielectricConstant('reweight_dielectric')
        reweight_dielectric.reference_dipole_moments = ProtocolPath(
            'uncorrelated_values', extract_dielectric.id)
        reweight_dielectric.reference_volumes = ProtocolPath(
            'uncorrelated_volumes', extract_dielectric.id)
        reweight_dielectric.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')
        reweight_dielectric.bootstrap_uncertainties = True
        reweight_dielectric.bootstrap_iterations = 200

        # Make a copy of the mbar reweighting protocol to use for evaluating gradients
        # by reweighting.
        reweight_dielectric_template = copy.deepcopy(reweight_dielectric)

        reweighting_protocols, data_replicator = generate_base_reweighting_protocols(
            extract_dielectric, reweight_dielectric, options,
            data_replicator_id)

        # Make sure input is taken from the correct protocol outputs.
        extract_dielectric.system_path = ProtocolPath(
            'system_path', reweighting_protocols.build_reference_system.id)
        extract_dielectric.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', reweighting_protocols.unpack_stored_data.id)

        # Set up the gradient calculations
        coordinate_path = ProtocolPath(
            'output_coordinate_path',
            reweighting_protocols.concatenate_trajectories.id)
        trajectory_path = ProtocolPath(
            'output_trajectory_path',
            reweighting_protocols.concatenate_trajectories.id)

        gradient_group, gradient_replicator, gradient_source = \
            generate_gradient_protocol_group(reweight_dielectric_template,
                                             ProtocolPath('force_field_path',
                                                          reweighting_protocols.unpack_stored_data.id),
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_path,
                                             trajectory_path,
                                             replicator_id='grad',
                                             use_subset_of_force_field=False,
                                             effective_sample_indices=ProtocolPath('effective_sample_indices',
                                                                                   reweight_dielectric.id))

        schema = WorkflowSchema(property_type=DielectricConstant.__name__)
        schema.id = '{}{}'.format(DielectricConstant.__name__, 'Schema')

        schema.protocols = {
            protocol.id: protocol.schema
            for protocol in reweighting_protocols
        }
        schema.protocols[gradient_group.id] = gradient_group.schema

        schema.replicators = [data_replicator, gradient_replicator]

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath(
            'value', reweighting_protocols.mbar_protocol.id)

        return schema
Beispiel #19
0
    def get_enthalpy_workflow(id_prefix='',
                              weight_by_mole_fraction=False,
                              options=None):
        """Returns the set of protocols which when combined in a workflow
        will yield the enthalpy of a substance.

        Parameters
        ----------
        id_prefix: str
            A prefix to append to the id of each of the returned protocols.
        weight_by_mole_fraction: bool
            If true, an extra protocol will be added to weight the calculated
            enthalpy by the mole fraction of the component inside of the
            convergence loop.
        options: PropertyWorkflowOptions
            The options to use when setting up the workflows.

        Returns
        -------
        EnthalpyOfMixing.EnthalpyWorkflow
            The protocols used to estimate the enthalpy of a substance.
        """

        build_coordinates = protocols.BuildCoordinatesPackmol(
            id_prefix + 'build_coordinates')

        build_coordinates.substance = ProtocolPath('substance', 'global')

        assign_topology = protocols.BuildSmirnoffSystem(id_prefix +
                                                        'build_topology')

        assign_topology.force_field_path = ProtocolPath(
            'force_field_path', 'global')

        assign_topology.coordinate_file_path = ProtocolPath(
            'coordinate_file_path', build_coordinates.id)
        assign_topology.substance = ProtocolPath('substance', 'global')

        # Equilibration
        energy_minimisation = protocols.RunEnergyMinimisation(
            id_prefix + 'energy_minimisation')

        energy_minimisation.input_coordinate_file = ProtocolPath(
            'coordinate_file_path', build_coordinates.id)
        energy_minimisation.system_path = ProtocolPath('system_path',
                                                       assign_topology.id)

        npt_equilibration = protocols.RunOpenMMSimulation(id_prefix +
                                                          'npt_equilibration')

        npt_equilibration.ensemble = Ensemble.NPT

        npt_equilibration.steps = 100000  # Debug settings.
        npt_equilibration.output_frequency = 5000  # Debug settings.

        npt_equilibration.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        npt_equilibration.input_coordinate_file = ProtocolPath(
            'output_coordinate_file', energy_minimisation.id)
        npt_equilibration.system_path = ProtocolPath('system_path',
                                                     assign_topology.id)

        # Production
        npt_production = protocols.RunOpenMMSimulation(id_prefix +
                                                       'npt_production')

        npt_production.ensemble = Ensemble.NPT

        npt_production.steps = 500000  # Debug settings.
        npt_production.output_frequency = 5000  # Debug settings.

        npt_production.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        npt_production.input_coordinate_file = ProtocolPath(
            'output_coordinate_file', npt_equilibration.id)
        npt_production.system_path = ProtocolPath('system_path',
                                                  assign_topology.id)

        # Analysis
        extract_enthalpy = protocols.ExtractAverageStatistic(
            id_prefix + 'extract_enthalpy')

        extract_enthalpy.statistics_type = ObservableType.Enthalpy
        extract_enthalpy.statistics_path = ProtocolPath(
            'statistics_file_path', npt_production.id)

        # Set up a conditional group to ensure convergence of uncertainty
        converge_uncertainty = groups.ConditionalGroup(id_prefix +
                                                       'converge_uncertainty')
        converge_uncertainty.add_protocols(npt_production, extract_enthalpy)

        converge_uncertainty.max_iterations = 1

        condition = groups.ConditionalGroup.Condition()

        condition.left_hand_value = ProtocolPath('value.uncertainty',
                                                 converge_uncertainty.id,
                                                 extract_enthalpy.id)
        condition.right_hand_value = ProtocolPath('per_component_uncertainty',
                                                  'global')
        condition.condition_type = groups.ConditionalGroup.ConditionType.LessThan

        converge_uncertainty.add_condition(condition)

        statistical_inefficiency = ProtocolPath('statistical_inefficiency',
                                                converge_uncertainty.id,
                                                extract_enthalpy.id)

        equilibration_index = ProtocolPath('equilibration_index',
                                           converge_uncertainty.id,
                                           extract_enthalpy.id)

        if weight_by_mole_fraction:

            # The component workflows need an extra step to multiply their enthalpies by their
            # relative mole fraction.
            weight_by_mole_fraction = WeightValueByMoleFraction(
                id_prefix + 'weight_by_mole_fraction')

            weight_by_mole_fraction.value = ProtocolPath(
                'value', extract_enthalpy.id)
            weight_by_mole_fraction.full_substance = ProtocolPath(
                'substance', 'global')

            # Again, set the component as a placeholder which will be set by the replicator.
            weight_by_mole_fraction.component = ReplicatorValue('repl')

            converge_uncertainty.add_protocols(weight_by_mole_fraction)

        # Extract the uncorrelated trajectory.
        extract_uncorrelated_trajectory = protocols.ExtractUncorrelatedTrajectoryData(
            id_prefix + 'extract_traj')

        extract_uncorrelated_trajectory.statistical_inefficiency = statistical_inefficiency
        extract_uncorrelated_trajectory.equilibration_index = equilibration_index

        extract_uncorrelated_trajectory.input_coordinate_file = ProtocolPath(
            'output_coordinate_file', converge_uncertainty.id,
            npt_production.id)

        extract_uncorrelated_trajectory.input_trajectory_path = ProtocolPath(
            'trajectory_file_path', converge_uncertainty.id, npt_production.id)

        # Extract the uncorrelated statistics.
        extract_uncorrelated_statistics = protocols.ExtractUncorrelatedStatisticsData(
            id_prefix + 'extract_stats')

        extract_uncorrelated_statistics.statistical_inefficiency = statistical_inefficiency
        extract_uncorrelated_statistics.equilibration_index = equilibration_index

        extract_uncorrelated_statistics.input_statistics_path = ProtocolPath(
            'statistics_file_path', converge_uncertainty.id, npt_production.id)

        # noinspection PyCallByClass
        return EnthalpyOfMixing.EnthalpyWorkflow(
            build_coordinates, assign_topology, energy_minimisation,
            npt_equilibration, converge_uncertainty,
            extract_uncorrelated_trajectory, extract_uncorrelated_statistics)
Beispiel #20
0
    def get_default_reweighting_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: PropertyWorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Set up the protocols which will reweight data for the full system.
        extract_mixed_enthalpy = protocols.ExtractAverageStatistic(
            'extract_enthalpy_$(mix_data_repl)_mixture')
        extract_mixed_enthalpy.statistics_type = ObservableType.Enthalpy

        mixture_protocols, mixture_data_replicator = generate_base_reweighting_protocols(
            extract_mixed_enthalpy, 'mix_data_repl', '_mixture')

        extract_mixed_enthalpy.statistics_path = ProtocolPath(
            'statistics_file_path', mixture_protocols.unpack_stored_data.id)

        # Set up the protocols which will reweight data for each of the components.
        extract_pure_enthalpy = protocols.ExtractAverageStatistic(
            'extract_enthalpy_$(pure_data_repl)_comp_$(comp_repl)')
        extract_pure_enthalpy.statistics_type = ObservableType.Enthalpy

        pure_protocols, pure_data_replicator = generate_base_reweighting_protocols(
            extract_pure_enthalpy, 'pure_data_repl', '_pure_$(comp_repl)')

        extract_pure_enthalpy.statistics_path = ProtocolPath(
            'statistics_file_path', pure_protocols.unpack_stored_data.id)

        # Make sure the replicator is only replicating over data from the pure component.
        pure_data_replicator.template_values = ProtocolPath(
            'component_data[$(comp_repl)]', 'global')

        # Set up the protocols which will be responsible for adding together
        # the component enthalpies, and subtracting these from the mixed system enthalpy.
        weight_by_mole_fraction = WeightValueByMoleFraction(
            'weight_comp_$(comp_repl)')
        weight_by_mole_fraction.value = ProtocolPath(
            'value', pure_protocols.mbar_protocol.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            'substance', 'global')
        weight_by_mole_fraction.component = ReplicatorValue('comp_repl')

        add_component_enthalpies = protocols.AddQuantities(
            'add_component_enthalpies')
        add_component_enthalpies.values = [
            ProtocolPath('weighted_value', weight_by_mole_fraction.id)
        ]

        calculate_enthalpy_of_mixing = protocols.SubtractQuantities(
            'calculate_enthalpy_of_mixing')
        calculate_enthalpy_of_mixing.value_b = ProtocolPath(
            'value', mixture_protocols.mbar_protocol.id)
        calculate_enthalpy_of_mixing.value_a = ProtocolPath(
            'result', add_component_enthalpies.id)

        # Set up a replicator that will re-run the pure reweighting workflow for each
        # component in the system.
        pure_component_replicator = ProtocolReplicator(
            replicator_id='comp_repl')
        pure_component_replicator.protocols_to_replicate = [
            ProtocolPath('', weight_by_mole_fraction.id)
        ]

        for pure_protocol in pure_protocols:
            pure_component_replicator.protocols_to_replicate.append(
                ProtocolPath('', pure_protocol.id))

        pure_component_replicator.template_values = ProtocolPath(
            'components', 'global')

        # Build the final workflow schema.
        schema = WorkflowSchema(property_type=EnthalpyOfMixing.__name__)
        schema.id = '{}{}'.format(EnthalpyOfMixing.__name__, 'Schema')

        schema.protocols = {}

        schema.protocols.update(
            {protocol.id: protocol.schema
             for protocol in mixture_protocols})
        schema.protocols.update(
            {protocol.id: protocol.schema
             for protocol in pure_protocols})

        schema.protocols[
            weight_by_mole_fraction.id] = weight_by_mole_fraction.schema
        schema.protocols[
            add_component_enthalpies.id] = add_component_enthalpies.schema
        schema.protocols[calculate_enthalpy_of_mixing.
                         id] = calculate_enthalpy_of_mixing.schema

        schema.replicators = [
            mixture_data_replicator, pure_component_replicator,
            pure_data_replicator
        ]

        schema.final_value_source = ProtocolPath(
            'result', calculate_enthalpy_of_mixing.id)

        return schema
Beispiel #21
0
    def get_default_reweighting_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: PropertyWorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        dielectric_calculation = ExtractAverageDielectric(
            'calc_dielectric_$(data_repl)')
        base_reweighting_protocols, data_replicator = generate_base_reweighting_protocols(
            dielectric_calculation)

        unpack_id = base_reweighting_protocols.unpack_stored_data.id

        dielectric_calculation.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', unpack_id)
        dielectric_calculation.input_coordinate_file = ProtocolPath(
            'coordinate_file_path', unpack_id)
        dielectric_calculation.trajectory_path = ProtocolPath(
            'trajectory_file_path', unpack_id)
        dielectric_calculation.system_path = ProtocolPath(
            'system_path',
            base_reweighting_protocols.build_reference_system.id)

        # For the dielectric constant, we employ a slightly more advanced protocol
        # set up for calculating fluctuation properties.
        mbar_protocol = ReweightDielectricConstant('mbar')

        mbar_protocol.reference_reduced_potentials = [
            ProtocolPath(
                'reduced_potentials',
                base_reweighting_protocols.reduced_reference_potential.id)
        ]

        mbar_protocol.reference_observables = [
            ProtocolPath('uncorrelated_values', dielectric_calculation.id)
        ]
        mbar_protocol.reference_volumes = [
            ProtocolPath('uncorrelated_volumes', dielectric_calculation.id)
        ]

        mbar_protocol.target_reduced_potentials = [
            ProtocolPath(
                'reduced_potentials',
                base_reweighting_protocols.reduced_target_potential.id)
        ]

        mbar_protocol.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        mbar_protocol.bootstrap_uncertainties = True
        mbar_protocol.bootstrap_iterations = 200

        # Recreate the immutable tuple for convenience.
        base_reweighting_protocols = BaseReweightingProtocols(
            base_reweighting_protocols.unpack_stored_data,
            base_reweighting_protocols.analysis_protocol,
            base_reweighting_protocols.decorrelate_trajectory,
            base_reweighting_protocols.concatenate_trajectories,
            base_reweighting_protocols.build_reference_system,
            base_reweighting_protocols.reduced_reference_potential,
            base_reweighting_protocols.build_target_system,
            base_reweighting_protocols.reduced_target_potential, mbar_protocol)

        schema = WorkflowSchema(property_type=DielectricConstant.__name__)
        schema.id = '{}{}'.format(DielectricConstant.__name__, 'Schema')

        schema.protocols = {
            protocol.id: protocol.schema
            for protocol in base_reweighting_protocols
        }
        schema.replicators = [data_replicator]

        schema.final_value_source = ProtocolPath(
            'value', base_reweighting_protocols.mbar_protocol.id)

        return schema
Beispiel #22
0
    def _get_reweighting_protocols(id_suffix,
                                   gradient_replicator_id,
                                   data_replicator_id,
                                   replicator_id=None,
                                   weight_by_mole_fraction=False,
                                   substance_reference=None,
                                   options=None):
        """Returns the set of protocols which when combined in a workflow
        will yield the molar volume of a substance by reweighting cached data.

        Parameters
        ----------
        id_suffix: str
            A suffix to append to the id of each of the returned protocols.
        gradient_replicator_id: str
            The id of the replicator which will clone those protocols which will
            estimate the gradient of the molar volume with respect to a given parameter.
        data_replicator_id: str
            The id of the replicator which will be used to clone these protocols
            for each cached simulation data.
        replicator_id: str, optional
            The optional id of the replicator which will be used to clone these
            protocols, e.g. for each component in the system.
        weight_by_mole_fraction: bool
            If true, an extra protocol will be added to weight the calculated
            molar volume by the mole fraction of the component.
        substance_reference: ProtocolPath or PlaceholderInput, optional
            An optional protocol path (or replicator reference) to the substance
            whose molar volume is being estimated.
        options: WorkflowOptions
            The options to use when setting up the workflows.

        Returns
        -------
        BaseReweightingProtocols
            The protocols used to estimate the molar volume of a substance.
        ProtocolPath
            A reference to the estimated molar volume.
        ProtocolReplicator
            The replicator which will replicate each protocol for each
            cached simulation datum.
        ProtocolGroup
            The group of protocols which will calculate the gradient of the reduced potential
            with respect to a given property.
        ProtocolPath
            A reference to the value of the gradient.
        """

        if replicator_id is not None:
            id_suffix = f'{id_suffix}_$({replicator_id})'

        full_id_suffix = id_suffix

        if data_replicator_id is not None:
            full_id_suffix = f'{id_suffix}_$({data_replicator_id})'

        if substance_reference is None:
            substance_reference = ProtocolPath('substance', 'global')

        extract_volume = analysis.ExtractAverageStatistic(
            f'extract_volume{full_id_suffix}')
        extract_volume.statistics_type = ObservableType.Volume
        reweight_volume = reweighting.ReweightStatistics(
            f'reweight_volume{id_suffix}')
        reweight_volume.statistics_type = ObservableType.Volume

        (protocols, data_replicator) = generate_base_reweighting_protocols(
            analysis_protocol=extract_volume,
            mbar_protocol=reweight_volume,
            workflow_options=options,
            replicator_id=data_replicator_id,
            id_suffix=id_suffix)

        # Make sure to use the correct substance.
        protocols.build_target_system.substance = substance_reference

        value_source = ProtocolPath('value', protocols.mbar_protocol.id)

        # Set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the full system volume.
        weight_volume = None

        if weight_by_mole_fraction is True:
            weight_volume = miscellaneous.WeightByMoleFraction(
                f'weight_volume{id_suffix}')
            weight_volume.value = ProtocolPath('value',
                                               protocols.mbar_protocol.id)
            weight_volume.full_substance = ProtocolPath('substance', 'global')
            weight_volume.component = substance_reference

            value_source = ProtocolPath('weighted_value', weight_volume.id)

        # Divide by the component molar volumes by the number of molecules in the system
        number_of_molecules = ProtocolPath(
            'total_number_of_molecules',
            protocols.unpack_stored_data.id.replace(f'$({data_replicator_id})',
                                                    '0'))

        number_of_molar_molecules = miscellaneous.MultiplyValue(
            f'number_of_molar_molecules{id_suffix}')
        number_of_molar_molecules.value = EstimatedQuantity(
            (1.0 / unit.avogadro_number).to(unit.mole),
            (0.0 / unit.avogadro_number).to(unit.mole), '')
        number_of_molar_molecules.multiplier = number_of_molecules

        divide_by_molecules = miscellaneous.DivideValue(
            f'divide_by_molecules{id_suffix}')
        divide_by_molecules.value = value_source
        divide_by_molecules.divisor = ProtocolPath(
            'result.value', number_of_molar_molecules.id)

        value_source = ProtocolPath('result', divide_by_molecules.id)

        # Set up the gradient calculations.
        reweight_volume_template = reweighting.ReweightStatistics('')
        reweight_volume_template.statistics_type = ObservableType.Volume
        reweight_volume_template.statistics_paths = ProtocolPath(
            'output_statistics_path', protocols.decorrelate_statistics.id)

        coordinate_path = ProtocolPath('output_coordinate_path',
                                       protocols.concatenate_trajectories.id)
        trajectory_path = ProtocolPath('output_trajectory_path',
                                       protocols.concatenate_trajectories.id)

        gradient_group, _, gradient_source = \
            generate_gradient_protocol_group(reweight_volume_template,
                                             ProtocolPath('force_field_path', protocols.unpack_stored_data.id),
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_path,
                                             trajectory_path,
                                             replicator_id=gradient_replicator_id,
                                             id_suffix=id_suffix,
                                             substance_source=substance_reference,
                                             use_subset_of_force_field=False,
                                             effective_sample_indices=ProtocolPath('effective_sample_indices',
                                                                                   protocols.mbar_protocol.id))

        # Remove the group id from the path.
        gradient_source.pop_next_in_path()

        if weight_by_mole_fraction is True:
            # The component workflows need an extra step to multiply their gradients by their
            # relative mole fraction.
            weight_gradient = miscellaneous.WeightByMoleFraction(
                f'weight_gradient_$({gradient_replicator_id})_'
                f'by_mole_fraction{id_suffix}')
            weight_gradient.value = gradient_source
            weight_gradient.full_substance = ProtocolPath(
                'substance', 'global')
            weight_gradient.component = substance_reference

            gradient_group.add_protocols(weight_gradient)
            gradient_source = ProtocolPath('weighted_value',
                                           weight_gradient.id)

        scale_gradient = miscellaneous.DivideValue(
            f'scale_gradient_$({gradient_replicator_id}){id_suffix}')
        scale_gradient.value = gradient_source
        scale_gradient.divisor = ProtocolPath('result.value',
                                              number_of_molar_molecules.id)

        gradient_group.add_protocols(scale_gradient)
        gradient_source = ProtocolPath('result', gradient_group.id,
                                       scale_gradient.id)

        all_protocols = (*protocols, number_of_molar_molecules,
                         divide_by_molecules)

        if weight_volume is not None:
            all_protocols = (*all_protocols, weight_volume)

        return all_protocols, value_source, data_replicator, gradient_group, gradient_source
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Define the protocol which will extract the average dielectric constant
        # from the results of a simulation.
        extract_dielectric = ExtractAverageDielectric('extract_dielectric')
        extract_dielectric.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_dielectric, options)

        # Make sure the input of the analysis protcol is properly hooked up.
        extract_dielectric.system_path = ProtocolPath(
            'system_path', protocols.assign_parameters.id)

        # Dielectric constants typically take longer to converge, so we need to
        # reflect this in the maximum number of convergence iterations.
        protocols.converge_uncertainty.max_iterations = 400

        # Set up the gradient calculations. For dielectric constants, we need to use
        # a slightly specialised reweighting protocol which we set up here.
        gradient_mbar_protocol = ReweightDielectricConstant('gradient_mbar')
        gradient_mbar_protocol.reference_dipole_moments = [
            ProtocolPath('dipole_moments', protocols.converge_uncertainty.id,
                         extract_dielectric.id)
        ]
        gradient_mbar_protocol.reference_volumes = [
            ProtocolPath('volumes', protocols.converge_uncertainty.id,
                         extract_dielectric.id)
        ]
        gradient_mbar_protocol.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')

        coordinate_source = ProtocolPath('output_coordinate_file',
                                         protocols.equilibration_simulation.id)
        trajectory_source = ProtocolPath('trajectory_file_path',
                                         protocols.converge_uncertainty.id,
                                         protocols.production_simulation.id)
        statistics_source = ProtocolPath('statistics_file_path',
                                         protocols.converge_uncertainty.id,
                                         protocols.production_simulation.id)

        gradient_group, gradient_replicator, gradient_source = \
            generate_gradient_protocol_group(gradient_mbar_protocol,
                                             [ProtocolPath('force_field_path', 'global')],
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_source,
                                             trajectory_source,
                                             statistics_source)

        # Build the workflow schema.
        schema = WorkflowSchema(property_type=DielectricConstant.__name__)
        schema.id = '{}{}'.format(DielectricConstant.__name__, 'Schema')

        schema.protocols = {
            protocols.build_coordinates.id:
            protocols.build_coordinates.schema,
            protocols.assign_parameters.id:
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.id:
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.id:
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.id:
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.id:
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.id:
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.id:
            gradient_group.schema
        }

        schema.replicators = [gradient_replicator]

        schema.outputs_to_store = {'full_system': output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        return schema
def test_nested_protocol_paths():

    value_protocol_a = DummyInputOutputProtocol('protocol_a')
    value_protocol_a.input_value = EstimatedQuantity(1 * unit.kelvin,
                                                     0.1 * unit.kelvin,
                                                     'constant')

    assert value_protocol_a.get_value(ProtocolPath(
        'input_value.value')) == value_protocol_a.input_value.value

    value_protocol_a.set_value(ProtocolPath('input_value._value'),
                               0.5 * unit.kelvin)
    assert value_protocol_a.input_value.value == 0.5 * unit.kelvin

    value_protocol_b = DummyInputOutputProtocol('protocol_b')
    value_protocol_b.input_value = EstimatedQuantity(2 * unit.kelvin,
                                                     0.05 * unit.kelvin,
                                                     'constant')

    value_protocol_c = DummyInputOutputProtocol('protocol_c')
    value_protocol_c.input_value = EstimatedQuantity(4 * unit.kelvin,
                                                     0.01 * unit.kelvin,
                                                     'constant')

    add_values_protocol = AddValues('add_values')

    add_values_protocol.values = [
        ProtocolPath('output_value', value_protocol_a.id),
        ProtocolPath('output_value', value_protocol_b.id),
        ProtocolPath('output_value', value_protocol_b.id), 5
    ]

    with pytest.raises(ValueError):
        add_values_protocol.get_value(ProtocolPath('valus[string]'))

    with pytest.raises(ValueError):
        add_values_protocol.get_value(ProtocolPath('values[string]'))

    input_values = add_values_protocol.get_value_references(
        ProtocolPath('values'))
    assert isinstance(input_values, dict) and len(input_values) == 3

    for index, value_reference in enumerate(input_values):

        input_value = add_values_protocol.get_value(value_reference)
        assert input_value.full_path == add_values_protocol.values[
            index].full_path

        add_values_protocol.set_value(value_reference, index)

    assert set(add_values_protocol.values) == {0, 1, 2, 5}

    dummy_dict_protocol = DummyInputOutputProtocol('dict_protocol')

    dummy_dict_protocol.input_value = {
        'value_a': ProtocolPath('output_value', value_protocol_a.id),
        'value_b': ProtocolPath('output_value', value_protocol_b.id),
    }

    input_values = dummy_dict_protocol.get_value_references(
        ProtocolPath('input_value'))
    assert isinstance(input_values, dict) and len(input_values) == 2

    for index, value_reference in enumerate(input_values):

        input_value = dummy_dict_protocol.get_value(value_reference)

        dummy_dict_keys = list(dummy_dict_protocol.input_value.keys())
        assert input_value.full_path == dummy_dict_protocol.input_value[
            dummy_dict_keys[index]].full_path

        dummy_dict_protocol.set_value(value_reference, index)

    add_values_protocol_2 = AddValues('add_values')

    add_values_protocol_2.values = [
        [ProtocolPath('output_value', value_protocol_a.id)],
        [
            ProtocolPath('output_value', value_protocol_b.id),
            ProtocolPath('output_value', value_protocol_b.id)
        ]
    ]

    with pytest.raises(ValueError):
        add_values_protocol_2.get_value(ProtocolPath('valus[string]'))

    with pytest.raises(ValueError):
        add_values_protocol.get_value(ProtocolPath('values[string]'))

    pass
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            'build_solvated_coordinates')
        build_full_coordinates.substance = ProtocolPath('substance', 'global')
        build_full_coordinates.max_molecules = 2000

        assign_full_parameters = forcefield.BuildSmirnoffSystem(
            f'assign_solvated_parameters')
        assign_full_parameters.force_field_path = ProtocolPath(
            'force_field_path', 'global')
        assign_full_parameters.substance = ProtocolPath('substance', 'global')
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            'coordinate_file_path', build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = simulation.RunEnergyMinimisation(
            'energy_minimisation')
        energy_minimisation.system_path = ProtocolPath(
            'system_path', assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            'coordinate_file_path', build_full_coordinates.id)

        equilibration_simulation = simulation.RunOpenMMSimulation(
            'equilibration_simulation')
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            'thermodynamic_state', 'global')
        equilibration_simulation.system_path = ProtocolPath(
            'system_path', assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            'output_coordinate_file', energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole('filter_solvent')
        filter_solvent.input_substance = ProtocolPath('substance', 'global')
        filter_solvent.component_role = Substance.ComponentRole.Solvent

        filter_solute = miscellaneous.FilterSubstanceByRole('filter_solute')
        filter_solute.input_substance = ProtocolPath('substance', 'global')
        filter_solute.component_role = Substance.ComponentRole.Solute

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            'build_vacuum_coordinates')
        build_vacuum_coordinates.substance = ProtocolPath(
            'filtered_substance', filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BuildSmirnoffSystem(
            f'assign_parameters')
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            'force_field_path', 'global')
        assign_vacuum_parameters.substance = ProtocolPath(
            'filtered_substance', filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            'coordinate_file_path', build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol('run_solvation_yank')
        run_yank.solute = ProtocolPath('filtered_substance', filter_solute.id)
        run_yank.solvent_1 = ProtocolPath('filtered_substance',
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath('thermodynamic_state',
                                                    'global')
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            'output_coordinate_file', equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath('system_path',
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            'coordinate_file_path', build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath('system_path',
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup(f'conditional_group')
        conditional_group.max_iterations = 20

        if options.convergence_mode != WorkflowOptions.ConvergenceMode.NoChecks:

            condition = groups.ConditionalGroup.Condition()
            condition.condition_type = groups.ConditionalGroup.ConditionType.LessThan
            condition.right_hand_value = ProtocolPath('target_uncertainty',
                                                      'global')
            condition.left_hand_value = ProtocolPath(
                'estimated_free_energy.uncertainty', conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue('total_iterations')
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath('current_iteration',
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath('result',
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema(property_type=SolvationFreeEnergy.__name__)
        schema.id = '{}{}'.format(SolvationFreeEnergy.__name__, 'Schema')

        schema.protocols = {
            build_full_coordinates.id: build_full_coordinates.schema,
            assign_full_parameters.id: assign_full_parameters.schema,
            energy_minimisation.id: energy_minimisation.schema,
            equilibration_simulation.id: equilibration_simulation.schema,
            filter_solvent.id: filter_solvent.schema,
            filter_solute.id: filter_solute.schema,
            build_vacuum_coordinates.id: build_vacuum_coordinates.schema,
            assign_vacuum_parameters.id: assign_vacuum_parameters.schema,
            conditional_group.id: conditional_group.schema
        }

        schema.final_value_source = ProtocolPath('estimated_free_energy',
                                                 conditional_group.id,
                                                 run_yank.id)
        return schema
Beispiel #26
0
    def get_default_reweighting_workflow_schema(options):
        """Returns the default workflow to use when estimating this property
        by reweighting existing data.

        Parameters
        ----------
        options: WorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        data_replicator_id = 'data_replicator'

        # The protocol which will be used to calculate the densities from
        # the existing data.
        density_calculation = analysis.ExtractAverageStatistic(
            f'calc_density_$({data_replicator_id})')
        density_calculation.statistics_type = ObservableType.Density

        reweight_density = reweighting.ReweightStatistics(f'reweight_density')
        reweight_density.statistics_type = ObservableType.Density

        reweighting_protocols, data_replicator = generate_base_reweighting_protocols(
            density_calculation, reweight_density, options, data_replicator_id)

        # Set up the gradient calculations
        coordinate_path = ProtocolPath(
            'output_coordinate_path',
            reweighting_protocols.concatenate_trajectories.id)
        trajectory_path = ProtocolPath(
            'output_trajectory_path',
            reweighting_protocols.concatenate_trajectories.id)

        reweight_density_template = reweighting.ReweightStatistics('')
        reweight_density_template.statistics_type = ObservableType.Density
        reweight_density_template.statistics_paths = ProtocolPath(
            'output_statistics_path',
            reweighting_protocols.decorrelate_statistics.id)

        gradient_group, gradient_replicator, gradient_source = \
            generate_gradient_protocol_group(reweight_density_template,
                                             ProtocolPath('force_field_path',
                                                          reweighting_protocols.unpack_stored_data.id),
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_path,
                                             trajectory_path,
                                             replicator_id='grad',
                                             use_subset_of_force_field=False,
                                             effective_sample_indices=ProtocolPath('effective_sample_indices',
                                                                                   reweighting_protocols.
                                                                                   mbar_protocol.id))

        schema = WorkflowSchema(property_type=Density.__name__)
        schema.id = '{}{}'.format(Density.__name__, 'Schema')

        schema.protocols = {
            protocol.id: protocol.schema
            for protocol in reweighting_protocols
        }
        schema.protocols[gradient_group.id] = gradient_group.schema

        schema.replicators = [data_replicator, gradient_replicator]

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath(
            'value', reweighting_protocols.mbar_protocol.id)

        return schema
Beispiel #27
0
    def get_default_simulation_workflow_schema(options=None):
        """Returns the default workflow to use when estimating this property
        from direct simulations.

        Parameters
        ----------
        options: PropertyWorkflowOptions
            The default options to use when setting up the estimation workflow.

        Returns
        -------
        WorkflowSchema
            The schema to follow when estimating this property.
        """

        schema = WorkflowSchema(property_type=Density.__name__)
        schema.id = '{}{}'.format(Density.__name__, 'Schema')

        # Initial coordinate and topology setup.
        build_coordinates = protocols.BuildCoordinatesPackmol('build_coordinates')

        build_coordinates.substance = ProtocolPath('substance', 'global')

        schema.protocols[build_coordinates.id] = build_coordinates.schema

        assign_topology = protocols.BuildSmirnoffSystem('build_topology')

        assign_topology.force_field_path = ProtocolPath('force_field_path', 'global')

        assign_topology.coordinate_file_path = ProtocolPath('coordinate_file_path', build_coordinates.id)
        assign_topology.substance = ProtocolPath('substance', 'global')

        schema.protocols[assign_topology.id] = assign_topology.schema

        # Equilibration
        energy_minimisation = protocols.RunEnergyMinimisation('energy_minimisation')

        energy_minimisation.input_coordinate_file = ProtocolPath('coordinate_file_path', build_coordinates.id)
        energy_minimisation.system_path = ProtocolPath('system_path', assign_topology.id)

        schema.protocols[energy_minimisation.id] = energy_minimisation.schema

        npt_equilibration = protocols.RunOpenMMSimulation('npt_equilibration')

        npt_equilibration.ensemble = Ensemble.NPT

        npt_equilibration.steps = 100000  # Debug settings.
        npt_equilibration.output_frequency = 5000  # Debug settings.

        npt_equilibration.thermodynamic_state = ProtocolPath('thermodynamic_state', 'global')

        npt_equilibration.input_coordinate_file = ProtocolPath('output_coordinate_file', energy_minimisation.id)
        npt_equilibration.system_path = ProtocolPath('system_path', assign_topology.id)

        schema.protocols[npt_equilibration.id] = npt_equilibration.schema

        # Production
        npt_production = protocols.RunOpenMMSimulation('npt_production')

        npt_production.ensemble = Ensemble.NPT

        npt_production.steps = 500000  # Debug settings.
        npt_production.output_frequency = 5000  # Debug settings.

        npt_production.thermodynamic_state = ProtocolPath('thermodynamic_state', 'global')

        npt_production.input_coordinate_file = ProtocolPath('output_coordinate_file', npt_equilibration.id)
        npt_production.system_path = ProtocolPath('system_path', assign_topology.id)

        # Analysis
        extract_density = protocols.ExtractAverageStatistic('extract_density')

        extract_density.statistics_type = ObservableType.Density
        extract_density.statistics_path = ProtocolPath('statistics_file_path', npt_production.id)

        # Set up a conditional group to ensure convergence of uncertainty
        converge_uncertainty = groups.ConditionalGroup('converge_uncertainty')
        converge_uncertainty.add_protocols(npt_production, extract_density)

        condition = groups.ConditionalGroup.Condition()

        condition.left_hand_value = ProtocolPath('value.uncertainty',
                                                 converge_uncertainty.id,
                                                 extract_density.id)

        condition.right_hand_value = ProtocolPath('target_uncertainty', 'global')

        condition.condition_type = groups.ConditionalGroup.ConditionType.LessThan

        converge_uncertainty.add_condition(condition)

        converge_uncertainty.max_iterations = 100

        schema.protocols[converge_uncertainty.id] = converge_uncertainty.schema

        # Finally, extract uncorrelated data
        extract_uncorrelated_trajectory = protocols.ExtractUncorrelatedTrajectoryData('extract_traj')

        extract_uncorrelated_trajectory.statistical_inefficiency = ProtocolPath('statistical_inefficiency',
                                                                                converge_uncertainty.id,
                                                                                extract_density.id)

        extract_uncorrelated_trajectory.equilibration_index = ProtocolPath('equilibration_index',
                                                                           converge_uncertainty.id,
                                                                           extract_density.id)

        extract_uncorrelated_trajectory.input_coordinate_file = ProtocolPath('output_coordinate_file',
                                                                             converge_uncertainty.id,
                                                                             npt_production.id)

        extract_uncorrelated_trajectory.input_trajectory_path = ProtocolPath('trajectory_file_path',
                                                                             converge_uncertainty.id,
                                                                             npt_production.id)

        schema.protocols[extract_uncorrelated_trajectory.id] = extract_uncorrelated_trajectory.schema

        extract_uncorrelated_statistics = protocols.ExtractUncorrelatedStatisticsData('extract_stats')

        extract_uncorrelated_statistics.statistical_inefficiency = ProtocolPath('statistical_inefficiency',
                                                                                converge_uncertainty.id,
                                                                                extract_density.id)

        extract_uncorrelated_statistics.equilibration_index = ProtocolPath('equilibration_index',
                                                                           converge_uncertainty.id,
                                                                           extract_density.id)

        extract_uncorrelated_statistics.input_statistics_path = ProtocolPath('statistics_file_path',
                                                                             converge_uncertainty.id,
                                                                             npt_production.id)

        schema.protocols[extract_uncorrelated_statistics.id] = extract_uncorrelated_statistics.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath('value', converge_uncertainty.id, extract_density.id)

        output_to_store = WorkflowOutputToStore()

        output_to_store.trajectory_file_path = ProtocolPath('output_trajectory_path',
                                                            extract_uncorrelated_trajectory.id)
        output_to_store.coordinate_file_path = ProtocolPath('output_coordinate_file',
                                                            converge_uncertainty.id, npt_production.id)

        output_to_store.statistics_file_path = ProtocolPath('output_statistics_path',
                                                            extract_uncorrelated_statistics.id)

        output_to_store.statistical_inefficiency = ProtocolPath('statistical_inefficiency', converge_uncertainty.id,
                                                                                            extract_density.id)

        schema.outputs_to_store = {'full_system': output_to_store}

        return schema
Beispiel #28
0
    def _get_simulation_protocols(id_suffix,
                                  gradient_replicator_id,
                                  replicator_id=None,
                                  weight_by_mole_fraction=False,
                                  component_substance_reference=None,
                                  full_substance_reference=None,
                                  options=None):
        """Returns the set of protocols which when combined in a workflow
        will yield the molar volume of a substance.

        Parameters
        ----------
        id_suffix: str
            A suffix to append to the id of each of the returned protocols.
        gradient_replicator_id: str
            The id of the replicator which will clone those protocols which will
            estimate the gradient of the molar volume with respect to a given parameter.
        replicator_id: str, optional
            The id of the replicator which will be used to clone these protocols.
            This will be appended to the id of each of the returned protocols if
            set.
        weight_by_mole_fraction: bool
            If true, an extra protocol will be added to weight the calculated
            molar volume by the mole fraction of the component.
        component_substance_reference: ProtocolPath or PlaceholderInput, optional
            An optional protocol path (or replicator reference) to the component substance
            whose enthalpy is being estimated.
        full_substance_reference: ProtocolPath or PlaceholderInput, optional
            An optional protocol path (or replicator reference) to the full substance
            whose enthalpy of mixing is being estimated. This cannot be `None` if
            `weight_by_mole_fraction` is `True`.
        options: WorkflowOptions
            The options to use when setting up the workflows.

        Returns
        -------
        BaseSimulationProtocols
            The protocols used to estimate the molar volume of a substance.
        DivideValue
            The protocol used to calculate the number of molar molecules in
            the system.
        ProtocolPath
            A reference to the estimated molar volume.
        WorkflowSimulationDataToStore
            An object which describes the default data from a simulation to store,
            such as the uncorrelated statistics and configurations.
        ProtocolGroup
            The group of protocols which will calculate the gradient of the reduced potential
            with respect to a given property.
        ProtocolReplicator
            The protocol which will replicate the gradient group for every gradient to
            estimate.
        ProtocolPath
            A reference to the value of the gradient.
        """

        if replicator_id is not None:
            id_suffix = f'{id_suffix}_$({replicator_id})'

        if component_substance_reference is None:
            component_substance_reference = ProtocolPath('substance', 'global')

        if weight_by_mole_fraction is True and full_substance_reference is None:

            raise ValueError(
                'The full substance reference must be set when weighting by'
                'the mole fraction')

        # Define the protocol which will extract the average molar volume from
        # the results of a simulation.
        extract_volume = analysis.ExtractAverageStatistic(
            f'extract_volume{id_suffix}')
        extract_volume.statistics_type = ObservableType.Volume

        # Define the protocols which will run the simulation itself.
        simulation_protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_volume, options, id_suffix)

        # Divide the volume by the number of molecules in the system
        number_of_molecules = ProtocolPath(
            'output_number_of_molecules',
            simulation_protocols.build_coordinates.id)
        built_substance = ProtocolPath(
            'output_substance', simulation_protocols.build_coordinates.id)

        number_of_molar_molecules = miscellaneous.DivideValue(
            f'number_of_molar_molecules{id_suffix}')
        number_of_molar_molecules.value = number_of_molecules
        number_of_molar_molecules.divisor = (
            1.0 * unit.avogadro_number).to('mole**-1')

        extract_volume.divisor = ProtocolPath('result',
                                              number_of_molar_molecules.id)

        # Use the correct substance.
        simulation_protocols.build_coordinates.substance = component_substance_reference
        simulation_protocols.assign_parameters.substance = built_substance
        output_to_store.substance = built_substance

        conditional_group = simulation_protocols.converge_uncertainty

        if weight_by_mole_fraction:
            # The component workflows need an extra step to multiply their molar volumes by their
            # relative mole fraction.
            weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
                f'weight_by_mole_fraction{id_suffix}')
            weight_by_mole_fraction.value = ProtocolPath(
                'value', extract_volume.id)
            weight_by_mole_fraction.full_substance = full_substance_reference
            weight_by_mole_fraction.component = component_substance_reference

            conditional_group.add_protocols(weight_by_mole_fraction)

            value_source = ProtocolPath('weighted_value', conditional_group.id,
                                        weight_by_mole_fraction.id)

        if options.convergence_mode != WorkflowOptions.ConvergenceMode.NoChecks:

            # Make sure the convergence criteria is set to use the per component
            # uncertainty target.
            conditional_group.conditions[0].right_hand_value = ProtocolPath(
                'per_component_uncertainty', 'global')

            if weight_by_mole_fraction:
                # Make sure the weighted uncertainty is being used in the conditional comparison.
                conditional_group.conditions[0].left_hand_value = ProtocolPath(
                    'weighted_value.uncertainty', conditional_group.id,
                    weight_by_mole_fraction.id)

        # Set up the gradient calculations
        reweight_molar_volume_template = reweighting.ReweightStatistics('')
        reweight_molar_volume_template.statistics_type = ObservableType.Volume
        reweight_molar_volume_template.statistics_paths = [
            ProtocolPath('statistics_file_path', conditional_group.id,
                         simulation_protocols.production_simulation.id)
        ]

        coordinate_source = ProtocolPath(
            'output_coordinate_file',
            simulation_protocols.equilibration_simulation.id)
        trajectory_source = ProtocolPath(
            'trajectory_file_path',
            simulation_protocols.converge_uncertainty.id,
            simulation_protocols.production_simulation.id)
        statistics_source = ProtocolPath(
            'statistics_file_path',
            simulation_protocols.converge_uncertainty.id,
            simulation_protocols.production_simulation.id)

        gradient_group, gradient_replicator, gradient_source = \
            generate_gradient_protocol_group(reweight_molar_volume_template,
                                             [ProtocolPath('force_field_path', 'global')],
                                             ProtocolPath('force_field_path', 'global'),
                                             coordinate_source,
                                             trajectory_source,
                                             statistics_source,
                                             replicator_id=gradient_replicator_id,
                                             substance_source=built_substance,
                                             id_suffix=id_suffix)

        # Remove the group id from the path.
        gradient_source.pop_next_in_path()

        if weight_by_mole_fraction:
            # The component workflows need an extra step to multiply their gradients by their
            # relative mole fraction.
            weight_gradient = miscellaneous.WeightByMoleFraction(
                f'weight_gradient_by_mole_fraction{id_suffix}')
            weight_gradient.value = gradient_source
            weight_gradient.full_substance = full_substance_reference
            weight_gradient.component = component_substance_reference

            gradient_group.add_protocols(weight_gradient)
            gradient_source = ProtocolPath('weighted_value',
                                           weight_gradient.id)

        scale_gradient = miscellaneous.DivideValue(
            f'scale_gradient{id_suffix}')
        scale_gradient.value = gradient_source
        scale_gradient.divisor = ProtocolPath('result',
                                              number_of_molar_molecules.id)

        gradient_group.add_protocols(scale_gradient)
        gradient_source = ProtocolPath('result', gradient_group.id,
                                       scale_gradient.id)

        return (simulation_protocols, number_of_molar_molecules, value_source,
                output_to_store, gradient_group, gradient_replicator,
                gradient_source)