Ejemplo n.º 1
0
    def retrieve_simulation_data(self, substance, include_pure_data=True):

        substance_ids = [substance.identifier]

        if isinstance(substance, Mixture) and include_pure_data is True:

            for component in substance.components:

                component_mixture = Mixture()
                component_mixture.add_component(component.smiles, 1.0, False)

                if component_mixture.identifier not in substance_ids:
                    substance_ids.append(component_mixture.identifier)

        return_paths = {}

        for substance_id in substance_ids:

            if substance_id not in self._simulation_data_by_substance:
                continue

            return_paths[substance_id] = []

            for simulation_data_key in self._simulation_data_by_substance[
                    substance_id]:

                stored_object = self.retrieve_object(simulation_data_key)
                return_paths[substance_id].append(
                    path.join(self._root_directory,
                              f'{stored_object.unique_id}_data'))

        return return_paths
Ejemplo n.º 2
0
def test_local_simulation_storage():
    """A simple test to that force fields can be stored and
    retrieved using the local storage backend."""

    substance = Mixture()
    substance.add_component('C', 1.0, False)

    dummy_simulation_data = StoredSimulationData()

    dummy_simulation_data.thermodynamic_state = ThermodynamicState(
        298.0 * unit.kelvin, 1.0 * unit.atmosphere)

    dummy_simulation_data.statistical_inefficiency = 1.0
    dummy_simulation_data.force_field_id = 'tmp_ff_id'

    dummy_simulation_data.substance = substance

    temporary_data_directory = 'temp_data'
    temporary_backend_directory = 'storage_dir'

    if path.isdir(temporary_data_directory):
        rmtree(temporary_data_directory)

    if path.isdir(temporary_backend_directory):
        rmtree(temporary_backend_directory)

    makedirs(temporary_data_directory)
    makedirs(temporary_backend_directory)

    with open(path.join(temporary_data_directory, 'data.json'), 'w') as file:
        json.dump(dummy_simulation_data, file, cls=TypedJSONEncoder)

    local_storage = LocalFileStorage(temporary_backend_directory)
    dummy_simulation_data.unique_id = local_storage.store_simulation_data(
        substance.identifier, temporary_data_directory)

    retrieved_data_directories = local_storage.retrieve_simulation_data(
        substance)
    assert len(retrieved_data_directories) == 1

    retrieved_data_directory = retrieved_data_directories[
        substance.identifier][0]

    with open(path.join(retrieved_data_directory, 'data.json'), 'r') as file:
        retrieved_data = json.load(file, cls=TypedJSONDecoder)

    assert dummy_simulation_data.thermodynamic_state == retrieved_data.thermodynamic_state
    assert dummy_simulation_data.statistical_inefficiency == retrieved_data.statistical_inefficiency
    assert dummy_simulation_data.force_field_id == retrieved_data.force_field_id
    assert dummy_simulation_data.substance == retrieved_data.substance

    local_storage_new = LocalFileStorage(temporary_backend_directory)
    assert local_storage_new.has_object(dummy_simulation_data.unique_id)

    if path.isdir(temporary_data_directory):
        rmtree(temporary_data_directory)

    if path.isdir(temporary_backend_directory):
        rmtree(temporary_backend_directory)
Ejemplo n.º 3
0
def test_density_dielectric_merging():

    substance = Mixture()
    substance.add_component('C', 1.0)

    density = Density(thermodynamic_state=ThermodynamicState(
        temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere),
                      phase=PropertyPhase.Liquid,
                      substance=substance,
                      value=10 * unit.gram / unit.mole,
                      uncertainty=1 * unit.gram / unit.mole)

    dielectric = DielectricConstant(thermodynamic_state=ThermodynamicState(
        temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere),
                                    phase=PropertyPhase.Liquid,
                                    substance=substance,
                                    value=10 * unit.gram / unit.mole,
                                    uncertainty=1 * unit.gram / unit.mole)

    density_schema = density.get_default_workflow_schema(
        'SimulationLayer', PropertyWorkflowOptions())
    dielectric_schema = dielectric.get_default_workflow_schema(
        'SimulationLayer', PropertyWorkflowOptions())

    density_metadata = Workflow.generate_default_metadata(
        density, get_data_filename('forcefield/smirnoff99Frosst.offxml'),
        PropertyEstimatorOptions())

    dielectric_metadata = Workflow.generate_default_metadata(
        density, get_data_filename('forcefield/smirnoff99Frosst.offxml'),
        PropertyEstimatorOptions())

    density_workflow = Workflow(density, density_metadata)
    density_workflow.schema = density_schema

    dielectric_workflow = Workflow(dielectric, dielectric_metadata)
    dielectric_workflow.schema = dielectric_schema

    workflow_graph = WorkflowGraph('')

    workflow_graph.add_workflow(density_workflow)
    workflow_graph.add_workflow(dielectric_workflow)

    merge_order_a = graph.topological_sort(density_workflow.dependants_graph)
    merge_order_b = graph.topological_sort(
        dielectric_workflow.dependants_graph)

    for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b):

        if protocol_id_A.find('extract_traj') < 0 and protocol_id_A.find(
                'extract_stats') < 0:

            assert density_workflow.protocols[protocol_id_A].schema.json() == \
                   dielectric_workflow.protocols[protocol_id_B].schema.json()

        else:

            assert density_workflow.protocols[protocol_id_A].schema.json() != \
                   dielectric_workflow.protocols[protocol_id_B].schema.json()
Ejemplo n.º 4
0
def test_cloned_schema_merging_simulation(registered_property_name,
                                          available_layer):
    """Tests that two, the exact the same, calculations get merged into one
    by the `WorkflowGraph`."""

    registered_property = registered_properties[registered_property_name]

    substance = Mixture()
    substance.add_component('C', 1.0)

    dummy_property = create_dummy_property(registered_property)

    workflow_schema = dummy_property.get_default_workflow_schema(
        available_layer, PropertyWorkflowOptions())

    if workflow_schema is None:
        return

    global_metadata = create_dummy_metadata(dummy_property, available_layer)

    workflow_a = Workflow(dummy_property, global_metadata)
    workflow_a.schema = workflow_schema

    workflow_b = Workflow(dummy_property, global_metadata)
    workflow_b.schema = workflow_schema

    workflow_graph = WorkflowGraph()

    workflow_graph.add_workflow(workflow_a)
    workflow_graph.add_workflow(workflow_b)

    ordered_dict_a = OrderedDict(sorted(workflow_a.dependants_graph.items()))
    ordered_dict_b = OrderedDict(sorted(workflow_b.dependants_graph.items()))

    merge_order_a = graph.topological_sort(ordered_dict_a)
    merge_order_b = graph.topological_sort(ordered_dict_b)

    assert len(workflow_graph._protocols_by_id) == len(workflow_a.protocols)

    for protocol_id in workflow_a.protocols:
        assert protocol_id in workflow_graph._protocols_by_id

    for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b):

        assert protocol_id_A == protocol_id_B

        assert workflow_a.protocols[protocol_id_A].schema.json() == \
               workflow_b.protocols[protocol_id_B].schema.json()
Ejemplo n.º 5
0
def build_solvated_system():
    """An example of how to build a solvated system using the built in
    utilities and protocol classes.
    """

    # Define the system that you wish to create coordinates for.
    mixed_system = Mixture()

    # Here we simply define a 1:1 mix of water and octanol.
    mixed_system.add_component(smiles='O', mole_fraction=0.5)
    mixed_system.add_component(smiles='CCCCCCCCO', mole_fraction=0.5)

    # Add any 'impurities' such as single solute molecules.
    # In this case we add a molecule of paracetamol.
    mixed_system.add_component(smiles='CC(=O)NC1=CC=C(C=C1)O', mole_fraction=0.0, impurity=True)

    # Create an object which under the hood calls the packmol utility
    # in a friendlier way:
    print('Building the coordinates (this may take a while...)')

    build_coordinates = BuildCoordinatesPackmol('')

    # Set the maximum number of molecules in the system.
    build_coordinates.max_molecules = 1500
    # and the target density (the default 1.0 g/ml is normally fine)
    build_coordinates.mass_density = 1.0 * unit.grams / unit.milliliters
    # and finally the system which coordinates should be generated for.
    build_coordinates.substance = mixed_system

    # Build the coordinates, creating a file called output.pdb
    build_coordinates.execute('', None)

    # Assign some smirnoff force field parameters to the
    # coordinates
    print('Assigning some parameters.')
    assign_force_field_parameters = BuildSmirnoffSystem('')

    assign_force_field_parameters.force_field_path = get_data_filename('forcefield/smirnoff99Frosst.offxml')
    assign_force_field_parameters.coordinate_file_path = 'output.pdb'
    assign_force_field_parameters.substance = mixed_system

    assign_force_field_parameters.execute('', None)

    # Do a simple energy minimisation
    print('Performing energy minimisation.')
    energy_minimisation = RunEnergyMinimisation('')

    energy_minimisation.input_coordinate_file = 'output.pdb'
    energy_minimisation.system_path = assign_force_field_parameters.system_path

    energy_minimisation.execute('', ComputeResources())
Ejemplo n.º 6
0
def create_dummy_property(property_class):

    substance = Mixture()
    substance.add_component('C', 0.5)
    substance.add_component('CO', 0.5)

    dummy_property = property_class(thermodynamic_state=ThermodynamicState(temperature=298 * unit.kelvin,
                                                                           pressure=1 * unit.atmosphere),
                                    phase=PropertyPhase.Liquid,
                                    substance=substance,
                                    value=10 * unit.gram,
                                    uncertainty=1 * unit.gram)
    
    dummy_property.source = CalculationSource(fidelity='dummy', provenance={})

    return dummy_property
Ejemplo n.º 7
0
def test_base_simulation_protocols():
    """Tests that the commonly chain build coordinates, assigned topology,
    energy minimise and perform simulation are able to work together without
    raising an exception."""

    mixed_system = Mixture()
    mixed_system.add_component(smiles='O', mole_fraction=1.0)

    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as temporary_directory:

        build_coordinates = BuildCoordinatesPackmol('')

        # Set the maximum number of molecules in the system.
        build_coordinates.max_molecules = 10
        # and the target density (the default 1.0 g/ml is normally fine)
        build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters
        # and finally the system which coordinates should be generated for.
        build_coordinates.substance = mixed_system

        # Build the coordinates, creating a file called output.pdb
        result = build_coordinates.execute(temporary_directory, None)
        assert not isinstance(result, PropertyEstimatorException)

        # Assign some smirnoff force field parameters to the
        # coordinates
        print('Assigning some parameters.')
        assign_force_field_parameters = BuildSmirnoffSystem('')

        assign_force_field_parameters.force_field_path = get_data_filename(
            'forcefield/smirnoff99Frosst.offxml')
        assign_force_field_parameters.coordinate_file_path = path.join(
            temporary_directory, 'output.pdb')
        assign_force_field_parameters.substance = mixed_system

        result = assign_force_field_parameters.execute(temporary_directory,
                                                       None)
        assert not isinstance(result, PropertyEstimatorException)

        # Do a simple energy minimisation
        print('Performing energy minimisation.')
        energy_minimisation = RunEnergyMinimisation('')

        energy_minimisation.input_coordinate_file = path.join(
            temporary_directory, 'output.pdb')
        energy_minimisation.system_path = assign_force_field_parameters.system_path

        result = energy_minimisation.execute(temporary_directory,
                                             ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)

        npt_equilibration = RunOpenMMSimulation('npt_equilibration')

        npt_equilibration.ensemble = Ensemble.NPT

        npt_equilibration.steps = 20  # Debug settings.
        npt_equilibration.output_frequency = 2  # Debug settings.

        npt_equilibration.thermodynamic_state = thermodynamic_state

        npt_equilibration.input_coordinate_file = path.join(
            temporary_directory, 'minimised.pdb')
        npt_equilibration.system_path = assign_force_field_parameters.system_path

        result = npt_equilibration.execute(temporary_directory,
                                           ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)

        extract_density = ExtractAverageStatistic('extract_density')

        extract_density.statistics_type = ObservableType.Density
        extract_density.statistics_path = path.join(temporary_directory,
                                                    'statistics.csv')

        result = extract_density.execute(temporary_directory,
                                         ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)

        extract_dielectric = ExtractAverageDielectric('extract_dielectric')

        extract_dielectric.thermodynamic_state = thermodynamic_state

        extract_dielectric.input_coordinate_file = path.join(
            temporary_directory, 'input.pdb')
        extract_dielectric.trajectory_path = path.join(temporary_directory,
                                                       'trajectory.dcd')
        extract_dielectric.system_path = assign_force_field_parameters.system_path

        result = extract_dielectric.execute(temporary_directory,
                                            ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)

        extract_uncorrelated_trajectory = ExtractUncorrelatedTrajectoryData(
            'extract_traj')

        extract_uncorrelated_trajectory.statistical_inefficiency = extract_density.statistical_inefficiency
        extract_uncorrelated_trajectory.equilibration_index = extract_density.equilibration_index
        extract_uncorrelated_trajectory.input_coordinate_file = path.join(
            temporary_directory, 'input.pdb')
        extract_uncorrelated_trajectory.input_trajectory_path = path.join(
            temporary_directory, 'trajectory.dcd')

        result = extract_uncorrelated_trajectory.execute(
            temporary_directory, ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)

        extract_uncorrelated_statistics = ExtractUncorrelatedStatisticsData(
            'extract_stats')

        extract_uncorrelated_statistics.statistical_inefficiency = extract_density.statistical_inefficiency
        extract_uncorrelated_statistics.equilibration_index = extract_density.equilibration_index
        extract_uncorrelated_statistics.input_statistics_path = path.join(
            temporary_directory, 'statistics.csv')

        result = extract_uncorrelated_statistics.execute(
            temporary_directory, ComputeResources())
        assert not isinstance(result, PropertyEstimatorException)
Ejemplo n.º 8
0
    def _build_workflow_graph(working_directory, properties,
                              target_force_field_path, stored_data_paths,
                              options):
        """Construct a workflow graph, containing all of the workflows which should
        be followed to estimate a set of properties by reweighting.

        Parameters
        ----------
        working_directory: str
            The local directory in which to store all local,
            temporary calculation data from this graph.
        properties : list of PhysicalProperty
            The properties to attempt to compute.
        target_force_field_path : str
            The path to the target force field parameters to use in the workflow.
        stored_data_paths: dict of str and tuple(str, str)
            A dictionary partitioned by substance identifiers, whose values
            are a tuple of a path to a stored simulation data object, and
            its corresponding force field path.
        options: PropertyEstimatorOptions
            The options to run the workflows with.
        """
        workflow_graph = WorkflowGraph(working_directory)

        for property_to_calculate in properties:

            if (not isinstance(property_to_calculate, IReweightable) or
                    not isinstance(property_to_calculate, IWorkflowProperty)):
                # Only properties which implement the IReweightable and
                # IWorkflowProperty interfaces can be reweighted
                continue

            property_type = type(property_to_calculate).__name__

            if property_type not in options.workflow_schemas:

                logging.warning('The reweighting layer does not support {} '
                                'workflows.'.format(property_type))

                continue

            if ReweightingLayer.__name__ not in options.workflow_schemas[
                    property_type]:
                continue

            schema = options.workflow_schemas[property_type][
                ReweightingLayer.__name__]

            global_metadata = Workflow.generate_default_metadata(
                property_to_calculate, target_force_field_path, options)

            if property_to_calculate.substance.identifier not in stored_data_paths:
                continue

            global_metadata['full_system_data'] = stored_data_paths[
                property_to_calculate.substance.identifier]
            global_metadata['component_data'] = []

            if property_to_calculate.multi_component_property:

                has_data_for_property = True

                for component in property_to_calculate.substance.components:

                    temporary_component = Mixture.MixtureComponent(
                        component.smiles, mole_fraction=1.0, impurity=False)

                    if temporary_component.identifier not in stored_data_paths:

                        has_data_for_property = False
                        break

                    global_metadata['component_data'].append(
                        stored_data_paths[temporary_component.identifier])

                if not has_data_for_property:
                    continue

            workflow = Workflow(property_to_calculate, global_metadata)
            workflow.schema = schema

            from propertyestimator.properties import CalculationSource
            workflow.physical_property.source = CalculationSource(
                fidelity=ReweightingLayer.__name__, provenance={})

            workflow_graph.add_workflow(workflow)

        return workflow_graph