def test_build_docked_coordinates_protocol(): """Tests docking a methanol molecule into alpha-Cyclodextrin.""" if not has_openeye(): pytest.skip("The `BuildDockedCoordinates` protocol requires OpenEye.") ligand_substance = Substance() ligand_substance.add_component( Component("CO", role=Component.Role.Ligand), ExactAmount(1), ) # TODO: This test could likely be made substantially faster # by storing the binary prepared receptor. Would this # be in breach of any oe license terms? with tempfile.TemporaryDirectory() as temporary_directory: build_docked_coordinates = BuildDockedCoordinates("build_methanol") build_docked_coordinates.ligand_substance = ligand_substance build_docked_coordinates.number_of_ligand_conformers = 5 build_docked_coordinates.receptor_coordinate_file = get_data_filename( "test/molecules/acd.mol2") build_docked_coordinates.execute(temporary_directory, ComputeResources()) docked_pdb = PDBFile( build_docked_coordinates.docked_complex_coordinate_path) assert docked_pdb.topology.getNumResidues() == 2
def test_validate_data_set(): valid_property = Density( ThermodynamicState(298 * unit.kelvin, 1 * unit.atmosphere), PropertyPhase.Liquid, Substance.from_components("O"), 0.0 * unit.gram / unit.milliliter, 0.0 * unit.gram / unit.milliliter, ) data_set = PhysicalPropertyDataSet() data_set.add_properties(valid_property) data_set.validate() invalid_property = Density( ThermodynamicState(-1 * unit.kelvin, 1 * unit.atmosphere), PropertyPhase.Liquid, Substance.from_components("O"), 0.0 * unit.gram / unit.milliliter, 0.0 * unit.gram / unit.milliliter, ) with pytest.raises(AssertionError): data_set.add_properties(invalid_property) data_set.add_properties(invalid_property, validate=False) with pytest.raises(AssertionError): data_set.validate()
def create_dummy_substance(number_of_components, elements=None): """Creates a substance with a given number of components, each containing the specified elements. Parameters ---------- number_of_components : int The number of components to add to the substance. elements : list of str The elements that each component should containt. Returns ------- Substance The created substance. """ if elements is None: elements = ["C"] substance = Substance() mole_fraction = 1.0 / number_of_components for index in range(number_of_components): smiles_pattern = "".join(elements * (index + 1)) substance.add_component(Component(smiles_pattern), MoleFraction(mole_fraction)) return substance
def test_solvate_existing_structure_protocol(): """Tests solvating a single methanol molecule in water.""" import mdtraj methanol_component = Component("CO") methanol_substance = Substance() methanol_substance.add_component(methanol_component, ExactAmount(1)) water_substance = Substance() water_substance.add_component(Component("O"), MoleFraction(1.0)) with tempfile.TemporaryDirectory() as temporary_directory: build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol") build_methanol_coordinates.max_molecules = 1 build_methanol_coordinates.substance = methanol_substance build_methanol_coordinates.execute(temporary_directory, ComputeResources()) methanol_residue_name = build_methanol_coordinates.assigned_residue_names[ methanol_component.identifier] solvate_coordinates = SolvateExistingStructure("solvate_methanol") solvate_coordinates.max_molecules = 9 solvate_coordinates.substance = water_substance solvate_coordinates.solute_coordinate_file = ( build_methanol_coordinates.coordinate_file_path) solvate_coordinates.execute(temporary_directory, ComputeResources()) solvated_system = mdtraj.load_pdb( solvate_coordinates.coordinate_file_path) assert solvated_system.n_residues == 10 assert solvated_system.top.residue(0).name == methanol_residue_name
def test_gradient_reduced_potentials(use_subset): substance = Substance.from_components("O") thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) reduced_potentials = OpenMMGradientPotentials(f"reduced_potentials") reduced_potentials.substance = substance reduced_potentials.thermodynamic_state = thermodynamic_state reduced_potentials.statistics_path = get_data_filename( "test/statistics/stats_pandas.csv") reduced_potentials.force_field_path = force_field_path reduced_potentials.trajectory_file_path = get_data_filename( "test/trajectories/water.dcd") reduced_potentials.coordinate_file_path = get_data_filename( "test/trajectories/water.pdb") reduced_potentials.use_subset_of_force_field = use_subset reduced_potentials.enable_pbc = True reduced_potentials.parameter_key = ParameterGradientKey( "vdW", "[#1]-[#8X2H2+0:1]-[#1]", "epsilon") reduced_potentials.execute(directory, ComputeResources()) assert path.isfile(reduced_potentials.forward_potentials_path) assert path.isfile(reduced_potentials.reverse_potentials_path)
def test_simulation_data_storage(): substance = Substance.from_components("C") with tempfile.TemporaryDirectory() as base_directory: data_directory = os.path.join(base_directory, "data_directory") data_object = create_dummy_simulation_data(data_directory, substance) backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) storage_key = storage.store_object(data_object, data_directory) # Regenerate the data directory. os.makedirs(data_directory, exist_ok=True) assert storage.has_object(data_object) assert storage_key == storage.store_object(data_object, data_directory) retrieved_object, retrieved_directory = storage.retrieve_object( storage_key, StoredSimulationData) assert backend_directory in retrieved_directory assert data_object.json() == retrieved_object.json()
def test_weight_by_mole_fraction_protocol(component_smiles, value): full_substance = Substance.from_components("C", "CC", "CCC") component = Substance.from_components(component_smiles) mole_fraction = next( iter(full_substance.get_amounts(component.components[0].identifier)) ).value with tempfile.TemporaryDirectory() as temporary_directory: weight_protocol = WeightByMoleFraction("weight") weight_protocol.value = value weight_protocol.full_substance = full_substance weight_protocol.component = component weight_protocol.execute(temporary_directory, ComputeResources()) assert weight_protocol.weighted_value == value * mole_fraction
def test_add_mole_fractions(): substance = Substance() substance.add_component(Component("C"), MoleFraction(0.5)) substance.add_component(Component("C"), MoleFraction(0.5)) assert substance.number_of_components == 1 amounts = substance.get_amounts(substance.components[0]) assert len(amounts) == 1 amount = next(iter(amounts)) assert isinstance(amount, MoleFraction) assert np.isclose(amount.value, 1.0)
def test_same_component_batching(): thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin, pressure=1.0 * unit.atmosphere) data_set = PhysicalPropertyDataSet() data_set.add_properties( Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilojoule / unit.mole, ), Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilojoule / unit.mole, ), ) options = RequestOptions() submission = EvaluatorClient._Submission() submission.dataset = data_set submission.options = options with DaskLocalCluster() as calculation_backend: server = EvaluatorServer(calculation_backend) batches = server._batch_by_same_component(submission, "") assert len(batches) == 2 assert len(batches[0].queued_properties) == 2 assert len(batches[1].queued_properties) == 2
def _execute(self, directory, available_resources): filtered_components = [] total_mole_fraction = 0.0 for component in self.input_substance.components: if component.role != self.component_role: continue filtered_components.append(component) amounts = self.input_substance.get_amounts(component) for amount in amounts: if not isinstance(amount, MoleFraction): continue total_mole_fraction += amount.value if self.expected_components != UNDEFINED and self.expected_components != len( filtered_components): raise ValueError( f"The filtered substance does not contain the expected number of " f"components ({self.expected_components}) - {filtered_components}", ) inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction) self.filtered_substance = Substance() for component in filtered_components: amounts = self.input_substance.get_amounts(component) for amount in amounts: if isinstance(amount, MoleFraction): amount = MoleFraction(amount.value * inverse_mole_fraction) self.filtered_substance.add_component(component, amount)
def create_substance(): test_substance = Substance() test_substance.add_component( Component("C", role=Component.Role.Solute), ExactAmount(1), ) test_substance.add_component( Component("CC", role=Component.Role.Ligand), ExactAmount(1), ) test_substance.add_component( Component("CCC", role=Component.Role.Receptor), ExactAmount(1), ) test_substance.add_component( Component("O", role=Component.Role.Solvent), MoleFraction(1.0), ) return test_substance
def test_simulation_data_query(): substance_a = Substance.from_components("C") substance_b = Substance.from_components("CO") substance_full = Substance.from_components("C", "CO") substances = [substance_a, substance_b, substance_full] with tempfile.TemporaryDirectory() as base_directory: backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) for substance in substances: data_directory = os.path.join(base_directory, f"{substance.identifier}") data_object = create_dummy_simulation_data(data_directory, substance) storage.store_object(data_object, data_directory) for substance in substances: substance_query = SimulationDataQuery() substance_query.substance = substance results = storage.query(substance_query) assert results is not None and len(results) == 1 assert len(next(iter(results.values()))[0]) == 3 component_query = SimulationDataQuery() component_query.substance = substance_full component_query.substance_query = SubstanceQuery() component_query.substance_query.components_only = True results = storage.query(component_query) assert results is not None and len(results) == 2
def test_calculate_reduced_potential_openmm(): substance = Substance.from_components("O") thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 10 build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildSmirnoffSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) reduced_potentials = OpenMMReducedPotentials(f"reduced_potentials") reduced_potentials.substance = substance reduced_potentials.thermodynamic_state = thermodynamic_state reduced_potentials.reference_force_field_paths = [force_field_path] reduced_potentials.system_path = assign_parameters.system_path reduced_potentials.trajectory_file_path = get_data_filename( "test/trajectories/water.dcd") reduced_potentials.coordinate_file_path = get_data_filename( "test/trajectories/water.pdb") reduced_potentials.kinetic_energies_path = get_data_filename( "test/statistics/stats_pandas.csv") reduced_potentials.high_precision = False reduced_potentials.execute(directory, ComputeResources()) assert path.isfile(reduced_potentials.statistics_file_path) final_array = StatisticsArray.from_pandas_csv( reduced_potentials.statistics_file_path) assert ObservableType.ReducedPotential in final_array
def test_build_tleap_system(): with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(TLeapForceFieldSource().json()) substance = Substance.from_components("C", "O", "C(=O)N") build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 9 build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildTLeapSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) assert path.isfile(assign_parameters.system_path)
def _setup_dummy_system(directory): force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) substance = Substance.from_components("C") build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 1 build_coordinates.mass_density = 0.001 * unit.grams / unit.milliliters build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildSmirnoffSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) return build_coordinates.coordinate_file_path, assign_parameters.system_path
def test_filter_by_smiles(): """A test to ensure that data sets may be filtered by which smiles their measured properties contain.""" methanol_substance = Substance() methanol_substance.add_component(Component("CO"), MoleFraction(1.0)) ethanol_substance = Substance() ethanol_substance.add_component(Component("CCO"), MoleFraction(1.0)) property_a = create_dummy_property(Density) property_a.substance = methanol_substance property_b = create_dummy_property(Density) property_b.substance = ethanol_substance data_set = PhysicalPropertyDataSet() data_set.add_properties(property_a, property_b) data_set.filter_by_smiles("CO") assert len(data_set) == 1 assert methanol_substance in data_set.substances assert ethanol_substance not in data_set.substances
class FilterSubstanceByRole(Protocol): """A protocol which takes a substance as input, and returns a substance which only contains components whose role match a given criteria. """ input_substance = InputAttribute( docstring="The substance to filter.", type_hint=Substance, default_value=UNDEFINED, ) component_role = InputAttribute( docstring="The role to filter substance components against.", type_hint=Component.Role, default_value=UNDEFINED, ) expected_components = InputAttribute( docstring="The number of components expected to remain after filtering. " "An exception is raised if this number is not matched.", type_hint=int, default_value=UNDEFINED, optional=True, ) filtered_substance = OutputAttribute(docstring="The filtered substance.", type_hint=Substance) def _execute(self, directory, available_resources): filtered_components = [] total_mole_fraction = 0.0 for component in self.input_substance.components: if component.role != self.component_role: continue filtered_components.append(component) amounts = self.input_substance.get_amounts(component) for amount in amounts: if not isinstance(amount, MoleFraction): continue total_mole_fraction += amount.value if self.expected_components != UNDEFINED and self.expected_components != len( filtered_components): raise ValueError( f"The filtered substance does not contain the expected number of " f"components ({self.expected_components}) - {filtered_components}", ) inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction) self.filtered_substance = Substance() for component in filtered_components: amounts = self.input_substance.get_amounts(component) for amount in amounts: if isinstance(amount, MoleFraction): amount = MoleFraction(amount.value * inverse_mole_fraction) self.filtered_substance.add_component(component, amount)
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=2000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Setup the fully solvated systems. build_full_coordinates = coordinates.BuildCoordinatesPackmol( "build_solvated_coordinates") build_full_coordinates.substance = ProtocolPath("substance", "global") build_full_coordinates.max_molecules = n_molecules assign_full_parameters = forcefield.BaseBuildSystem( f"assign_solvated_parameters") assign_full_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_full_parameters.substance = ProtocolPath("substance", "global") assign_full_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) # Perform a quick minimisation of the full system to give # YANK a better starting point for its minimisation. energy_minimisation = openmm.OpenMMEnergyMinimisation( "energy_minimisation") energy_minimisation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) equilibration_simulation = openmm.OpenMMSimulation( "equilibration_simulation") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 10000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) # Create a substance which only contains the solute (e.g. for the # vacuum phase simulations). filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_role = Component.Role.Solvent filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute") filter_solute.input_substance = ProtocolPath("substance", "global") filter_solute.component_role = Component.Role.Solute # Setup the solute in vacuum system. build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol( "build_vacuum_coordinates") build_vacuum_coordinates.substance = ProtocolPath( "filtered_substance", filter_solute.id) build_vacuum_coordinates.max_molecules = 1 assign_vacuum_parameters = forcefield.BaseBuildSystem( f"assign_parameters") assign_vacuum_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_vacuum_parameters.substance = ProtocolPath( "filtered_substance", filter_solute.id) assign_vacuum_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) # Set up the protocol to run yank. run_yank = yank.SolvationYankProtocol("run_solvation_yank") run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id) run_yank.solvent_1 = ProtocolPath("filtered_substance", filter_solvent.id) run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") run_yank.steps_per_iteration = 500 run_yank.checkpoint_interval = 50 run_yank.solvent_1_coordinates = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) run_yank.solvent_1_system = ProtocolPath("system_path", assign_full_parameters.id) run_yank.solvent_2_coordinates = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) run_yank.solvent_2_system = ProtocolPath("system_path", assign_vacuum_parameters.id) # Set up the group which will run yank until the free energy has been determined to within # a given uncertainty conditional_group = groups.ConditionalGroup(f"conditional_group") conditional_group.max_iterations = 20 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.left_hand_value = ProtocolPath( "estimated_free_energy.error", conditional_group.id, run_yank.id) conditional_group.add_condition(condition) # Define the total number of iterations that yank should run for. total_iterations = miscellaneous.MultiplyValue("total_iterations") total_iterations.value = 2000 total_iterations.multiplier = ProtocolPath("current_iteration", conditional_group.id) # Make sure the simulations gets extended after each iteration. run_yank.number_of_iterations = ProtocolPath("result", total_iterations.id) conditional_group.add_protocols(total_iterations, run_yank) # Define the full workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ build_full_coordinates.schema, assign_full_parameters.schema, energy_minimisation.schema, equilibration_simulation.schema, filter_solvent.schema, filter_solute.schema, build_vacuum_coordinates.schema, assign_vacuum_parameters.schema, conditional_group.schema, ] schema.final_value_source = ProtocolPath("estimated_free_energy", conditional_group.id, run_yank.id) calculation_schema.workflow_schema = schema return calculation_schema
def test_duplicate_simulation_data_storage(reverse_order): substance = Substance.from_components("CO") with tempfile.TemporaryDirectory() as base_directory_path: storage_directory = os.path.join(base_directory_path, "storage") local_storage = LocalFileStorage(storage_directory) # Construct some data to store with increasing # statistical inefficiencies. data_to_store = [] for index in range(3): data_directory = os.path.join(base_directory_path, f"data_{index}") coordinate_name = f"data_{index}.pdb" data_object = create_dummy_simulation_data( directory_path=data_directory, substance=substance, force_field_id="ff_id_1", coordinate_file_name=coordinate_name, statistical_inefficiency=float(index), calculation_id="id", ) data_to_store.append((data_object, data_directory)) # Keep a track of the storage keys. all_storage_keys = set() iterator = enumerate(data_to_store) if reverse_order: iterator = reversed(list(iterator)) # Store the data for index, data in iterator: data_object, data_directory = data storage_key = local_storage.store_object(data_object, data_directory) all_storage_keys.add(storage_key) retrieved_object, stored_directory = local_storage.retrieve_object( storage_key) # Handle the case where we haven't reversed the order of # the data to store. Here only the first object in the list # should be stored an never replaced as it has the lowest # statistical inefficiency. if not reverse_order: expected_index = 0 # Handle the case where we have reversed the order of # the data to store. Here only the each new piece of # data should replace the last, as it will have a lower # statistical inefficiency. else: expected_index = index assert retrieved_object.json( ) == data_to_store[expected_index][0].json() # Make sure the directory has been correctly overwritten / retained # depending on the data order. coordinate_path = os.path.join(stored_directory, f"data_{expected_index}.pdb") assert os.path.isfile(coordinate_path) # Make sure all pieces of data got assigned the same key if # reverse order. assert len(all_storage_keys) == 1
def test_to_pandas(): """A test to ensure that data sets are convertable to pandas objects.""" source = CalculationSource("Dummy", {}) pure_substance = Substance.from_components("C") binary_substance = Substance.from_components("C", "O") data_set = PhysicalPropertyDataSet() for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) density_property = Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.gram / unit.milliliter, uncertainty=0.11 * unit.gram / unit.milliliter, source=source, ) dielectric_property = DielectricConstant( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.dimensionless, uncertainty=0.11 * unit.dimensionless, source=source, ) data_set.add_properties(density_property) data_set.add_properties(dielectric_property) for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) enthalpy_property = EnthalpyOfMixing( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.kilojoules / unit.mole, uncertainty=0.11 * unit.kilojoules / unit.mole, source=source, ) excess_property = ExcessMolarVolume( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.meter**3 / unit.mole, uncertainty=0.11 * unit.meter**3 / unit.mole, source=source, ) data_set.add_properties(enthalpy_property) data_set.add_properties(excess_property) data_set_pandas = data_set.to_pandas() required_columns = [ "Temperature (K)", "Pressure (kPa)", "Phase", "N Components", "Source", "Component 1", "Role 1", "Mole Fraction 1", "Exact Amount 1", "Component 2", "Role 2", "Mole Fraction 2", "Exact Amount 2", ] assert all(x in data_set_pandas for x in required_columns) assert data_set_pandas is not None assert data_set_pandas.shape == (12, 21) data_set_without_na = data_set_pandas.dropna(axis=1, how="all") assert data_set_without_na.shape == (12, 19)
def test_storage_retrieval(): # Create some dummy properties methane = Substance.from_components("C") methanol = Substance.from_components("CO") mixture = Substance.from_components("C", "CO") # Add extra unused data to make sure the wrong data isn't # Being retrieved. unused_pure = Substance.from_components("CCO") unused_mixture = Substance.from_components("CCO", "CO") data_to_store = [ (methane, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Gas, 1), (mixture, PropertyPhase.Liquid, 1000), (unused_pure, PropertyPhase.Liquid, 1000), (unused_mixture, PropertyPhase.Liquid, 1000), ] storage_keys = {} state = ThermodynamicState(temperature=1.0 * unit.kelvin) properties = [ # Properties with a full system query. Density( value=1.0 * unit.gram / unit.litre, substance=methanol, thermodynamic_state=state, ), DielectricConstant( value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state ), # Properties with a multi-component query. EnthalpyOfVaporization( value=1.0 * unit.joule / unit.mole, substance=methanol, thermodynamic_state=state, ), # Property with a multi-phase query. EnthalpyOfMixing( value=1.0 * unit.joule / unit.mole, substance=mixture, thermodynamic_state=state, ), ExcessMolarVolume( value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state ), ] expected_data_per_property = { Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]}, DielectricConstant: { "full_system_data": [(methane, PropertyPhase.Liquid, 1000)] }, EnthalpyOfVaporization: { "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)], "gas_data": [(methanol, PropertyPhase.Gas, 1)], }, EnthalpyOfMixing: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, ExcessMolarVolume: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, } force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as base_directory: # Create a storage backend with some dummy data. backend_directory = os.path.join(base_directory, "storage_dir") storage_backend = LocalFileStorage(backend_directory) force_field_id = storage_backend.store_force_field(force_field) for substance, phase, n_mol in data_to_store: data_directory = os.path.join(base_directory, substance.identifier) data = create_dummy_simulation_data( data_directory, substance=substance, force_field_id=force_field_id, phase=phase, number_of_molecules=n_mol, ) storage_key = storage_backend.store_object(data, data_directory) storage_keys[(substance, phase, n_mol)] = storage_key for physical_property in properties: schema = registered_calculation_schemas["ReweightingLayer"][ physical_property.__class__.__name__ ] if callable(schema): schema = schema() # noinspection PyProtectedMember metadata = ReweightingLayer._get_workflow_metadata( base_directory, physical_property, "", [], storage_backend, schema, ) assert metadata is not None expected_data_list = expected_data_per_property[physical_property.__class__] for data_key in expected_data_list: assert data_key in metadata stored_metadata = metadata[data_key] expected_metadata = expected_data_list[data_key] assert len(stored_metadata) == len(expected_metadata) if isinstance(stored_metadata[0], list): # Flatten any lists of lists. stored_metadata = [ item for sublist in stored_metadata for item in sublist ] expected_metadata = [ item for sublist in expected_metadata for item in sublist ] metadata_storage_keys = [ os.path.basename(x) for x, _, _ in stored_metadata ] expected_storage_keys = [storage_keys[x] for x in expected_metadata] assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def _build_input_output_substances(): """Builds sets if input and expected substances for the `test_build_coordinate_composition` test. Returns ------- list of tuple of Substance and Substance A list of input and expected substances. """ # Start with some easy cases substances = [ (Substance.from_components("O"), Substance.from_components("O")), (Substance.from_components("O", "C"), Substance.from_components("O", "C")), ( Substance.from_components("O", "C", "CO"), Substance.from_components("O", "C", "CO"), ), ] # Handle some cases where rounding will need to occur. input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.41)) input_substance.add_component(Component("C"), MoleFraction(0.59)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.4)) expected_substance.add_component(Component("C"), MoleFraction(0.6)) substances.append((input_substance, expected_substance)) input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.59)) input_substance.add_component(Component("C"), MoleFraction(0.41)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.6)) expected_substance.add_component(Component("C"), MoleFraction(0.4)) substances.append((input_substance, expected_substance)) return substances
def test_density_dielectric_merging(workflow_merge_function): substance = Substance.from_components("C") density = evaluator.properties.Density( thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere ), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole, ) dielectric = evaluator.properties.DielectricConstant( thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere ), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole, ) density_schema = density.default_simulation_schema().workflow_schema dielectric_schema = dielectric.default_simulation_schema().workflow_schema density_metadata = Workflow.generate_default_metadata( density, "smirnoff99Frosst-1.1.0.offxml", [] ) dielectric_metadata = Workflow.generate_default_metadata( density, "smirnoff99Frosst-1.1.0.offxml", [] ) density_workflow = Workflow(density_metadata) density_workflow.schema = density_schema dielectric_workflow = Workflow(dielectric_metadata) dielectric_workflow.schema = dielectric_schema workflow_merge_function(density_workflow, dielectric_workflow) density_workflow_graph = density_workflow.to_graph() dielectric_workflow_graph = dielectric_workflow.to_graph() dependants_graph_a = density_workflow_graph._protocol_graph._build_dependants_graph( density_workflow_graph.protocols, False, apply_reduction=True ) dependants_graph_b = dielectric_workflow_graph._protocol_graph._build_dependants_graph( dielectric_workflow_graph.protocols, False, apply_reduction=True ) merge_order_a = graph.topological_sort(dependants_graph_a) merge_order_b = graph.topological_sort(dependants_graph_b) for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b): if ( protocol_id_A.find("extract_traj") < 0 and protocol_id_A.find("extract_stats") < 0 ): assert ( density_workflow.protocols[protocol_id_A].schema.json() == dielectric_workflow.protocols[protocol_id_B].schema.json() ) else: assert ( density_workflow.protocols[protocol_id_A].schema.json() != dielectric_workflow.protocols[protocol_id_B].schema.json() )
def generate_default_metadata( physical_property, force_field_path, parameter_gradient_keys=None, target_uncertainty=None, ): """Generates the default global metadata dictionary. Parameters ---------- physical_property: PhysicalProperty The physical property whose arguments are available in the global scope. force_field_path: str The path to the force field parameters to use in the workflow. parameter_gradient_keys: list of ParameterGradientKey A list of references to all of the parameters which all observables should be differentiated with respect to. target_uncertainty: pint.Quantity, optional The uncertainty which the property should be estimated to within. Returns ------- dict of str, Any The metadata dictionary, with the following keys / types: - thermodynamic_state: `ThermodynamicState` - The state (T,p) at which the property is being computed - substance: `Substance` - The composition of the system of interest. - components: list of `Substance` - The components present in the system for which the property is being estimated. - target_uncertainty: pint.Quantity - The target uncertainty with which properties should be estimated. - per_component_uncertainty: pint.Quantity - The target uncertainty divided by the sqrt of the number of components in the system + 1 - force_field_path: str - A path to the force field parameters with which the property should be evaluated with. - parameter_gradient_keys: list of ParameterGradientKey - A list of references to all of the parameters which all observables should be differentiated with respect to. """ components = [] for component in physical_property.substance.components: component_substance = Substance.from_components(component) components.append(component_substance) if target_uncertainty is None: target_uncertainty = math.inf * physical_property.value.units target_uncertainty = target_uncertainty.to( physical_property.value.units) # +1 comes from inclusion of the full mixture as a possible component. per_component_uncertainty = target_uncertainty / sqrt( physical_property.substance.number_of_components + 1) # Find only those gradient keys which will actually be relevant to the # property of interest relevant_gradient_keys = Workflow._find_relevant_gradient_keys( physical_property.substance, force_field_path, parameter_gradient_keys) # Define a dictionary of accessible 'global' properties. global_metadata = { "thermodynamic_state": physical_property.thermodynamic_state, "substance": physical_property.substance, "components": components, "target_uncertainty": target_uncertainty, "per_component_uncertainty": per_component_uncertainty, "force_field_path": force_field_path, "parameter_gradient_keys": relevant_gradient_keys, } # Include the properties metadata if physical_property.metadata != UNDEFINED: global_metadata.update(physical_property.metadata) return global_metadata
def test_substance_len(): substance = Substance.from_components("C", "CC", "CCC", "CCC") assert len(substance) == 3
def test_build_ligpargen_system(requests_mock): force_field_source = LigParGenForceFieldSource( request_url="http://testligpargen.com/request", download_url="http://testligpargen.com/download", ) substance = Substance.from_components("C", "O") def request_callback(request, context): context.status_code = 200 smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n', request.text).group(1) cmiles_molecule = load_molecule(smiles, toolkit="rdkit") smiles = mol_to_smiles(cmiles_molecule, isomeric=False, explicit_hydrogen=False, mapped=False) assert smiles == "C" return 'value="/tmp/0000.xml"' def download_callback(_, context): context.status_code = 200 return """ <ForceField> <AtomTypes> <Type name="opls_802" class="H802" element="H" mass="1.008000" /> <Type name="opls_804" class="H804" element="H" mass="1.008000" /> <Type name="opls_803" class="H803" element="H" mass="1.008000" /> <Type name="opls_800" class="C800" element="C" mass="12.011000" /> <Type name="opls_801" class="H801" element="H" mass="1.008000" /> </AtomTypes> <Residues> <Residue name="UNK"> <Atom name="C00" type="opls_800" /> <Atom name="H01" type="opls_801" /> <Atom name="H02" type="opls_802" /> <Atom name="H03" type="opls_803" /> <Atom name="H04" type="opls_804" /> <Bond from="0" to="1"/> <Bond from="0" to="2"/> <Bond from="0" to="3"/> <Bond from="0" to="4"/> </Residue> </Residues> <HarmonicBondForce> <Bond class1="H801" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H802" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H803" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H804" class2="C800" length="0.109000" k="284512.000000"/> </HarmonicBondForce> <HarmonicAngleForce> <Angle class1="H801" class2="C800" class3="H802" angle="1.881465" k="276.144000"/> <Angle class1="H801" class2="C800" class3="H803" angle="1.881465" k="276.144000"/> <Angle class1="H801" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> <Angle class1="H802" class2="C800" class3="H803" angle="1.881465" k="276.144000"/> <Angle class1="H803" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> <Angle class1="H802" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> </HarmonicAngleForce> <PeriodicTorsionForce> <Improper class1="C800" class2="H801" class3="H802" class4="H803" k1="0.000000" k2="0.000000" k3="0.000000" k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00" phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/> <Improper class1="C800" class2="H801" class3="H802" class4="H804" k1="0.000000" k2="0.000000" k3="0.000000" k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00" phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/> </PeriodicTorsionForce> <NonbondedForce coulomb14scale="0.5" lj14scale="0.5"> <Atom type="opls_803" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_802" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_800" charge="-0.299400" sigma="0.350000" epsilon="0.276144" /> <Atom type="opls_804" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_801" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> </NonbondedForce> </ForceField> """ requests_mock.post(force_field_source.request_url, text=request_callback) requests_mock.post(force_field_source.download_url, text=download_callback) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(force_field_source.json()) build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 8 build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildLigParGenSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) assert path.isfile(assign_parameters.system_path)
def data_set_from_data_frame(data_frame): """Converts a `pandas.DataFrame` to a `PhysicalPropertyDataSet` object. See the `PhysicalPropertyDataSet.to_pandas()` function for information on the required columns. Parameters ---------- data_frame: pandas.DataFrame The data frame to convert. Returns ------- PhysicalPropertyDataSet The converted data set. """ return_value = PhysicalPropertyDataSet() if len(data_frame) == 0: return return_value # Make sure the base columns are present. required_base_columns = [ "Temperature (K)", "Pressure (kPa)", "Phase", "N Components", "Source", ] assert all(x in data_frame for x in required_base_columns) # Make sure the substance columns are present. max_components = max(int(x) for x in data_frame["N Components"]) assert max_components > 0 required_components_columns = [ x for i in range(max_components) for x in [ f"Component {i + 1}", f"Role {i + 1}", f"Mole Fraction {i + 1}", f"Exact Amount {i + 1}", ] ] assert all(x in data_frame for x in required_components_columns) property_types = [] for column_name in data_frame: if " Value" not in column_name: continue column_name_split = column_name.split(" ") assert len(column_name_split) >= 2 property_type = getattr(evaluator.properties, column_name_split[0]) property_types.append(property_type) assert len(property_types) > 0 # Make sure we don't have duplicate property columns. assert len(set(property_types)) == len(property_types) properties = [] for _, row in data_frame.iterrows(): # Create the substance from the component columns number_of_components = row["N Components"] substance = Substance() for component_index in range(number_of_components): smiles = row[f"Component {component_index + 1}"] role = Component.Role[row[f"Role {component_index + 1}"]] mole_fraction = row[f"Mole Fraction {component_index + 1}"] exact_amount = row[f"Exact Amount {component_index + 1}"] assert not numpy.isnan(mole_fraction) or not numpy.isnan( exact_amount) component = Component(smiles, role) if not numpy.isnan(mole_fraction): substance.add_component(component, MoleFraction(mole_fraction)) if not numpy.isnan(exact_amount): substance.add_component(component, ExactAmount(exact_amount)) # Extract the state pressure = row["Pressure (kPa)"] * unit.kilopascal temperature = row["Temperature (K)"] * unit.kelvin thermodynamic_state = ThermodynamicState(temperature, pressure) phase = PropertyPhase.from_string(row["Phase"]) source = MeasurementSource(reference=row["Source"]) for property_type in property_types: default_unit = property_type.default_unit() value_header = f"{property_type.__name__} Value ({default_unit:~})" if numpy.isnan(row[value_header]): continue value = row[value_header] * default_unit uncertainty = 0.0 * default_unit physical_property = property_type( thermodynamic_state=thermodynamic_state, phase=phase, substance=substance, value=value, uncertainty=uncertainty, source=source, ) properties.append(physical_property) return_value.add_properties(*properties) return return_value
def _rebuild_substance(self, number_of_molecules): """Rebuilds the `Substance` object which this protocol is building coordinates for. This may not be the same as the input substance due to the finite number of molecules to be added causing rounding of mole fractions. Parameters ---------- number_of_molecules: list of int The number of each component which should be added to the system. Returns ------- Substance The substance which contains the corrected component amounts. """ new_amounts = defaultdict(list) total_number_of_molecules = sum(number_of_molecules) # Handle any exact amounts. for component in self.substance.components: exact_amounts = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, ExactAmount) ] if len(exact_amounts) == 0: continue total_number_of_molecules -= exact_amounts[0].value new_amounts[component].append(exact_amounts[0]) # Recompute the mole fractions. total_mole_fraction = 0.0 number_of_new_mole_fractions = 0 for index, component in enumerate(self.substance.components): mole_fractions = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, MoleFraction) ] if len(mole_fractions) == 0: continue molecule_count = number_of_molecules[index] if component in new_amounts: molecule_count -= new_amounts[component][0].value new_mole_fraction = molecule_count / total_number_of_molecules new_amounts[component].append(MoleFraction(new_mole_fraction)) total_mole_fraction += new_mole_fraction number_of_new_mole_fractions += 1 if (not np.isclose(total_mole_fraction, 1.0) and number_of_new_mole_fractions > 0): raise ValueError("The new mole fraction does not equal 1.0") output_substance = Substance() for component, amounts in new_amounts.items(): for amount in amounts: output_substance.add_component(component, amount) return output_substance
def test_multiple_amounts(): substance = Substance() sodium = Component("[Na+]") chloride = Component("[Cl-]") substance.add_component(sodium, MoleFraction(0.75)) substance.add_component(sodium, ExactAmount(1)) substance.add_component(chloride, MoleFraction(0.25)) substance.add_component(chloride, ExactAmount(1)) assert substance.number_of_components == 2 sodium_amounts = substance.get_amounts(sodium) chlorine_amounts = substance.get_amounts(chloride) assert len(sodium_amounts) == 2 assert len(chlorine_amounts) == 2 molecule_counts = substance.get_molecules_per_component(6) assert len(molecule_counts) == 2 assert molecule_counts[sodium.identifier] == 4 assert molecule_counts[chloride.identifier] == 2