def test_weight_by_mole_fraction_protocol(component_smiles, value): full_substance = Substance.from_components("C", "CC", "CCC") component = Substance.from_components(component_smiles) mole_fraction = next( iter(full_substance.get_amounts(component.components[0].identifier)) ).value with tempfile.TemporaryDirectory() as temporary_directory: weight_protocol = WeightByMoleFraction("weight") weight_protocol.value = value weight_protocol.full_substance = full_substance weight_protocol.component = component weight_protocol.execute(temporary_directory, ComputeResources()) assert weight_protocol.weighted_value == value * mole_fraction
def test_same_component_batching(): thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin, pressure=1.0 * unit.atmosphere) data_set = PhysicalPropertyDataSet() data_set.add_properties( Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "C"), value=0.0 * unit.kilojoule / unit.mole, ), Density( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilogram / unit.meter**3, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, substance=Substance.from_components("O", "CO"), value=0.0 * unit.kilojoule / unit.mole, ), ) options = RequestOptions() submission = EvaluatorClient._Submission() submission.dataset = data_set submission.options = options with DaskLocalCluster() as calculation_backend: server = EvaluatorServer(calculation_backend) batches = server._batch_by_same_component(submission, "") assert len(batches) == 2 assert len(batches[0].queued_properties) == 2 assert len(batches[1].queued_properties) == 2
def test_simulation_data_query(): substance_a = Substance.from_components("C") substance_b = Substance.from_components("CO") substance_full = Substance.from_components("C", "CO") substances = [substance_a, substance_b, substance_full] with tempfile.TemporaryDirectory() as base_directory: backend_directory = os.path.join(base_directory, "storage_dir") storage = LocalFileStorage(backend_directory) for substance in substances: data_directory = os.path.join(base_directory, f"{substance.identifier}") data_object = create_dummy_simulation_data(data_directory, substance) storage.store_object(data_object, data_directory) for substance in substances: substance_query = SimulationDataQuery() substance_query.substance = substance results = storage.query(substance_query) assert results is not None and len(results) == 1 assert len(next(iter(results.values()))[0]) == 3 component_query = SimulationDataQuery() component_query.substance = substance_full component_query.substance_query = SubstanceQuery() component_query.substance_query.components_only = True results = storage.query(component_query) assert results is not None and len(results) == 2
def test_calculate_reduced_potential_openmm(): substance = Substance.from_components("O") thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 10 build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildSmirnoffSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) reduced_potentials = OpenMMReducedPotentials(f"reduced_potentials") reduced_potentials.substance = substance reduced_potentials.thermodynamic_state = thermodynamic_state reduced_potentials.reference_force_field_paths = [force_field_path] reduced_potentials.system_path = assign_parameters.system_path reduced_potentials.trajectory_file_path = get_data_filename( "test/trajectories/water.dcd") reduced_potentials.coordinate_file_path = get_data_filename( "test/trajectories/water.pdb") reduced_potentials.kinetic_energies_path = get_data_filename( "test/statistics/stats_pandas.csv") reduced_potentials.high_precision = False reduced_potentials.execute(directory, ComputeResources()) assert path.isfile(reduced_potentials.statistics_file_path) final_array = StatisticsArray.from_pandas_csv( reduced_potentials.statistics_file_path) assert ObservableType.ReducedPotential in final_array
def _setup_dummy_system(directory): force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) substance = Substance.from_components("C") build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 1 build_coordinates.mass_density = 0.001 * unit.grams / unit.milliliters build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildSmirnoffSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) return build_coordinates.coordinate_file_path, assign_parameters.system_path
def test_build_tleap_system(): with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(TLeapForceFieldSource().json()) substance = Substance.from_components("C", "O", "C(=O)N") build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 9 build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildTLeapSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) assert path.isfile(assign_parameters.system_path)
def _build_input_output_substances(): """Builds sets if input and expected substances for the `test_build_coordinate_composition` test. Returns ------- list of tuple of Substance and Substance A list of input and expected substances. """ # Start with some easy cases substances = [ (Substance.from_components("O"), Substance.from_components("O")), (Substance.from_components("O", "C"), Substance.from_components("O", "C")), ( Substance.from_components("O", "C", "CO"), Substance.from_components("O", "C", "CO"), ), ] # Handle some cases where rounding will need to occur. input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.41)) input_substance.add_component(Component("C"), MoleFraction(0.59)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.4)) expected_substance.add_component(Component("C"), MoleFraction(0.6)) substances.append((input_substance, expected_substance)) input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.59)) input_substance.add_component(Component("C"), MoleFraction(0.41)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.6)) expected_substance.add_component(Component("C"), MoleFraction(0.4)) substances.append((input_substance, expected_substance)) return substances
def test_duplicate_simulation_data_storage(reverse_order): substance = Substance.from_components("CO") with tempfile.TemporaryDirectory() as base_directory_path: storage_directory = os.path.join(base_directory_path, "storage") local_storage = LocalFileStorage(storage_directory) # Construct some data to store with increasing # statistical inefficiencies. data_to_store = [] for index in range(3): data_directory = os.path.join(base_directory_path, f"data_{index}") coordinate_name = f"data_{index}.pdb" data_object = create_dummy_simulation_data( directory_path=data_directory, substance=substance, force_field_id="ff_id_1", coordinate_file_name=coordinate_name, statistical_inefficiency=float(index), calculation_id="id", ) data_to_store.append((data_object, data_directory)) # Keep a track of the storage keys. all_storage_keys = set() iterator = enumerate(data_to_store) if reverse_order: iterator = reversed(list(iterator)) # Store the data for index, data in iterator: data_object, data_directory = data storage_key = local_storage.store_object(data_object, data_directory) all_storage_keys.add(storage_key) retrieved_object, stored_directory = local_storage.retrieve_object( storage_key) # Handle the case where we haven't reversed the order of # the data to store. Here only the first object in the list # should be stored an never replaced as it has the lowest # statistical inefficiency. if not reverse_order: expected_index = 0 # Handle the case where we have reversed the order of # the data to store. Here only the each new piece of # data should replace the last, as it will have a lower # statistical inefficiency. else: expected_index = index assert retrieved_object.json( ) == data_to_store[expected_index][0].json() # Make sure the directory has been correctly overwritten / retained # depending on the data order. coordinate_path = os.path.join(stored_directory, f"data_{expected_index}.pdb") assert os.path.isfile(coordinate_path) # Make sure all pieces of data got assigned the same key if # reverse order. assert len(all_storage_keys) == 1
def test_density_dielectric_merging(workflow_merge_function): substance = Substance.from_components("C") density = evaluator.properties.Density( thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere ), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole, ) dielectric = evaluator.properties.DielectricConstant( thermodynamic_state=ThermodynamicState( temperature=298 * unit.kelvin, pressure=1 * unit.atmosphere ), phase=PropertyPhase.Liquid, substance=substance, value=10 * unit.gram / unit.mole, uncertainty=1 * unit.gram / unit.mole, ) density_schema = density.default_simulation_schema().workflow_schema dielectric_schema = dielectric.default_simulation_schema().workflow_schema density_metadata = Workflow.generate_default_metadata( density, "smirnoff99Frosst-1.1.0.offxml", [] ) dielectric_metadata = Workflow.generate_default_metadata( density, "smirnoff99Frosst-1.1.0.offxml", [] ) density_workflow = Workflow(density_metadata) density_workflow.schema = density_schema dielectric_workflow = Workflow(dielectric_metadata) dielectric_workflow.schema = dielectric_schema workflow_merge_function(density_workflow, dielectric_workflow) density_workflow_graph = density_workflow.to_graph() dielectric_workflow_graph = dielectric_workflow.to_graph() dependants_graph_a = density_workflow_graph._protocol_graph._build_dependants_graph( density_workflow_graph.protocols, False, apply_reduction=True ) dependants_graph_b = dielectric_workflow_graph._protocol_graph._build_dependants_graph( dielectric_workflow_graph.protocols, False, apply_reduction=True ) merge_order_a = graph.topological_sort(dependants_graph_a) merge_order_b = graph.topological_sort(dependants_graph_b) for protocol_id_A, protocol_id_B in zip(merge_order_a, merge_order_b): if ( protocol_id_A.find("extract_traj") < 0 and protocol_id_A.find("extract_stats") < 0 ): assert ( density_workflow.protocols[protocol_id_A].schema.json() == dielectric_workflow.protocols[protocol_id_B].schema.json() ) else: assert ( density_workflow.protocols[protocol_id_A].schema.json() != dielectric_workflow.protocols[protocol_id_B].schema.json() )
def test_substance_len(): substance = Substance.from_components("C", "CC", "CCC", "CCC") assert len(substance) == 3
def generate_default_metadata( physical_property, force_field_path, parameter_gradient_keys=None, target_uncertainty=None, ): """Generates the default global metadata dictionary. Parameters ---------- physical_property: PhysicalProperty The physical property whose arguments are available in the global scope. force_field_path: str The path to the force field parameters to use in the workflow. parameter_gradient_keys: list of ParameterGradientKey A list of references to all of the parameters which all observables should be differentiated with respect to. target_uncertainty: pint.Quantity, optional The uncertainty which the property should be estimated to within. Returns ------- dict of str, Any The metadata dictionary, with the following keys / types: - thermodynamic_state: `ThermodynamicState` - The state (T,p) at which the property is being computed - substance: `Substance` - The composition of the system of interest. - components: list of `Substance` - The components present in the system for which the property is being estimated. - target_uncertainty: pint.Quantity - The target uncertainty with which properties should be estimated. - per_component_uncertainty: pint.Quantity - The target uncertainty divided by the sqrt of the number of components in the system + 1 - force_field_path: str - A path to the force field parameters with which the property should be evaluated with. - parameter_gradient_keys: list of ParameterGradientKey - A list of references to all of the parameters which all observables should be differentiated with respect to. """ components = [] for component in physical_property.substance.components: component_substance = Substance.from_components(component) components.append(component_substance) if target_uncertainty is None: target_uncertainty = math.inf * physical_property.value.units target_uncertainty = target_uncertainty.to( physical_property.value.units) # +1 comes from inclusion of the full mixture as a possible component. per_component_uncertainty = target_uncertainty / sqrt( physical_property.substance.number_of_components + 1) # Find only those gradient keys which will actually be relevant to the # property of interest relevant_gradient_keys = Workflow._find_relevant_gradient_keys( physical_property.substance, force_field_path, parameter_gradient_keys) # Define a dictionary of accessible 'global' properties. global_metadata = { "thermodynamic_state": physical_property.thermodynamic_state, "substance": physical_property.substance, "components": components, "target_uncertainty": target_uncertainty, "per_component_uncertainty": per_component_uncertainty, "force_field_path": force_field_path, "parameter_gradient_keys": relevant_gradient_keys, } # Include the properties metadata if physical_property.metadata != UNDEFINED: global_metadata.update(physical_property.metadata) return global_metadata
def test_storage_retrieval(): # Create some dummy properties methane = Substance.from_components("C") methanol = Substance.from_components("CO") mixture = Substance.from_components("C", "CO") # Add extra unused data to make sure the wrong data isn't # Being retrieved. unused_pure = Substance.from_components("CCO") unused_mixture = Substance.from_components("CCO", "CO") data_to_store = [ (methane, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Liquid, 1000), (methanol, PropertyPhase.Gas, 1), (mixture, PropertyPhase.Liquid, 1000), (unused_pure, PropertyPhase.Liquid, 1000), (unused_mixture, PropertyPhase.Liquid, 1000), ] storage_keys = {} state = ThermodynamicState(temperature=1.0 * unit.kelvin) properties = [ # Properties with a full system query. Density( value=1.0 * unit.gram / unit.litre, substance=methanol, thermodynamic_state=state, ), DielectricConstant( value=1.0 * unit.dimensionless, substance=methane, thermodynamic_state=state ), # Properties with a multi-component query. EnthalpyOfVaporization( value=1.0 * unit.joule / unit.mole, substance=methanol, thermodynamic_state=state, ), # Property with a multi-phase query. EnthalpyOfMixing( value=1.0 * unit.joule / unit.mole, substance=mixture, thermodynamic_state=state, ), ExcessMolarVolume( value=1.0 * unit.meter ** 3, substance=mixture, thermodynamic_state=state ), ] expected_data_per_property = { Density: {"full_system_data": [(methanol, PropertyPhase.Liquid, 1000)]}, DielectricConstant: { "full_system_data": [(methane, PropertyPhase.Liquid, 1000)] }, EnthalpyOfVaporization: { "liquid_data": [(methanol, PropertyPhase.Liquid, 1000)], "gas_data": [(methanol, PropertyPhase.Gas, 1)], }, EnthalpyOfMixing: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, ExcessMolarVolume: { "full_system_data": [(mixture, PropertyPhase.Liquid, 1000)], "component_data": [ [(methane, PropertyPhase.Liquid, 1000)], [(methanol, PropertyPhase.Liquid, 1000)], ], }, } force_field = SmirnoffForceFieldSource.from_path("smirnoff99Frosst-1.1.0.offxml") with tempfile.TemporaryDirectory() as base_directory: # Create a storage backend with some dummy data. backend_directory = os.path.join(base_directory, "storage_dir") storage_backend = LocalFileStorage(backend_directory) force_field_id = storage_backend.store_force_field(force_field) for substance, phase, n_mol in data_to_store: data_directory = os.path.join(base_directory, substance.identifier) data = create_dummy_simulation_data( data_directory, substance=substance, force_field_id=force_field_id, phase=phase, number_of_molecules=n_mol, ) storage_key = storage_backend.store_object(data, data_directory) storage_keys[(substance, phase, n_mol)] = storage_key for physical_property in properties: schema = registered_calculation_schemas["ReweightingLayer"][ physical_property.__class__.__name__ ] if callable(schema): schema = schema() # noinspection PyProtectedMember metadata = ReweightingLayer._get_workflow_metadata( base_directory, physical_property, "", [], storage_backend, schema, ) assert metadata is not None expected_data_list = expected_data_per_property[physical_property.__class__] for data_key in expected_data_list: assert data_key in metadata stored_metadata = metadata[data_key] expected_metadata = expected_data_list[data_key] assert len(stored_metadata) == len(expected_metadata) if isinstance(stored_metadata[0], list): # Flatten any lists of lists. stored_metadata = [ item for sublist in stored_metadata for item in sublist ] expected_metadata = [ item for sublist in expected_metadata for item in sublist ] metadata_storage_keys = [ os.path.basename(x) for x, _, _ in stored_metadata ] expected_storage_keys = [storage_keys[x] for x in expected_metadata] assert sorted(metadata_storage_keys) == sorted(expected_storage_keys)
def test_to_pandas(): """A test to ensure that data sets are convertable to pandas objects.""" source = CalculationSource("Dummy", {}) pure_substance = Substance.from_components("C") binary_substance = Substance.from_components("C", "O") data_set = PhysicalPropertyDataSet() for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) density_property = Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.gram / unit.milliliter, uncertainty=0.11 * unit.gram / unit.milliliter, source=source, ) dielectric_property = DielectricConstant( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=pure_substance, value=1 * unit.dimensionless, uncertainty=0.11 * unit.dimensionless, source=source, ) data_set.add_properties(density_property) data_set.add_properties(dielectric_property) for temperature in [ 298 * unit.kelvin, 300 * unit.kelvin, 302 * unit.kelvin ]: thermodynamic_state = ThermodynamicState(temperature=temperature, pressure=1.0 * unit.atmosphere) enthalpy_property = EnthalpyOfMixing( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.kilojoules / unit.mole, uncertainty=0.11 * unit.kilojoules / unit.mole, source=source, ) excess_property = ExcessMolarVolume( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=binary_substance, value=1 * unit.meter**3 / unit.mole, uncertainty=0.11 * unit.meter**3 / unit.mole, source=source, ) data_set.add_properties(enthalpy_property) data_set.add_properties(excess_property) data_set_pandas = data_set.to_pandas() required_columns = [ "Temperature (K)", "Pressure (kPa)", "Phase", "N Components", "Source", "Component 1", "Role 1", "Mole Fraction 1", "Exact Amount 1", "Component 2", "Role 2", "Mole Fraction 2", "Exact Amount 2", ] assert all(x in data_set_pandas for x in required_columns) assert data_set_pandas is not None assert data_set_pandas.shape == (12, 21) data_set_without_na = data_set_pandas.dropna(axis=1, how="all") assert data_set_without_na.shape == (12, 19)
def test_build_ligpargen_system(requests_mock): force_field_source = LigParGenForceFieldSource( request_url="http://testligpargen.com/request", download_url="http://testligpargen.com/download", ) substance = Substance.from_components("C", "O") def request_callback(request, context): context.status_code = 200 smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n', request.text).group(1) cmiles_molecule = load_molecule(smiles, toolkit="rdkit") smiles = mol_to_smiles(cmiles_molecule, isomeric=False, explicit_hydrogen=False, mapped=False) assert smiles == "C" return 'value="/tmp/0000.xml"' def download_callback(_, context): context.status_code = 200 return """ <ForceField> <AtomTypes> <Type name="opls_802" class="H802" element="H" mass="1.008000" /> <Type name="opls_804" class="H804" element="H" mass="1.008000" /> <Type name="opls_803" class="H803" element="H" mass="1.008000" /> <Type name="opls_800" class="C800" element="C" mass="12.011000" /> <Type name="opls_801" class="H801" element="H" mass="1.008000" /> </AtomTypes> <Residues> <Residue name="UNK"> <Atom name="C00" type="opls_800" /> <Atom name="H01" type="opls_801" /> <Atom name="H02" type="opls_802" /> <Atom name="H03" type="opls_803" /> <Atom name="H04" type="opls_804" /> <Bond from="0" to="1"/> <Bond from="0" to="2"/> <Bond from="0" to="3"/> <Bond from="0" to="4"/> </Residue> </Residues> <HarmonicBondForce> <Bond class1="H801" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H802" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H803" class2="C800" length="0.109000" k="284512.000000"/> <Bond class1="H804" class2="C800" length="0.109000" k="284512.000000"/> </HarmonicBondForce> <HarmonicAngleForce> <Angle class1="H801" class2="C800" class3="H802" angle="1.881465" k="276.144000"/> <Angle class1="H801" class2="C800" class3="H803" angle="1.881465" k="276.144000"/> <Angle class1="H801" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> <Angle class1="H802" class2="C800" class3="H803" angle="1.881465" k="276.144000"/> <Angle class1="H803" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> <Angle class1="H802" class2="C800" class3="H804" angle="1.881465" k="276.144000"/> </HarmonicAngleForce> <PeriodicTorsionForce> <Improper class1="C800" class2="H801" class3="H802" class4="H803" k1="0.000000" k2="0.000000" k3="0.000000" k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00" phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/> <Improper class1="C800" class2="H801" class3="H802" class4="H804" k1="0.000000" k2="0.000000" k3="0.000000" k4="0.000000" periodicity1="1" periodicity2="2" periodicity3="3" periodicity4="4" phase1="0.00" phase2="3.141592653589793" phase3="0.00" phase4="3.141592653589793"/> </PeriodicTorsionForce> <NonbondedForce coulomb14scale="0.5" lj14scale="0.5"> <Atom type="opls_803" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_802" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_800" charge="-0.299400" sigma="0.350000" epsilon="0.276144" /> <Atom type="opls_804" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> <Atom type="opls_801" charge="0.074800" sigma="0.250000" epsilon="0.125520" /> </NonbondedForce> </ForceField> """ requests_mock.post(force_field_source.request_url, text=request_callback) requests_mock.post(force_field_source.download_url, text=download_callback) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(force_field_source.json()) build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 8 build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildLigParGenSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) assert path.isfile(assign_parameters.system_path)