def test_multiple_amounts(): substance = Substance() sodium = Component("[Na+]") chloride = Component("[Cl-]") substance.add_component(sodium, MoleFraction(0.75)) substance.add_component(sodium, ExactAmount(1)) substance.add_component(chloride, MoleFraction(0.25)) substance.add_component(chloride, ExactAmount(1)) assert substance.number_of_components == 2 sodium_amounts = substance.get_amounts(sodium) chlorine_amounts = substance.get_amounts(chloride) assert len(sodium_amounts) == 2 assert len(chlorine_amounts) == 2 molecule_counts = substance.get_molecules_per_component(6) assert len(molecule_counts) == 2 assert molecule_counts[sodium.identifier] == 4 assert molecule_counts[chloride.identifier] == 2
def test_truncate_n_molecules(): substance = Substance() substance.add_component( component=Component(smiles="[Na+]"), amount=MoleFraction(0.00267), ) substance.add_component( component=Component(smiles="[Cl-]"), amount=MoleFraction(0.00267), ) substance.add_component(component=Component(smiles="O"), amount=MoleFraction(1.0 - 2.0 * 0.00267)) # Attempt to get the number of molecules without truncating. with pytest.raises(ValueError): substance.get_molecules_per_component(1000, truncate_n_molecules=False) # Attempt to get the number of molecules with truncating. molecule_counts = substance.get_molecules_per_component( 1000, truncate_n_molecules=True) assert molecule_counts == { "[Na+]{solv}": 3, "[Cl-]{solv}": 3, "O{solv}": 994 }
def create_dummy_substance(number_of_components, elements=None): """Creates a substance with a given number of components, each containing the specified elements. Parameters ---------- number_of_components : int The number of components to add to the substance. elements : list of str The elements that each component should containt. Returns ------- Substance The created substance. """ if elements is None: elements = ["C"] substance = Substance() mole_fraction = 1.0 / number_of_components for index in range(number_of_components): smiles_pattern = "".join(elements * (index + 1)) substance.add_component(Component(smiles_pattern), MoleFraction(mole_fraction)) return substance
def test_solvate_existing_structure_protocol(): """Tests solvating a single methanol molecule in water.""" import mdtraj methanol_component = Component("CO") methanol_substance = Substance() methanol_substance.add_component(methanol_component, ExactAmount(1)) water_substance = Substance() water_substance.add_component(Component("O"), MoleFraction(1.0)) with tempfile.TemporaryDirectory() as temporary_directory: build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol") build_methanol_coordinates.max_molecules = 1 build_methanol_coordinates.substance = methanol_substance build_methanol_coordinates.execute(temporary_directory, ComputeResources()) methanol_residue_name = build_methanol_coordinates.assigned_residue_names[ methanol_component.identifier] solvate_coordinates = SolvateExistingStructure("solvate_methanol") solvate_coordinates.max_molecules = 9 solvate_coordinates.substance = water_substance solvate_coordinates.solute_coordinate_file = ( build_methanol_coordinates.coordinate_file_path) solvate_coordinates.execute(temporary_directory, ComputeResources()) solvated_system = mdtraj.load_pdb( solvate_coordinates.coordinate_file_path) assert solvated_system.n_residues == 10 assert solvated_system.top.residue(0).name == methanol_residue_name
def from_components(cls, *components): """Creates a new `Substance` object from a list of components. This method assumes that all components should be present with equal mole fractions. Parameters ---------- components: Component or str The components to add to the substance. These may either be full `Component` objects or just the smiles representation of the component. Returns ------- Substance The substance containing the requested components in equal amounts. """ if len(components) == 0: raise ValueError("At least one component must be specified") mole_fraction = 1.0 / len(components) return_substance = cls() for component in components: if isinstance(component, str): component = Component(smiles=component) return_substance.add_component(component, MoleFraction(mole_fraction)) return return_substance
def test_add_mole_fractions(): substance = Substance() substance.add_component(Component("C"), MoleFraction(0.5)) substance.add_component(Component("C"), MoleFraction(0.5)) assert substance.number_of_components == 1 amounts = substance.get_amounts(substance.components[0]) assert len(amounts) == 1 amount = next(iter(amounts)) assert isinstance(amount, MoleFraction) assert np.isclose(amount.value, 1.0)
def data_frame() -> pandas.DataFrame: temperatures = [298.15, 318.15] pressures = [101.325, 101.0] properties = [Density, EnthalpyOfMixing] mole_fractions = [(1.0, ), (1.0, ), (0.25, 0.75), (0.75, 0.25)] smiles = {1: [("C(F)(Cl)(Br)", ), ("C", )], 2: [("CO", "C"), ("C", "CO")]} loop_variables = [( temperature, pressure, property_type, mole_fraction, ) for temperature in temperatures for pressure in pressures for property_type in properties for mole_fraction in mole_fractions] data_entries = [] for temperature, pressure, property_type, mole_fraction in loop_variables: n_components = len(mole_fraction) for smiles_tuple in smiles[n_components]: substance = Substance() for smiles_pattern, x in zip(smiles_tuple, mole_fraction): substance.add_component(Component(smiles_pattern), MoleFraction(x)) data_entries.append( property_type( thermodynamic_state=ThermodynamicState( temperature=temperature * unit.kelvin, pressure=pressure * unit.kilopascal, ), phase=PropertyPhase.Liquid, value=1.0 * property_type.default_unit(), uncertainty=1.0 * property_type.default_unit(), source=MeasurementSource(doi=" "), substance=substance, )) data_set = PhysicalPropertyDataSet() data_set.add_properties(*data_entries) return data_set.to_pandas()
def _execute(self, directory, available_resources): filtered_components = [] total_mole_fraction = 0.0 for component in self.input_substance.components: if component.role not in self.component_roles: continue filtered_components.append(component) amounts = self.input_substance.get_amounts(component) for amount in amounts: if not isinstance(amount, MoleFraction): continue total_mole_fraction += amount.value if self.expected_components != UNDEFINED and self.expected_components != len( filtered_components ): raise ValueError( f"The filtered substance does not contain the expected number of " f"components ({self.expected_components}) - {filtered_components}", ) inverse_mole_fraction = ( 1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction ) self.filtered_substance = Substance() for component in filtered_components: amounts = self.input_substance.get_amounts(component) for amount in amounts: if isinstance(amount, MoleFraction): amount = MoleFraction(amount.value * inverse_mole_fraction) self.filtered_substance.add_component(component, amount)
def _build_input_output_substances(): """Builds sets if input and expected substances for the `test_build_coordinate_composition` test. Returns ------- list of tuple of Substance and Substance A list of input and expected substances. """ # Start with some easy cases substances = [ (Substance.from_components("O"), Substance.from_components("O")), (Substance.from_components("O", "C"), Substance.from_components("O", "C")), ( Substance.from_components("O", "C", "CO"), Substance.from_components("O", "C", "CO"), ), ] # Handle some cases where rounding will need to occur. input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.41)) input_substance.add_component(Component("C"), MoleFraction(0.59)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.4)) expected_substance.add_component(Component("C"), MoleFraction(0.6)) substances.append((input_substance, expected_substance)) input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.59)) input_substance.add_component(Component("C"), MoleFraction(0.41)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.6)) expected_substance.add_component(Component("C"), MoleFraction(0.4)) substances.append((input_substance, expected_substance)) return substances
def test_build_coordinates_packmol_exact(count_exact_amount): """Tests that the build coordinate protocol behaves correctly for substances with exact amounts.""" import mdtraj substance = Substance() substance.add_component(Component("O"), MoleFraction(1.0)) substance.add_component(Component("C"), ExactAmount(1)) max_molecule = 11 if count_exact_amount else 10 build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = max_molecule build_coordinates.count_exact_amount = count_exact_amount build_coordinates.substance = substance with tempfile.TemporaryDirectory() as directory: build_coordinates.execute(directory) built_system = mdtraj.load_pdb(build_coordinates.coordinate_file_path) assert built_system.n_residues == 11
def create_substance(): test_substance = Substance() test_substance.add_component( Component("C", role=Component.Role.Solute), ExactAmount(1), ) test_substance.add_component( Component("CC", role=Component.Role.Ligand), ExactAmount(1), ) test_substance.add_component( Component("CCC", role=Component.Role.Receptor), ExactAmount(1), ) test_substance.add_component( Component("O", role=Component.Role.Solvent), MoleFraction(1.0), ) return test_substance
def _build_substance( guest_smiles: Optional[str], host_smiles: str, ionic_strength: Optional[unit.Quantity], negative_buffer_ion: str = "[Cl-]", positive_buffer_ion: str = "[Na+]", ): """Builds a substance containing a ligand and receptor solvated in an aqueous solution with a given ionic strength Parameters ---------- guest_smiles The SMILES descriptor of the guest. host_smiles The SMILES descriptor of the host. ionic_strength The ionic strength of the aqueous solvent. Returns ------- The built substance. """ from openff.toolkit.topology import Molecule from simtk import unit as simtk_unit substance = Substance() if guest_smiles is not None: guest = Component(smiles=guest_smiles, role=Component.Role.Ligand) substance.add_component(component=guest, amount=ExactAmount(1)) host = Component(smiles=host_smiles, role=Component.Role.Receptor) substance.add_component(component=host, amount=ExactAmount(1)) water = Component(smiles="O", role=Component.Role.Solvent) sodium = Component(smiles=positive_buffer_ion, role=Component.Role.Solvent) chlorine = Component(smiles=negative_buffer_ion, role=Component.Role.Solvent) water_mole_fraction = 1.0 if ionic_strength is not None: salt_mole_fraction = Substance.calculate_aqueous_ionic_mole_fraction( ionic_strength) if isinstance(salt_mole_fraction, unit.Quantity): # noinspection PyUnresolvedReferences salt_mole_fraction = salt_mole_fraction.magnitude water_mole_fraction = 1.0 - salt_mole_fraction * 2 substance.add_component( component=sodium, amount=MoleFraction(salt_mole_fraction), ) substance.add_component( component=chlorine, amount=MoleFraction(salt_mole_fraction), ) substance.add_component(component=water, amount=MoleFraction(water_mole_fraction)) host_molecule_charge = Molecule.from_smiles(host_smiles).total_charge guest_molecule_charge = ( 0.0 * simtk_unit.elementary_charge if guest_smiles is None else Molecule.from_smiles(guest_smiles).total_charge) net_charge = (host_molecule_charge + guest_molecule_charge).value_in_unit( simtk_unit.elementary_charge) n_counter_ions = abs(int(net_charge)) if net_charge <= -0.9999: substance.add_component(sodium, ExactAmount(n_counter_ions)) elif net_charge >= 0.9999: substance.add_component(chlorine, ExactAmount(n_counter_ions)) return substance
def complete_evaluator_data_set(): """Create a more comprehensive `PhysicalPropertyDataSet` which contains one measurement for each of: * pure density * binary density * pure enthalpy of vaporization * binary enthalpy of mixing * binary excess molar volume * hydration free energy Returns ------- PhysicalPropertyDataSet """ thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere) source = MeasurementSource(doi="10.1000/xyz123") solvation_substance = Substance() solvation_substance.add_component(Component("O"), MoleFraction(1.0)) solvation_substance.add_component(Component("CCCO"), ExactAmount(1)) evaluator_properties = [ Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, source=source, ), Density( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("O", "CC=O"), value=1.0 * unit.kilogram / unit.meter**3, uncertainty=0.1 * unit.kilogram / unit.meter**3, source=source, ), EnthalpyOfVaporization( thermodynamic_state=thermodynamic_state, phase=PropertyPhase(PropertyPhase.Liquid | PropertyPhase.Gas), substance=Substance.from_components("CCO"), value=1.0 * EnthalpyOfVaporization.default_unit(), uncertainty=0.1 * EnthalpyOfVaporization.default_unit(), source=source, ), EnthalpyOfMixing( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("CCCCO", "CC(C=O)C"), value=1.0 * EnthalpyOfMixing.default_unit(), uncertainty=0.1 * EnthalpyOfMixing.default_unit(), source=source, ), ExcessMolarVolume( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=Substance.from_components("C(=O)CCCO", "CCCCCC"), value=1.0 * ExcessMolarVolume.default_unit(), uncertainty=0.1 * ExcessMolarVolume.default_unit(), source=source, ), SolvationFreeEnergy( thermodynamic_state=thermodynamic_state, phase=PropertyPhase.Liquid, substance=solvation_substance, value=1.0 * SolvationFreeEnergy.default_unit(), uncertainty=0.1 * SolvationFreeEnergy.default_unit(), source=source, ), ] for index, evaluator_property in enumerate(evaluator_properties): evaluator_property.id = str(index + 1) evaluator_data_set = PhysicalPropertyDataSet() evaluator_data_set.add_properties(*evaluator_properties) return evaluator_data_set
def _apply( cls, data_frame: pandas.DataFrame, schema: ImportFreeSolvSchema, n_processes, ) -> pandas.DataFrame: from openff.evaluator import properties, substances, unit # Convert the data frame into data rows. free_solv_data_frame = cls._download_free_solv() data_entries = [] for _, row in free_solv_data_frame.iterrows(): # Extract and standardize the SMILES pattern of the solute_smiles = row["SMILES"].lstrip().rstrip() solute_smiles = substances.Component(solute_smiles).smiles # Build the substance. substance = Substance() substance.add_component(Component(smiles="O"), MoleFraction(1.0)) substance.add_component( Component(smiles=solute_smiles, role=Component.Role.Solute), ExactAmount(1), ) # Extract the value and uncertainty value = (float(row["experimental value (kcal/mol)"]) * unit.kilocalorie / unit.mole) std_error = (float(row["experimental uncertainty (kcal/mol)"]) * unit.kilocalorie / unit.mole) # Attempt to extract a DOI original_source = row[ "experimental reference (original or paper this value was taken from)"] doi = cls._validate_doi(original_source) data_entry = SolvationFreeEnergy( thermodynamic_state=ThermodynamicState( temperature=298.15 * unit.kelvin, pressure=101.325 * unit.kilopascal, ), phase=PropertyPhase.Liquid, substance=substance, value=value.to(properties.SolvationFreeEnergy.default_unit()), uncertainty=std_error.to( properties.SolvationFreeEnergy.default_unit()), source=MeasurementSource(doi=doi), ) data_entries.append(data_entry) data_set = PhysicalPropertyDataSet() data_set.add_properties(*data_entries) free_solv_data_frame = data_set.to_pandas() data_frame = pandas.concat([data_frame, free_solv_data_frame], ignore_index=True, sort=False) return data_frame
def main(): os.makedirs("raw_data_v2", exist_ok=True) for data_set_name in [ "curated_data_set", "gaff 1.81", "gaff 2.11", "parsley 1.0.0", "smirnoff99frosst 1.1.0", ]: with open(os.path.join("raw_data", f"{data_set_name}.json")) as file: raw_data_set = json.load(file) assert (raw_data_set["@type"] == "propertyestimator.datasets.datasets.PhysicalPropertyDataSet") physical_properties = [] for raw_data_set_entries in raw_data_set["properties"].values(): for raw_data_set_entry in raw_data_set_entries: # Extract the substance this entry was measured for. substance = Substance() for raw_component in raw_data_set_entry["substance"][ "components"]: component = Component( smiles=raw_component["smiles"], role=Component.Role[raw_component["role"]["value"]], ) raw_amounts = raw_data_set_entry["substance"]["amounts"][ raw_component["smiles"]] for raw_amount in raw_amounts["value"]: if (raw_amount["@type"] == "propertyestimator.substances.Substance->MoleFraction" ): substance.add_component( component, MoleFraction(raw_amount["value"])) elif (raw_amount["@type"] == "propertyestimator.substances.Substance->ExactAmount" ): substance.add_component( component, ExactAmount(raw_amount["value"])) else: raise NotImplementedError() # Extract the source of the property if (raw_data_set_entry["source"]["@type"] == "propertyestimator.properties.properties.CalculationSource" ): source = CalculationSource( fidelity=raw_data_set_entry["source"]["fidelity"]) elif (raw_data_set_entry["source"]["@type"] == "propertyestimator.properties.properties.MeasurementSource" ): source = MeasurementSource(doi=correct_doi( raw_data_set_entry["source"]["reference"])) else: raise NotImplementedError() # Generate the new property object. property_class = getattr( properties, raw_data_set_entry["@type"].split(".")[-1]) physical_property = property_class( thermodynamic_state=ThermodynamicState( temperature=( raw_data_set_entry["thermodynamic_state"] ["temperature"]["value"] * unit.Unit(raw_data_set_entry["thermodynamic_state"] ["temperature"]["unit"])), pressure=( raw_data_set_entry["thermodynamic_state"] ["pressure"]["value"] * unit.Unit(raw_data_set_entry["thermodynamic_state"] ["pressure"]["unit"])), ), phase=PropertyPhase(raw_data_set_entry["phase"]), substance=substance, value=(raw_data_set_entry["value"]["value"] * unit.Unit(raw_data_set_entry["value"]["unit"])), uncertainty=( None if isinstance(source, MeasurementSource) else (raw_data_set_entry["uncertainty"]["value"] * unit.Unit(raw_data_set_entry["uncertainty"]["unit"]) )), source=source, ) physical_property.id = raw_data_set_entry["id"] physical_properties.append(physical_property) data_set = PhysicalPropertyDataSet() data_set.add_properties(*physical_properties) data_set.json(os.path.join("raw_data_v2", f"{data_set_name}.json"), format=True) data_set.to_pandas().to_csv( os.path.join("raw_data_v2", f"{data_set_name}.csv"))
def test_ligand_receptor_yank_protocol(): full_substance = Substance() full_substance.add_component( Component(smiles="c1ccccc1", role=Component.Role.Receptor), ExactAmount(1), ) full_substance.add_component( Component(smiles="C", role=Component.Role.Ligand), ExactAmount(1), ) full_substance.add_component( Component(smiles="O", role=Component.Role.Solvent), MoleFraction(1.0), ) solute_substance = Substance() solute_substance.add_component( Component(smiles="C", role=Component.Role.Ligand), ExactAmount(1), ) solute_substance.add_component( Component(smiles="O", role=Component.Role.Solvent), MoleFraction(1.0), ) thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): force_field_path = "ff.json" with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) complex_coordinate_path, complex_system = _setup_dummy_system( "full", full_substance, 3, force_field_path) ligand_coordinate_path, ligand_system = _setup_dummy_system( "ligand", solute_substance, 2, force_field_path) run_yank = LigandReceptorYankProtocol("yank") run_yank.substance = full_substance run_yank.thermodynamic_state = thermodynamic_state run_yank.number_of_iterations = 1 run_yank.steps_per_iteration = 1 run_yank.checkpoint_interval = 1 run_yank.verbose = True run_yank.setup_only = True run_yank.ligand_residue_name = "TMP" run_yank.receptor_residue_name = "TMP" run_yank.solvated_ligand_coordinates = ligand_coordinate_path run_yank.solvated_ligand_system = ligand_system run_yank.solvated_complex_coordinates = complex_coordinate_path run_yank.solvated_complex_system = complex_system run_yank.force_field_path = force_field_path run_yank.execute("", ComputeResources())
def test_solvation_yank_protocol(solvent_smiles): full_substance = Substance() full_substance.add_component( Component(smiles="CO", role=Component.Role.Solute), ExactAmount(1), ) full_substance.add_component( Component(smiles=solvent_smiles, role=Component.Role.Solvent), MoleFraction(1.0), ) solvent_substance = Substance() solvent_substance.add_component( Component(smiles=solvent_smiles, role=Component.Role.Solvent), MoleFraction(1.0), ) solute_substance = Substance() solute_substance.add_component( Component(smiles="CO", role=Component.Role.Solute), ExactAmount(1), ) thermodynamic_state = ThermodynamicState(temperature=298.15 * unit.kelvin, pressure=1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: with temporarily_change_directory(directory): force_field_path = "ff.json" with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) solvated_coordinate_path, solvated_system = _setup_dummy_system( "full", full_substance, 2, force_field_path) vacuum_coordinate_path, vacuum_system = _setup_dummy_system( "vacuum", solute_substance, 1, force_field_path) run_yank = SolvationYankProtocol("yank") run_yank.solute = solute_substance run_yank.solvent_1 = solvent_substance run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = thermodynamic_state run_yank.number_of_iterations = 1 run_yank.steps_per_iteration = 1 run_yank.checkpoint_interval = 1 run_yank.verbose = True run_yank.setup_only = True run_yank.solution_1_coordinates = solvated_coordinate_path run_yank.solution_1_system = solvated_system run_yank.solution_2_coordinates = vacuum_coordinate_path run_yank.solution_2_system = vacuum_system run_yank.electrostatic_lambdas_1 = [1.00] run_yank.steric_lambdas_1 = [1.00] run_yank.electrostatic_lambdas_2 = [1.00] run_yank.steric_lambdas_2 = [1.00] run_yank.execute("", ComputeResources())
def from_pandas(cls, data_frame: pandas.DataFrame) -> "PhysicalPropertyDataSet": """Constructs a data set object from a pandas ``DataFrame`` object. Notes ----- * All physical properties are assumed to be source from experimental measurements. * Currently this method onlu supports data frames containing properties which are built-in to the framework (e.g. Density). * This method assumes the data frame has a structure identical to that produced by the ``PhysicalPropertyDataSet.to_pandas`` function. Parameters ---------- data_frame The data frame to construct the data set from. Returns ------- The constructed data set. """ from openff.evaluator import properties property_header_matches = { re.match(r"^([a-zA-Z]+) Value \(([a-zA-Z0-9+-/\s]*)\)$", header) for header in data_frame if header.find(" Value ") >= 0 } property_headers = {} # Validate that the headers have the correct format, specify a # built-in property type, and specify correctly the properties # units. for match in property_header_matches: assert match property_type_string, property_unit_string = match.groups() assert hasattr(properties, property_type_string) property_type = getattr(properties, property_type_string) property_unit = unit.Unit(property_unit_string) assert property_unit is not None assert (property_unit.dimensionality == property_type.default_unit().dimensionality) property_headers[match.group(0)] = (property_type, property_unit) # Convert the data rows to property objects. physical_properties = [] for _, data_row in data_frame.iterrows(): data_row = data_row.dropna() # Extract the state at which the measurement was made. thermodynamic_state = ThermodynamicState( temperature=data_row["Temperature (K)"] * unit.kelvin, pressure=data_row["Pressure (kPa)"] * unit.kilopascal, ) property_phase = PropertyPhase.from_string(data_row["Phase"]) # Extract the substance the measurement was made for. substance = Substance() for i in range(data_row["N Components"]): component = Component( smiles=data_row[f"Component {i + 1}"], role=Component.Role[data_row.get(f"Role {i + 1}", "Solvent")], ) mole_fraction = data_row.get(f"Mole Fraction {i + 1}", 0.0) exact_amount = data_row.get(f"Exact Amount {i + 1}", 0) if not numpy.isclose(mole_fraction, 0.0): substance.add_component(component, MoleFraction(mole_fraction)) if not numpy.isclose(exact_amount, 0.0): substance.add_component(component, ExactAmount(exact_amount)) for ( property_header, (property_type, property_unit), ) in property_headers.items(): # Check to see whether the row contains a value for this # type of property. if property_header not in data_row: continue uncertainty_header = property_header.replace( "Value", "Uncertainty") source_string = data_row["Source"] is_doi = all( any( re.match(pattern, split_string, re.I) for pattern in [ r"^10.\d{4,9}/[-._;()/:A-Z0-9]+$", r"^10.1002/[^\s]+$", r"^10.\d{4}/\d+-\d+X?(\d+)\d+<[\d\w]+:[\d\w]*>\d+.\d+.\w+;\d$", r"^10.1021/\w\w\d+$", r"^10.1207/[\w\d]+\&\d+_\d+$", ]) for split_string in source_string.split(" + ")) physical_property = property_type( thermodynamic_state=thermodynamic_state, phase=property_phase, value=data_row[property_header] * property_unit, uncertainty=None if uncertainty_header not in data_row else data_row[uncertainty_header] * property_unit, substance=substance, source=MeasurementSource( doi="" if not is_doi else source_string, reference=source_string if not is_doi else "", ), ) identifier = data_row.get("Id", None) if identifier: physical_property.id = identifier physical_properties.append(physical_property) data_set = PhysicalPropertyDataSet() data_set.add_properties(*physical_properties) return data_set
def _rebuild_substance(self, number_of_molecules): """Rebuilds the `Substance` object which this protocol is building coordinates for. This may not be the same as the input substance due to the finite number of molecules to be added causing rounding of mole fractions. Parameters ---------- number_of_molecules: list of int The number of each component which should be added to the system. Returns ------- Substance The substance which contains the corrected component amounts. """ new_amounts = defaultdict(list) total_number_of_molecules = sum(number_of_molecules) # Handle any exact amounts. for component in self.substance.components: exact_amounts = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, ExactAmount) ] if len(exact_amounts) == 0: continue total_number_of_molecules -= exact_amounts[0].value new_amounts[component].append(exact_amounts[0]) # Recompute the mole fractions. total_mole_fraction = 0.0 number_of_new_mole_fractions = 0 for index, component in enumerate(self.substance.components): mole_fractions = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, MoleFraction) ] if len(mole_fractions) == 0: continue molecule_count = number_of_molecules[index] if component in new_amounts: molecule_count -= new_amounts[component][0].value new_mole_fraction = molecule_count / total_number_of_molecules new_amounts[component].append(MoleFraction(new_mole_fraction)) total_mole_fraction += new_mole_fraction number_of_new_mole_fractions += 1 if ( not np.isclose(total_mole_fraction, 1.0) and number_of_new_mole_fractions > 0 ): raise ValueError("The new mole fraction does not equal 1.0") output_substance = Substance() for component, amounts in new_amounts.items(): for amount in amounts: output_substance.add_component(component, amount) return output_substance