def test_multiple_amounts(): substance = Substance() sodium = Component("[Na+]") chloride = Component("[Cl-]") substance.add_component(sodium, MoleFraction(0.75)) substance.add_component(sodium, ExactAmount(1)) substance.add_component(chloride, MoleFraction(0.25)) substance.add_component(chloride, ExactAmount(1)) assert substance.number_of_components == 2 sodium_amounts = substance.get_amounts(sodium) chlorine_amounts = substance.get_amounts(chloride) assert len(sodium_amounts) == 2 assert len(chlorine_amounts) == 2 molecule_counts = substance.get_molecules_per_component(6) assert len(molecule_counts) == 2 assert molecule_counts[sodium.identifier] == 4 assert molecule_counts[chloride.identifier] == 2
def from_components(cls, *components): """Creates a new `Substance` object from a list of components. This method assumes that all components should be present with equal mole fractions. Parameters ---------- components: Component or str The components to add to the substance. These may either be full `Component` objects or just the smiles representation of the component. Returns ------- Substance The substance containing the requested components in equal amounts. """ if len(components) == 0: raise ValueError("At least one component must be specified") mole_fraction = 1.0 / len(components) return_substance = cls() for component in components: if isinstance(component, str): component = Component(smiles=component) return_substance.add_component(component, MoleFraction(mole_fraction)) return return_substance
def test_solvate_existing_structure_protocol(): """Tests solvating a single methanol molecule in water.""" import mdtraj methanol_component = Component("CO") methanol_substance = Substance() methanol_substance.add_component(methanol_component, ExactAmount(1)) water_substance = Substance() water_substance.add_component(Component("O"), MoleFraction(1.0)) with tempfile.TemporaryDirectory() as temporary_directory: build_methanol_coordinates = BuildCoordinatesPackmol("build_methanol") build_methanol_coordinates.max_molecules = 1 build_methanol_coordinates.substance = methanol_substance build_methanol_coordinates.execute(temporary_directory, ComputeResources()) methanol_residue_name = build_methanol_coordinates.assigned_residue_names[ methanol_component.identifier] solvate_coordinates = SolvateExistingStructure("solvate_methanol") solvate_coordinates.max_molecules = 9 solvate_coordinates.substance = water_substance solvate_coordinates.solute_coordinate_file = ( build_methanol_coordinates.coordinate_file_path) solvate_coordinates.execute(temporary_directory, ComputeResources()) solvated_system = mdtraj.load_pdb( solvate_coordinates.coordinate_file_path) assert solvated_system.n_residues == 10 assert solvated_system.top.residue(0).name == methanol_residue_name
def create_dummy_substance(number_of_components, elements=None): """Creates a substance with a given number of components, each containing the specified elements. Parameters ---------- number_of_components : int The number of components to add to the substance. elements : list of str The elements that each component should containt. Returns ------- Substance The created substance. """ if elements is None: elements = ["C"] substance = Substance() mole_fraction = 1.0 / number_of_components for index in range(number_of_components): smiles_pattern = "".join(elements * (index + 1)) substance.add_component(Component(smiles_pattern), MoleFraction(mole_fraction)) return substance
def test_add_mole_fractions(): substance = Substance() substance.add_component(Component("C"), MoleFraction(0.5)) substance.add_component(Component("C"), MoleFraction(0.5)) assert substance.number_of_components == 1 amounts = substance.get_amounts(substance.components[0]) assert len(amounts) == 1 amount = next(iter(amounts)) assert isinstance(amount, MoleFraction) assert np.isclose(amount.value, 1.0)
def test_filter_by_smiles(): """A test to ensure that data sets may be filtered by which smiles their measured properties contain.""" methanol_substance = Substance() methanol_substance.add_component(Component("CO"), MoleFraction(1.0)) ethanol_substance = Substance() ethanol_substance.add_component(Component("CCO"), MoleFraction(1.0)) property_a = create_dummy_property(Density) property_a.substance = methanol_substance property_b = create_dummy_property(Density) property_b.substance = ethanol_substance data_set = PhysicalPropertyDataSet() data_set.add_properties(property_a, property_b) data_set.filter_by_smiles("CO") assert len(data_set) == 1 assert methanol_substance in data_set.substances assert ethanol_substance not in data_set.substances
def _build_input_output_substances(): """Builds sets if input and expected substances for the `test_build_coordinate_composition` test. Returns ------- list of tuple of Substance and Substance A list of input and expected substances. """ # Start with some easy cases substances = [ (Substance.from_components("O"), Substance.from_components("O")), (Substance.from_components("O", "C"), Substance.from_components("O", "C")), ( Substance.from_components("O", "C", "CO"), Substance.from_components("O", "C", "CO"), ), ] # Handle some cases where rounding will need to occur. input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.41)) input_substance.add_component(Component("C"), MoleFraction(0.59)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.4)) expected_substance.add_component(Component("C"), MoleFraction(0.6)) substances.append((input_substance, expected_substance)) input_substance = Substance() input_substance.add_component(Component("O"), MoleFraction(0.59)) input_substance.add_component(Component("C"), MoleFraction(0.41)) expected_substance = Substance() expected_substance.add_component(Component("O"), MoleFraction(0.6)) expected_substance.add_component(Component("C"), MoleFraction(0.4)) substances.append((input_substance, expected_substance)) return substances
def _execute(self, directory, available_resources): filtered_components = [] total_mole_fraction = 0.0 for component in self.input_substance.components: if component.role != self.component_role: continue filtered_components.append(component) amounts = self.input_substance.get_amounts(component) for amount in amounts: if not isinstance(amount, MoleFraction): continue total_mole_fraction += amount.value if self.expected_components != UNDEFINED and self.expected_components != len( filtered_components): raise ValueError( f"The filtered substance does not contain the expected number of " f"components ({self.expected_components}) - {filtered_components}", ) inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction) self.filtered_substance = Substance() for component in filtered_components: amounts = self.input_substance.get_amounts(component) for amount in amounts: if isinstance(amount, MoleFraction): amount = MoleFraction(amount.value * inverse_mole_fraction) self.filtered_substance.add_component(component, amount)
def create_substance(): test_substance = Substance() test_substance.add_component( Component("C", role=Component.Role.Solute), ExactAmount(1), ) test_substance.add_component( Component("CC", role=Component.Role.Ligand), ExactAmount(1), ) test_substance.add_component( Component("CCC", role=Component.Role.Receptor), ExactAmount(1), ) test_substance.add_component( Component("O", role=Component.Role.Solvent), MoleFraction(1.0), ) return test_substance
def _rebuild_substance(self, number_of_molecules): """Rebuilds the `Substance` object which this protocol is building coordinates for. This may not be the same as the input substance due to the finite number of molecules to be added causing rounding of mole fractions. Parameters ---------- number_of_molecules: list of int The number of each component which should be added to the system. Returns ------- Substance The substance which contains the corrected component amounts. """ new_amounts = defaultdict(list) total_number_of_molecules = sum(number_of_molecules) # Handle any exact amounts. for component in self.substance.components: exact_amounts = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, ExactAmount) ] if len(exact_amounts) == 0: continue total_number_of_molecules -= exact_amounts[0].value new_amounts[component].append(exact_amounts[0]) # Recompute the mole fractions. total_mole_fraction = 0.0 number_of_new_mole_fractions = 0 for index, component in enumerate(self.substance.components): mole_fractions = [ amount for amount in self.substance.get_amounts(component) if isinstance(amount, MoleFraction) ] if len(mole_fractions) == 0: continue molecule_count = number_of_molecules[index] if component in new_amounts: molecule_count -= new_amounts[component][0].value new_mole_fraction = molecule_count / total_number_of_molecules new_amounts[component].append(MoleFraction(new_mole_fraction)) total_mole_fraction += new_mole_fraction number_of_new_mole_fractions += 1 if (not np.isclose(total_mole_fraction, 1.0) and number_of_new_mole_fractions > 0): raise ValueError("The new mole fraction does not equal 1.0") output_substance = Substance() for component, amounts in new_amounts.items(): for amount in amounts: output_substance.add_component(component, amount) return output_substance
def data_set_from_data_frame(data_frame): """Converts a `pandas.DataFrame` to a `PhysicalPropertyDataSet` object. See the `PhysicalPropertyDataSet.to_pandas()` function for information on the required columns. Parameters ---------- data_frame: pandas.DataFrame The data frame to convert. Returns ------- PhysicalPropertyDataSet The converted data set. """ return_value = PhysicalPropertyDataSet() if len(data_frame) == 0: return return_value # Make sure the base columns are present. required_base_columns = [ "Temperature (K)", "Pressure (kPa)", "Phase", "N Components", "Source", ] assert all(x in data_frame for x in required_base_columns) # Make sure the substance columns are present. max_components = max(int(x) for x in data_frame["N Components"]) assert max_components > 0 required_components_columns = [ x for i in range(max_components) for x in [ f"Component {i + 1}", f"Role {i + 1}", f"Mole Fraction {i + 1}", f"Exact Amount {i + 1}", ] ] assert all(x in data_frame for x in required_components_columns) property_types = [] for column_name in data_frame: if " Value" not in column_name: continue column_name_split = column_name.split(" ") assert len(column_name_split) >= 2 property_type = getattr(evaluator.properties, column_name_split[0]) property_types.append(property_type) assert len(property_types) > 0 # Make sure we don't have duplicate property columns. assert len(set(property_types)) == len(property_types) properties = [] for _, row in data_frame.iterrows(): # Create the substance from the component columns number_of_components = row["N Components"] substance = Substance() for component_index in range(number_of_components): smiles = row[f"Component {component_index + 1}"] role = Component.Role[row[f"Role {component_index + 1}"]] mole_fraction = row[f"Mole Fraction {component_index + 1}"] exact_amount = row[f"Exact Amount {component_index + 1}"] assert not numpy.isnan(mole_fraction) or not numpy.isnan( exact_amount) component = Component(smiles, role) if not numpy.isnan(mole_fraction): substance.add_component(component, MoleFraction(mole_fraction)) if not numpy.isnan(exact_amount): substance.add_component(component, ExactAmount(exact_amount)) # Extract the state pressure = row["Pressure (kPa)"] * unit.kilopascal temperature = row["Temperature (K)"] * unit.kelvin thermodynamic_state = ThermodynamicState(temperature, pressure) phase = PropertyPhase.from_string(row["Phase"]) source = MeasurementSource(reference=row["Source"]) for property_type in property_types: default_unit = property_type.default_unit() value_header = f"{property_type.__name__} Value ({default_unit:~})" if numpy.isnan(row[value_header]): continue value = row[value_header] * default_unit uncertainty = 0.0 * default_unit physical_property = property_type( thermodynamic_state=thermodynamic_state, phase=phase, substance=substance, value=value, uncertainty=uncertainty, source=source, ) properties.append(physical_property) return_value.add_properties(*properties) return return_value