def test_ligand_set(): ligand_set = ligands.LigandSet("mcl1_sample") lig_list = ligand_set.get_list() for key in lig_list: assert key in ligand_set.keys() assert isinstance(ligand_set.get_ligand(key), ligands.Ligand) with pytest.raises(ValueError, match="Ligand xxx is not part of set."): ligand_set.get_ligand("xxx") df = ligand_set.get_dataframe() for i, row in df.iterrows(): test_data = row.loc[ligand_set[row.loc["name"][0]]._data.index] pd.testing.assert_series_equal(ligand_set[row.loc["name"][0]]._data, test_data, check_names=False) df = ligand_set.get_dataframe(columns=["name", "smiles"]) for i, row in df.iterrows(): assert row["name"][0] == ligand_set[row.loc["name"][0]].get_name() assert row["smiles"][0] == ligand_set[row.loc["name"] [0]]._data["smiles"][0] molecules = ligand_set.get_molecules() for name, lig in ligand_set.items(): assert Molecule.are_isomorphic(lig.get_molecule(), molecules[name]) # ToDo: proper test for get_html() ligand_set.get_html() ligand_set.get_html(columns=["name", "smiles"])
def test_ligand_data(target, ligand_name, lig): m1 = Chem.MolFromSmiles(lig._data["smiles"][0]) m1 = Chem.AddHs(m1) m2 = Chem.SDMolSupplier( os.path.join( targets.data_path, targets.get_target_dir(target), "02_ligands", ligand_name, "crd", f"{ligand_name}.sdf", ), removeHs=False, )[0] assert m1.GetNumAtoms() == m2.GetNumAtoms() m1.RemoveAllConformers() m2.RemoveAllConformers() assert pytest.approx(1.0, 1e-9) == DataStructs.FingerprintSimilarity( Chem.RDKFingerprint(m1), Chem.RDKFingerprint(m2)) # assert Chem.MolToMolBlock(m1) == Chem.MolToMolBlock(m2) res = rdFMCS.FindMCS([m1, m2]) assert res.numAtoms == m1.GetNumAtoms() assert res.numBonds == m1.GetNumBonds() m3 = lig.get_molecule() m2 = Molecule.from_rdkit(m2) assert Molecule.are_isomorphic(m2, m3)
def test_abinitio_fitting_prep_no_gradient(): """ Test preparing for fitting using the abinitio target. """ torsion_target = AbInitio_SMIRNOFF() torsion_target.fit_gradient = False target_schema = biphenyl_target(target=torsion_target) biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files # now we need to make sure the pdb order was not changed mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"), file_format="pdb") isomorphic, atom_map = Molecule.are_isomorphic(biphenyl, mol, return_atom_map=True) assert isomorphic is True assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms)) # also make sure charges are in the mol2 file mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"), "mol2") assert mol.partial_charges is not None # make sure the scan coords and energies match qdata_file = os.path.join(folders[0], "qdata.txt") coords, energies, gradients = read_qdata(qdata_file=qdata_file) # make sure no gradients were written assert not gradients reference_data = target_schema.tasks[0].reference_data() for i, (coord, energy) in enumerate(zip(coords, energies)): # find the reference data data = reference_data[i] assert data.energy == energy assert coord == data.molecule.geometry.flatten().tolist()
def _get_fragment_parent_mapping( fragment: Molecule, parent: Molecule ) -> Dict[int, int]: """ Get a mapping between two molecules of different size ie a fragment to a parent. Parameters ---------- fragment: off.Molecule The fragment molecule that we want to map on to the parent. parent: off.Molecule The parent molecule the fragment was made from. Notes ----- As the MCS is used to create the mapping it will not be complete, that is some fragment atoms have no relation to the parent. Returns ------- Dict[int, int] A mapping between the fragment and the parent molecule. """ # check to see if we can do a normal mapping in the toolkit isomorphic, atom_map = Molecule.are_isomorphic( fragment, parent, return_atom_map=True, aromatic_matching=False, bond_order_matching=False, bond_stereochemistry_matching=False, atom_stereochemistry_matching=False, ) if atom_map is not None: return atom_map else: # this molecule are different sizes so now we can use rdkit trick return FragmentEngine._get_rdkit_mcs_mapping(fragment, parent)
def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries): """ Low level call to load each torsion drive dataset and return a list of molecules Parameters ---------- datasetName : str torsion drive dataset name. specification : str specification in the dataset. Example: "B3LYP-D3", "default", "UFF" Returns ------- molList : list of objects each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object """ while True: try: assert datasetName break except AssertionError: print("datasetName is empty. Check input list of dataset tuples") raise while True: try: assert specification break except AssertionError: print("specification is empty. Check input list of dataset tuples") raise # initiate qc portal instance client = ptl.FractalClient() # from the TorsionDriveDataset collection picking up given datasetName ds = client.get_collection("TorsionDriveDataset", datasetName) ds.status([specification], status="COMPLETE") # Serial implementation # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark # https://openforcefield.org/force-fields/force-fields/ # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi with open(benchmark_smiles) as f: bm_smiles = f.readlines() bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles] tb = [] overlaps = 0 qca_entries = [] for i in range(ds.df.size): if ds.df.iloc[i, 0].status == "COMPLETE": smiles = ds.df.index[i] mapped_smiles = ds.get_entry(smiles).attributes[ "canonical_isomeric_explicit_hydrogen_mapped_smiles" ] mol1 = Molecule.from_mapped_smiles(mapped_smiles) not_identical = True for mol in bm_mols: isomorphic,atom_map = Molecule.are_isomorphic(mol1, mol, return_atom_map=False, aromatic_matching=False, formal_charge_matching=False, bond_order_matching=False, atom_stereochemistry_matching=False, bond_stereochemistry_matching=False, ) if(isomorphic): not_identical = False overlaps += 1 entry = ds.get_entry(smiles) tdr_id = entry.object_map['default'] # print(tdr_id) qca_entries.append(tdr_id) break if(not_identical): tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles)) # overlaps_qca_ids.txt is also a hardcoded file with open(qca_overlapped_entries, "a") as f: for item in qca_entries: f.write("%s\n" % item) print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps) print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df)) return tb