def test_ligand_set():
    ligand_set = ligands.LigandSet("mcl1_sample")

    lig_list = ligand_set.get_list()
    for key in lig_list:
        assert key in ligand_set.keys()
        assert isinstance(ligand_set.get_ligand(key), ligands.Ligand)

    with pytest.raises(ValueError, match="Ligand xxx is not part of set."):
        ligand_set.get_ligand("xxx")

    df = ligand_set.get_dataframe()
    for i, row in df.iterrows():
        test_data = row.loc[ligand_set[row.loc["name"][0]]._data.index]
        pd.testing.assert_series_equal(ligand_set[row.loc["name"][0]]._data,
                                       test_data,
                                       check_names=False)

    df = ligand_set.get_dataframe(columns=["name", "smiles"])
    for i, row in df.iterrows():
        assert row["name"][0] == ligand_set[row.loc["name"][0]].get_name()
        assert row["smiles"][0] == ligand_set[row.loc["name"]
                                              [0]]._data["smiles"][0]

    molecules = ligand_set.get_molecules()
    for name, lig in ligand_set.items():
        assert Molecule.are_isomorphic(lig.get_molecule(), molecules[name])

    # ToDo: proper test for get_html()
    ligand_set.get_html()
    ligand_set.get_html(columns=["name", "smiles"])
def test_ligand_data(target, ligand_name, lig):
    m1 = Chem.MolFromSmiles(lig._data["smiles"][0])
    m1 = Chem.AddHs(m1)
    m2 = Chem.SDMolSupplier(
        os.path.join(
            targets.data_path,
            targets.get_target_dir(target),
            "02_ligands",
            ligand_name,
            "crd",
            f"{ligand_name}.sdf",
        ),
        removeHs=False,
    )[0]
    assert m1.GetNumAtoms() == m2.GetNumAtoms()
    m1.RemoveAllConformers()
    m2.RemoveAllConformers()
    assert pytest.approx(1.0, 1e-9) == DataStructs.FingerprintSimilarity(
        Chem.RDKFingerprint(m1), Chem.RDKFingerprint(m2))
    #            assert Chem.MolToMolBlock(m1) == Chem.MolToMolBlock(m2)
    res = rdFMCS.FindMCS([m1, m2])
    assert res.numAtoms == m1.GetNumAtoms()
    assert res.numBonds == m1.GetNumBonds()

    m3 = lig.get_molecule()
    m2 = Molecule.from_rdkit(m2)
    assert Molecule.are_isomorphic(m2, m3)
Beispiel #3
0
def test_abinitio_fitting_prep_no_gradient():
    """
    Test preparing for fitting using the abinitio target.
    """

    torsion_target = AbInitio_SMIRNOFF()
    torsion_target.fit_gradient = False
    target_schema = biphenyl_target(target=torsion_target)
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now load in a scan result we have saved
    result_data = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    # now try and update the results
    target_schema.update_with_results(results=result_data)
    assert target_schema.ready_for_fitting is True
    # now try and prep for fitting
    with temp_directory():
        torsion_target.prep_for_fitting(fitting_target=target_schema)
        # we should only have one torsion drive to do here
        folders = os.listdir(".")
        assert len(folders) == 1
        target_files = os.listdir(folders[0])
        assert "molecule.pdb" in target_files
        assert "scan.xyz" in target_files
        assert "molecule.mol2" in target_files
        assert "qdata.txt" in target_files
        # now we need to make sure the pdb order was not changed
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"),
                                 file_format="pdb")
        isomorphic, atom_map = Molecule.are_isomorphic(biphenyl,
                                                       mol,
                                                       return_atom_map=True)
        assert isomorphic is True
        assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms))

        # also make sure charges are in the mol2 file
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"),
                                 "mol2")
        assert mol.partial_charges is not None

        # make sure the scan coords and energies match
        qdata_file = os.path.join(folders[0], "qdata.txt")
        coords, energies, gradients = read_qdata(qdata_file=qdata_file)
        # make sure no gradients were written
        assert not gradients
        reference_data = target_schema.tasks[0].reference_data()
        for i, (coord, energy) in enumerate(zip(coords, energies)):
            # find the reference data
            data = reference_data[i]
            assert data.energy == energy
            assert coord == data.molecule.geometry.flatten().tolist()
Beispiel #4
0
    def _get_fragment_parent_mapping(
        fragment: Molecule, parent: Molecule
    ) -> Dict[int, int]:
        """
        Get a mapping between two molecules of different size ie a fragment to a parent.

        Parameters
        ----------
        fragment: off.Molecule
            The fragment molecule that we want to map on to the parent.
        parent: off.Molecule
            The parent molecule the fragment was made from.

        Notes
        -----
            As the MCS is used to create the mapping it will not be complete, that is some fragment atoms have no relation to the parent.

        Returns
        -------
        Dict[int, int]
            A mapping between the fragment and the parent molecule.
        """

        # check to see if we can do a normal mapping in the toolkit
        isomorphic, atom_map = Molecule.are_isomorphic(
            fragment,
            parent,
            return_atom_map=True,
            aromatic_matching=False,
            bond_order_matching=False,
            bond_stereochemistry_matching=False,
            atom_stereochemistry_matching=False,
        )
        if atom_map is not None:
            return atom_map

        else:
            # this molecule are different sizes so now we can use rdkit trick
            return FragmentEngine._get_rdkit_mcs_mapping(fragment, parent)
Beispiel #5
0
def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries):
    """
    Low level call to load each torsion drive dataset and return a list of molecules

        Parameters
        ----------
        datasetName : str
            torsion drive dataset name.
        specification : str
            specification in the dataset. Example: "B3LYP-D3", "default", "UFF"

        Returns
        -------
        molList : list of objects
            each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object
    """
    while True:
        try:
            assert datasetName
            break
        except AssertionError:
            print("datasetName is empty. Check input list of dataset tuples")
            raise
    while True:
        try:
            assert specification
            break
        except AssertionError:
            print("specification is empty. Check input list of dataset tuples")
            raise

    # initiate qc portal instance
    client = ptl.FractalClient()
    # from the TorsionDriveDataset collection picking up given datasetName
    ds = client.get_collection("TorsionDriveDataset", datasetName)
    ds.status([specification], status="COMPLETE")

    # Serial implementation

    # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark
    # https://openforcefield.org/force-fields/force-fields/
    # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi
    
    with open(benchmark_smiles) as f:
        bm_smiles = f.readlines()
    bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles]
    
    tb = []
    overlaps = 0
    qca_entries = []
    
    for i in range(ds.df.size):
        if ds.df.iloc[i, 0].status == "COMPLETE":
            smiles = ds.df.index[i]
            mapped_smiles = ds.get_entry(smiles).attributes[
                "canonical_isomeric_explicit_hydrogen_mapped_smiles"
            ]
            mol1 = Molecule.from_mapped_smiles(mapped_smiles)
            not_identical = True
            for mol in bm_mols:
                isomorphic,atom_map = Molecule.are_isomorphic(mol1, 
                                                  mol,
                                                  return_atom_map=False,
                                                  aromatic_matching=False,
                                                  formal_charge_matching=False,
                                                  bond_order_matching=False,
                                                  atom_stereochemistry_matching=False,
                                                  bond_stereochemistry_matching=False,
                                                          )
                if(isomorphic):
                    not_identical = False
                    overlaps += 1
                    entry = ds.get_entry(smiles)
                    tdr_id = entry.object_map['default']
#                     print(tdr_id)
                    qca_entries.append(tdr_id)
                    break
            if(not_identical): 
                tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles))
    
    # overlaps_qca_ids.txt is also a hardcoded file
    with open(qca_overlapped_entries, "a") as f:
        for item in qca_entries:
            f.write("%s\n" % item)
        
    print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps)
    print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df))
    return tb