Ejemplo n.º 1
0
    def test_featurize_splif(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        bins = [(1, 2), (2, 3)]

        dicts = rgf.featurize_splif(prot_xyz,
                                    prot_rdk,
                                    lig_xyz,
                                    lig_rdk,
                                    contact_bins=bins,
                                    pairwise_distances=distance,
                                    ecfp_degree=2)
        expected_dicts = [
            rgf.compute_splif_features_in_range(prot_rdk,
                                                lig_rdk,
                                                distance,
                                                c_bin,
                                                ecfp_degree=2)
            for c_bin in bins
        ]
        self.assertIsInstance(dicts, list)
        self.assertEqual(dicts, expected_dicts)
Ejemplo n.º 2
0
  def test_compute_splif_features_in_range(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    prot_num_atoms = prot_rdk.GetNumAtoms()
    lig_num_atoms = lig_rdk.GetNumAtoms()
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    for bins in ((0, 2), (2, 3)):
      splif_dict = rgf.compute_splif_features_in_range(
          prot_rdk,
          lig_rdk,
          distance,
          bins,
      )

      self.assertIsInstance(splif_dict, dict)
      for (prot_idx, lig_idx), ecfp_pair in splif_dict.items():

        for idx in (prot_idx, lig_idx):
          self.assertIsInstance(idx, (int, np.int64))
        self.assertGreaterEqual(prot_idx, 0)
        self.assertLess(prot_idx, prot_num_atoms)
        self.assertGreaterEqual(lig_idx, 0)
        self.assertLess(lig_idx, lig_num_atoms)

        for ecfp in ecfp_pair:
          ecfp_idx, ecfp_frag = ecfp.split(',')
          ecfp_idx = int(ecfp_idx)
          self.assertGreaterEqual(ecfp_idx, 0)
Ejemplo n.º 3
0
    def test_compute_splif_features_in_range(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        prot_num_atoms = prot_rdk.GetNumAtoms()
        lig_num_atoms = lig_rdk.GetNumAtoms()
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        for bins in ((0, 2), (2, 3)):
            splif_dict = rgf.compute_splif_features_in_range(
                prot_rdk,
                lig_rdk,
                distance,
                bins,
            )

            self.assertIsInstance(splif_dict, dict)
            for (prot_idx, lig_idx), ecfp_pair in splif_dict.items():

                for idx in (prot_idx, lig_idx):
                    self.assertIsInstance(idx, (int, np.int64))
                self.assertGreaterEqual(prot_idx, 0)
                self.assertLess(prot_idx, prot_num_atoms)
                self.assertGreaterEqual(lig_idx, 0)
                self.assertLess(lig_idx, lig_num_atoms)

                for ecfp in ecfp_pair:
                    ecfp_idx, ecfp_frag = ecfp.split(',')
                    ecfp_idx = int(ecfp_idx)
                    self.assertGreaterEqual(ecfp_idx, 0)
Ejemplo n.º 4
0
  def test_compute_pairwise_distances(self):
    n1 = 10
    n2 = 50
    coords1 = np.random.rand(n1, 3)
    coords2 = np.random.rand(n2, 3)

    distance = rgf.compute_pairwise_distances(coords1, coords2)
    self.assertEqual(distance.shape, (n1, n2))
    self.assertTrue((distance >= 0).all())
    # random coords between 0 and 1, so the max possible distance in sqrt(2)
    self.assertTrue((distance <= 2.0**0.5).all())

    # check if correct distance metric was used
    coords1 = np.array([[0, 0, 0], [1, 0, 0]])
    coords2 = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]])
    distance = rgf.compute_pairwise_distances(coords1, coords2)
    self.assertTrue((distance == [[1, 2, 3], [0, 1, 2]]).all())
Ejemplo n.º 5
0
    def test_compute_pairwise_distances(self):
        n1 = 10
        n2 = 50
        coords1 = np.random.rand(n1, 3)
        coords2 = np.random.rand(n2, 3)

        distance = rgf.compute_pairwise_distances(coords1, coords2)
        self.assertEqual(distance.shape, (n1, n2))
        self.assertTrue((distance >= 0).all())
        # random coords between 0 and 1, so the max possible distance in sqrt(2)
        self.assertTrue((distance <= 2.0**0.5).all())

        # check if correct distance metric was used
        coords1 = np.array([[0, 0, 0], [1, 0, 0]])
        coords2 = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]])
        distance = rgf.compute_pairwise_distances(coords1, coords2)
        self.assertTrue((distance == [[1, 2, 3], [0, 1, 2]]).all())
Ejemplo n.º 6
0
    def test_compute_pairwise_distances(self):
        n1 = 10
        n2 = 50
        coords1 = np.random.rand(n1, 3)
        coords2 = np.random.rand(n2, 3)

        distance = rgf.compute_pairwise_distances(coords1, coords2)
        self.assertEqual(distance.shape, (n1, n2))
        self.assertTrue((distance >= 0).all())
        # random coords between 0 and 1, so the max possible distance in sqrt(2)
        self.assertTrue((distance <= 2.0**0.5).all())
Ejemplo n.º 7
0
  def test_featurize_binding_pocket_ecfp(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    # check if results are the same if we provide precomputed distances
    prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
    )
    prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp(
        prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance)
    # ...but first check if we actually got two dicts
    self.assertIsInstance(prot_dict, dict)
    self.assertIsInstance(lig_dict, dict)

    self.assertEqual(prot_dict, prot_dict_dist)
    self.assertEqual(lig_dict, lig_dict_dist)

    # check if we get less features with smaller distance cutoff
    prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        cutoff=2.0,
    )
    prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        cutoff=6.0,
    )
    self.assertLess(len(prot_dict_d2), len(prot_dict))
    # ligands are typically small so all atoms might be present
    self.assertLessEqual(len(lig_dict_d2), len(lig_dict))
    self.assertGreater(len(prot_dict_d6), len(prot_dict))
    self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict))

    # check if using different ecfp_degree changes anything
    prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        ecfp_degree=3,
    )
    self.assertNotEqual(prot_dict_e3, prot_dict)
    self.assertNotEqual(lig_dict_e3, lig_dict)
Ejemplo n.º 8
0
    def test_featurize_binding_pocket_ecfp(self):
        prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
        lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
        distance = rgf.compute_pairwise_distances(protein_xyz=prot_xyz,
                                                  ligand_xyz=lig_xyz)

        # check if results are the same if we provide precomputed distances
        prot_dict, lig_dict = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
        )
        prot_dict_dist, lig_dict_dist = rgf.featurize_binding_pocket_ecfp(
            prot_xyz, prot_rdk, lig_xyz, lig_rdk, pairwise_distances=distance)
        # ...but first check if we actually got two dicts
        self.assertIsInstance(prot_dict, dict)
        self.assertIsInstance(lig_dict, dict)

        self.assertEqual(prot_dict, prot_dict_dist)
        self.assertEqual(lig_dict, lig_dict_dist)

        # check if we get less features with smaller distance cutoff
        prot_dict_d2, lig_dict_d2 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            cutoff=2.0,
        )
        prot_dict_d6, lig_dict_d6 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            cutoff=6.0,
        )
        self.assertLess(len(prot_dict_d2), len(prot_dict))
        # ligands are typically small so all atoms might be present
        self.assertLessEqual(len(lig_dict_d2), len(lig_dict))
        self.assertGreater(len(prot_dict_d6), len(prot_dict))
        self.assertGreaterEqual(len(lig_dict_d6), len(lig_dict))

        # check if using different ecfp_degree changes anything
        prot_dict_e3, lig_dict_e3 = rgf.featurize_binding_pocket_ecfp(
            prot_xyz,
            prot_rdk,
            lig_xyz,
            lig_rdk,
            ecfp_degree=3,
        )
        self.assertNotEqual(prot_dict_e3, prot_dict)
        self.assertNotEqual(lig_dict_e3, lig_dict)
Ejemplo n.º 9
0
  def test_featurize_splif(self):
    prot_xyz, prot_rdk = rgf.load_molecule(self.protein_file)
    lig_xyz, lig_rdk = rgf.load_molecule(self.ligand_file)
    distance = rgf.compute_pairwise_distances(
        protein_xyz=prot_xyz, ligand_xyz=lig_xyz)

    bins = [(1, 2), (2, 3)]

    dicts = rgf.featurize_splif(
        prot_xyz,
        prot_rdk,
        lig_xyz,
        lig_rdk,
        contact_bins=bins,
        pairwise_distances=distance,
        ecfp_degree=2)
    expected_dicts = [
        rgf.compute_splif_features_in_range(
            prot_rdk, lig_rdk, distance, c_bin, ecfp_degree=2) for c_bin in bins
    ]
    self.assertIsInstance(dicts, list)
    self.assertEqual(dicts, expected_dicts)