Esempio n. 1
0
    def _featurize(self, complex: Tuple[str, str]) -> np.ndarray:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=None,
                             coordinates=xyz,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_list=compute_salt_bridges(
                                 frag1[1],
                                 frag2[1],
                                 distances,
                                 cutoff=self.cutoff),
                             nb_channel=1) for xyz in xyzs
                ]))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
Esempio n. 2
0
    def _featurize(self, datapoint, **kwargs):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )

        try:
            fragments = load_complex(datapoint, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
    def _featurize(self, mol_pdb: str, complex_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    complex_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, complex_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
Esempio n. 4
0
 def test_voxelize_convert_atom(self):
     N = 5
     coordinates = np.random.rand(N, 3)
     atom_index = 2
     box_width = 16
     voxel_width = 1
     voxels_per_edge = int(box_width / voxel_width)
     get_voxels = voxel_utils.convert_atom_to_voxel
     hash_function = hash_utils.hash_ecfp
     feature_dict = {1: "C", 2: "CC"}
     nb_channel = 16
     features = voxel_utils.voxelize(get_voxels,
                                     box_width,
                                     voxel_width,
                                     hash_function,
                                     coordinates,
                                     feature_dict,
                                     nb_channel=nb_channel)
     assert features.shape == (voxels_per_edge, voxels_per_edge,
                               voxels_per_edge, nb_channel)
Esempio n. 5
0
 def test_voxelize_convert_atom_pair(self):
     N = 5
     M = 6
     coordinates1 = np.random.rand(N, 3)
     coordinates2 = np.random.rand(M, 3)
     coordinates = [coordinates1, coordinates2]
     atom_index_pair = (2, 3)
     box_width = 16
     voxel_width = 1
     voxels_per_edge = int(box_width / voxel_width)
     get_voxels = voxel_utils.convert_atom_pair_to_voxel
     hash_function = hash_utils.hash_ecfp_pair
     feature_dict = {(1, 2): ("C", "O"), (2, 3): ("CC", "OH")}
     nb_channel = 16
     features = voxel_utils.voxelize(get_voxels,
                                     box_width,
                                     voxel_width,
                                     hash_function,
                                     coordinates,
                                     feature_dict,
                                     nb_channel=nb_channel)
     assert features.shape == (voxels_per_edge, voxels_per_edge,
                               voxels_per_edge, nb_channel)
Esempio n. 6
0
    def _featurize(self, datapoint, **kwargs):  # -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )

        try:
            fragments = rdkit_utils.load_complex(datapoint,
                                                 add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            protein_pi_t, protein_pi_parallel, ligand_pi_t, ligand_pi_parallel = (
                compute_pi_stack(frag1[1],
                                 frag2[1],
                                 distances,
                                 dist_cutoff=self.cutoff,
                                 angle_cutoff=self.angle_cutoff))
            pi_parallel_tensor = sum([
                voxelize(convert_atom_to_voxel,
                         hash_function=None,
                         box_width=self.box_width,
                         voxel_width=self.voxel_width,
                         coordinates=xyz,
                         feature_dict=feature_dict,
                         nb_channel=1)
                for (xyz, feature_dict
                     ) in zip(xyzs, [ligand_pi_parallel, protein_pi_parallel])
            ])

            pi_t_tensor = sum([
                voxelize(convert_atom_to_voxel,
                         hash_function=None,
                         box_width=self.box_width,
                         voxel_width=self.voxel_width,
                         coordinates=frag1_xyz,
                         feature_dict=protein_pi_t,
                         nb_channel=1)
                for (xyz,
                     feature_dict) in zip(xyzs, [ligand_pi_t, protein_pi_t])
            ])

            pairwise_features.append(
                np.concatenate([pi_parallel_tensor, pi_t_tensor], axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 2) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
Esempio n. 7
0
    def _compute_feature(self, feature_name, prot_xyz, prot_rdk, lig_xyz,
                         lig_rdk, distances):
        if feature_name == 'ecfp_ligand':
            return [
                compute_ecfp_features(lig_rdk, self.ecfp_degree,
                                      self.ecfp_power)
            ]
        if feature_name == 'ecfp_hashed':
            return [
                vectorize(hash_ecfp,
                          feature_dict=ecfp_dict,
                          size=2**self.ecfp_power)
                for ecfp_dict in featurize_contacts_ecfp(
                    (prot_xyz, prot_rdk), (lig_xyz, lig_rdk),
                    distances,
                    cutoff=self.cutoffs['ecfp_cutoff'],
                    ecfp_degree=self.ecfp_degree)
            ]
        if feature_name == 'splif_hashed':
            return [
                vectorize(hash_ecfp_pair,
                          feature_dict=splif_dict,
                          size=2**self.splif_power)
                for splif_dict in featurize_splif((prot_xyz, prot_rdk), (
                    lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'],
                                                  distances, self.ecfp_degree)
            ]
        if feature_name == 'hbond_count':
            return [
                vectorize(hash_ecfp_pair, feature_list=hbond_list, size=2**0)
                for hbond_list in
                compute_hydrogen_bonds((prot_xyz, prot_rdk), (
                    lig_xyz,
                    lig_rdk), distances, self.cutoffs['hbond_dist_bins'],
                                       self.cutoffs['hbond_angle_cutoffs'])
            ]
        if feature_name == 'ecfp':
            return [
                sum([
                    voxelize(
                        convert_atom_to_voxel,
                        xyz,
                        box_width=self.box_width,
                        voxel_width=self.voxel_width,
                        hash_function=hash_ecfp,
                        feature_dict=ecfp_dict,
                        nb_channel=2**self.ecfp_power,
                    ) for xyz, ecfp_dict in zip(
                        (prot_xyz, lig_xyz),
                        featurize_contacts_ecfp(
                            (prot_xyz, prot_rdk), (lig_xyz, lig_rdk),
                            distances,
                            cutoff=self.cutoffs['ecfp_cutoff'],
                            ecfp_degree=self.ecfp_degree))
                ])
            ]
        if feature_name == 'splif':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    hash_function=hash_ecfp_pair,
                    feature_dict=splif_dict,
                    nb_channel=2**self.splif_power,
                )
                for splif_dict in featurize_splif((prot_xyz, prot_rdk), (
                    lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'],
                                                  distances, self.ecfp_degree)
            ]
        if feature_name == 'sybyl':

            def hash_sybyl_func(x):
                hash_sybyl(x, sybyl_types=self.sybyl_types)

            return [
                voxelize(
                    convert_atom_to_voxel,
                    xyz,
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    hash_function=hash_sybyl_func,
                    feature_dict=sybyl_dict,
                    nb_channel=len(self.sybyl_types),
                ) for xyz, sybyl_dict in zip(
                    (prot_xyz, lig_xyz),
                    featurize_binding_pocket_sybyl(
                        prot_xyz,
                        prot_rdk,
                        lig_xyz,
                        lig_rdk,
                        distances,
                        cutoff=self.cutoffs['sybyl_cutoff']))
            ]
        if feature_name == 'salt_bridge':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    feature_list=compute_salt_bridges(
                        prot_rdk,
                        lig_rdk,
                        distances,
                        cutoff=self.cutoffs['salt_bridges_cutoff']),
                    nb_channel=1,
                )
            ]
        if feature_name == 'charge':
            return [
                sum([
                    voxelize(convert_atom_to_voxel,
                             xyz,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=compute_charge_dictionary(mol),
                             nb_channel=1,
                             dtype="np.float16")
                    for xyz, mol in ((prot_xyz, prot_rdk), (lig_xyz, lig_rdk))
                ])
            ]
        if feature_name == 'hbond':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    feature_list=hbond_list,
                    nb_channel=2**0,
                ) for hbond_list in
                compute_hydrogen_bonds((prot_xyz, prot_rdk), (
                    lig_xyz,
                    lig_rdk), distances, self.cutoffs['hbond_dist_bins'],
                                       self.cutoffs['hbond_angle_cutoffs'])
            ]
        if feature_name == 'pi_stack':
            return voxelize_pi_stack(prot_xyz, prot_rdk, lig_xyz, lig_rdk,
                                     distances,
                                     self.cutoffs['pi_stack_dist_cutoff'],
                                     self.cutoffs['pi_stack_angle_cutoff'],
                                     self.box_width, self.voxel_width)
        if feature_name == 'cation_pi':
            return [
                sum([
                    voxelize(
                        convert_atom_to_voxel,
                        xyz,
                        box_width=self.box_width,
                        voxel_width=self.voxel_width,
                        feature_dict=cation_pi_dict,
                        nb_channel=1,
                    ) for xyz, cation_pi_dict in zip(
                        (prot_xyz, lig_xyz),
                        compute_binding_pocket_cation_pi(
                            prot_rdk,
                            lig_rdk,
                            dist_cutoff=self.cutoffs['cation_pi_dist_cutoff'],
                            angle_cutoff=self.
                            cutoffs['cation_pi_angle_cutoff'],
                        ))
                ])
            ]
        raise ValueError('Unknown feature type "%s"' % feature_name)
  def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray:
    """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
    molecular_complex = (mol_pdb, protein_pdb)
    try:
      fragments = rdkit_utils.load_complex(
          molecular_complex, add_hydrogens=False)

    except MoleculeLoadException:
      logger.warning("This molecule cannot be loaded by Rdkit. Returning None")
      return None
    pairwise_features = []
    # We compute pairwise contact fingerprints
    centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
    for (frag1_ind, frag2_ind) in itertools.combinations(
        range(len(fragments)), 2):
      frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
      distances = compute_pairwise_distances(frag1[0], frag2[0])
      frag1_xyz = subtract_centroid(frag1[0], centroid)
      frag2_xyz = subtract_centroid(frag2[0], centroid)
      xyzs = [frag1_xyz, frag2_xyz]
      # rdks = [frag1[1], frag2[1]]
      protein_pi_t, protein_pi_parallel, ligand_pi_t, ligand_pi_parallel = (
          compute_pi_stack(
              frag1[1],
              frag2[1],
              distances,
              dist_cutoff=self.cutoff,
              angle_cutoff=self.angle_cutoff))
      pi_parallel_tensor = sum([
          voxelize(
              convert_atom_to_voxel,
              hash_function=None,
              box_width=self.box_width,
              voxel_width=self.voxel_width,
              coordinates=xyz,
              feature_dict=feature_dict,
              nb_channel=1)
          for (xyz, feature_dict
              ) in zip(xyzs, [ligand_pi_parallel, protein_pi_parallel])
      ])

      pi_t_tensor = sum([
          voxelize(
              convert_atom_to_voxel,
              hash_function=None,
              box_width=self.box_width,
              voxel_width=self.voxel_width,
              coordinates=frag1_xyz,
              feature_dict=protein_pi_t,
              nb_channel=1)
          for (xyz, feature_dict) in zip(xyzs, [ligand_pi_t, protein_pi_t])
      ])

      pairwise_features.append(
          np.concatenate([pi_parallel_tensor, pi_t_tensor], axis=-1))
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 2) so we should concatenate on the last axis.
    return np.concatenate(pairwise_features, axis=-1)