コード例 #1
0
    def _featurize(self, complex: Tuple[str, str]):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            # Get coordinates
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            vector = [
                vectorize(hash_ecfp, feature_dict=ecfp_dict, size=self.size)
                for ecfp_dict in featurize_contacts_ecfp(
                    frag1,
                    frag2,
                    distances,
                    cutoff=self.cutoff,
                    ecfp_degree=self.radius)
            ]
            pairwise_features += vector

        pairwise_features = np.concatenate(pairwise_features)
        return pairwise_features
コード例 #2
0
    def _featurize(self, mol_pdb: str, complex_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    complex_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, complex_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            # Get coordinates
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            # distances = compute_pairwise_distances(prot_xyz, lig_xyz)
            vectors = [
                vectorize(hash_ecfp_pair,
                          feature_dict=splif_dict,
                          size=self.size)
                for splif_dict in featurize_splif(
                    frag1, frag2, self.contact_bins, distances, self.radius)
            ]
            pairwise_features += vectors
        pairwise_features = np.concatenate(pairwise_features)
        return pairwise_features
コード例 #3
0
    def _featurize(self, datapoint, **kwargs):  # -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )
        try:
            fragments = rdkit_utils.load_complex(datapoint,
                                                 add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    sum([
                        voxelize(convert_atom_pair_to_voxel,
                                 hash_function=None,
                                 box_width=self.box_width,
                                 voxel_width=self.voxel_width,
                                 coordinates=xyz,
                                 feature_list=hbond_list,
                                 nb_channel=1) for xyz in xyzs
                    ]) for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
コード例 #4
0
    def _featurize(self, mol_pdb: str, protein_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, protein_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features: List[np.ndarray] = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_to_voxel,
                             xyz,
                             self.box_width,
                             self.voxel_width,
                             hash_function=hash_ecfp,
                             feature_dict=ecfp_dict,
                             nb_channel=self.size)
                    for xyz, ecfp_dict in zip(
                        xyzs,
                        featurize_contacts_ecfp(frag1,
                                                frag2,
                                                distances,
                                                cutoff=self.cutoff,
                                                ecfp_degree=self.radius))
                ]))
        if self.flatten:
            return np.concatenate(
                [features.flatten() for features in pairwise_features])
        else:
            # Features are of shape (voxels_per_edge, voxels_per_edge,
            # voxels_per_edge, num_feat) so we should concatenate on the last
            # axis.
            return np.concatenate(pairwise_features, axis=-1)
コード例 #5
0
  def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray:
    """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
    molecular_complex = (mol_pdb, protein_pdb)
    try:
      fragments = rdkit_utils.load_complex(
          molecular_complex, add_hydrogens=False)

    except MoleculeLoadException:
      logger.warning("This molecule cannot be loaded by Rdkit. Returning None")
      return None
    pairwise_features = []
    # We compute pairwise contact fingerprints
    centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
    if self.reduce_to_contacts:
      fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff)
    for (frag1_ind, frag2_ind) in itertools.combinations(
        range(len(fragments)), 2):
      frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
      distances = compute_pairwise_distances(frag1[0], frag2[0])
      frag1_xyz = subtract_centroid(frag1[0], centroid)
      frag2_xyz = subtract_centroid(frag2[0], centroid)
      xyzs = [frag1_xyz, frag2_xyz]
      # rdks = [frag1[1], frag2[1]]
      pairwise_features.append(
          np.concatenate(
              [
                  sum([
                      voxelize(
                          convert_atom_pair_to_voxel,
                          hash_function=None,
                          box_width=self.box_width,
                          voxel_width=self.voxel_width,
                          coordinates=xyz,
                          feature_list=hbond_list,
                          nb_channel=1) for xyz in xyzs
                  ]) for hbond_list in compute_hydrogen_bonds(
                      frag1, frag2, distances, self.distance_bins,
                      self.angle_cutoffs)
              ],
              axis=-1))
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
    return np.concatenate(pairwise_features, axis=-1)
コード例 #6
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            # distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                sum([
                    voxelize(convert_atom_to_voxel,
                             hash_function=None,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             coordinates=xyz,
                             feature_dict=cation_pi_dict,
                             nb_channel=1) for xyz, cation_pi_dict in zip(
                                 xyzs,
                                 compute_binding_pocket_cation_pi(
                                     frag1[1],
                                     frag2[1],
                                     dist_cutoff=self.cutoff,
                                     angle_cutoff=self.angle_cutoff,
                                 ))
                ]))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
コード例 #7
0
    def _featurize(self, datapoint, **kwargs):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )

        try:
            fragments = load_complex(datapoint, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
コード例 #8
0
    def _featurize(self, mol_pdb: str, complex_pdb: str):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    complex_pdb: str
      Filename for protein molecule
    """
        molecular_complex = (mol_pdb, complex_pdb)
        try:
            fragments = load_complex(molecular_complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            pairwise_features.append(
                np.concatenate([
                    voxelize(convert_atom_pair_to_voxel,
                             hash_function=hash_ecfp_pair,
                             coordinates=xyzs,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=splif_dict,
                             nb_channel=self.size) for splif_dict in
                    featurize_splif(frag1, frag2, self.contact_bins, distances,
                                    self.radius)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
コード例 #9
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        # We compute pairwise contact fingerprints
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            # frag1_xyz = subtract_centroid(frag1[0], centroid)
            # frag2_xyz = subtract_centroid(frag2[0], centroid)
            # xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    np.array([len(hbond_list)])
                    for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
コード例 #10
0
    def _featurize(self, datapoint, **kwargs):
        """
    Compute featurization for a molecular complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )

        try:
            fragments = load_complex(datapoint, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        for (frag1, frag2) in itertools.combinations(fragments, 2):
            # Get coordinates
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            vector = [
                vectorize(hash_ecfp, feature_dict=ecfp_dict, size=self.size)
                for ecfp_dict in featurize_contacts_ecfp(
                    frag1,
                    frag2,
                    distances,
                    cutoff=self.cutoff,
                    ecfp_degree=self.radius)
            ]
            pairwise_features += vector

        pairwise_features = np.concatenate(pairwise_features)
        return pairwise_features
コード例 #11
0
 def test_get_contact_atom_indices(self):
   complexes = rdkit_utils.load_complex([self.protein_file, self.ligand_file])
   contact_indices = get_contact_atom_indices(complexes)
   assert len(contact_indices) == 2
コード例 #12
0
 def test_load_complex(self):
   complexes = rdkit_utils.load_complex(
       (self.protein_file, self.ligand_file),
       add_hydrogens=False,
       calc_charges=False)
   assert len(complexes) == 2