예제 #1
0
    def _featurize(self, datapoint, **kwargs):  # -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    datapoint: Tuple[str, str]
      Filenames for molecule and protein.
    """
        if 'complex' in kwargs:
            datapoint = kwargs.get("complex")
            raise DeprecationWarning(
                'Complex is being phased out as a parameter, please pass "datapoint" instead.'
            )
        try:
            fragments = rdkit_utils.load_complex(datapoint,
                                                 add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            frag1_xyz = subtract_centroid(frag1[0], centroid)
            frag2_xyz = subtract_centroid(frag2[0], centroid)
            xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    sum([
                        voxelize(convert_atom_pair_to_voxel,
                                 hash_function=None,
                                 box_width=self.box_width,
                                 voxel_width=self.voxel_width,
                                 coordinates=xyz,
                                 feature_list=hbond_list,
                                 nb_channel=1) for xyz in xyzs
                    ]) for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
예제 #2
0
  def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray:
    """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    mol_pdb: str
      Filename for ligand molecule
    protein_pdb: str
      Filename for protein molecule
    """
    molecular_complex = (mol_pdb, protein_pdb)
    try:
      fragments = rdkit_utils.load_complex(
          molecular_complex, add_hydrogens=False)

    except MoleculeLoadException:
      logger.warning("This molecule cannot be loaded by Rdkit. Returning None")
      return None
    pairwise_features = []
    # We compute pairwise contact fingerprints
    centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
    if self.reduce_to_contacts:
      fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff)
    for (frag1_ind, frag2_ind) in itertools.combinations(
        range(len(fragments)), 2):
      frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
      distances = compute_pairwise_distances(frag1[0], frag2[0])
      frag1_xyz = subtract_centroid(frag1[0], centroid)
      frag2_xyz = subtract_centroid(frag2[0], centroid)
      xyzs = [frag1_xyz, frag2_xyz]
      # rdks = [frag1[1], frag2[1]]
      pairwise_features.append(
          np.concatenate(
              [
                  sum([
                      voxelize(
                          convert_atom_pair_to_voxel,
                          hash_function=None,
                          box_width=self.box_width,
                          voxel_width=self.voxel_width,
                          coordinates=xyz,
                          feature_list=hbond_list,
                          nb_channel=1) for xyz in xyzs
                  ]) for hbond_list in compute_hydrogen_bonds(
                      frag1, frag2, distances, self.distance_bins,
                      self.angle_cutoffs)
              ],
              axis=-1))
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
    return np.concatenate(pairwise_features, axis=-1)
예제 #3
0
    def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]:
        """
    Compute featurization for a single mol/protein complex

    Parameters
    ----------
    complex: Tuple[str, str]
      Filenames for molecule and protein.
    """
        try:
            fragments = rdkit_utils.load_complex(complex, add_hydrogens=False)

        except MoleculeLoadException:
            logger.warning(
                "This molecule cannot be loaded by Rdkit. Returning None")
            return None
        pairwise_features = []
        # We compute pairwise contact fingerprints
        # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff)
        if self.reduce_to_contacts:
            fragments = reduce_molecular_complex_to_contacts(
                fragments, self.cutoff)
        # We compute pairwise contact fingerprints
        for (frag1_ind,
             frag2_ind) in itertools.combinations(range(len(fragments)), 2):
            frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind]
            distances = compute_pairwise_distances(frag1[0], frag2[0])
            # frag1_xyz = subtract_centroid(frag1[0], centroid)
            # frag2_xyz = subtract_centroid(frag2[0], centroid)
            # xyzs = [frag1_xyz, frag2_xyz]
            # rdks = [frag1[1], frag2[1]]
            pairwise_features.append(
                np.concatenate([
                    np.array([len(hbond_list)])
                    for hbond_list in compute_hydrogen_bonds(
                        frag1, frag2, distances, self.distance_bins,
                        self.angle_cutoffs)
                ],
                               axis=-1))
        # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis.
        return np.concatenate(pairwise_features, axis=-1)
예제 #4
0
    def _compute_feature(self, feature_name, prot_xyz, prot_rdk, lig_xyz,
                         lig_rdk, distances):
        if feature_name == 'ecfp_ligand':
            return [
                compute_ecfp_features(lig_rdk, self.ecfp_degree,
                                      self.ecfp_power)
            ]
        if feature_name == 'ecfp_hashed':
            return [
                vectorize(hash_ecfp,
                          feature_dict=ecfp_dict,
                          size=2**self.ecfp_power)
                for ecfp_dict in featurize_contacts_ecfp(
                    (prot_xyz, prot_rdk), (lig_xyz, lig_rdk),
                    distances,
                    cutoff=self.cutoffs['ecfp_cutoff'],
                    ecfp_degree=self.ecfp_degree)
            ]
        if feature_name == 'splif_hashed':
            return [
                vectorize(hash_ecfp_pair,
                          feature_dict=splif_dict,
                          size=2**self.splif_power)
                for splif_dict in featurize_splif((prot_xyz, prot_rdk), (
                    lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'],
                                                  distances, self.ecfp_degree)
            ]
        if feature_name == 'hbond_count':
            return [
                vectorize(hash_ecfp_pair, feature_list=hbond_list, size=2**0)
                for hbond_list in
                compute_hydrogen_bonds((prot_xyz, prot_rdk), (
                    lig_xyz,
                    lig_rdk), distances, self.cutoffs['hbond_dist_bins'],
                                       self.cutoffs['hbond_angle_cutoffs'])
            ]
        if feature_name == 'ecfp':
            return [
                sum([
                    voxelize(
                        convert_atom_to_voxel,
                        xyz,
                        box_width=self.box_width,
                        voxel_width=self.voxel_width,
                        hash_function=hash_ecfp,
                        feature_dict=ecfp_dict,
                        nb_channel=2**self.ecfp_power,
                    ) for xyz, ecfp_dict in zip(
                        (prot_xyz, lig_xyz),
                        featurize_contacts_ecfp(
                            (prot_xyz, prot_rdk), (lig_xyz, lig_rdk),
                            distances,
                            cutoff=self.cutoffs['ecfp_cutoff'],
                            ecfp_degree=self.ecfp_degree))
                ])
            ]
        if feature_name == 'splif':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    hash_function=hash_ecfp_pair,
                    feature_dict=splif_dict,
                    nb_channel=2**self.splif_power,
                )
                for splif_dict in featurize_splif((prot_xyz, prot_rdk), (
                    lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'],
                                                  distances, self.ecfp_degree)
            ]
        if feature_name == 'sybyl':

            def hash_sybyl_func(x):
                hash_sybyl(x, sybyl_types=self.sybyl_types)

            return [
                voxelize(
                    convert_atom_to_voxel,
                    xyz,
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    hash_function=hash_sybyl_func,
                    feature_dict=sybyl_dict,
                    nb_channel=len(self.sybyl_types),
                ) for xyz, sybyl_dict in zip(
                    (prot_xyz, lig_xyz),
                    featurize_binding_pocket_sybyl(
                        prot_xyz,
                        prot_rdk,
                        lig_xyz,
                        lig_rdk,
                        distances,
                        cutoff=self.cutoffs['sybyl_cutoff']))
            ]
        if feature_name == 'salt_bridge':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    feature_list=compute_salt_bridges(
                        prot_rdk,
                        lig_rdk,
                        distances,
                        cutoff=self.cutoffs['salt_bridges_cutoff']),
                    nb_channel=1,
                )
            ]
        if feature_name == 'charge':
            return [
                sum([
                    voxelize(convert_atom_to_voxel,
                             xyz,
                             box_width=self.box_width,
                             voxel_width=self.voxel_width,
                             feature_dict=compute_charge_dictionary(mol),
                             nb_channel=1,
                             dtype="np.float16")
                    for xyz, mol in ((prot_xyz, prot_rdk), (lig_xyz, lig_rdk))
                ])
            ]
        if feature_name == 'hbond':
            return [
                voxelize(
                    convert_atom_pair_to_voxel,
                    (prot_xyz, lig_xyz),
                    box_width=self.box_width,
                    voxel_width=self.voxel_width,
                    feature_list=hbond_list,
                    nb_channel=2**0,
                ) for hbond_list in
                compute_hydrogen_bonds((prot_xyz, prot_rdk), (
                    lig_xyz,
                    lig_rdk), distances, self.cutoffs['hbond_dist_bins'],
                                       self.cutoffs['hbond_angle_cutoffs'])
            ]
        if feature_name == 'pi_stack':
            return voxelize_pi_stack(prot_xyz, prot_rdk, lig_xyz, lig_rdk,
                                     distances,
                                     self.cutoffs['pi_stack_dist_cutoff'],
                                     self.cutoffs['pi_stack_angle_cutoff'],
                                     self.box_width, self.voxel_width)
        if feature_name == 'cation_pi':
            return [
                sum([
                    voxelize(
                        convert_atom_to_voxel,
                        xyz,
                        box_width=self.box_width,
                        voxel_width=self.voxel_width,
                        feature_dict=cation_pi_dict,
                        nb_channel=1,
                    ) for xyz, cation_pi_dict in zip(
                        (prot_xyz, lig_xyz),
                        compute_binding_pocket_cation_pi(
                            prot_rdk,
                            lig_rdk,
                            dist_cutoff=self.cutoffs['cation_pi_dist_cutoff'],
                            angle_cutoff=self.
                            cutoffs['cation_pi_angle_cutoff'],
                        ))
                ])
            ]
        raise ValueError('Unknown feature type "%s"' % feature_name)