def _featurize(self, datapoint, **kwargs): # -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = rdkit_utils.load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ sum([ voxelize(convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray: """ Compute featurization for a single mol/protein complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = rdkit_utils.load_complex( molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning("This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations( range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate( [ sum([ voxelize( convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) # We compute pairwise contact fingerprints for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) # frag1_xyz = subtract_centroid(frag1[0], centroid) # frag2_xyz = subtract_centroid(frag2[0], centroid) # xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ np.array([len(hbond_list)]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _compute_feature(self, feature_name, prot_xyz, prot_rdk, lig_xyz, lig_rdk, distances): if feature_name == 'ecfp_ligand': return [ compute_ecfp_features(lig_rdk, self.ecfp_degree, self.ecfp_power) ] if feature_name == 'ecfp_hashed': return [ vectorize(hash_ecfp, feature_dict=ecfp_dict, size=2**self.ecfp_power) for ecfp_dict in featurize_contacts_ecfp( (prot_xyz, prot_rdk), (lig_xyz, lig_rdk), distances, cutoff=self.cutoffs['ecfp_cutoff'], ecfp_degree=self.ecfp_degree) ] if feature_name == 'splif_hashed': return [ vectorize(hash_ecfp_pair, feature_dict=splif_dict, size=2**self.splif_power) for splif_dict in featurize_splif((prot_xyz, prot_rdk), ( lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'], distances, self.ecfp_degree) ] if feature_name == 'hbond_count': return [ vectorize(hash_ecfp_pair, feature_list=hbond_list, size=2**0) for hbond_list in compute_hydrogen_bonds((prot_xyz, prot_rdk), ( lig_xyz, lig_rdk), distances, self.cutoffs['hbond_dist_bins'], self.cutoffs['hbond_angle_cutoffs']) ] if feature_name == 'ecfp': return [ sum([ voxelize( convert_atom_to_voxel, xyz, box_width=self.box_width, voxel_width=self.voxel_width, hash_function=hash_ecfp, feature_dict=ecfp_dict, nb_channel=2**self.ecfp_power, ) for xyz, ecfp_dict in zip( (prot_xyz, lig_xyz), featurize_contacts_ecfp( (prot_xyz, prot_rdk), (lig_xyz, lig_rdk), distances, cutoff=self.cutoffs['ecfp_cutoff'], ecfp_degree=self.ecfp_degree)) ]) ] if feature_name == 'splif': return [ voxelize( convert_atom_pair_to_voxel, (prot_xyz, lig_xyz), box_width=self.box_width, voxel_width=self.voxel_width, hash_function=hash_ecfp_pair, feature_dict=splif_dict, nb_channel=2**self.splif_power, ) for splif_dict in featurize_splif((prot_xyz, prot_rdk), ( lig_xyz, lig_rdk), self.cutoffs['splif_contact_bins'], distances, self.ecfp_degree) ] if feature_name == 'sybyl': def hash_sybyl_func(x): hash_sybyl(x, sybyl_types=self.sybyl_types) return [ voxelize( convert_atom_to_voxel, xyz, box_width=self.box_width, voxel_width=self.voxel_width, hash_function=hash_sybyl_func, feature_dict=sybyl_dict, nb_channel=len(self.sybyl_types), ) for xyz, sybyl_dict in zip( (prot_xyz, lig_xyz), featurize_binding_pocket_sybyl( prot_xyz, prot_rdk, lig_xyz, lig_rdk, distances, cutoff=self.cutoffs['sybyl_cutoff'])) ] if feature_name == 'salt_bridge': return [ voxelize( convert_atom_pair_to_voxel, (prot_xyz, lig_xyz), box_width=self.box_width, voxel_width=self.voxel_width, feature_list=compute_salt_bridges( prot_rdk, lig_rdk, distances, cutoff=self.cutoffs['salt_bridges_cutoff']), nb_channel=1, ) ] if feature_name == 'charge': return [ sum([ voxelize(convert_atom_to_voxel, xyz, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=compute_charge_dictionary(mol), nb_channel=1, dtype="np.float16") for xyz, mol in ((prot_xyz, prot_rdk), (lig_xyz, lig_rdk)) ]) ] if feature_name == 'hbond': return [ voxelize( convert_atom_pair_to_voxel, (prot_xyz, lig_xyz), box_width=self.box_width, voxel_width=self.voxel_width, feature_list=hbond_list, nb_channel=2**0, ) for hbond_list in compute_hydrogen_bonds((prot_xyz, prot_rdk), ( lig_xyz, lig_rdk), distances, self.cutoffs['hbond_dist_bins'], self.cutoffs['hbond_angle_cutoffs']) ] if feature_name == 'pi_stack': return voxelize_pi_stack(prot_xyz, prot_rdk, lig_xyz, lig_rdk, distances, self.cutoffs['pi_stack_dist_cutoff'], self.cutoffs['pi_stack_angle_cutoff'], self.box_width, self.voxel_width) if feature_name == 'cation_pi': return [ sum([ voxelize( convert_atom_to_voxel, xyz, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=cation_pi_dict, nb_channel=1, ) for xyz, cation_pi_dict in zip( (prot_xyz, lig_xyz), compute_binding_pocket_cation_pi( prot_rdk, lig_rdk, dist_cutoff=self.cutoffs['cation_pi_dist_cutoff'], angle_cutoff=self. cutoffs['cation_pi_angle_cutoff'], )) ]) ] raise ValueError('Unknown feature type "%s"' % feature_name)