def _featurize(self, datapoint, **kwargs): # -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = rdkit_utils.load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ sum([ voxelize(convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str): """ Compute featurization for a molecular complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = load_complex(molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features: List[np.ndarray] = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( sum([ voxelize(convert_atom_to_voxel, xyz, self.box_width, self.voxel_width, hash_function=hash_ecfp, feature_dict=ecfp_dict, nb_channel=self.size) for xyz, ecfp_dict in zip( xyzs, featurize_contacts_ecfp(frag1, frag2, distances, cutoff=self.cutoff, ecfp_degree=self.radius)) ])) if self.flatten: return np.concatenate( [features.flatten() for features in pairwise_features]) else: # Features are of shape (voxels_per_edge, voxels_per_edge, # voxels_per_edge, num_feat) so we should concatenate on the last # axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray: """ Compute featurization for a single mol/protein complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = rdkit_utils.load_complex( molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning("This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations( range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate( [ sum([ voxelize( convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] # distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( sum([ voxelize(convert_atom_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_dict=cation_pi_dict, nb_channel=1) for xyz, cation_pi_dict in zip( xyzs, compute_binding_pocket_cation_pi( frag1[1], frag2[1], dist_cutoff=self.cutoff, angle_cutoff=self.angle_cutoff, )) ])) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, datapoint, **kwargs): """ Compute featurization for a molecular complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( np.concatenate([ voxelize(convert_atom_pair_to_voxel, hash_function=hash_ecfp_pair, coordinates=xyzs, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=splif_dict, nb_channel=self.size) for splif_dict in featurize_splif(frag1, frag2, self.contact_bins, distances, self.radius) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, complex_pdb: str): """ Compute featurization for a molecular complex Parameters ---------- mol_pdb: str Filename for ligand molecule complex_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, complex_pdb) try: fragments = load_complex(molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( np.concatenate([ voxelize(convert_atom_pair_to_voxel, hash_function=hash_ecfp_pair, coordinates=xyzs, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=splif_dict, nb_channel=self.size) for splif_dict in featurize_splif(frag1, frag2, self.contact_bins, distances, self.radius) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)