def _featurize(self, datapoint, **kwargs): # -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = rdkit_utils.load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ sum([ voxelize(convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray: """ Compute featurization for a single mol/protein complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = rdkit_utils.load_complex( molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning("This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations( range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate( [ sum([ voxelize( convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( sum([ voxelize(convert_atom_pair_to_voxel, hash_function=None, coordinates=xyz, box_width=self.box_width, voxel_width=self.voxel_width, feature_list=compute_salt_bridges( frag1[1], frag2[1], distances, cutoff=self.cutoff), nb_channel=1) for xyz in xyzs ])) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) # We compute pairwise contact fingerprints for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) # frag1_xyz = subtract_centroid(frag1[0], centroid) # frag2_xyz = subtract_centroid(frag2[0], centroid) # xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ np.array([len(hbond_list)]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)