def _featurize(self, complex: Tuple[str, str]): """ Compute featurization for a molecular complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints for (frag1, frag2) in itertools.combinations(fragments, 2): # Get coordinates distances = compute_pairwise_distances(frag1[0], frag2[0]) vector = [ vectorize(hash_ecfp, feature_dict=ecfp_dict, size=self.size) for ecfp_dict in featurize_contacts_ecfp( frag1, frag2, distances, cutoff=self.cutoff, ecfp_degree=self.radius) ] pairwise_features += vector pairwise_features = np.concatenate(pairwise_features) return pairwise_features
def _featurize(self, mol_pdb: str, complex_pdb: str): """ Compute featurization for a molecular complex Parameters ---------- mol_pdb: str Filename for ligand molecule complex_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, complex_pdb) try: fragments = load_complex(molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints for (frag1, frag2) in itertools.combinations(fragments, 2): # Get coordinates distances = compute_pairwise_distances(frag1[0], frag2[0]) # distances = compute_pairwise_distances(prot_xyz, lig_xyz) vectors = [ vectorize(hash_ecfp_pair, feature_dict=splif_dict, size=self.size) for splif_dict in featurize_splif( frag1, frag2, self.contact_bins, distances, self.radius) ] pairwise_features += vectors pairwise_features = np.concatenate(pairwise_features) return pairwise_features
def _featurize(self, datapoint, **kwargs): # -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = rdkit_utils.load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ sum([ voxelize(convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str): """ Compute featurization for a molecular complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = load_complex(molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features: List[np.ndarray] = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( sum([ voxelize(convert_atom_to_voxel, xyz, self.box_width, self.voxel_width, hash_function=hash_ecfp, feature_dict=ecfp_dict, nb_channel=self.size) for xyz, ecfp_dict in zip( xyzs, featurize_contacts_ecfp(frag1, frag2, distances, cutoff=self.cutoff, ecfp_degree=self.radius)) ])) if self.flatten: return np.concatenate( [features.flatten() for features in pairwise_features]) else: # Features are of shape (voxels_per_edge, voxels_per_edge, # voxels_per_edge, num_feat) so we should concatenate on the last # axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, protein_pdb: str) -> np.ndarray: """ Compute featurization for a single mol/protein complex Parameters ---------- mol_pdb: str Filename for ligand molecule protein_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, protein_pdb) try: fragments = rdkit_utils.load_complex( molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning("This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts(fragments, self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations( range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate( [ sum([ voxelize( convert_atom_pair_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_list=hbond_list, nb_channel=1) for xyz in xyzs ]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] # distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( sum([ voxelize(convert_atom_to_voxel, hash_function=None, box_width=self.box_width, voxel_width=self.voxel_width, coordinates=xyz, feature_dict=cation_pi_dict, nb_channel=1) for xyz, cation_pi_dict in zip( xyzs, compute_binding_pocket_cation_pi( frag1[1], frag2[1], dist_cutoff=self.cutoff, angle_cutoff=self.angle_cutoff, )) ])) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, datapoint, **kwargs): """ Compute featurization for a molecular complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( np.concatenate([ voxelize(convert_atom_pair_to_voxel, hash_function=hash_ecfp_pair, coordinates=xyzs, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=splif_dict, nb_channel=self.size) for splif_dict in featurize_splif(frag1, frag2, self.contact_bins, distances, self.radius) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, mol_pdb: str, complex_pdb: str): """ Compute featurization for a molecular complex Parameters ---------- mol_pdb: str Filename for ligand molecule complex_pdb: str Filename for protein molecule """ molecular_complex = (mol_pdb, complex_pdb) try: fragments = load_complex(molecular_complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) for (frag1, frag2) in itertools.combinations(fragments, 2): distances = compute_pairwise_distances(frag1[0], frag2[0]) frag1_xyz = subtract_centroid(frag1[0], centroid) frag2_xyz = subtract_centroid(frag2[0], centroid) xyzs = [frag1_xyz, frag2_xyz] pairwise_features.append( np.concatenate([ voxelize(convert_atom_pair_to_voxel, hash_function=hash_ecfp_pair, coordinates=xyzs, box_width=self.box_width, voxel_width=self.voxel_width, feature_dict=splif_dict, nb_channel=self.size) for splif_dict in featurize_splif(frag1, frag2, self.contact_bins, distances, self.radius) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, complex: Tuple[str, str]) -> Optional[np.ndarray]: """ Compute featurization for a single mol/protein complex Parameters ---------- complex: Tuple[str, str] Filenames for molecule and protein. """ try: fragments = rdkit_utils.load_complex(complex, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints # centroid = compute_contact_centroid(fragments, cutoff=self.cutoff) if self.reduce_to_contacts: fragments = reduce_molecular_complex_to_contacts( fragments, self.cutoff) # We compute pairwise contact fingerprints for (frag1_ind, frag2_ind) in itertools.combinations(range(len(fragments)), 2): frag1, frag2 = fragments[frag1_ind], fragments[frag2_ind] distances = compute_pairwise_distances(frag1[0], frag2[0]) # frag1_xyz = subtract_centroid(frag1[0], centroid) # frag2_xyz = subtract_centroid(frag2[0], centroid) # xyzs = [frag1_xyz, frag2_xyz] # rdks = [frag1[1], frag2[1]] pairwise_features.append( np.concatenate([ np.array([len(hbond_list)]) for hbond_list in compute_hydrogen_bonds( frag1, frag2, distances, self.distance_bins, self.angle_cutoffs) ], axis=-1)) # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, 1) so we should concatenate on the last axis. return np.concatenate(pairwise_features, axis=-1)
def _featurize(self, datapoint, **kwargs): """ Compute featurization for a molecular complex Parameters ---------- datapoint: Tuple[str, str] Filenames for molecule and protein. """ if 'complex' in kwargs: datapoint = kwargs.get("complex") raise DeprecationWarning( 'Complex is being phased out as a parameter, please pass "datapoint" instead.' ) try: fragments = load_complex(datapoint, add_hydrogens=False) except MoleculeLoadException: logger.warning( "This molecule cannot be loaded by Rdkit. Returning None") return None pairwise_features = [] # We compute pairwise contact fingerprints for (frag1, frag2) in itertools.combinations(fragments, 2): # Get coordinates distances = compute_pairwise_distances(frag1[0], frag2[0]) vector = [ vectorize(hash_ecfp, feature_dict=ecfp_dict, size=self.size) for ecfp_dict in featurize_contacts_ecfp( frag1, frag2, distances, cutoff=self.cutoff, ecfp_degree=self.radius) ] pairwise_features += vector pairwise_features = np.concatenate(pairwise_features) return pairwise_features
def test_get_contact_atom_indices(self): complexes = rdkit_utils.load_complex([self.protein_file, self.ligand_file]) contact_indices = get_contact_atom_indices(complexes) assert len(contact_indices) == 2
def test_load_complex(self): complexes = rdkit_utils.load_complex( (self.protein_file, self.ligand_file), add_hydrogens=False, calc_charges=False) assert len(complexes) == 2