def write_superposed_pdbs(self, output_pdb_folder, alignments: dict = None): """ Superposes PDBs according to alignment and writes transformed PDBs to files (View with Pymol) Parameters ---------- alignments output_pdb_folder """ if alignments is None: alignments = self.alignment output_pdb_folder = Path(output_pdb_folder) if not output_pdb_folder.exists(): output_pdb_folder.mkdir() reference_name = self.structures[0].name reference_pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[0].name}.pdb") ) core_indices = np.array( [ i for i in range(len(alignments[reference_name])) if -1 not in [alignments[n][i] for n in alignments] ] ) aln_ref = alignments[reference_name] ref_coords_core = ( reference_pdb[helper.get_alpha_indices(reference_pdb)] .getCoords() .astype(np.float64)[np.array([aln_ref[c] for c in core_indices])] ) ref_centroid = helper.nb_mean_axis_0(ref_coords_core) ref_coords_core -= ref_centroid transformation = pd.Transformation(np.eye(3), -ref_centroid) reference_pdb = pd.applyTransformation(transformation, reference_pdb) pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb) for i in range(1, len(self.structures)): name = self.structures[i].name pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[i].name}.pdb") ) aln_name = alignments[name] common_coords_2 = ( pdb[helper.get_alpha_indices(pdb)] .getCoords() .astype(np.float64)[np.array([aln_name[c] for c in core_indices])] ) ( rotation_matrix, translation_matrix, ) = superposition_functions.svd_superimpose( ref_coords_core, common_coords_2 ) transformation = pd.Transformation(rotation_matrix.T, translation_matrix) pdb = pd.applyTransformation(transformation, pdb) pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
def get_dssp_features(protein_dssp): """ Extracts DSSP features (assumes DSSP is run already) Parameters ---------- protein_dssp protein on which execDSSP has been called Returns ------- dict of secondary, dssp_ NH_O_1_index, NH_O_1_energy hydrogen bonds; e.g. -3,-1.4 means: if this residue is residue i then N-H of I is h-bonded to C=O of I-3 with an electrostatic H-bond energy of -1.4 kcal/mol. There are two columns for each type of H-bond, to allow for bifurcated H-bonds. NH_O_2_index, NH_O_2_energy O_NH_1_index, O_NH_1_energy O_NH_2_index, O_NH_2_energy acc number of water molecules in contact with this residue *10. or residue water exposed surface in Angstrom^2. alpha virtual torsion angle (dihedral angle) defined by the four Cα atoms of residues I-1,I,I+1,I+2. Used to define chirality. kappa virtual bond angle (bend angle) defined by the three Cα atoms of residues I-2,I,I+2. Used to define bend (structure code ‘S’). phi IUPAC peptide backbone torsion angles. psi IUPAC peptide backbone torsion angles. tco cosine of angle between C=O of residue I and C=O of residue I-1. For α-helices, TCO is near +1, for β-sheets TCO is near -1. Ignores: dssp_bp1, dssp_bp2, and dssp_sheet_label: residue number of first and second bridge partner followed by one letter sheet label """ dssp_ignore = ["dssp_bp1", "dssp_bp2", "dssp_sheet_label", "dssp_resnum"] dssp_labels = [ label for label in protein_dssp.getDataLabels() if label.startswith("dssp") and label not in dssp_ignore ] data = {} alpha_indices = helper.get_alpha_indices(protein_dssp) indices = [protein_dssp[x].getData("dssp_resnum") for x in alpha_indices] assert len(alpha_indices) == len(indices) for label in dssp_labels + ["secondary"]: label_to_index = { i - 1: protein_dssp[x].getData(label) for i, x in zip(indices, alpha_indices) } data[label] = np.array( [ label_to_index[i] if i in label_to_index else 0 for i in range(len(alpha_indices)) ] ) return data
def write_superposed_pdbs_reference(self, output_pdb_folder, alignments): """ Superposes PDBs according to reference structure and writes transformed PDBs to files (View with Pymol) Parameters ---------- alignments output_pdb_folder """ reference_name = self.structures[self.reference_structure_index].name reference_pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[self.reference_structure_index].name}.pdb" )) aln_ref = alignments[reference_name] reference_coords = (reference_pdb[helper.get_alpha_indices( reference_pdb)].getCoords().astype(np.float64)) pd.writePDB(str(output_pdb_folder / f"{reference_name}.pdb"), reference_pdb) for i in range(1, len(self.structures)): name = self.structures[i].name pdb = pd.parsePDB( str(self.output_folder / f"cleaned_pdb/{self.structures[i].name}.pdb")) aln_name = alignments[name] common_coords_1, common_coords_2 = get_common_coordinates( reference_coords, pdb[helper.get_alpha_indices(pdb)].getCoords().astype( np.float64), aln_ref, aln_name, ) ( rotation_matrix, translation_matrix, ) = superposition_functions.svd_superimpose( common_coords_1, common_coords_2) transformation = pd.Transformation(rotation_matrix.T, translation_matrix) pdb = pd.applyTransformation(transformation, pdb) pd.writePDB(str(output_pdb_folder / f"{name}.pdb"), pdb)
def get_fluctuations(protein: pd.AtomGroup, n_modes: int = 50): """ Get atom fluctuations using anisotropic and Gaussian network models with n_modes modes. Parameters ---------- protein n_modes Returns ------- dict of anm_ca, anm_cb, gnm_ca, gnm_cb """ data = {} beta_indices = helper.get_beta_indices(protein) alpha_indices = helper.get_alpha_indices(protein) data["anm_cb"] = get_anm_fluctuations(protein[beta_indices], n_modes) data["gnm_cb"] = get_gnm_fluctuations(protein[beta_indices], n_modes) data["anm_ca"] = get_anm_fluctuations(protein[alpha_indices], n_modes) data["gnm_ca"] = get_gnm_fluctuations(protein[alpha_indices], n_modes) return data