def get_chiral_bck_list(self) -> Dict[str, List[Residue]]: """ Returns a list of residues with chiral CAs""" prot_chains = 0 chiral_bck_list = [] for chn in self.st.get_chains(): if self.chain_ids[chn.id] == mu.PROTEIN: prot_chains += 1 for res in chn.get_residues(): if res.get_resname() != 'GLY' and not mu.is_hetatm(res): chiral_bck_list.append(res) if not prot_chains: print("No protein chains detected, skipping") return {} if not chiral_bck_list: print("No residues with chiral CA found, skipping") return {'chiral_bck_list': []} return { 'list': chiral_bck_list, 'res_to_fix': [res for res in chiral_bck_list if not mu.check_chiral_ca(res)] }
def calc_stats(self): """ Calculates general statistics about the structure, and guesses whether it is a CA-only structure """ self.num_res = 0 self.num_ats = 0 self.res_hetats = 0 self.num_wat = 0 self.res_insc = 0 for res in self.st.get_residues(): self.num_res += 1 if mu.is_wat(res): self.num_wat += 1 if mu.is_hetatm(res): self.res_hetats += 1 if mu.has_ins_code(res): self.res_insc += 1 self.num_ats += len(res.get_list()) self.res_ligands = self.res_hetats - self.num_wat # Detecting whether it is a CA-only structure # num_ats should be much larger than num_res # waters removed # Taking polyGly as a lower limit self.ca_only = self.num_ats - self.num_wat < (self.num_res - self.num_wat) * 4
def get_backbone_breaks(self) -> Dict[str, List[List[Residue]]]: """ Determines several backbone anomalies 1. Modified residues 2. Backbone breaks 3. Backbone links not corresponding to sequence 4. Too long residue links """ bck_breaks_list = [] wrong_link_list = [] not_link_seq_list = [] self.modified_residue_list = [] for i in range(0, len(self.all_residues) - 1): res1 = self.all_residues[i] res2 = self.all_residues[i + 1] if not mu.same_chain(res1, res2): continue if mu.is_hetatm(res1) or mu.is_hetatm(res2): if res1 in self.next_residue: if self.next_residue[res1] == res2: if mu.is_hetatm(res1): self.modified_residue_list.append(res1) else: continue # Skip NA # TODO include NA Backbone if self.chain_ids[res1.get_parent().id] != mu.PROTEIN: continue if res1 not in self.next_residue: bck_breaks_list.append([res1, res2]) if mu.seq_consecutive(res1, res2): dist = 0. if 'N' in res1 and 'C' in res2: dist = res1['N'] - res2['C'] not_link_seq_list.append([res1, res2, dist]) else: if res2 != self.next_residue[res1]: wrong_link_list.append( [res1, self.next_residue[res1], res2]) return { 'bck_breaks_list': bck_breaks_list, 'wrong_link_list': wrong_link_list, 'not_link_seq_list': not_link_seq_list }
def get_term_res(self) -> List[Tuple[str, Residue]]: term_res = [] for res in self.all_residues: if mu.is_hetatm(res): continue if self.is_N_term(res): term_res.append(('N', res)) if self.is_C_term(res): term_res.append(('C', res)) return term_res
def add_hydrogens(self, ion_res_list, remove_h: bool = True): """ Add hydrogens considering selections in ion_res_list Args: **r_at_list**: dict as Bio.PDB.Residue: Tauromeric Option **remove_h**: Remove Hydrogen atom before adding new ones """ add_h_rules = self.data_library.get_add_h_rules() for res in self.all_residues: if mu.is_hetatm(res): continue if remove_h: mu.remove_H_from_r(res, verbose=False) if res not in self.prev_residue: prev_residue = None else: prev_residue = self.prev_residue[res] error_msg = mu.add_hydrogens_backbone(res, prev_residue) if error_msg: print(error_msg, mu.residue_id(res)) rcode = res.get_resname() if rcode == 'GLY': continue if rcode not in add_h_rules: print(NotAValidResidueError(rcode).message) continue if res in ion_res_list: if rcode != ion_res_list[res]: print('Replacing {} by {}'.format(mu.residue_id(res), ion_res_list[res])) error_msg = mu.add_hydrogens_side( res, self.res_library, ion_res_list[res], add_h_rules[rcode][ion_res_list[res]]) res.resname = ion_res_list[res] else: error_msg = mu.add_hydrogens_side(res, self.res_library, rcode, add_h_rules[rcode]) if error_msg: print(error_msg, mu.residue_id(res)) self.residue_renumbering() self.atom_renumbering() self.modified = True
def check_extra_atoms(self) -> List[Tuple[Residue, Atom]]: """ Makes a **list of extra atoms** in the structure Returns: List of residues with extra atoms, as a tuples ["r",atoms_list] """ # TODO Nucleic acids valid_codes = self.data_library.get_valid_codes('protein') residue_data = self.data_library.get_all_atom_lists() extra_at_list = [] for res in self.st.get_residues(): if res.get_resname() in valid_codes and not mu.is_hetatm(res): extra_ats = mu.check_unk_at_in_r( res, residue_data[res.get_resname().replace(' ', '')]) if extra_ats: extra_at_list.append((res, extra_ats)) return extra_at_list
def guess_hetatm(self): """ Guesses HETATM type as modified res, metal, wat, organic """ self.hetatm = {} for typ in [ mu.UNKNOWN, mu.MODRES, mu.METAL, mu.ORGANIC, mu.COVORGANIC, mu.WAT ]: self.hetatm[typ] = [] for res in self.st.get_residues(): if not mu.is_hetatm(res): continue if mu.is_wat(res): self.hetatm[mu.WAT].append(res) elif len(res) == 1: self.hetatm[mu.METAL].append(res) elif 'N' in res or 'C' in res: # modified aminoacid candidate, TODO check connectivity with n-1 or n+1 self.hetatm[mu.MODRES].append(res) # TODO check modified nucleotides else: self.hetatm[mu.ORGANIC].append(res)
def check_missing_atoms( self) -> List[Tuple[Residue, Dict[str, List[str]]]]: """ Makes a **list of missing atoms** in the structure Returns: List of residues with missing atoms, as a tuples ["r",{"backbone":[atoms_list],"side":[atoms_list]}] """ # TODO Nucleic acids valid_codes = self.data_library.get_valid_codes('protein') residue_data = self.data_library.get_all_atom_lists() miss_at_list = [] for res in self.st.get_residues(): if res.get_resname() in valid_codes and not mu.is_hetatm(res): miss_at = mu.check_all_at_in_r( res, residue_data[res.get_resname().replace(' ', '')]) if self.is_C_term(res) and 'OXT' not in res: if 'backbone' not in miss_at: miss_at['backbone'] = [] miss_at['backbone'].append('OXT') if miss_at: miss_at_list.append((res, miss_at)) return miss_at_list