def gn_comparer(self, gn1, gn2, protein_conformation): ''' ''' res1 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn( gn1, protein_conformation)) res2 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn( gn2, protein_conformation)) return res1.sequence_number - res2.sequence_number
def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False): ''' Fetches specific lines from pdb file by generic number (if generic number is not available then by residue number). Returns nested OrderedDict() with generic numbers as keys in the outer dictionary, and atom names as keys in the inner dictionary. @param structure: Structure, Structure object where residues should be fetched from \n @param generic_numbers: list, list of generic numbers to be fetched \n @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461 residue to be considered a 5x46 residue. ''' output = OrderedDict() atoms_list = [] for gn in generic_numbers: rotamer=None if 'x' in str(gn): rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, residue__display_generic_number__label=dgn(gn,structure.protein_conformation), structure__preferred_chain=structure.preferred_chain)) else: rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain)) if just_nums==False: try: gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation, sequence_number=gn).display_generic_number.label) except: pass if len(rotamer)>1: for i in rotamer: if i.pdbdata.pdb.startswith('COMPND')==False: if i.pdbdata.pdb[21] in structure.preferred_chain: rotamer = i break else: rotamer = rotamer[0] io = StringIO(rotamer.pdbdata.pdb) rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0] for chain in rota_struct: for residue in chain: for atom in residue: atoms_list.append(atom) if modify_bulges==True and len(gn)==5: output[gn.replace('x','.')[:-1]] = atoms_list else: try: output[gn.replace('x','.')] = atoms_list except: output[str(gn)] = atoms_list atoms_list = [] return output
def get_residue_distance(self, residue1, residue2): try: res1 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue1, self.parent_prot_conf)) res2 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue2, self.parent_prot_conf)) print(res1, res1.id, res2, res2.id) try: rota1 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res1.sequence_number) if len(rota1)==0: raise Exception except: rota1 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue1, self.structure.protein_conformation)) rota1 = right_rotamer_select(rota1, self.structure.preferred_chain[0]) try: rota2 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res2.sequence_number) if len(rota2)==0: raise Exception except: rota2 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue2, self.structure.protein_conformation)) rota2 = right_rotamer_select(rota2, self.structure.preferred_chain[0]) rotas = [rota1, rota2] io1 = StringIO(rotas[0].pdbdata.pdb) rota_struct1 = PDB.PDBParser(QUIET=True).get_structure('structure', io1)[0] io2 = StringIO(rotas[1].pdbdata.pdb) rota_struct2 = PDB.PDBParser(QUIET=True).get_structure('structure', io2)[0] for chain1, chain2 in zip(rota_struct1, rota_struct2): for r1, r2 in zip(chain1, chain2): # print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name) line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1]) self.line = line return self.calculate_CA_distance(r1, r2) except: try: res1 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue1, self.parent_prot_conf)) res2 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue2, self.parent_prot_conf)) if self.structure_type=='refined': pdb_data = self.structure.pdb_data.pdb elif self.structure_type=='hommod': pdb_data = self.structure.pdb_data.pdb io = StringIO(pdb_data) struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0] for chain in struct: r1 = chain[res1.sequence_number] r2 = chain[res2.sequence_number] print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name) line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1]) self.line = line return self.calculate_CA_distance(r1, r2) except: print('Error: {} no matching rotamers ({}, {})'.format(self.structure.pdb_code.index, residue1, residue2)) return False
def run_recog(self): chain = self.pdb_struct[self.structure.preferred_chain[0]] constrictions, bulges, values = OrderedDict(),OrderedDict(),OrderedDict() for r in chain: skip = True for ran in self.range: if ran[0]-1<r.get_id()[1]<ran[1]: skip=False if skip: continue try: ca = r['CA'].get_coord() ca2 = chain[r.get_id()[1]+2]['CA'].get_coord() ca3 = chain[r.get_id()[1]+3]['CA'].get_coord() ca5 = chain[r.get_id()[1]+5]['CA'].get_coord() b0 = -1.0*(ca2-ca) b1 = ca3-ca2 b2 = ca5-ca3 b0xb1 = np.cross(b0,b1) b1xb2 = np.cross(b2,b1) b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2) y = np.dot(b0xb1_x_b1xb2, b1)*(1.0/np.linalg.norm(b1)) x = np.dot(b0xb1, b1xb2) if self.verbose: print(chain[r.get_id()[1]+2].get_id()[1],'-',chain[r.get_id()[1]+3].get_id()[1],np.degrees(np.arctan2(y, x))) values[r.get_id()[1]] = np.degrees(np.arctan2(y, x)) except: pass for num, val in values.items(): if abs(val)>150: count = 1 for i in range(1,4): try: if abs(values[num+i])>150: count+=1 else: raise Exception except: break if count==3: constrictions[num+2] = [val] if abs(val)<100: count = 1 for i in range(1,3): try: if abs(values[num+i])<100: count+=1 else: raise Exception except: break if count==3: bulges[num+2] = [val] found_c, missed_c, found_b, missed_b = OrderedDict(),OrderedDict(),OrderedDict(),OrderedDict() remove_c = [] db_constrictions = self.structure.protein_anomalies.filter(anomaly_type__slug='constriction') db_constrictions_dict = OrderedDict() for c in db_constrictions: gn = c.generic_number.label prev_gn = gn[:-1]+str(int(gn[-1])-1) prev_resi = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(prev_gn, self.parent_prot_conf)) db_constrictions_dict[gn] = prev_resi.sequence_number for ca2 in constrictions: if ca2-2<=prev_resi.sequence_number<=ca2+3: if gn not in found_c: found_c[gn] = prev_resi.sequence_number remove_c.append(ca2) else: remove_c.append(ca2) if gn not in found_c: missed_c[gn] = '' for r in remove_c: del constrictions[r] for ca2 in constrictions: ca2_found = False for key, value in found_c.items(): if ca2-2<=value<=ca2+3: ca2_found = True break if not ca2_found: missed_c[ca2] = ca2 remove_b = [] db_bulges = self.structure.protein_anomalies.filter(anomaly_type__slug='bulge') db_bulges_dict = OrderedDict() for b in db_bulges: gn = b.generic_number.label resi = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(b.generic_number.label, self.parent_prot_conf)) db_bulges_dict[gn] = resi.sequence_number for ca2 in bulges: if ca2-2<=resi.sequence_number<=ca2+2: if gn not in found_b: found_b[gn] = resi.sequence_number remove_b.append(ca2) else: remove_b.append(ca2) if gn not in found_b: missed_b[gn] = '' for r in remove_b: del bulges[r] for ca2 in bulges: ca2_found = False for key, value in found_b.items(): if ca2-2<=value<=ca2+3: ca2_found = True break if not ca2_found: missed_b[ca2] = ca2 print('#################') print(self.structure) print('DB constrictions: {}'.format(db_constrictions_dict)) print('DB bulges: {}'.format(db_bulges_dict)) print('Found constrictions: {}'.format(found_c)) print('Missed constrictions: {}'.format(missed_c)) print('Found bulges: {}'.format(found_b)) print('Missed bulges: {}'.format(missed_b)) for c in missed_c: if type(c)==type(0): for i in range(c-3,c+3): try: print(i,values[i]) except: pass for b in missed_b: if type(b)==type(0): for i in range(b-3,b+3): try: print(i,values[i]) except: pass
def run_recog(self): chain = self.pdb_struct[self.structure.preferred_chain[0]] constrictions, bulges, values = OrderedDict(), OrderedDict( ), OrderedDict() for r in chain: skip = True for ran in self.range: if ran[0] - 1 < r.get_id()[1] < ran[1]: skip = False if skip: continue try: ca = r['CA'].get_coord() ca2 = chain[r.get_id()[1] + 2]['CA'].get_coord() ca3 = chain[r.get_id()[1] + 3]['CA'].get_coord() ca5 = chain[r.get_id()[1] + 5]['CA'].get_coord() b0 = -1.0 * (ca2 - ca) b1 = ca3 - ca2 b2 = ca5 - ca3 b0xb1 = np.cross(b0, b1) b1xb2 = np.cross(b2, b1) b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2) y = np.dot(b0xb1_x_b1xb2, b1) * (1.0 / np.linalg.norm(b1)) x = np.dot(b0xb1, b1xb2) if self.verbose: print(chain[r.get_id()[1] + 2].get_id()[1], '-', chain[r.get_id()[1] + 3].get_id()[1], np.degrees(np.arctan2(y, x))) values[r.get_id()[1]] = np.degrees(np.arctan2(y, x)) except: pass for num, val in values.items(): if abs(val) > 150: count = 1 for i in range(1, 4): try: if abs(values[num + i]) > 150: count += 1 else: raise Exception except: break if count == 3: constrictions[num + 2] = [val] if abs(val) < 100: count = 1 for i in range(1, 3): try: if abs(values[num + i]) < 100: count += 1 else: raise Exception except: break if count == 3: bulges[num + 2] = [val] found_c, missed_c, found_b, missed_b = OrderedDict(), OrderedDict( ), OrderedDict(), OrderedDict() remove_c = [] db_constrictions = self.structure.protein_anomalies.filter( anomaly_type__slug='constriction') db_constrictions_dict = OrderedDict() for c in db_constrictions: gn = c.generic_number.label prev_gn = gn[:-1] + str(int(gn[-1]) - 1) prev_resi = Residue.objects.get( protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(prev_gn, self.parent_prot_conf)) db_constrictions_dict[gn] = prev_resi.sequence_number for ca2 in constrictions: if ca2 - 2 <= prev_resi.sequence_number <= ca2 + 3: if gn not in found_c: found_c[gn] = prev_resi.sequence_number remove_c.append(ca2) else: remove_c.append(ca2) if gn not in found_c: missed_c[gn] = '' for r in remove_c: del constrictions[r] for ca2 in constrictions: ca2_found = False for key, value in found_c.items(): if ca2 - 2 <= value <= ca2 + 3: ca2_found = True break if not ca2_found: missed_c[ca2] = ca2 remove_b = [] db_bulges = self.structure.protein_anomalies.filter( anomaly_type__slug='bulge') db_bulges_dict = OrderedDict() for b in db_bulges: gn = b.generic_number.label try: resi = Residue.objects.get( protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(b.generic_number.label, self.parent_prot_conf)) except ResidueGenericNumberEquivalent.DoesNotExist: print( 'Warning: {} ResidueGenericNumberEquivalent object missing from db' .format(gn)) continue db_bulges_dict[gn] = resi.sequence_number for ca2 in bulges: if ca2 - 2 <= resi.sequence_number <= ca2 + 2: if gn not in found_b: found_b[gn] = resi.sequence_number remove_b.append(ca2) else: remove_b.append(ca2) if gn not in found_b: missed_b[gn] = '' for r in remove_b: del bulges[r] for ca2 in bulges: ca2_found = False for key, value in found_b.items(): if ca2 - 2 <= value <= ca2 + 3: ca2_found = True break if not ca2_found: missed_b[ca2] = ca2 print('#################') print(self.structure) print('DB constrictions: {}'.format(db_constrictions_dict)) print('DB bulges: {}'.format(db_bulges_dict)) print('Found constrictions: {}'.format(found_c)) print('Missed constrictions: {}'.format(missed_c)) print('Found bulges: {}'.format(found_b)) print('Missed bulges: {}'.format(missed_b)) for c in missed_c: if type(c) == type(0): for i in range(c - 3, c + 3): try: print(i, values[i]) except: pass for b in missed_b: if type(b) == type(0): for i in range(b - 3, b + 3): try: print(i, values[i]) except: pass
def gn_comparer(self, gn1, gn2, protein_conformation): ''' ''' res1 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn(gn1,protein_conformation)) res2 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn(gn2,protein_conformation)) return res1.sequence_number-res2.sequence_number