예제 #1
0
 def gn_comparer(self, gn1, gn2, protein_conformation):
     '''
     '''
     res1 = Residue.objects.get(protein_conformation=protein_conformation,
                                display_generic_number__label=dgn(
                                    gn1, protein_conformation))
     res2 = Residue.objects.get(protein_conformation=protein_conformation,
                                display_generic_number__label=dgn(
                                    gn2, protein_conformation))
     return res1.sequence_number - res2.sequence_number
 def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False):
     ''' Fetches specific lines from pdb file by generic number (if generic number is
         not available then by residue number). Returns nested OrderedDict()
         with generic numbers as keys in the outer dictionary, and atom names as keys
         in the inner dictionary.
         
         @param structure: Structure, Structure object where residues should be fetched from \n
         @param generic_numbers: list, list of generic numbers to be fetched \n
         @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461
         residue to be considered a 5x46 residue. 
     '''
     output = OrderedDict()
     atoms_list = []
     for gn in generic_numbers:
         rotamer=None
         if 'x' in str(gn):      
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__display_generic_number__label=dgn(gn,structure.protein_conformation), 
                     structure__preferred_chain=structure.preferred_chain))
         else:
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain))
             if just_nums==False:
                 try:
                     gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation,
                                                 sequence_number=gn).display_generic_number.label)
                 except:
                     pass
         if len(rotamer)>1:
             for i in rotamer:
                 if i.pdbdata.pdb.startswith('COMPND')==False:
                     if i.pdbdata.pdb[21] in structure.preferred_chain:
                         rotamer = i
                         break
         else:
             rotamer = rotamer[0]
         io = StringIO(rotamer.pdbdata.pdb)
         rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
         for chain in rota_struct:
             for residue in chain:
                 for atom in residue:
                     atoms_list.append(atom)
                 if modify_bulges==True and len(gn)==5:
                     output[gn.replace('x','.')[:-1]] = atoms_list
                 else:
                     try:
                         output[gn.replace('x','.')] = atoms_list
                     except:
                         output[str(gn)] = atoms_list
                 atoms_list = []
     return output
 def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False):
     ''' Fetches specific lines from pdb file by generic number (if generic number is
         not available then by residue number). Returns nested OrderedDict()
         with generic numbers as keys in the outer dictionary, and atom names as keys
         in the inner dictionary.
         
         @param structure: Structure, Structure object where residues should be fetched from \n
         @param generic_numbers: list, list of generic numbers to be fetched \n
         @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461
         residue to be considered a 5x46 residue. 
     '''
     output = OrderedDict()
     atoms_list = []
     for gn in generic_numbers:
         rotamer=None
         if 'x' in str(gn):      
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__display_generic_number__label=dgn(gn,structure.protein_conformation), 
                     structure__preferred_chain=structure.preferred_chain))
         else:
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain))
             if just_nums==False:
                 try:
                     gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation,
                                                 sequence_number=gn).display_generic_number.label)
                 except:
                     pass
         if len(rotamer)>1:
             for i in rotamer:
                 if i.pdbdata.pdb.startswith('COMPND')==False:
                     if i.pdbdata.pdb[21] in structure.preferred_chain:
                         rotamer = i
                         break
         else:
             rotamer = rotamer[0]
         io = StringIO(rotamer.pdbdata.pdb)
         rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
         for chain in rota_struct:
             for residue in chain:
                 for atom in residue:
                     atoms_list.append(atom)
                 if modify_bulges==True and len(gn)==5:
                     output[gn.replace('x','.')[:-1]] = atoms_list
                 else:
                     try:
                         output[gn.replace('x','.')] = atoms_list
                     except:
                         output[str(gn)] = atoms_list
                 atoms_list = []
     return output
예제 #4
0
    def get_residue_distance(self, residue1, residue2):
        try:
            res1 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue1, self.parent_prot_conf))
            res2 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue2, self.parent_prot_conf))
            print(res1, res1.id, res2, res2.id)
            try:
                rota1 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res1.sequence_number)
                if len(rota1)==0:
                    raise Exception
            except:
                rota1 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue1, self.structure.protein_conformation))
            rota1 = right_rotamer_select(rota1, self.structure.preferred_chain[0])
            try:
                rota2 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res2.sequence_number)
                if len(rota2)==0:
                    raise Exception
            except:
                rota2 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue2, self.structure.protein_conformation))
            rota2 = right_rotamer_select(rota2, self.structure.preferred_chain[0])
            rotas = [rota1, rota2]
            io1 = StringIO(rotas[0].pdbdata.pdb)
            rota_struct1 = PDB.PDBParser(QUIET=True).get_structure('structure', io1)[0]
            io2 = StringIO(rotas[1].pdbdata.pdb)
            rota_struct2 = PDB.PDBParser(QUIET=True).get_structure('structure', io2)[0]

            for chain1, chain2 in zip(rota_struct1, rota_struct2):
                for r1, r2 in zip(chain1, chain2):
                    # print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name)
                    line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1])
                    self.line = line
                    return self.calculate_CA_distance(r1, r2)
        except:
            try:
                res1 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue1, self.parent_prot_conf))
                res2 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue2, self.parent_prot_conf))
                if self.structure_type=='refined':
                    pdb_data = self.structure.pdb_data.pdb
                elif self.structure_type=='hommod':
                    pdb_data = self.structure.pdb_data.pdb
                io = StringIO(pdb_data)
                struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
                for chain in struct:
                    r1 = chain[res1.sequence_number]
                    r2 = chain[res2.sequence_number]
                    print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name)
                    line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1])
                    self.line = line
                    return self.calculate_CA_distance(r1, r2)

            except:
                print('Error: {} no matching rotamers ({}, {})'.format(self.structure.pdb_code.index, residue1, residue2))
                return False   
예제 #5
0
    def get_residue_distance(self, residue1, residue2):
        try:
            res1 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue1, self.parent_prot_conf))
            res2 = Residue.objects.get(protein_conformation__protein=self.structure.protein_conformation.protein.parent, display_generic_number__label=dgn(residue2, self.parent_prot_conf))
            print(res1, res1.id, res2, res2.id)
            try:
                rota1 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res1.sequence_number)
                if len(rota1)==0:
                    raise Exception
            except:
                rota1 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue1, self.structure.protein_conformation))
            rota1 = right_rotamer_select(rota1, self.structure.preferred_chain[0])
            try:
                rota2 = Rotamer.objects.filter(structure=self.structure, residue__sequence_number=res2.sequence_number)
                if len(rota2)==0:
                    raise Exception
            except:
                rota2 = Rotamer.objects.filter(structure=self.structure, residue__display_generic_number__label=dgn(residue2, self.structure.protein_conformation))
            rota2 = right_rotamer_select(rota2, self.structure.preferred_chain[0])
            rotas = [rota1, rota2]
            io1 = StringIO(rotas[0].pdbdata.pdb)
            rota_struct1 = PDB.PDBParser(QUIET=True).get_structure('structure', io1)[0]
            io2 = StringIO(rotas[1].pdbdata.pdb)
            rota_struct2 = PDB.PDBParser(QUIET=True).get_structure('structure', io2)[0]

            for chain1, chain2 in zip(rota_struct1, rota_struct2):
                for r1, r2 in zip(chain1, chain2):
                    # print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name)
                    line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1])
                    self.line = line
                    return self.calculate_CA_distance(r1, r2)
        except:
            try:
                res1 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue1, self.parent_prot_conf))
                res2 = Residue.objects.get(protein_conformation=self.parent_prot_conf, display_generic_number__label=dgn(residue2, self.parent_prot_conf))
                if self.structure_type=='refined':
                    pdb_data = self.structure.pdb_data.pdb
                elif self.structure_type=='hommod':
                    pdb_data = self.structure.pdb_data.pdb
                io = StringIO(pdb_data)
                struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
                for chain in struct:
                    r1 = chain[res1.sequence_number]
                    r2 = chain[res2.sequence_number]
                    print(self.structure, r1.get_id()[1], r2.get_id()[1], self.calculate_CA_distance(r1, r2), self.structure.state.name)
                    line = '{},{},{},{},{}\n'.format(self.structure, self.structure.state.name, round(self.calculate_CA_distance(r1, r2), 2), r1.get_id()[1], r2.get_id()[1])
                    self.line = line
                    return self.calculate_CA_distance(r1, r2)

            except:
                print('Error: {} no matching rotamers ({}, {})'.format(self.structure.pdb_code.index, residue1, residue2))
                return False   
	def run_recog(self):
		chain = self.pdb_struct[self.structure.preferred_chain[0]]
		constrictions, bulges, values = OrderedDict(),OrderedDict(),OrderedDict()
		for r in chain:
			skip = True
			for ran in self.range:
				if ran[0]-1<r.get_id()[1]<ran[1]:
					skip=False
			if skip:
				continue
			try:
				ca = r['CA'].get_coord()
				ca2 = chain[r.get_id()[1]+2]['CA'].get_coord()
				ca3 = chain[r.get_id()[1]+3]['CA'].get_coord()
				ca5 = chain[r.get_id()[1]+5]['CA'].get_coord()
				b0 = -1.0*(ca2-ca)
				b1 = ca3-ca2
				b2 = ca5-ca3
				b0xb1 = np.cross(b0,b1)
				b1xb2 = np.cross(b2,b1)
				b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2)
				y = np.dot(b0xb1_x_b1xb2, b1)*(1.0/np.linalg.norm(b1))
				x = np.dot(b0xb1, b1xb2)
				if self.verbose:
					print(chain[r.get_id()[1]+2].get_id()[1],'-',chain[r.get_id()[1]+3].get_id()[1],np.degrees(np.arctan2(y, x)))
				values[r.get_id()[1]] = np.degrees(np.arctan2(y, x))
			except:
				pass
		for num, val in values.items():
			if abs(val)>150:
				count = 1
				for i in range(1,4):
					try:
						if abs(values[num+i])>150:
							count+=1
						else:
							raise Exception
					except:
						break
				if count==3:
					constrictions[num+2] = [val]
			if abs(val)<100:
				count = 1
				for i in range(1,3):
					try:
						if abs(values[num+i])<100:
							count+=1
						else:
							raise Exception
					except:
						break
				if count==3:
					bulges[num+2] = [val]

		found_c, missed_c, found_b, missed_b = OrderedDict(),OrderedDict(),OrderedDict(),OrderedDict()
		remove_c = []
		db_constrictions = self.structure.protein_anomalies.filter(anomaly_type__slug='constriction')
		db_constrictions_dict = OrderedDict()
		for c in db_constrictions:
			gn = c.generic_number.label
			prev_gn = gn[:-1]+str(int(gn[-1])-1)
			prev_resi = Residue.objects.get(protein_conformation=self.parent_prot_conf, 
										    display_generic_number__label=dgn(prev_gn, self.parent_prot_conf))
			db_constrictions_dict[gn] = prev_resi.sequence_number
			for ca2 in constrictions:
				if ca2-2<=prev_resi.sequence_number<=ca2+3:
					if gn not in found_c:
						found_c[gn] = prev_resi.sequence_number
						remove_c.append(ca2)
					else:
						remove_c.append(ca2)
			if gn not in found_c:
				missed_c[gn] = ''
		for r in remove_c:
			del constrictions[r]
		for ca2 in constrictions:
			ca2_found = False
			for key, value in found_c.items():
				if ca2-2<=value<=ca2+3:
					ca2_found = True
					break
			if not ca2_found:
				missed_c[ca2] = ca2

		remove_b = []
		db_bulges = self.structure.protein_anomalies.filter(anomaly_type__slug='bulge')
		db_bulges_dict = OrderedDict()
		for b in db_bulges:
			gn = b.generic_number.label
			resi = Residue.objects.get(protein_conformation=self.parent_prot_conf, 
									   display_generic_number__label=dgn(b.generic_number.label, self.parent_prot_conf))
			db_bulges_dict[gn] = resi.sequence_number
			for ca2 in bulges:
				if ca2-2<=resi.sequence_number<=ca2+2:
					if gn not in found_b:
						found_b[gn] = resi.sequence_number
						remove_b.append(ca2)
					else:
						remove_b.append(ca2)
			if gn not in found_b:
				missed_b[gn] = ''
		for r in remove_b:
			del bulges[r]
		for ca2 in bulges:
			ca2_found = False
			for key, value in found_b.items():
				if ca2-2<=value<=ca2+3:
					ca2_found = True
					break
			if not ca2_found:
				missed_b[ca2] = ca2

		print('#################')
		print(self.structure)
		print('DB constrictions: {}'.format(db_constrictions_dict))
		print('DB bulges: {}'.format(db_bulges_dict))
		print('Found constrictions: {}'.format(found_c))
		print('Missed constrictions: {}'.format(missed_c))
		print('Found bulges: {}'.format(found_b))
		print('Missed bulges: {}'.format(missed_b))
		for c in missed_c:
			if type(c)==type(0):
				for i in range(c-3,c+3):
					try:
						print(i,values[i])
					except:
						pass
		for b in missed_b:
			if type(b)==type(0):
				for i in range(b-3,b+3):
					try:
						print(i,values[i])
					except:
						pass
    def run_recog(self):
        chain = self.pdb_struct[self.structure.preferred_chain[0]]
        constrictions, bulges, values = OrderedDict(), OrderedDict(
        ), OrderedDict()
        for r in chain:
            skip = True
            for ran in self.range:
                if ran[0] - 1 < r.get_id()[1] < ran[1]:
                    skip = False
            if skip:
                continue
            try:
                ca = r['CA'].get_coord()
                ca2 = chain[r.get_id()[1] + 2]['CA'].get_coord()
                ca3 = chain[r.get_id()[1] + 3]['CA'].get_coord()
                ca5 = chain[r.get_id()[1] + 5]['CA'].get_coord()
                b0 = -1.0 * (ca2 - ca)
                b1 = ca3 - ca2
                b2 = ca5 - ca3
                b0xb1 = np.cross(b0, b1)
                b1xb2 = np.cross(b2, b1)
                b0xb1_x_b1xb2 = np.cross(b0xb1, b1xb2)
                y = np.dot(b0xb1_x_b1xb2, b1) * (1.0 / np.linalg.norm(b1))
                x = np.dot(b0xb1, b1xb2)
                if self.verbose:
                    print(chain[r.get_id()[1] + 2].get_id()[1], '-',
                          chain[r.get_id()[1] + 3].get_id()[1],
                          np.degrees(np.arctan2(y, x)))
                values[r.get_id()[1]] = np.degrees(np.arctan2(y, x))
            except:
                pass
        for num, val in values.items():
            if abs(val) > 150:
                count = 1
                for i in range(1, 4):
                    try:
                        if abs(values[num + i]) > 150:
                            count += 1
                        else:
                            raise Exception
                    except:
                        break
                if count == 3:
                    constrictions[num + 2] = [val]
            if abs(val) < 100:
                count = 1
                for i in range(1, 3):
                    try:
                        if abs(values[num + i]) < 100:
                            count += 1
                        else:
                            raise Exception
                    except:
                        break
                if count == 3:
                    bulges[num + 2] = [val]

        found_c, missed_c, found_b, missed_b = OrderedDict(), OrderedDict(
        ), OrderedDict(), OrderedDict()
        remove_c = []
        db_constrictions = self.structure.protein_anomalies.filter(
            anomaly_type__slug='constriction')
        db_constrictions_dict = OrderedDict()
        for c in db_constrictions:
            gn = c.generic_number.label
            prev_gn = gn[:-1] + str(int(gn[-1]) - 1)
            prev_resi = Residue.objects.get(
                protein_conformation=self.parent_prot_conf,
                display_generic_number__label=dgn(prev_gn,
                                                  self.parent_prot_conf))
            db_constrictions_dict[gn] = prev_resi.sequence_number
            for ca2 in constrictions:
                if ca2 - 2 <= prev_resi.sequence_number <= ca2 + 3:
                    if gn not in found_c:
                        found_c[gn] = prev_resi.sequence_number
                        remove_c.append(ca2)
                    else:
                        remove_c.append(ca2)
            if gn not in found_c:
                missed_c[gn] = ''
        for r in remove_c:
            del constrictions[r]
        for ca2 in constrictions:
            ca2_found = False
            for key, value in found_c.items():
                if ca2 - 2 <= value <= ca2 + 3:
                    ca2_found = True
                    break
            if not ca2_found:
                missed_c[ca2] = ca2

        remove_b = []
        db_bulges = self.structure.protein_anomalies.filter(
            anomaly_type__slug='bulge')
        db_bulges_dict = OrderedDict()
        for b in db_bulges:
            gn = b.generic_number.label
            try:
                resi = Residue.objects.get(
                    protein_conformation=self.parent_prot_conf,
                    display_generic_number__label=dgn(b.generic_number.label,
                                                      self.parent_prot_conf))
            except ResidueGenericNumberEquivalent.DoesNotExist:
                print(
                    'Warning: {} ResidueGenericNumberEquivalent object missing from db'
                    .format(gn))
                continue

            db_bulges_dict[gn] = resi.sequence_number
            for ca2 in bulges:
                if ca2 - 2 <= resi.sequence_number <= ca2 + 2:
                    if gn not in found_b:
                        found_b[gn] = resi.sequence_number
                        remove_b.append(ca2)
                    else:
                        remove_b.append(ca2)
            if gn not in found_b:
                missed_b[gn] = ''
        for r in remove_b:
            del bulges[r]
        for ca2 in bulges:
            ca2_found = False
            for key, value in found_b.items():
                if ca2 - 2 <= value <= ca2 + 3:
                    ca2_found = True
                    break
            if not ca2_found:
                missed_b[ca2] = ca2

        print('#################')
        print(self.structure)
        print('DB constrictions: {}'.format(db_constrictions_dict))
        print('DB bulges: {}'.format(db_bulges_dict))
        print('Found constrictions: {}'.format(found_c))
        print('Missed constrictions: {}'.format(missed_c))
        print('Found bulges: {}'.format(found_b))
        print('Missed bulges: {}'.format(missed_b))
        for c in missed_c:
            if type(c) == type(0):
                for i in range(c - 3, c + 3):
                    try:
                        print(i, values[i])
                    except:
                        pass
        for b in missed_b:
            if type(b) == type(0):
                for i in range(b - 3, b + 3):
                    try:
                        print(i, values[i])
                    except:
                        pass
 def gn_comparer(self, gn1, gn2, protein_conformation):
     '''
     '''
     res1 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn(gn1,protein_conformation))
     res2 = Residue.objects.get(protein_conformation=protein_conformation, display_generic_number__label=dgn(gn2,protein_conformation))
     return res1.sequence_number-res2.sequence_number