def splitOnePDB(fname, outPath): try: s= parser.get_structure(fname, fname) except Exception: print ("Error loading pdb") return 0 banLenChains=[] try: for chain in s[0]: badResInChain=0 for res in chain.get_list(): if not is_aa(res,standard=True): badResInChain+=1 chainLen= sum(1 for res in chain if "CA" in res) - badResInChain if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN: print(chainLen) banLenChains.append(chain.get_id()) except KeyError: print ("Not good model") return 0 for badChainId in banLenChains: s[0].detach_child(badChainId) receptorChainList= [] ligandChainList= [] if len( s[0].get_list())<2: print(s) print( s[0].get_list()) print("Not enough good chains") return 0 for chain1 in s[0]: tmpReceptorList=[] for chain2 in s[0]: if chain1!= chain2: tmpReceptorList.append(chain2) if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList: ligandChainList.append(chain1) receptorChainList.append(tmpReceptorList) prefix= os.path.basename(fname).split(".")[0] for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)): io=PDBIO() ligandStruct= Structure(prefix+"ligand") ligandStruct.add(Model(0)) ligandChain.set_parent(ligandStruct[0]) ligandStruct[0].add(ligandChain) io.set_structure(ligandStruct) io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb")) io=PDBIO() receptorStruct= Structure(prefix+"receptor") receptorStruct.add(Model(0)) for receptorChain in receptorChains: receptorChain.set_parent(receptorStruct[0]) receptorStruct[0].add(receptorChain) io.set_structure(receptorStruct) io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb")) print( "ligand:", ligandChain, "receptor:",receptorChains )
def splitOnePDB(fname, chainIdL, chainIdR, outPath): print(os.path.basename(fname)) try: s = parser.get_structure(os.path.basename(fname), fname) except Exception: print("Error loading pdb") return 0 banLenChains = [] try: for chain in s[0]: badResInChain = 0 for res in chain.get_list(): if not is_aa(res, standard=True) and res.resname != "HOH": badResInChain += 1 # for res in chain: print(res) chainLen = sum(1 for res in chain if "CA" in res) - badResInChain if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN: print(chain, chainLen) banLenChains.append(chain.get_id()) except KeyError: print("Not good model") return 0 # print(banLenChains) if len(s[0].get_list()) - len(banLenChains) < 2: print(s) print(s[0].get_list()) print("Not enough good chains") return 0 ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR) print("ligand:", ligandChains, "receptor:", receptorChains) prefix = os.path.basename(fname).split(".")[0] io = PDBIO() ligandStruct = Structure(prefix + "ligand") ligandStruct.add(Model(0)) for ligandChain in ligandChains: ligandChain.set_parent(ligandStruct[0]) ligandStruct[0].add(ligandChain) io.set_structure(ligandStruct) io.save( os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb")) io = PDBIO() receptorStruct = Structure(prefix + "receptor") receptorStruct.add(Model(0)) for receptorChain in receptorChains: receptorChain.set_parent(receptorStruct[0]) receptorStruct[0].add(receptorChain) io.set_structure(receptorStruct) io.save( os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
def retrieve_ca_model(structure): """ chains are represented only by main chain atoms (Calfas or C4') """ reduced_struct = Structure('clustering_model') my_model = Model(0) reduced_struct.add(my_model) main_chain_atoms = [] for ch in structure[0]: my_chain = Chain(ch.id) reduced_struct[0].add(my_chain) for resi in ch: for atom in resi: #print "----", resi.id, resi.get_segid(), ch.id if atom.get_name() == "CA" or atom.get_name( ) == "C4'" or atom.get_name() == "C4*": my_residue = Residue((' ', resi.id[1], ' '), resi.get_resname(), ' ') atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA', atom.get_serial_number()) my_chain.add(my_residue) my_residue.add(atom) main_chain_atoms.append(atom) return reduced_struct
def getStructFromFasta(self, fname, chainType): ''' Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled and thus no coordiantes availables. Implements from Structure to Residue hierarchy. :param fname: str. path to fasta file @chainType: str. "l" or "r" ''' seq = self.parseFasta( fname, inputNumber="1" if chainType == "l" else "2") #inpuNumber is used to report which partner fails if error prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0] chainId = chainType.upper() residues = [] struct = Structure(prefix) model = Model(0) struct.add(model) chain = Chain(chainId) model.add(chain) for i, aa in enumerate(seq): try: resname = one_to_three(aa) except KeyError: resname = "UNK" res = Residue((' ', i, ' '), resname, prefix) chain.add(res) return struct
def slice(cls, obj, selection, name='slice'): """Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection> defines which descendents 'S1' will be stored in 'S2'.""" from Bio.PDB.Structure import Structure from Bio.PDB.Model import Model from Bio.PDB.Chain import Chain ent = Structure(name) # Biopython structure object # Loop over selection and determine what model/chain objects we need to create in order to # store the slice models = {} for item in selection: mid = item[1] cid = item[2] if mid not in models: models[mid] = set() # store chain ids models[mid].add(cid) # Create model/chains to store slice for mid in models: ent.add(Model(mid)) for cid in models[mid]: ent[mid].add(Chain(cid)) # Add residues to slice for item in selection: mid = item[1] cid = item[2] rid = item[3] ent[mid][cid].add(obj[mid][cid][rid].copy()) return cls(ent, name=name)
def add(self, residue): """Add PdbResidue object to site (in the residues list and dict)""" residue = residue.copy(include_structure=True) if type(residue) == PdbResidue: self.residues.append(residue) self.residues_dict[residue.full_id] = residue residue.parent_site = self if type(residue) == Het: self.ligands.append(residue) residue.parent_site = self if residue.is_polymer: if residue.chain in self.structure[0]: for r in residue.structure: self.structure[0][residue.chain].add(r) return True self.structure[0].add(residue.structure) return True if residue.structure: # Initialize structure if empty if self.structure is None: self.structure = Structure(self.id) self.structure.add(Model(0)) chain_id = residue.structure.get_parent().get_id() if chain_id not in self.structure[0]: self.structure[0].add(Chain(chain_id)) # Add residue structure to site structure if residue.structure.get_id() not in self.structure[0][chain_id]: self.structure[0][chain_id].add(residue.structure) return True
def create_sphere_representation(self): """ each chain is here represented by centre of mass only """ new_struct = Structure('sphrere') my_model = Model(0) new_struct.add(my_model) chain_mass_centres, index = [], 1 my_chain = Chain(self.fa_struct.chain) new_struct[0].add(my_chain) coord, self.molmass, self.radius = self.calculate_centre_of_complex( self.fa_struct.struct) my_residue = Residue((' ', index, ' '), "ALA", ' ') coords = array(coord, 'f') atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1) my_chain.add(my_residue) my_residue.add(atom) self.cg_struct = new_struct name = "dddd" + self.fa_struct.chain self.save_pdb(new_struct, name)
def create_new_chain(self, old_struct): s = Structure(old_struct.chain) my_model = Model(0) s.add(my_model) my_chain = Chain(old_struct.chain) my_model.add(my_chain) #what if more chains in one component? return s
def retrieve_sphere_model(structure): #, score): """ each chain is here represented by centre of mass only """ sphere_struct = Structure('clustering_model') my_model = Model(0) sphere_struct.add(my_model) #bedzie zmieniona numeracja chain_mass_centres, index = [], 0 for chain in structure.get_chains(): my_chain = Chain(chain.id) sphere_struct[0].add(my_chain) coord = calculate_centre_of_complex(chain) chain_mass_centres.append(coord) my_residue = Residue((' ', index, ' '), chain.id, ' ') coords = array(coord, 'f') atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1) my_chain.add(my_residue) my_residue.add(atom) index += 1 del structure return sphere_struct
def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure: # Grab the Alphafold dictionary entry that descrives the residue range in the structure seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0]) seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0]) # start empty renumbered_structure = Structure(structure.id) for model in structure: renumbered_model = Model(model.id) for chain in model: transcript_residue_number = seq_db_align_begin renumbered_chain = Chain(chain.id) for residue in chain: renumbered_residue = residue.copy() renumbered_residue.id = (' ', transcript_residue_number, ' ') # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder for atom in renumbered_residue: atom.disordered_flag = 0 renumbered_residue.disordered = 0 renumbered_chain.add(renumbered_residue) transcript_residue_number += 1 assert transcript_residue_number == seq_db_align_end + 1 renumbered_model.add(renumbered_chain) renumbered_structure.add(renumbered_model) return renumbered_structure
def create_new_chain(self, id): """ """ self.fragment_lattice = Structure(id) my_model = Model(0) self.fragment_lattice.add(my_model) my_chain = Chain(id) my_model.add(my_chain) #what if more chains in one component?
def initialize_res(residue: Union[Geo, str]) -> Structure: """Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.""" if isinstance(residue, Geo): geo = residue elif isinstance(residue, str): geo = geometry(residue) else: raise ValueError("Invalid residue argument:", residue) segID = 1 AA = geo.residue_name CA_N_length = geo.CA_N_length CA_C_length = geo.CA_C_length N_CA_C_angle = geo.N_CA_C_angle CA_coord = np.array([0.0, 0.0, 0.0]) C_coord = np.array([CA_C_length, 0, 0]) N_coord = np.array([ CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)), CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)), 0, ]) N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N") # Check if the peptide is capped or not if geo.residue_name == "ACE": CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C") else: CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C") C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C") ##Create Carbonyl atom (to be moved later) C_O_length = geo.C_O_length CA_C_O_angle = geo.CA_C_O_angle N_CA_C_O_diangle = geo.N_CA_C_O_diangle carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O") res = make_res_of_type(segID, N, CA, C, O, geo) cha = Chain("A") cha.add(res) mod = Model(0) mod.add(cha) struc = Structure("X") struc.add(mod) return struc
def init_model(self, model_id, serial_num=None): """Initiate a new Model object with given id. Arguments: o id - int o serial_num - int """ self.model = Model(model_id, serial_num) self.structure.add(self.model)
def get_structure(self, name='RNA chain'): """Returns chain as a PDB.Structure object.""" struc = Structure(name) model = Model(0) chain = Chain(self.chain_name) struc.add(model) struc[0].add(chain) for resi in self: struc[0][self.chain_name].add(resi) return struc
def single_chain_structure(chain, name='superposition'): from Bio.PDB.Structure import Structure from Bio.PDB.Model import Model structure = Structure(name) model = Model(0) structure.add(model) model.add(chain) return structure
def complex_save(given_complex, i, path): s = Structure(i) my_model = Model(0) s.add(my_model) for component in given_complex.components: my_model.add( component.pyrystruct.struct[0][component.pyrystruct.chain]) out = PDBIO() out.set_structure(s) out.save(path) return path
def create_new_structure(self, name, chain_id): """ creates new Bio.PDB structure object Parameters: ----------- name : structure name chain_id : chain name (e.g. A, B, C) Returns: --------- self.struct : Bio.PDB object with model and chain inside """ self.struct = Structure(name) my_model = Model(0) my_chain = Chain(chain_id) self.struct.add(my_model) self.struct[0].add(my_chain)
def __create_superimposed_pdb(self): def fill_in_chain(chain, protein_id, rotation_matrix = None): for index,residue in enumerate(self.proteins[protein_id].get_residues()): residue.id = (residue.id[0], index, residue.id[2]) chain.add(residue) merged_model = Model(0) chain_a = Chain('A') chain_b = Chain('B') fill_in_chain(chain_a, 0) fill_in_chain(chain_b, 1) merged_model.add(chain_a) merged_model.add(chain_b) return merged_model
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo= Geo(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= np.array([0.,0.,0.]) C_coord= np.array([CA_C_length,0,0]) N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0]) N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") res=makeRes(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
def save_pdb(self, complex_id, temp = "", name = ""): """ gets coordinates of all complex components and writes them in one file one component = one pdb model Parameters: ------------ complex_id : number of complex from simulation Returns: -------- pdb files with simulated components in OUTFOLDER """ ##add component chain by chain not residue by residue. model_num = 0 score = round(self.simulation_score, 4) s = Structure(complex_id) my_model = Model(0) s.add(my_model) for component in self.components: #@TODO: #what if more chains in one component? my_model.add(component.pyrystruct.struct[0][component.pyrystruct.chain]) out = PDBIO() out.set_structure(s) outname = outfolder.outdirname.split("/")[-1] temp = str(temp) try: temp = round(float(temp),1) except: pass if name: fi_name = str(outfolder.outdirname)+'/'+name+'_'+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb' out.save(fi_name) else: fi_name = str(outfolder.outdirname)+'/'+str(outname)+"_"+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb' out.save(fi_name) for comp in self.components: comp.pyrystruct.struct[0][comp.pyrystruct.chain].detach_parent() return fi_name
def __make_structure_from_residues__(self, residues): """ Makes a Structure object either from a pdbfile or a list of residues """ # KR: this probably can be outsourced to another module. struct = Structure('s') model = Model('m') n_chain = 1 chain = Chain('c%i' % n_chain) for residue in residues: if chain.has_id(residue.id): model.add(chain) n_chain += 1 chain = Chain('c%i' % n_chain) chain.add(residue) model.add(chain) struct.add(model) return struct
def create_structure(coords, pdb_type, remove_masked): """Create the structure. Args: coords: 3D coordinates of structure pdb_type: predict or actual structure remove_masked: whether to include masked atoms. If false, the masked atoms have coordinates of [0,0,0]. Returns: structure """ name = protein.id_ structure = Structure(name) model = Model(0) chain = Chain('A') for i, residue in enumerate(protein.primary): residue = AA_LETTERS[residue] if int(protein.mask[i]) == 1 or remove_masked == False: new_residue = Residue((' ', i + 1, ' '), residue, ' ') j = 3 * i atom_list = ['N', 'CA', 'CB'] for k, atom in enumerate(atom_list): new_atom = Atom(name=atom, coord=coords[j + k, :], bfactor=0, occupancy=1, altloc=' ', fullname=" {} ".format(atom), serial_number=0) new_residue.add(new_atom) chain.add(new_residue) model.add(chain) structure.add(model) io = PDBIO() io.set_structure(structure) io.save(save_dir + name + '_' + pdb_type + '.pdb') return structure
def select_structure(selector, structure): new_structure = Structure(structure.id) for model in structure: if not selector.accept_model(model): continue new_model = Model(model.id, model.serial_num) new_structure.add(new_model) for chain in model: if not selector.accept_chain(chain): continue new_chain = Chain(chain.id) new_model.add(new_chain) for residue in chain: if not selector.accept_residue(residue): continue new_residue = Residue(residue.id, residue.resname, residue.segid) new_chain.add(new_residue) for atom in residue: if selector.accept_atom(atom): new_residue.add(atom) return new_structure
def createPDBFile(self): "Create test CIF file with 12 Atoms in icosahedron vertexes" from Bio.PDB.Structure import Structure from Bio.PDB.Model import Model from Bio.PDB.Chain import Chain from Bio.PDB.Residue import Residue from Bio.PDB.Atom import Atom from Bio.PDB.mmcifio import MMCIFIO import os CIFFILENAME = "/tmp/out.cif" # create atom struct with ico simmety (i222r) icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r') pentomVectorI222r = icosahedron.getVertices() # create biopython object structure = Structure('result') # structure_id model = Model(1, 1) # model_id,serial_num structure.add(model) chain = Chain('A') # chain Id model.add(chain) for i, v in enumerate(pentomVectorI222r, 1): res_id = (' ', i, ' ') # first arg ' ' -> aTOm else heteroatom res_name = "ALA" #+ str(i) # define name of residue res_segid = ' ' residue = Residue(res_id, res_name, res_segid) chain.add(residue) # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number, # element=None) atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C") residue.add(atom) io = MMCIFIO() io.set_structure(structure) # delete file if exists if os.path.exists(CIFFILENAME): os.remove(CIFFILENAME) io.save(CIFFILENAME) return CIFFILENAME
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo=geometry(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= numpy.array([0.,0.,0.]) C_coord= numpy.array([CA_C_length,0,0]) N_coord = numpy.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0]) N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") if(AA=='G'): res=makeGly(segID, N, CA, C, O, geo) elif(AA=='A'): res=makeAla(segID, N, CA, C, O, geo) elif(AA=='S'): res=makeSer(segID, N, CA, C, O, geo) elif(AA=='C'): res=makeCys(segID, N, CA, C, O, geo) elif(AA=='V'): res=makeVal(segID, N, CA, C, O, geo) elif(AA=='I'): res=makeIle(segID, N, CA, C, O, geo) elif(AA=='L'): res=makeLeu(segID, N, CA, C, O, geo) elif(AA=='T'): res=makeThr(segID, N, CA, C, O, geo) elif(AA=='R'): res=makeArg(segID, N, CA, C, O, geo) elif(AA=='K'): res=makeLys(segID, N, CA, C, O, geo) elif(AA=='D'): res=makeAsp(segID, N, CA, C, O, geo) elif(AA=='E'): res=makeGlu(segID, N, CA, C, O, geo) elif(AA=='N'): res=makeAsn(segID, N, CA, C, O, geo) elif(AA=='Q'): res=makeGln(segID, N, CA, C, O, geo) elif(AA=='M'): res=makeMet(segID, N, CA, C, O, geo) elif(AA=='H'): res=makeHis(segID, N, CA, C, O, geo) elif(AA=='P'): res=makePro(segID, N, CA, C, O, geo) elif(AA=='F'): res=makePhe(segID, N, CA, C, O, geo) elif(AA=='Y'): res=makeTyr(segID, N, CA, C, O, geo) elif(AA=='W'): res=makeTrp(segID, N, CA, C, O, geo) else: res=makeGly(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
# nie jestem pewien dlaczego to nie działa. Pojawia się błąd: # AttributeError: 'module' object has no attribute 'array' from Bio import PDB from Bio.PDB import PDBParser, NeighborSearch, Superimposer, PDBIO from Bio.PDB.Atom import Atom from Bio.PDB.Residue import Residue from Bio.PDB.Chain import Chain from Bio.PDB.Model import Model from Bio.PDB.Structure import Structure my_structure = Structure('Cytosine') my_model = Model(0) my_chain = Chain('A') my_residue = Residue((' ', 1, ' '), 'C', ' ') atoms = [{ 'name': 'N1', 'coord': PDB.Atom.array([64.612, 45.818, 10.877], 'f'), 'bfactor': 42.59, 'occupancy': 1.0, 'altloc': ' ', 'fullname': 'N1', 'serial_number': 1 }, { 'name': 'C2', 'coord': PDB.Atom.array([65.472, 46.868, 10.634], 'f'), 'bfactor': 44.48, 'occupancy': 1.0, 'altloc': ' ', 'fullname': 'C2', 'serial_number': 2
all_models_stats = [] files = [f for f in os.listdir(path) if os.path.isfile(join(path,f)) and '.pdb' in f] files.sort() for pdb_file in files:#os.listdir(path): print pdb_file + '\n' parser = PDBParser() struct = parser.get_structure('structure', path + pdb_file) chains = list(struct[0].get_chains()) #print str(len(chains)) compares = []#storing compares that are already done #analyzed_count += 1 model = Model() model.name = pdb_file for ch1 in range(0,len(chains)): for ch2 in range(ch1 + 1,len(chains)): checklist = [ chains[ch1].get_full_id()[2] , chains[ch2].get_full_id()[2] ] checklist2 = [ chains[ch2].get_full_id()[2], chains[ch1].get_full_id()[2] ] if chains[ch1].get_full_id()[2] != chains[ch2].get_full_id()[2] and not checklist in compares and not checklist2 in compares: comparsion = [ chains[ch1].get_full_id()[2] , chains[ch2].get_full_id()[2] ] compares.append(comparsion)#appending comprasion to already done comprasions chain1_atms = list(chains[ch1].get_atoms()) chain2_atms = list(chains[ch2].get_atoms()) #print str(len(chain1_atms)) + ' ' + str(len(chain2_atms))
def normalize_structure(input_path: str, pdb_id: str, model_id: int, chain_id: str, primary: str, mask: str, save=True, verbose=True): assert primary assert mask with warnings.catch_warnings(record=True): warnings.simplefilter("ignore", PDBConstructionWarning) parser = PDBParser() structure = parser.get_structure(pdb_id, input_path) if not model_id in structure.child_dict: try_model_id = model_id - 1 model = None while try_model_id >= 0: if try_model_id in structure.child_dict: model = structure.child_dict[try_model_id] if verbose: print('Supposing model {} is {}...'.format( model_id - 1, model_id)) try_model_id -= 1 if not model: raise ValueError( 'model "{}" not found in "{}", options are {}'.format( model_id, pdb_id, list(structure.child_dict.keys()))) else: model = structure.child_dict[model_id] if not chain_id in model.child_dict: raise ValueError( 'chain "{}" not found in "{}" model "{}", options are {}'. format(chain_id, pdb_id, model_id, list(model.child_dict.keys()))) chain = model.child_dict[chain_id] new_chain = normalize_chain(chain) raw = [] for residue in chain: try: raw.append(resname_to_abbrev(residue.resname)) except UnknownResnameError: # if verbose: # print('Skipping residue "{}"'.format(residue.resname)) pass raw = ''.join(raw) # verify that the sequence is what we expect normalized = [] for residue in new_chain: try: normalized.append(resname_to_abbrev(residue.resname)) except UnknownResnameError: # if verbose: # print('Skipping residue "{}"'.format(residue.resname)) pass normalized = ''.join(normalized) # extract the known primary sequence using the mask masked_primary = [] for r, m in zip(primary, mask): if m == '-': continue assert m == '+' masked_primary.append(r) masked_primary = ''.join(masked_primary) # ensure the sequence lengths match if len(normalized) != len(masked_primary): raise ChainLengthError(len(normalized), len(masked_primary)) # ensure residue identities match for i, (got, expected) in enumerate(zip(normalized, masked_primary)): if got != expected: raise ValueError( 'mismatch residue at position {} (got {}, expected {})'. format(i, got, expected)) new_model = Model(model.id) new_model.add(new_chain) new_structure = Structure(structure.id) new_structure.add(new_model) if save: out_path = input_path + '.norm' io = PDBIO() io.set_structure(new_structure) io.save(out_path) return out_path else: return new_structure