def finish(self): """ Overrides Executor method. Write more.... """ # Retrieve models created as PDBModels # only as many models as the user requested m_paths = [ os.path.join(self.dir_models, f) for f in os.listdir(self.dir_models) ] m_paths = m_paths[:self.n] # self.result = [(full1, modeled_doms1), (full2, modeled_doms2), ...] # where 'full#' is the clean model generated, and 'modeled_doms#' is # a dictionary with key:value pairs of # *index of domain in chain*:*original domain with new coordinates* # symmetric models have also a out_symseq output for the symmetric unit # sequence if self.symtemplate: self.result = [ extract_symmetric(B.PDBModel(m), self.symseq, self.embedded) for m in m_paths ] else: self.result = [extract_embedded(B.PDBModel(m), self.embedded) \ for m in m_paths]
def setUp(self): self.testpath = self.testpath or \ os.path.join(os.path.abspath(os.path.dirname(__file__)), 'testdata') self.dom1 = self.dom1 or B.PDBModel( os.path.join(self.testpath, '2z6o.pdb')) self.dom2 = self.dom2 or B.PDBModel( os.path.join(self.testpath, 'histone.pdb')) self.domAB1 = self.domAB1 or B.PDBModel( os.path.join(self.testpath, 'domAB1.pdb')) self.domAB2 = self.domAB2 or self.domAB1.clone()
def fetchPDB(self, id): try: h = self.getLocalPDBHandle(id) except: h = self.getRemotePDBHandle(id) fname = tempfile.mktemp('.pdb', 'ncbiparser_') lines, infos = self.parsePdbFromHandle(h, first_model_only=1) ## close if it is a handle try: h.close() except: pass f = open(fname, 'w', 1) f.writelines(lines) f.close() m = B.PDBModel(fname) m.disconnect() m.pdbCode = id m.info.update(infos) T.tryRemove(fname) return m
def embed_symmetric(self, j_doms, full_chains): """ Embeds every symmetric unit (full_chain) from full_chains into every j_dom :param j_doms: All the j_doms in which the symmetric chains will be embedded :type j_doms: list of PDBModels """ full_symmetric = B.PDBModel() emb_jsym = None ch = None j_dom = None # m = [] for i in range(len(full_chains)): ch = full_chains[i] j_dom = j_doms[i] ch = self.extract_fixed(j_dom, ch) emb_jsym = R.embed(j_dom, ch) # Symmetric unit embedded into j_dom assert emb_jsym.sequence() == j_dom.sequence()[:2] + ch.sequence() +\ j_dom.sequence()[2:] full_symmetric = full_symmetric.concat(emb_jsym) container_jdom = j_dom.sequence() emb_mod = ch container_seq = emb_jsym.sequence() return full_symmetric, container_seq, emb_mod, container_jdom
def parse2new( self, source, ref, traj=None ): """ Replace content of a new Trajectory from the source. Args: source (str): file name or other input object ref (str or PDBModel): reference structure instance or file traj (Biskit.md.Trajectory): existing instance to be updated Returns: Biskit.Trajectory: new Trajectory instance """ r = traj if traj is None: import biskit.md r = biskit.md.Trajectory() ref = B.PDBModel(ref) src = AmberMdcrd(source, natom=ref.lenAtoms(),hasbox=self.hasbox) r.frames = src.coordinates r.setRef(ref) r.resIndex = r.ref.resMap() assert N.shape(r.frames) == (src.frame, src.natom, 3) assert r.lenAtoms() == src.natom return r
def test_embed_symmetric(self): ''' Tests builder.embed_symmetric() ''' mod1 = B.PDBModel(os.path.join(self.testpath, 'chain01_2ch.pdb')) emb_mod = mod1.takeChains([1, 2, 3]) j_dom = mod1.takeChains([0]) full_emb = self.builder1.embed_symmetric([j_dom], [emb_mod]) self.assertTrue(len(full_emb[0]) == 9267, str(len(full_emb)))
def test_PDBParseModel(self): """PDBParseModel test""" ## loading output file from X-plor if self.local: print('Loading pdb file ..') self.p = PDBParseModel() self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb')) self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 113.682601929, 2)
def concat_full(self): """ Concats self.full_chains into a single model """ final = B.PDBModel() for m in self.full_chains: final = final.concat(m) final.addChainId() final['serial_number'] = N.arange(1, len(final) + 1) print('Done.') return final
def parse2new(self, source, disconnect=False, skipRes=None): """ Create a new PDBModel from the source. :param source: source PDB file or pickled PDBModel or PDBModel object :type source: str || file || PDBModel :param disconnect: do *not* associate new model with the source [False] :type disconnect: bool :param skipRes: list residues that should not be parsed :type skipRes: [ str ] :return: new model (always of type PDBModel, regardless of source type) :rtype: PDBModel """ m = B.PDBModel() self.update(m, source, updateMissing=True, skipRes=skipRes) if disconnect: m.disconnect() return m
def parse2new(self, source, ref, traj=None): """ Replace content of a new Trajectory from the source. Args: source (str): file name or other input object ref (str or PDBModel): reference structure instance or file traj (Biskit.md.Trajectory): existing instance to be updated Returns: Biskit.Trajectory: new Trajectory instance """ r = traj if traj is None: import biskit.md r = biskit.md.Trajectory() src = NetCDFTraj.open_old(str(source)) r.frames = src.coordinates r.setRef(B.PDBModel(ref)) r.resIndex = r.ref.resMap() assert N.shape(r.frames) == (src.frame, src.atom, 3) assert r.lenAtoms() == src.atom r.profiles['time'] = src.time if src.hasvels: r.profiles['velocities'] = src.velocities if src.hasfrcs: r.profiles['forces'] = src.forces r.ref.info['application'] = src.application r.ref.info['program'] = src.program return r
def restore_emb(self, emb_mod, emb_ch): ''' Divides the emb_ch model into the original chains, as in emb_mod :param emb_mod: Model of the chains as they were initially embedded :type emb_mod: PDBModel :param emb_ch: Model of the chains to be separated in to the original ones :type emb_ch: PDBModel ''' l = 0 # Cummulative residue length of the chains full = B.PDBModel() # Go through each chain, obtain its length and take the corresponding # residues from emb_ch. Then renumber amino acids and concat to full for i in range(emb_mod.lenChains()): length = len(emb_mod.takeChains([i]).sequence()) ch = emb_ch.takeResidues(list(range(l, l + length))) ch.renumberResidues() full = full.concat(ch) l += length return full
def test_extract_embedded(self): """ Tests the builder.extract_embedded method """ mod1 = B.PDBModel(os.path.join(self.testpath, 'chain01_2ch.pdb')) emb_mod = mod1.takeChains([1, 2, 3]) j_dom = mod1.takeChains([0]) full_emb = self.builder1.embed_symmetric([j_dom], [emb_mod]) container_seq = j_dom.sequence()[:2] + emb_mod.sequence() +\ j_dom.sequence()[2:] full = full_emb[0] while full.lenChains() > 1: full.mergeChains(0) chain01_2ch_reb = self.builder1.extract_embedded( full, emb_mod, container_seq) # chain01_2ch_reb.writePdb('testdata/chain01_testrebuilt.pdb') self.assertTrue(chain01_2ch_reb.lenChains() == 4) self.assertTrue(chain01_2ch_reb.sequence() == mod1.sequence())
def parse2new(self, source, ref=None, traj=None): """ Create / Replace Trajectory from the source list of PDBModels or PDBs. Args: source (str): list of file names or PDBModel instances ref (str or PDBModel): reference structure instance or file traj (Biskit.md.Trajectory): existing instance to be updated Returns: Biskit.Trajectory: new Trajectory instance """ r = traj if traj is None: import biskit.md r = biskit.md.Trajectory() r.setRef(B.PDBModel(ref or source[0])) n_frames = len(source) if self.rmwat: r.ref = r.ref.compress(N.logical_not(r.ref.maskSolvent())) r.resIndex = r.ref.resMap() refNames = r.ref.atomNames() ## cache for atom checking if self.verbose: T.errWrite('reading %i pdbs...' % n_frames) r.frames = N.zeros( (n_frames, r.ref.lenAtoms(), 3)) ## target coordinate array r.frameNames = ['#%i07' % i for i in range(n_frames)] atomCast = None reportIntervall = 1 if n_frames < 100 else round(n_frames / 100) for i, f in enumerate(source): m = B.PDBModel(f) ## compare atom order & content of first frame to reference pdb if self.analyzeEach or i == 0: atomCast, castRef = m.compareAtoms(r.ref) if castRef != list(range(r.ref.lenAtoms())): ## we can remove/reorder atoms from each frame but not from ref raise P.TrajParserError("Reference PDB doesn't match %s." % m.fileName) if N.all(atomCast == list(range(len(m)))): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take(atomCast) ## additional check on each 100st frame if i % reportIntervall == 0 and m.atomNames() != refNames: raise P.TrajParserError("%s doesn't match reference pdb." % m.fileName) r.frames[i] = m.xyz if type(f) is str: ## save original file name r.frameNames[i] = T.stripFilename(f) if i % reportIntervall == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite('done\n') return r
def extract_embedded(full, embedded): """ Extracts one or more PDBModels from another Finds the sequence and location of each domain in embedded dictionary. Extracts the atoms and concatenates at the end of 'full'. Renumbers amino acids, id number and renames chains in the process. It also returns the original multichain domain (before adding to the chain) but with the new coordinates after modeling :param full: chain as modeled by ranch :param embedded: dictionary with embedded domains and its position (index) in the full sequence :type embedded: dictionary :return full: 'full' with embedded domains concatenated at the end as independent chains :type full: PDBModel :return modeled_doms: list with 'modeled_doms' dictionary, which has key:value pairs where key is the index (position) of the domain in the chain and value is the 'original' domain with new coordinates to be used in subsequent chains :type modeled_doms: list of dictionaries """ r = B.PDBModel() emb_ind = [] # List for start and end indexes for each embedded domain # Create dictionary for domains that were modeled modeled_doms = {} for key, value in embedded.items(): dom = value[2] m = value[1] # domain that has dom embedded in i_start = value[0] # start position of dom in full i_end = i_start + len(dom.sequence()) # end position of dom in full # Remember that dom is embedded inside of m in the following way: # ....FFFFFFFFFMMDDDDDDDDDDDDDDDDDDMMMMMMMMMMMMMMMMMMMMMMMMFFFFFFFFF.... # F = residues of full; D = residues of dom; M = residues of m # dom is placed after the first two residues of m # (start, end) of the first two residues of m m_first = (i_start - 2, i_start) # (start, end) of the remaining residues of m m_last = (i_end, i_end + len(m.sequence()) - 2) # sequece of m m_seq = full.sequence()[m_first[0]:m_first[1]] + \ full.sequence()[m_last[0]:m_last[1]] if full.sequence()[i_start:i_end] == dom.sequence() and \ m_seq == m.sequence(): emb = full.takeResidues(list(range(i_start, i_end))) # Restore chain ids and residue numbers assert emb.sequence() == dom.sequence() assert len(emb) == len(dom), str(len(emb)) + ', ' + str(len(dom)) emb.atoms['chain_id'] = dom.atoms['chain_id'] emb.atoms['residue_number'] = dom.atoms['residue_number'] m_new = full.takeResidues(list(range(*m_first))).concat( full.takeResidues(list(range(*m_last))), newChain=False) m_new.atoms['chain_id'] = m.atoms['chain_id'] m_new.atoms['residue_number'] = m.atoms['residue_number'] m_new = m_new.concat(emb) modeled_doms[key] = m_new r = r.concat(emb) emb_ind.append((i_start, i_end)) else: raise MatchError( 'The sequence from the domain to exctract does not \ match the sequence in the full domain with the specified indices' ) # Sort the list to remove the atoms from highest to lowest index, # so the indexes won't be affected emb_ind = sorted(emb_ind, key=itemgetter(0), reverse=True) # Remove the embedded domains from full for i_start, i_end in emb_ind: atomi_start = full.resIndex()[i_start] atomi_end = full.resIndex()[i_end] full.remove(list(range(atomi_start, atomi_end))) # Concat the original chain that previously contained the embedded domains for i in range(full.lenChains() - 1): full.mergeChains(0) full.renumberResidues() # Renumber amino acids full = full.concat(r) # Combine full and r full.addChainId() # Add chain IDs with consecutive letters # NOTE: add feature for personalized chain names # Renumber atoms full['serial_number'] = N.arange(1, len(full) + 1) out_symseq = full.sequence() return full, [modeled_doms], out_symseq
#!/usr/bin/env python ## re-generate binary test data in this folder import biskit as B import biskit.tools as T m = B.PDBModel('1A19.pdb') m = m.compress(m.maskProtein()) m.saveAs('1A19_dry.model')
## TEST RANCHV3 import biskit as b import ranchv3 as r dom1 = b.PDBModel("ranch_example/2z6o_mod.pdb") dom2 = b.PDBModel("ranch_example/Histone_H3.pdb") with open("ranch_example/full.fasta") as f: seq=f.readlines() del(seq[0]) seq = ''.join(seq).replace('\n','') call = r.Ranch(sequence=seq, s='p1', x=(dom1,dom2), f=('yes','no'), o=('no','no'), filesuff='test', w='ranch_example/models') ## TEST RANCHV4 ## cwd = Documents/Stefan/multiprot/multiprot import biskit as b import ranchv4 as r dom1 = b.PDBModel("ranch_example/2z6o_mod.pdb") dom2 = b.PDBModel("ranch_example/Histone_H3.pdb") with open("ranch_example/full.fasta") as f: seq=f.readlines() del(seq[0])
#!/usr/bin/env python ## re-generate binary test data in this folder import biskit as B import biskit.tools as T m = B.PDBModel('1A2P.pdb') m = m.takeChains([0]) m = m.sort() m.saveAs('1A2P_dry.model')
#!/usr/bin/env python ## re-generate binary test data in this folder import biskit as B import biskit.tools as T m1 = B.PDBModel('1huy_citrine.pdb') m1.saveAs('1huy_citrine.model') m2 = B.PDBModel('1zgp_dsred_dimer.pdb') m2.saveAs('1zgp_dsred_dimer.model')
def restore_pulchra(self, ch, ch_reb, domains, modeled_domains, symtemplate, container_jdom): ''' Retrieve the domain sidechain's coordinates from ranch, and combine with the rebuilt CA side chains from pulchra :param ch: chain as originally modeled by ranch :type ch: PDBModel :param ch_reb: modeled chain rebuilt by pulchra :type ch_reb: PDBModel :param domains: list of domains from which the chain was built. :type domains: list with PDBModel and str elements :param modeled_domains: dictionary with multiple-chain domains that were modeled, with their new coordinates :type modeled_domains: dict :param symtemplate: symmetry template used for symmetry (if any) :type symtemplate: PDBModel :param container_jdom: domain that is connecting the modeled chain to the symmetric core (see self.embed_symmetric()) :type container_jdom: PDBModel ''' ch_res = B.PDBModel() aa_count = 0 for k in range(len(domains)): d = domains[k] if k == 0: # First domain # Ndelta = how many residues will be added/substracted from the N # terminus of linkers/PDBModels Nd = 0 # Cdelta = how many residues will be added/substracted from the C # terminus of linkers/PDBModels Cd = 2 elif k == len(domains) - 1: # Last domain Nd = 2 Cd = 0 else: # Domain in the middle Nd = 2 Cd = 2 if isinstance(d, B.PDBModel): if d.lenChains() == 1: # Normal single-chain domain... take the corresponding # residues from ch len_dom = len(d.sequence()) elif d is symtemplate: # Symtemplate... take the residues corresponding to the # container_jdom from ch, OR the first chain of symtemplate jdom = container_jdom or symtemplate.takeChains( [0]).sequence() len_dom = len(jdom) else: # Multiple chain domain... take the corresponding chain from # modeled_domains len_dom = len(modeled_domains[k].takeChains([0 ]).sequence()) rdom = ch.takeResidues( list(range(aa_count + Nd, aa_count + len_dom - Cd))) assert ch_reb.sequence()[aa_count+Nd:aa_count+len_dom-Cd] == \ rdom.sequence() ch_res = ch_res.concat(rdom) aa_count += len_dom else: # is a string len_d = len(d) # assert ch_reb.sequence()[aa_count:aa_count+len_d] == d ch_res = ch_res.concat( ch_reb.takeResidues( list(range(aa_count - Nd, aa_count + len_d + Cd)))) aa_count += len_d while ch_res.lenChains() > 1: ch_res.mergeChains(0) ch_res.renumberResidues() assert ch_reb.sequence() == ch_res.sequence() return ch_res