Ejemplo n.º 1
0
    def finish(self):
        """
        Overrides Executor method.
        Write more....
        """

        # Retrieve models created as PDBModels
        # only as many models as the user requested
        m_paths = [
            os.path.join(self.dir_models, f)
            for f in os.listdir(self.dir_models)
        ]
        m_paths = m_paths[:self.n]

        # self.result = [(full1, modeled_doms1), (full2, modeled_doms2), ...]
        # where 'full#' is the clean model generated, and 'modeled_doms#' is
        # a dictionary with key:value pairs of
        # *index of domain in chain*:*original domain with new coordinates*
        # symmetric models have also a out_symseq output for the symmetric unit
        # sequence
        if self.symtemplate:
            self.result = [
                extract_symmetric(B.PDBModel(m), self.symseq, self.embedded)
                for m in m_paths
            ]
        else:
            self.result = [extract_embedded(B.PDBModel(m), self.embedded) \
            for m in m_paths]
Ejemplo n.º 2
0
    def setUp(self):

        self.testpath = self.testpath or \
            os.path.join(os.path.abspath(os.path.dirname(__file__)), 'testdata')

        self.dom1 = self.dom1 or B.PDBModel(
            os.path.join(self.testpath, '2z6o.pdb'))
        self.dom2 = self.dom2 or B.PDBModel(
            os.path.join(self.testpath, 'histone.pdb'))
        self.domAB1 = self.domAB1 or B.PDBModel(
            os.path.join(self.testpath, 'domAB1.pdb'))
        self.domAB2 = self.domAB2 or self.domAB1.clone()
Ejemplo n.º 3
0
    def fetchPDB(self, id):

        try:
            h = self.getLocalPDBHandle(id)
        except:
            h = self.getRemotePDBHandle(id)

        fname = tempfile.mktemp('.pdb', 'ncbiparser_')

        lines, infos = self.parsePdbFromHandle(h, first_model_only=1)

        ## close if it is a handle
        try:
            h.close()
        except:
            pass

        f = open(fname, 'w', 1)
        f.writelines(lines)
        f.close()

        m = B.PDBModel(fname)
        m.disconnect()
        m.pdbCode = id
        m.info.update(infos)

        T.tryRemove(fname)

        return m
Ejemplo n.º 4
0
    def embed_symmetric(self, j_doms, full_chains):
        """
        Embeds every symmetric unit (full_chain) from full_chains into every j_dom

        :param j_doms:  All the j_doms in which the symmetric chains will be
                        embedded
        :type j_doms:   list of PDBModels 
        """

        full_symmetric = B.PDBModel()
        emb_jsym = None
        ch = None
        j_dom = None
        # m = []

        for i in range(len(full_chains)):
            ch = full_chains[i]
            j_dom = j_doms[i]
            ch = self.extract_fixed(j_dom, ch)
            emb_jsym = R.embed(j_dom, ch)  # Symmetric unit embedded into j_dom

            assert emb_jsym.sequence() == j_dom.sequence()[:2] + ch.sequence() +\
                j_dom.sequence()[2:]

            full_symmetric = full_symmetric.concat(emb_jsym)

        container_jdom = j_dom.sequence()
        emb_mod = ch
        container_seq = emb_jsym.sequence()
        return full_symmetric, container_seq, emb_mod, container_jdom
Ejemplo n.º 5
0
    def parse2new( self, source, ref, traj=None ):
        """
        Replace content of a new Trajectory from the source.
        
        Args:
            source (str): file name or other input object
            ref (str or PDBModel): reference structure instance or file
            traj (Biskit.md.Trajectory): existing instance to be updated

        Returns:
           Biskit.Trajectory: new Trajectory instance
        """
        r = traj
        if traj is None:
            import biskit.md
            r = biskit.md.Trajectory()
        
        ref = B.PDBModel(ref)
        
        src = AmberMdcrd(source, natom=ref.lenAtoms(),hasbox=self.hasbox)
        
        r.frames = src.coordinates
        
        r.setRef(ref)
        r.resIndex = r.ref.resMap()
        assert N.shape(r.frames) == (src.frame, src.natom, 3)
        assert r.lenAtoms() == src.natom
        
        return r
Ejemplo n.º 6
0
    def test_embed_symmetric(self):
        '''
        Tests builder.embed_symmetric()
        '''
        mod1 = B.PDBModel(os.path.join(self.testpath, 'chain01_2ch.pdb'))
        emb_mod = mod1.takeChains([1, 2, 3])
        j_dom = mod1.takeChains([0])
        full_emb = self.builder1.embed_symmetric([j_dom], [emb_mod])

        self.assertTrue(len(full_emb[0]) == 9267, str(len(full_emb)))
Ejemplo n.º 7
0
    def test_PDBParseModel(self):
        """PDBParseModel test"""

        ## loading output file from X-plor
        if self.local:
            print('Loading pdb file ..')

        self.p = PDBParseModel()
        self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb'))

        self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 113.682601929, 2)
Ejemplo n.º 8
0
    def concat_full(self):
        """
        Concats self.full_chains into a single model
        """
        final = B.PDBModel()

        for m in self.full_chains:
            final = final.concat(m)

        final.addChainId()
        final['serial_number'] = N.arange(1, len(final) + 1)

        print('Done.')

        return final
Ejemplo n.º 9
0
    def parse2new(self, source, disconnect=False, skipRes=None):
        """
        Create a new PDBModel from the source.

        :param source: source PDB file or pickled PDBModel or PDBModel object
        :type  source: str || file || PDBModel
        :param disconnect: do *not* associate new model with the source [False]
        :type  disconnect: bool
        :param skipRes: list residues that should not be parsed
        :type  skipRes: [ str ]

        :return: new model (always of type PDBModel, regardless of source type)
        :rtype: PDBModel
        """
        m = B.PDBModel()
        self.update(m, source, updateMissing=True, skipRes=skipRes)

        if disconnect: m.disconnect()

        return m
Ejemplo n.º 10
0
    def parse2new(self, source, ref, traj=None):
        """
        Replace content of a new Trajectory from the source.
        
        Args:
            source (str): file name or other input object
            ref (str or PDBModel): reference structure instance or file
            traj (Biskit.md.Trajectory): existing instance to be updated

        Returns:
           Biskit.Trajectory: new Trajectory instance
        """
        r = traj
        if traj is None:
            import biskit.md
            r = biskit.md.Trajectory()

        src = NetCDFTraj.open_old(str(source))

        r.frames = src.coordinates

        r.setRef(B.PDBModel(ref))
        r.resIndex = r.ref.resMap()
        assert N.shape(r.frames) == (src.frame, src.atom, 3)
        assert r.lenAtoms() == src.atom

        r.profiles['time'] = src.time

        if src.hasvels:
            r.profiles['velocities'] = src.velocities

        if src.hasfrcs:
            r.profiles['forces'] = src.forces

        r.ref.info['application'] = src.application
        r.ref.info['program'] = src.program

        return r
Ejemplo n.º 11
0
    def restore_emb(self, emb_mod, emb_ch):
        '''
        Divides the emb_ch model into the original chains, as in emb_mod

        :param emb_mod: Model of the chains as they were initially embedded
        :type emb_mod:  PDBModel
        :param emb_ch:  Model of the chains to be separated in to the original ones
        :type emb_ch:   PDBModel
        '''

        l = 0  # Cummulative residue length of the chains
        full = B.PDBModel()

        # Go through each chain, obtain its length and take the corresponding
        # residues from emb_ch. Then renumber amino acids and concat to full
        for i in range(emb_mod.lenChains()):
            length = len(emb_mod.takeChains([i]).sequence())
            ch = emb_ch.takeResidues(list(range(l, l + length)))
            ch.renumberResidues()
            full = full.concat(ch)
            l += length

        return full
Ejemplo n.º 12
0
    def test_extract_embedded(self):
        """
        Tests the builder.extract_embedded method
        """

        mod1 = B.PDBModel(os.path.join(self.testpath, 'chain01_2ch.pdb'))
        emb_mod = mod1.takeChains([1, 2, 3])
        j_dom = mod1.takeChains([0])
        full_emb = self.builder1.embed_symmetric([j_dom], [emb_mod])
        container_seq = j_dom.sequence()[:2] + emb_mod.sequence() +\
            j_dom.sequence()[2:]

        full = full_emb[0]
        while full.lenChains() > 1:
            full.mergeChains(0)

        chain01_2ch_reb = self.builder1.extract_embedded(
            full, emb_mod, container_seq)

        # chain01_2ch_reb.writePdb('testdata/chain01_testrebuilt.pdb')

        self.assertTrue(chain01_2ch_reb.lenChains() == 4)
        self.assertTrue(chain01_2ch_reb.sequence() == mod1.sequence())
Ejemplo n.º 13
0
    def parse2new(self, source, ref=None, traj=None):
        """
        Create / Replace Trajectory from the source list of PDBModels or PDBs.
        
        Args:
            source (str): list of file names or PDBModel instances
            ref (str or PDBModel): reference structure instance or file
            traj (Biskit.md.Trajectory): existing instance to be updated

        Returns:
           Biskit.Trajectory: new Trajectory instance
        """
        r = traj
        if traj is None:
            import biskit.md
            r = biskit.md.Trajectory()

        r.setRef(B.PDBModel(ref or source[0]))
        n_frames = len(source)

        if self.rmwat:
            r.ref = r.ref.compress(N.logical_not(r.ref.maskSolvent()))

        r.resIndex = r.ref.resMap()
        refNames = r.ref.atomNames()  ## cache for atom checking

        if self.verbose: T.errWrite('reading %i pdbs...' % n_frames)

        r.frames = N.zeros(
            (n_frames, r.ref.lenAtoms(), 3))  ## target coordinate array
        r.frameNames = ['#%i07' % i for i in range(n_frames)]

        atomCast = None
        reportIntervall = 1 if n_frames < 100 else round(n_frames / 100)

        for i, f in enumerate(source):

            m = B.PDBModel(f)

            ## compare atom order & content of first frame to reference pdb
            if self.analyzeEach or i == 0:
                atomCast, castRef = m.compareAtoms(r.ref)

                if castRef != list(range(r.ref.lenAtoms())):
                    ## we can remove/reorder atoms from each frame but not from ref
                    raise P.TrajParserError("Reference PDB doesn't match %s." %
                                            m.fileName)

                if N.all(atomCast == list(range(len(m)))):
                    atomCast = None  ## no casting necessary
                else:
                    if self.verbose: T.errWrite(' casting ')

            ## assert that frame fits reference
            if atomCast:
                m = m.take(atomCast)

            ## additional check on each 100st frame
            if i % reportIntervall == 0 and m.atomNames() != refNames:
                raise P.TrajParserError("%s doesn't match reference pdb." %
                                        m.fileName)

            r.frames[i] = m.xyz

            if type(f) is str:  ## save original file name
                r.frameNames[i] = T.stripFilename(f)

            if i % reportIntervall == 0 and self.verbose:
                T.errWrite('#')

        if self.verbose: T.errWrite('done\n')
        return r
Ejemplo n.º 14
0
def extract_embedded(full, embedded):
    """
    Extracts one  or more PDBModels from another
    Finds the sequence and location of each domain in embedded dictionary.
    Extracts the atoms and concatenates at the end of 'full'. 
    Renumbers amino acids, id number and renames chains in the process.

    It also returns the original multichain domain (before adding to the chain)
    but with the new coordinates after modeling
    
    :param full:    chain as modeled by ranch
    :param embedded: dictionary with embedded domains and its position (index)
                            in the full sequence
    :type embedded: dictionary

    :return full: 'full' with embedded domains concatenated at the end as 
                independent chains
    :type full: PDBModel
    :return modeled_doms:   list with 'modeled_doms' dictionary, which has
                key:value pairs where key is the index (position) of the domain
                in the chain and value is the 'original' domain with new
                coordinates to be used in subsequent chains
    :type modeled_doms: list of dictionaries
    """

    r = B.PDBModel()
    emb_ind = []  # List for start and end indexes for each embedded domain

    # Create dictionary for domains that were modeled
    modeled_doms = {}

    for key, value in embedded.items():
        dom = value[2]
        m = value[1]  # domain that has dom embedded in
        i_start = value[0]  # start position of dom in full
        i_end = i_start + len(dom.sequence())  # end position of dom in full

        # Remember that dom is embedded inside of m in the following way:
        # ....FFFFFFFFFMMDDDDDDDDDDDDDDDDDDMMMMMMMMMMMMMMMMMMMMMMMMFFFFFFFFF....
        # F = residues of full; D = residues of dom; M = residues of m
        # dom is placed after the first two residues of m

        # (start, end) of the first two residues of m
        m_first = (i_start - 2, i_start)
        # (start, end) of the remaining residues of m
        m_last = (i_end, i_end + len(m.sequence()) - 2)
        # sequece of m
        m_seq = full.sequence()[m_first[0]:m_first[1]] + \
                full.sequence()[m_last[0]:m_last[1]]

        if full.sequence()[i_start:i_end] == dom.sequence() and \
            m_seq == m.sequence():

            emb = full.takeResidues(list(range(i_start, i_end)))
            # Restore chain ids and residue numbers
            assert emb.sequence() == dom.sequence()
            assert len(emb) == len(dom), str(len(emb)) + ', ' + str(len(dom))
            emb.atoms['chain_id'] = dom.atoms['chain_id']
            emb.atoms['residue_number'] = dom.atoms['residue_number']

            m_new = full.takeResidues(list(range(*m_first))).concat(
                full.takeResidues(list(range(*m_last))), newChain=False)
            m_new.atoms['chain_id'] = m.atoms['chain_id']
            m_new.atoms['residue_number'] = m.atoms['residue_number']
            m_new = m_new.concat(emb)
            modeled_doms[key] = m_new

            r = r.concat(emb)

            emb_ind.append((i_start, i_end))
        else:
            raise MatchError(
                'The sequence from the domain to exctract does not \
                match the sequence in the full domain with the specified indices'
            )

    # Sort the list to remove the atoms from highest to lowest index,
    # so the indexes won't be affected
    emb_ind = sorted(emb_ind, key=itemgetter(0), reverse=True)

    # Remove the embedded domains from full
    for i_start, i_end in emb_ind:
        atomi_start = full.resIndex()[i_start]
        atomi_end = full.resIndex()[i_end]
        full.remove(list(range(atomi_start, atomi_end)))

    # Concat the original chain that previously contained the embedded domains
    for i in range(full.lenChains() - 1):
        full.mergeChains(0)

    full.renumberResidues()  # Renumber amino acids
    full = full.concat(r)  # Combine full and r
    full.addChainId()  # Add chain IDs with consecutive letters
    # NOTE: add feature for personalized chain names

    # Renumber atoms
    full['serial_number'] = N.arange(1, len(full) + 1)

    out_symseq = full.sequence()

    return full, [modeled_doms], out_symseq
Ejemplo n.º 15
0
#!/usr/bin/env python
## re-generate binary test data in this folder

import biskit as B
import biskit.tools as T

m = B.PDBModel('1A19.pdb')
m = m.compress(m.maskProtein())
m.saveAs('1A19_dry.model')
Ejemplo n.º 16
0
## TEST RANCHV3

import biskit as b
import ranchv3 as r

dom1 = b.PDBModel("ranch_example/2z6o_mod.pdb")
dom2 = b.PDBModel("ranch_example/Histone_H3.pdb")

with open("ranch_example/full.fasta") as f:
	seq=f.readlines()

del(seq[0])

seq = ''.join(seq).replace('\n','')

call = r.Ranch(sequence=seq, s='p1', x=(dom1,dom2), f=('yes','no'), o=('no','no'), filesuff='test', w='ranch_example/models')

## TEST RANCHV4
## cwd = Documents/Stefan/multiprot/multiprot

import biskit as b
import ranchv4 as r

dom1 = b.PDBModel("ranch_example/2z6o_mod.pdb")
dom2 = b.PDBModel("ranch_example/Histone_H3.pdb")

with open("ranch_example/full.fasta") as f:
	seq=f.readlines()

del(seq[0])
Ejemplo n.º 17
0
#!/usr/bin/env python
## re-generate binary test data in this folder

import biskit as B
import biskit.tools as T

m = B.PDBModel('1A2P.pdb')
m = m.takeChains([0])
m = m.sort()

m.saveAs('1A2P_dry.model')
Ejemplo n.º 18
0
#!/usr/bin/env python
## re-generate binary test data in this folder

import biskit as B
import biskit.tools as T

m1 = B.PDBModel('1huy_citrine.pdb')
m1.saveAs('1huy_citrine.model')

m2 = B.PDBModel('1zgp_dsred_dimer.pdb')
m2.saveAs('1zgp_dsred_dimer.model')
Ejemplo n.º 19
0
    def restore_pulchra(self, ch, ch_reb, domains, modeled_domains,
                        symtemplate, container_jdom):
        '''
        Retrieve the domain sidechain's coordinates from ranch, and combine with
        the rebuilt CA side chains from pulchra
        
        :param ch:      chain as originally modeled by ranch
        :type ch:       PDBModel
        :param ch_reb:  modeled chain rebuilt by pulchra
        :type ch_reb:   PDBModel
        :param domains: list of domains from which the chain was built.
        :type domains:  list with PDBModel and str elements
        :param modeled_domains: dictionary with multiple-chain domains that were
                                modeled, with their new coordinates
        :type modeled_domains:  dict
        :param symtemplate: symmetry template used for symmetry (if any)
        :type symtemplate:  PDBModel
        :param container_jdom:  domain that is connecting the modeled chain to
                                the symmetric core (see self.embed_symmetric())
        :type container_jdom:   PDBModel  
        '''

        ch_res = B.PDBModel()
        aa_count = 0

        for k in range(len(domains)):
            d = domains[k]
            if k == 0:  # First domain
                # Ndelta = how many residues will be added/substracted from the N
                # terminus of linkers/PDBModels
                Nd = 0
                # Cdelta = how many residues will be added/substracted from the C
                # terminus of linkers/PDBModels
                Cd = 2
            elif k == len(domains) - 1:  # Last domain
                Nd = 2
                Cd = 0
            else:  # Domain in the middle
                Nd = 2
                Cd = 2

            if isinstance(d, B.PDBModel):
                if d.lenChains() == 1:
                    # Normal single-chain domain... take the corresponding
                    # residues from ch
                    len_dom = len(d.sequence())

                elif d is symtemplate:
                    # Symtemplate... take the residues corresponding to the
                    # container_jdom from ch, OR the first chain of symtemplate
                    jdom = container_jdom or symtemplate.takeChains(
                        [0]).sequence()
                    len_dom = len(jdom)

                else:
                    # Multiple chain domain... take the corresponding chain from
                    # modeled_domains
                    len_dom = len(modeled_domains[k].takeChains([0
                                                                 ]).sequence())

                rdom = ch.takeResidues(
                    list(range(aa_count + Nd, aa_count + len_dom - Cd)))

                assert ch_reb.sequence()[aa_count+Nd:aa_count+len_dom-Cd] == \
                    rdom.sequence()

                ch_res = ch_res.concat(rdom)
                aa_count += len_dom

            else:  # is a string
                len_d = len(d)
                # assert ch_reb.sequence()[aa_count:aa_count+len_d] == d

                ch_res = ch_res.concat(
                    ch_reb.takeResidues(
                        list(range(aa_count - Nd, aa_count + len_d + Cd))))
                aa_count += len_d

        while ch_res.lenChains() > 1:
            ch_res.mergeChains(0)

        ch_res.renumberResidues()

        assert ch_reb.sequence() == ch_res.sequence()

        return ch_res