Ejemplo n.º 1
0
 def __init__(self, pdbFilename=None, **kwargs):
     super(NatQ, self).__init__(pdbFilename, **kwargs)
     # kwargs
     self.nativeRad = kwargs.get('nativerad', 8.)
     # Main
     if pdbFilename is not None:
         self.aa = PDBFile(self.pdbFilename)[0]
         self.aa.parse()
         self.cg = KTGo(self.aa)
         self.nativeContacts = self.cg.get_nativeSCSC()
Ejemplo n.º 2
0
def parse(pdbFilename, **kwargs):
    """
    Parse a *.pdb* plain text file into its constituent chains and segments, and
    print one CHARMM formatted *.pdb* file per chain/segment combination.

    *kwarg defaults are listed first*

    **kwargs:**
        | ``informat``      ['auto', 'pdborg', 'charmm'] # 'auto' -> detects input formatting
        | ``outformat``     ['charmm', 'pdborg', 'auto'] # 'auto' -> same as input formatting
        | ``outpath``       ['auto', user_specified_path] # 'auto' -> same as pdbFilename path
        | ``fix_chainid``   [True, False]
        | ``autofix``       [True, False]
        | ``modelnum``      ['auto', 0, 1, 2, ...] # 'auto' -> uses the first model found
        | ``pickleme``      [False, True] # pickles the PDBFile object for later use
        | ``verbose``       [False, True]
        | ``old_resid``     [False, True]

    >>> parse('~/pychm/1yjp/1yjp.pdb',outpath='~',pickleme=True)
    """
    # kwargs
    kwargs = lowerKeys(kwargs)
    inFormat = kwargs.get('informat', 'auto')
    outFormat = kwargs.get('outformat', 'charmm')
    outPath = kwargs.get('outpath', 'auto')
    fix_chainid = kwargs.get('fix_chainid', True)
    autoFix = kwargs.get('autofix', True)
    modelNum = kwargs.get('modelnum', 'auto')
    pickleMe = kwargs.get('pickleme', False)
    verbose = kwargs.get('verbose', False)
    old_resid = kwargs.get('old_resid', False)
    # Repackage the PDBFile kwargs, make pdbFile object
    pdbFileArgs = {'informat':inFormat, 'fix_chainid':fix_chainid,
                'autofix':autoFix, 'verbose':verbose}
    pdb = PDBFile(pdbFilename, **pdbFileArgs)
    if verbose:
        print '%s: Output formatting set to `%s`' % (pdb.code, outFormat)
    # Get model number...
    if modelNum == 'auto':
        thisMol = pdb.iter_models().next()
    else:
        thisMol = pdb[modelNum]
    if verbose:
        print '%s: Loading `%s`' % (pdb.code, thisMol.name)
    # Determine explicit output Path
    if outPath == 'auto':
        outPath = pdb.path
    else:
        outPath = expandPath(outPath)
    mkdir(outPath)
    if verbose:
        print '%s: Output path set to `%s`' % (pdb.code, outPath)
    # Do Work
    thisMol.parse()
    if verbose:
        print '%s: Parsing `%s`' % (pdb.code, thisMol.name)
    # Write CHARMMing style output.
    segDict = {'nuc':'nuc', 'pro':'pro', 'good':'goodhet', 'bad':'het',
            'dna':'dna', 'rna':'rna'}
    stdoutList = []
    # Write pdb files
    writeArgs = {'outformat':outFormat, 'ter':True, 'end':False,
                'old_resid':old_resid}
    for seg in thisMol.iter_seg():
        stdoutList.append('%s-%s' % (seg.chainid, segDict[seg.segType]))
        name = '%s/new_%s-%s-%s.pdb' % (outPath, thisMol.code, seg.chainid,
                                        segDict[seg.segType])
        if verbose:
            print '%s: Writing output to file `%s`' % (pdb.code, name)
        seg.write(filename=name, **writeArgs)
    # Write pickle (chk) file
    if pickleMe:
        pickleFilename = '%s/%s.chk' % (outPath, pdb.code)
        pickleFile = open(pickleFilename,'w')
        dump(pdb,pickleFile)
        pickleFile.close()
        if verbose:
            print '%s: Writing pickle to file `%s`' % (pdb.code, pickleFilename)
    if verbose:
        print '\n\nEnd of verbosity\n\n'
    # To STDOUT
    print 'natom=%d' % len(thisMol)
    print 'nwarn=%d' % len(thisMol.warnings)
    if thisMol.warnings:
        print 'warnings=%r' % thisMol.warnings
    print 'seg=%r' % stdoutList
Ejemplo n.º 3
0
class NatQ(BaseAnalysis):
    """
    DOCME
    """
    def __init__(self, pdbFilename=None, **kwargs):
        super(NatQ, self).__init__(pdbFilename, **kwargs)
        # kwargs
        self.nativeRad = kwargs.get('nativerad', 8.)
        # Main
        if pdbFilename is not None:
            self.aa = PDBFile(self.pdbFilename)[0]
            self.aa.parse()
            self.cg = KTGo(self.aa)
            self.nativeContacts = self.cg.get_nativeSCSC()

    @Property
    def data():
        doc =\
        """
        """
        def fget(self):
            filename = '%s/natq.pickle' % self.anlPathname
            try:
                self._data = pickle.load(open(filename))
                print 'found pickled data in: %s' % filename
                return self._data
            except IOError:
                print 'processing data in: %s' % self.anlPathname
                self._data = self.build_nativeContactMatrix()
                pickle.dump(self._data, open(filename, 'w'))
                return self._data
        def fset(self, value):
            assert isinstance(value, np.ndarray)
            self._data = value
        def fdel(self):
            filename = '%s/natq.pickle' % self.anlPathname
            del self._data
            try:
                os.remove(filename)
            except IOError:
                pass
        return locals()

    @Property
    def inpPathname():
        doc =\
        """
        """
        def fget(self):
            return self._inpPathname
        def fset(self, value):
            self._inpPathname = self.expandPath(value)
        return locals()

    @Property
    def nativeRad():
        doc =\
        """
        The cutoff distance between two CG sidechain atom objects which is used
        to determine if their parent residues are in 'contact'.
        """
        def fget(self):
            return self._nativeRad
        def fset(self, value):
            if value <= 1:
                raise ValueError("The native contact radius must be at least 1.")
            self._nativeRad = float(value)
        return locals()

    def build_nativeContactMatrix(self):
        """
        Builds a 2D numpy array from available correl output files. The first
        index references the contact number, the second index references the
        correl time array value (which in turn corresponds to nstep of the
        original dynamics in quanta of ``correlSkip``. This method can take
        awhile because of disk IO.
        """
        print "Building native contact matrix."
        rowLength = (self.correlStop - self.correlStart) / self.correlSkip + 1
        tmp = []
        for i in xrange(len(self.nativeContacts)):
            try:
                tmp.append(load_correlOutput('%s/natq%04d.anl' % (self.anlPathname, i)))
            except IOError:
                tmp.append(np.zeros(rowLength))
                print "Can't find correl output for natq number: %04d." % i
        return np.array(tmp, dtype=np.float64)

    def get_natQofT(self):
        """
        """
        data = self.data
        tmp = np.zeros(data.shape)
        tmp[data <= self.nativeRad] = 1
        tmp[data > self.nativeRad] = 0
        return tmp.mean(axis=0)

# Charmm input creation
    def do_correl(self, **kwargs):
        self.write_correlInput(**kwargs)
        self.run_correlInput()

    def write_correlInput(self, **kwargs):
        kwargs = lowerKeys(kwargs)
        header = kwargs.get('header', [])
        #
        print self.anlPathname
        if self.anlPathname is None:
            raise IOError("No directory specified.")
        mkdir(self.anlPathname)
        #
        for i, group in enumerate(grouper(self.nativeContacts, 100)):
            String = self.get_correlInputHeader(header)
            String.append('! anl :: write')
            for j, contact in enumerate(group):
                String.append('open unit %03d write card name %s/natq%02d%02d.anl' % (j+100, self.anlPathname, i, j))
            String.append('')
            String.append('correl maxtimesteps %d maxatom %d maxseries %d' % (self.correlArrayLength, 1000, len(group)))
            for j, contact in enumerate(group):
                String.append('enter n%03d bond bynum %d bynum %d geometry' % (j, contact.i.atomNum, contact.j.atomNum))
            String.append('traj firstu 10 nunit 1 begin %d stop %d skip %d select all end' % (self.correlStart, self.correlStop, self.correlSkip))
            String.append('')
            for j, contact in enumerate(group):
                String.append('write n%03d card unit %03d' % (j, j+100))
                String.append('* Contact %02d%02d: between cgAtoms %s and %s' % (i, j, contact.i.addr, contact.j.addr))
                String.append('* Native Contact - Interaction between %s and %s' % (contact.i.prmString, contact.j.prmString))
                String.append('*')
                String.append('')
            String.append('end')
            String.append('')
            String.append('rewind unit 10')
            String.append('')
            String.append('stop')
            String = '\n'.join(String)
            # Write file
            self.inpFilename = '%s/natq%02d.inp' % (self.inpPathname, i)
            print '%s' % self.inpFilename
            write_to = open(self.inpFilename, 'w')
            write_to.write(String)
            write_to.close()

    def run_correlInput(self):
        #
        for i, group in enumerate(grouper(self.nativeContacts, 100)):
            self.inpFilename = '%s/natq%02d.inp' % (self.inpPathname, i)
            self.outFilename = '%s/natq%02d.out' % (self.outPathname, i)
            #
            try:
                os.remove(self.outFilename)
            except OSError:
                pass
            if not os.path.exists(self.inpFilename):
                raise IOError("No such file or directory: '%s'" % self.inpFilename)
            os.system('%s < %s > %s' % (self.charmmBin, self.inpFilename, self.outFilename))
Ejemplo n.º 4
0
class Contacts(BaseAnalysis):
    """
    DOCME
    """
    def __init__(self, pdbFilename=None, **kwargs):
        super(Contacts, self).__init__(pdbFilename, **kwargs)
        # kwargs
        self.nativeRad = kwargs.get('nativerad', 8.)
        # Main
        if pdbFilename is not None:
            self.aa = PDBFile(self.pdbFilename)[0]
            self.aa.parse()
            self.cg = KTGo(self.aa)
            self.contacts = self.get_contacts()

    @Property
    def data():
        doc =\
        """
        """
        def fget(self):
            filename = '%s/contacts.pickle' % self.anlPathname
            try:
                self._data = pickle.load(open(filename))
                print 'found pickled data in: %s' % filename
                return self._data
            except IOError:
                print 'processing data in: %s' % self.anlPathname
                self._data = self.build_contactMatrix()
                pickle.dump(self._data, open(filename, 'w'))
                return self._data
        def fset(self, value):
            assert isinstance(value, np.ndarray)
            self._data = value
        def fdel(self):
            filename = '%s/contacts.pickle' % self.anlPathname
            del self._data
            try:
                os.remove(filename)
            except IOError:
                pass
        return locals()

    @Property
    def inpPathname():
        doc =\
        """
        """
        def fget(self):
            return self._inpPathname
        def fset(self, value):
            self._inpPathname = self.expandPath(value)
        return locals()

    @Property
    def nativeRad():
        doc =\
        """
        The cutoff distance between two CG sidechain atom objects which is used
        to determine if their parent residues are in 'contact'.
        """
        def fget(self):
            return self._nativeRad
        def fset(self, value):
            if value <= 1:
                raise ValueError("The native contact radius must be at least 1.")
            self._nativeRad = float(value)
        return locals()

    def get_contacts(self):
        """
        Returns a complete :class:`list` of :class:`Bond` objects, each bond
        object represents a unique sidechain-sidechain pair. Additionally,
        and most importantly, each :class:`Bond` object has been assigned the
        additional ``native`` attribute, to ``True`` or ``False``.  This value
        is true, if the corresponding all-atom representation of the residue
        sidechains have any heavy atoms within 4.5 Angstrom, as determined by
        :meth:`KTGo.get_nativeSCSC`.
        """
        iterator = [ atom for atom in self.cg if atom.atomType == 's' ]
        tmp = [ Bond(atom_i, atom_j) for i, atom_i in enumerate(iterator) for
                    j, atom_j in enumerate(iterator) if i < j ]
        nativeContacts = set(self.cg.get_nativeSCSC())
        for contact in tmp:
            if contact in nativeContacts:
                contact.native = True
            else:
                contact.native = False
        return tmp

    def build_contactMatrix(self):
        """
        Builds a 2D numpy array from available correl output files. The first
        index references the contact number, the second index references the
        correl time array value (which in turn corresponds to nstep of the
        original dynamics in quanta of ``correlSkip``. This method can take
        awhile because of disk IO.
        """
        print "Building full contact matrix."
        rowLength = (self.correlStop - self.correlStart) / self.correlSkip + 1
        tmp = []
        for i in xrange(len(self.contacts)):
            try:
                tmp.append(load_correlOutput('%s/contact%04d.anl' % (self.anlPathname, i)))
            except IOError:
                tmp.append(np.zeros(rowLength))
                print "Can't find correl output for contact number: %04d." % i
        return np.array(tmp, dtype=np.float64)

    def get_nativeContactMatrix(self):
        """
        Projects out elements of the contactMatrix which are not derived from
        'native' contacts.
        """
        nativeIndex = [ i for i, taco in enumerate(self.contacts) if taco.native ]
        return self.data[nativeIndex]

    def get_natQofT(self):
        """
        """
        nativeContacts = self.get_nativeContactMatrix()
        tmp = np.zeros(nativeContacts.shape)
        tmp[nativeContacts <= self.nativeRad] = 1
        tmp[nativeContacts > self.nativeRad] = 0
        return tmp.mean(axis=0)

# Charmm input creation
    def do_correl(self, **kwargs):
        self.write_correlInput(**kwargs)
        self.run_correlInput()

    def write_correlInput(self, **kwargs):
        kwargs = lowerKeys(kwargs)
        header = kwargs.get('header', [])
        #
        if self.anlPathname is None:
            raise IOError("No directory specified.")
        mkdir(self.anlPathname)
        #
        for i, group in enumerate(grouper(self.contacts, 100)):
            String = self.get_correlInputHeader(header)
            String.append('! anl :: write')
            for j, contact in enumerate(group):
                String.append('open unit %03d write card name %s/contact%02d%02d.anl' % (j+100, self.anlPathname, i, j))
            String.append('')
            String.append('correl maxtimesteps %d maxatom %d maxseries %d' % (self.correlArrayLength, 1000, len(group)))
            for j, contact in enumerate(group):
                String.append('enter n%03d bond bynum %d bynum %d geometry' % (j, contact.i.atomNum, contact.j.atomNum))
            String.append('traj firstu 10 nunit 1 begin %d stop %d skip %d select all end' % (self.correlStart, self.correlStop, self.correlSkip))
            String.append('')
            for j, contact in enumerate(group):
                String.append('write n%03d card unit %03d' % (j, j+100))
                String.append('* Contact %02d%02d: between cgAtoms %s and %s' % (i, j, contact.i.addr, contact.j.addr))
                if contact.native:
                    String.append('* Native Contact - Interaction between %s and %s' % (contact.i.prmString, contact.j.prmString))
                String.append('*')
                String.append('')
            String.append('end')
            String.append('')
            String.append('rewind unit 10')
            String.append('')
            String.append('stop')
            String = '\n'.join(String)
            # Write file
            self.inpFilename = '%s/contact%02d.inp' % (self.inpPathname, i)
            write_to = open(self.inpFilename, 'w')
            write_to.write(String)
            write_to.close()

    def run_correlInput(self):
        #
        for i, group in enumerate(grouper(self.contacts, 100)):
            self.inpFilename = '%s/contact%02d.inp' % (self.inpPathname, i)
            self.outFilename = '%s/contact%02d.out' % (self.outPathname, i)
            #
            try:
                os.remove(self.outFilename)
            except OSError:
                pass
            if not os.path.exists(self.inpFilename):
                raise IOError("No such file or directory: '%s'" % self.inpFilename)
            os.system('%s < %s > %s' % (self.charmmBin, self.inpFilename, self.outFilename))