def __init__(self, pdbFilename=None, **kwargs): super(NatQ, self).__init__(pdbFilename, **kwargs) # kwargs self.nativeRad = kwargs.get('nativerad', 8.) # Main if pdbFilename is not None: self.aa = PDBFile(self.pdbFilename)[0] self.aa.parse() self.cg = KTGo(self.aa) self.nativeContacts = self.cg.get_nativeSCSC()
def parse(pdbFilename, **kwargs): """ Parse a *.pdb* plain text file into its constituent chains and segments, and print one CHARMM formatted *.pdb* file per chain/segment combination. *kwarg defaults are listed first* **kwargs:** | ``informat`` ['auto', 'pdborg', 'charmm'] # 'auto' -> detects input formatting | ``outformat`` ['charmm', 'pdborg', 'auto'] # 'auto' -> same as input formatting | ``outpath`` ['auto', user_specified_path] # 'auto' -> same as pdbFilename path | ``fix_chainid`` [True, False] | ``autofix`` [True, False] | ``modelnum`` ['auto', 0, 1, 2, ...] # 'auto' -> uses the first model found | ``pickleme`` [False, True] # pickles the PDBFile object for later use | ``verbose`` [False, True] | ``old_resid`` [False, True] >>> parse('~/pychm/1yjp/1yjp.pdb',outpath='~',pickleme=True) """ # kwargs kwargs = lowerKeys(kwargs) inFormat = kwargs.get('informat', 'auto') outFormat = kwargs.get('outformat', 'charmm') outPath = kwargs.get('outpath', 'auto') fix_chainid = kwargs.get('fix_chainid', True) autoFix = kwargs.get('autofix', True) modelNum = kwargs.get('modelnum', 'auto') pickleMe = kwargs.get('pickleme', False) verbose = kwargs.get('verbose', False) old_resid = kwargs.get('old_resid', False) # Repackage the PDBFile kwargs, make pdbFile object pdbFileArgs = {'informat':inFormat, 'fix_chainid':fix_chainid, 'autofix':autoFix, 'verbose':verbose} pdb = PDBFile(pdbFilename, **pdbFileArgs) if verbose: print '%s: Output formatting set to `%s`' % (pdb.code, outFormat) # Get model number... if modelNum == 'auto': thisMol = pdb.iter_models().next() else: thisMol = pdb[modelNum] if verbose: print '%s: Loading `%s`' % (pdb.code, thisMol.name) # Determine explicit output Path if outPath == 'auto': outPath = pdb.path else: outPath = expandPath(outPath) mkdir(outPath) if verbose: print '%s: Output path set to `%s`' % (pdb.code, outPath) # Do Work thisMol.parse() if verbose: print '%s: Parsing `%s`' % (pdb.code, thisMol.name) # Write CHARMMing style output. segDict = {'nuc':'nuc', 'pro':'pro', 'good':'goodhet', 'bad':'het', 'dna':'dna', 'rna':'rna'} stdoutList = [] # Write pdb files writeArgs = {'outformat':outFormat, 'ter':True, 'end':False, 'old_resid':old_resid} for seg in thisMol.iter_seg(): stdoutList.append('%s-%s' % (seg.chainid, segDict[seg.segType])) name = '%s/new_%s-%s-%s.pdb' % (outPath, thisMol.code, seg.chainid, segDict[seg.segType]) if verbose: print '%s: Writing output to file `%s`' % (pdb.code, name) seg.write(filename=name, **writeArgs) # Write pickle (chk) file if pickleMe: pickleFilename = '%s/%s.chk' % (outPath, pdb.code) pickleFile = open(pickleFilename,'w') dump(pdb,pickleFile) pickleFile.close() if verbose: print '%s: Writing pickle to file `%s`' % (pdb.code, pickleFilename) if verbose: print '\n\nEnd of verbosity\n\n' # To STDOUT print 'natom=%d' % len(thisMol) print 'nwarn=%d' % len(thisMol.warnings) if thisMol.warnings: print 'warnings=%r' % thisMol.warnings print 'seg=%r' % stdoutList
class NatQ(BaseAnalysis): """ DOCME """ def __init__(self, pdbFilename=None, **kwargs): super(NatQ, self).__init__(pdbFilename, **kwargs) # kwargs self.nativeRad = kwargs.get('nativerad', 8.) # Main if pdbFilename is not None: self.aa = PDBFile(self.pdbFilename)[0] self.aa.parse() self.cg = KTGo(self.aa) self.nativeContacts = self.cg.get_nativeSCSC() @Property def data(): doc =\ """ """ def fget(self): filename = '%s/natq.pickle' % self.anlPathname try: self._data = pickle.load(open(filename)) print 'found pickled data in: %s' % filename return self._data except IOError: print 'processing data in: %s' % self.anlPathname self._data = self.build_nativeContactMatrix() pickle.dump(self._data, open(filename, 'w')) return self._data def fset(self, value): assert isinstance(value, np.ndarray) self._data = value def fdel(self): filename = '%s/natq.pickle' % self.anlPathname del self._data try: os.remove(filename) except IOError: pass return locals() @Property def inpPathname(): doc =\ """ """ def fget(self): return self._inpPathname def fset(self, value): self._inpPathname = self.expandPath(value) return locals() @Property def nativeRad(): doc =\ """ The cutoff distance between two CG sidechain atom objects which is used to determine if their parent residues are in 'contact'. """ def fget(self): return self._nativeRad def fset(self, value): if value <= 1: raise ValueError("The native contact radius must be at least 1.") self._nativeRad = float(value) return locals() def build_nativeContactMatrix(self): """ Builds a 2D numpy array from available correl output files. The first index references the contact number, the second index references the correl time array value (which in turn corresponds to nstep of the original dynamics in quanta of ``correlSkip``. This method can take awhile because of disk IO. """ print "Building native contact matrix." rowLength = (self.correlStop - self.correlStart) / self.correlSkip + 1 tmp = [] for i in xrange(len(self.nativeContacts)): try: tmp.append(load_correlOutput('%s/natq%04d.anl' % (self.anlPathname, i))) except IOError: tmp.append(np.zeros(rowLength)) print "Can't find correl output for natq number: %04d." % i return np.array(tmp, dtype=np.float64) def get_natQofT(self): """ """ data = self.data tmp = np.zeros(data.shape) tmp[data <= self.nativeRad] = 1 tmp[data > self.nativeRad] = 0 return tmp.mean(axis=0) # Charmm input creation def do_correl(self, **kwargs): self.write_correlInput(**kwargs) self.run_correlInput() def write_correlInput(self, **kwargs): kwargs = lowerKeys(kwargs) header = kwargs.get('header', []) # print self.anlPathname if self.anlPathname is None: raise IOError("No directory specified.") mkdir(self.anlPathname) # for i, group in enumerate(grouper(self.nativeContacts, 100)): String = self.get_correlInputHeader(header) String.append('! anl :: write') for j, contact in enumerate(group): String.append('open unit %03d write card name %s/natq%02d%02d.anl' % (j+100, self.anlPathname, i, j)) String.append('') String.append('correl maxtimesteps %d maxatom %d maxseries %d' % (self.correlArrayLength, 1000, len(group))) for j, contact in enumerate(group): String.append('enter n%03d bond bynum %d bynum %d geometry' % (j, contact.i.atomNum, contact.j.atomNum)) String.append('traj firstu 10 nunit 1 begin %d stop %d skip %d select all end' % (self.correlStart, self.correlStop, self.correlSkip)) String.append('') for j, contact in enumerate(group): String.append('write n%03d card unit %03d' % (j, j+100)) String.append('* Contact %02d%02d: between cgAtoms %s and %s' % (i, j, contact.i.addr, contact.j.addr)) String.append('* Native Contact - Interaction between %s and %s' % (contact.i.prmString, contact.j.prmString)) String.append('*') String.append('') String.append('end') String.append('') String.append('rewind unit 10') String.append('') String.append('stop') String = '\n'.join(String) # Write file self.inpFilename = '%s/natq%02d.inp' % (self.inpPathname, i) print '%s' % self.inpFilename write_to = open(self.inpFilename, 'w') write_to.write(String) write_to.close() def run_correlInput(self): # for i, group in enumerate(grouper(self.nativeContacts, 100)): self.inpFilename = '%s/natq%02d.inp' % (self.inpPathname, i) self.outFilename = '%s/natq%02d.out' % (self.outPathname, i) # try: os.remove(self.outFilename) except OSError: pass if not os.path.exists(self.inpFilename): raise IOError("No such file or directory: '%s'" % self.inpFilename) os.system('%s < %s > %s' % (self.charmmBin, self.inpFilename, self.outFilename))
class Contacts(BaseAnalysis): """ DOCME """ def __init__(self, pdbFilename=None, **kwargs): super(Contacts, self).__init__(pdbFilename, **kwargs) # kwargs self.nativeRad = kwargs.get('nativerad', 8.) # Main if pdbFilename is not None: self.aa = PDBFile(self.pdbFilename)[0] self.aa.parse() self.cg = KTGo(self.aa) self.contacts = self.get_contacts() @Property def data(): doc =\ """ """ def fget(self): filename = '%s/contacts.pickle' % self.anlPathname try: self._data = pickle.load(open(filename)) print 'found pickled data in: %s' % filename return self._data except IOError: print 'processing data in: %s' % self.anlPathname self._data = self.build_contactMatrix() pickle.dump(self._data, open(filename, 'w')) return self._data def fset(self, value): assert isinstance(value, np.ndarray) self._data = value def fdel(self): filename = '%s/contacts.pickle' % self.anlPathname del self._data try: os.remove(filename) except IOError: pass return locals() @Property def inpPathname(): doc =\ """ """ def fget(self): return self._inpPathname def fset(self, value): self._inpPathname = self.expandPath(value) return locals() @Property def nativeRad(): doc =\ """ The cutoff distance between two CG sidechain atom objects which is used to determine if their parent residues are in 'contact'. """ def fget(self): return self._nativeRad def fset(self, value): if value <= 1: raise ValueError("The native contact radius must be at least 1.") self._nativeRad = float(value) return locals() def get_contacts(self): """ Returns a complete :class:`list` of :class:`Bond` objects, each bond object represents a unique sidechain-sidechain pair. Additionally, and most importantly, each :class:`Bond` object has been assigned the additional ``native`` attribute, to ``True`` or ``False``. This value is true, if the corresponding all-atom representation of the residue sidechains have any heavy atoms within 4.5 Angstrom, as determined by :meth:`KTGo.get_nativeSCSC`. """ iterator = [ atom for atom in self.cg if atom.atomType == 's' ] tmp = [ Bond(atom_i, atom_j) for i, atom_i in enumerate(iterator) for j, atom_j in enumerate(iterator) if i < j ] nativeContacts = set(self.cg.get_nativeSCSC()) for contact in tmp: if contact in nativeContacts: contact.native = True else: contact.native = False return tmp def build_contactMatrix(self): """ Builds a 2D numpy array from available correl output files. The first index references the contact number, the second index references the correl time array value (which in turn corresponds to nstep of the original dynamics in quanta of ``correlSkip``. This method can take awhile because of disk IO. """ print "Building full contact matrix." rowLength = (self.correlStop - self.correlStart) / self.correlSkip + 1 tmp = [] for i in xrange(len(self.contacts)): try: tmp.append(load_correlOutput('%s/contact%04d.anl' % (self.anlPathname, i))) except IOError: tmp.append(np.zeros(rowLength)) print "Can't find correl output for contact number: %04d." % i return np.array(tmp, dtype=np.float64) def get_nativeContactMatrix(self): """ Projects out elements of the contactMatrix which are not derived from 'native' contacts. """ nativeIndex = [ i for i, taco in enumerate(self.contacts) if taco.native ] return self.data[nativeIndex] def get_natQofT(self): """ """ nativeContacts = self.get_nativeContactMatrix() tmp = np.zeros(nativeContacts.shape) tmp[nativeContacts <= self.nativeRad] = 1 tmp[nativeContacts > self.nativeRad] = 0 return tmp.mean(axis=0) # Charmm input creation def do_correl(self, **kwargs): self.write_correlInput(**kwargs) self.run_correlInput() def write_correlInput(self, **kwargs): kwargs = lowerKeys(kwargs) header = kwargs.get('header', []) # if self.anlPathname is None: raise IOError("No directory specified.") mkdir(self.anlPathname) # for i, group in enumerate(grouper(self.contacts, 100)): String = self.get_correlInputHeader(header) String.append('! anl :: write') for j, contact in enumerate(group): String.append('open unit %03d write card name %s/contact%02d%02d.anl' % (j+100, self.anlPathname, i, j)) String.append('') String.append('correl maxtimesteps %d maxatom %d maxseries %d' % (self.correlArrayLength, 1000, len(group))) for j, contact in enumerate(group): String.append('enter n%03d bond bynum %d bynum %d geometry' % (j, contact.i.atomNum, contact.j.atomNum)) String.append('traj firstu 10 nunit 1 begin %d stop %d skip %d select all end' % (self.correlStart, self.correlStop, self.correlSkip)) String.append('') for j, contact in enumerate(group): String.append('write n%03d card unit %03d' % (j, j+100)) String.append('* Contact %02d%02d: between cgAtoms %s and %s' % (i, j, contact.i.addr, contact.j.addr)) if contact.native: String.append('* Native Contact - Interaction between %s and %s' % (contact.i.prmString, contact.j.prmString)) String.append('*') String.append('') String.append('end') String.append('') String.append('rewind unit 10') String.append('') String.append('stop') String = '\n'.join(String) # Write file self.inpFilename = '%s/contact%02d.inp' % (self.inpPathname, i) write_to = open(self.inpFilename, 'w') write_to.write(String) write_to.close() def run_correlInput(self): # for i, group in enumerate(grouper(self.contacts, 100)): self.inpFilename = '%s/contact%02d.inp' % (self.inpPathname, i) self.outFilename = '%s/contact%02d.out' % (self.outPathname, i) # try: os.remove(self.outFilename) except OSError: pass if not os.path.exists(self.inpFilename): raise IOError("No such file or directory: '%s'" % self.inpFilename) os.system('%s < %s > %s' % (self.charmmBin, self.inpFilename, self.outFilename))