def pairwise_cast( models ): """ atom cast all models in list with each other -> modified list """ for i in range( len( models ) ): for j in range( i+1, len( models) ): T.flushPrint('.') m1 = models[i] m2 = models[j] eq_res, eq_atm = m1.equals( m2 ) if not (eq_res and eq_atm): i1, i2 = m1.compareAtoms( m2 ) delta_1 = len( m1 ) - len( i1 ) delta_2 = len( m2 ) - len( i2 ) models[i].keep( i1 ) models[j].keep( i2 ) f1 = T.stripFilename( m1.sourceFile() ) f2 = T.stripFilename( m2.sourceFile() ) print "Removed %i atoms from %s" % (delta_1, f1) print "Removed %i atoms from %s" % (delta_2, f2) return models
def pairwise_cast(models): """ atom cast all models in list with each other -> modified list """ for i in range(len(models)): for j in range(i + 1, len(models)): T.flushPrint('.') m1 = models[i] m2 = models[j] eq_res, eq_atm = m1.equals(m2) if not (eq_res and eq_atm): i1, i2 = m1.compareAtoms(m2) delta_1 = len(m1) - len(i1) delta_2 = len(m2) - len(i2) models[i].keep(i1) models[j].keep(i2) f1 = T.stripFilename(m1.sourceFile()) f2 = T.stripFilename(m2.sourceFile()) print "Removed %i atoms from %s" % (delta_1, f1) print "Removed %i atoms from %s" % (delta_2, f2) return models
def byName(self, rescode, topo=None ): """ Identify matching reference residue by residue name. Note: residue names are not guaranteed to be unique if several topology files have been read in (the default set of Amber topologies uses unique names though). The optional topo parameter can be used to specify in which topology the residue is looked up. Note: residue 3 letter names are all UPPERCASE. @param rescode: three-letter name of residue to look up @type rescode: str @param topo: optional (file) name of topology (@see L{topokeys()} ) @type topo: str @return: matching reference residue @rtype: AmberResidueType @raise: KeyError if the topology or residue name are not found """ if topo: fbase = T.stripFilename( topo ) return self.topoindex[ fbase ][ rescode ] for topo, residues in self.topoindex.items(): if rescode in residues: return residues[rescode] raise KeyError, 'No residue type found for name '+str(rescode)
def byName(self, rescode, topo=None): """ Identify matching reference residue by residue name. Note: residue names are not guaranteed to be unique if several topology files have been read in (the default set of Amber topologies uses unique names though). The optional topo parameter can be used to specify in which topology the residue is looked up. Note: residue 3 letter names are all UPPERCASE. @param rescode: three-letter name of residue to look up @type rescode: str @param topo: optional (file) name of topology (@see L{topokeys()} ) @type topo: str @return: matching reference residue @rtype: AmberResidueType @raise: KeyError if the topology or residue name are not found """ if topo: fbase = T.stripFilename(topo) return self.topoindex[fbase][rescode] for topo, residues in self.topoindex.items(): if rescode in residues: return residues[rescode] raise KeyError, 'No residue type found for name ' + str(rescode)
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue
def pdbname(self): """ Extract pdb code from file name. @return: (assumed) pdb code @rtype: str """ return T.stripFilename(self.pdb.filename)
def __parseBiomt( self, pdbFile, firstLine): """ """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains,rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N.array( rotation ) translation = N.transpose( [ translation ] ) rotation = N.concatenate( (rotation, translation), axis=1 ) rtList.append(N.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue
def failed( self ): """ If HEX job fails """ print "FAILED: ", self.host, ' ', t.stripFilename(self.finp) print "\tJob details:" print "\tCommand: ", self.cmd print "\tinput: ", self.finp print "\tHex log: ", self.log print "\tHex out: ", self.fout print print "\t", t.lastError() self.owner.failedHex( self )
def idFromName(self, fname): """ Extract PDB code from file name. @param fname: file name @type fname: str @return: first 4 letters of filename if available @rtype: str """ name = T.stripFilename(fname) if len(name) > 3: return name[:4] return ''
def idFromName( self, fname ): """ Extract PDB code from file name. @param fname: file name @type fname: str @return: first 4 letters of filename if available @rtype: str """ name = T.stripFilename( fname ) if len( name ) > 3: return name[:4] return ''
def changeModel(inFile, prefix, sourceModel): print '\nget ' + os.path.basename(inFile) + '..', model = PDBModel(inFile) model.update() model = model.sort() eq = model.equals(sourceModel) if not eq[0] and eq[1]: raise ConvertError('source and other models are not equal: ' + str(eq)) # model.validSource() model.setSource(sourceModel.validSource()) #model.atomsChanged = 0 for k in model.atoms: model.atoms[k, 'changed'] = N0.all(model[k] == sourceModel[k]) model.xyzChanged = (0 != N0.sum(N0.ravel(model.xyz - sourceModel.xyz))) model.update(updateMissing=1) if model.xyzChanged: doper = PDBDope(model) if 'MS' in sourceModel.atoms.keys(): doper.addSurfaceRacer(probe=1.4) if 'density' in sourceModel.atoms.keys(): doper.addDensity() if 'foldX' in sourceModel.info.keys(): doper.addFoldX() if 'delphi' in sourceModel.info.keys(): doper.addDelphi() outFile = os.path.dirname( inFile ) + '/' + prefix +\ T.stripFilename( inFile ) + '.model' T.dump(model, outFile) print '-> ' + os.path.basename(outFile)
def addTopology(self, topofile, override=False): """ Include an additional topology (off) library in the collection. @param topofile: file name of topology, either full path or simple file name which will then be looked for in Biskit/data/amber/residues. @type topofile: str @param override: override topologies or residue entries with same name (default False) @type override: False @return: dictionary of all residue types parsed from topofile indexed by three-letter residue name @rtype : {str : AmberResidueType} @raise: AmberResidueLibraryError if override==False and a topology or a residue with identical atom content have already been registered. """ fbase = T.stripFilename( topofile ) if fbase in self.topoindex and not override: raise AmberResidueLibraryError, 'duplicate topology '+fbase if self.verbose: self.log.add('parsing %s...' % topofile ) resdic = AmberPrepParser( topofile ).residueDict() if self.verbose: self.log.add( 'Read %i residue definitions.\n' % len(resdic) ) self.topoindex[ fbase ] = resdic for resname, restype in resdic.items(): akey = restype.atomkey(compress=False) if akey in self.aindex and not override: raise AmberResidueLibraryError, \ 'duplicate residue entry: %s -> %s' %\ (resname, self.aindex[akey].code) self.aindex[ akey ] = restype return self.topoindex[ fbase ]
def addTopology(self, topofile, override=False): """ Include an additional topology (off) library in the collection. @param topofile: file name of topology, either full path or simple file name which will then be looked for in Biskit/data/amber/residues. @type topofile: str @param override: override topologies or residue entries with same name (default False) @type override: False @return: dictionary of all residue types parsed from topofile indexed by three-letter residue name @rtype : {str : AmberResidueType} @raise: AmberResidueLibraryError if override==False and a topology or a residue with identical atom content have already been registered. """ fbase = T.stripFilename(topofile) if fbase in self.topoindex and not override: raise AmberResidueLibraryError, 'duplicate topology ' + fbase if self.verbose: self.log.add('parsing %s...' % topofile) resdic = AmberPrepParser(topofile).residueDict() if self.verbose: self.log.add('Read %i residue definitions.\n' % len(resdic)) self.topoindex[fbase] = resdic for resname, restype in resdic.items(): akey = restype.atomkey(compress=False) if akey in self.aindex and not override: raise AmberResidueLibraryError, \ 'duplicate residue entry: %s -> %s' %\ (resname, self.aindex[akey].code) self.aindex[akey] = restype return self.topoindex[fbase]
def changeModel( inFile, prefix, sourceModel ): print '\nget ' + os.path.basename( inFile ) + '..', model = PDBModel( inFile ) model.update() model = model.sort() eq = model.equals( sourceModel ) if not eq[0] and eq[1]: raise ConvertError('source and other models are not equal: ' + str(eq)) # model.validSource() model.setSource( sourceModel.validSource() ) #model.atomsChanged = 0 for k in model.atoms: model.atoms[k,'changed'] = N0.all( model[k] == sourceModel[k] ) model.xyzChanged = ( 0 != N0.sum( N0.ravel( model.xyz - sourceModel.xyz)) ) model.update( updateMissing=1 ) if model.xyzChanged: doper = PDBDope( model ) if 'MS' in sourceModel.atoms.keys(): doper.addSurfaceRacer( probe=1.4 ) if 'density' in sourceModel.atoms.keys(): doper.addDensity() ## if 'foldX' in sourceModel.info.keys(): ## doper.addFoldX() if 'delphi' in sourceModel.info.keys(): doper.addDelphi() outFile = os.path.dirname( inFile ) + '/' + prefix +\ T.stripFilename( inFile ) + '.model' T.dump( model, outFile ) print '-> ' + os.path.basename( outFile )
def report( tc ): clTrajs = tc.memberTrajs() for i in range(0, tc.n_clusters ): t = clTrajs[i] rms = tc.avgRmsd( i, tc.aMask ) names = [ '_'.join(T.stripFilename(s).split('_')[-2:]) for s in t.frameNames] print "%i <%4.2f +-%4.2f>: " % (i, rms[0],rms[1] ), names print tr = clTrajs[0].concat( *tuple( clTrajs[1:] ) ) avgall = N0.average( MaU.aboveDiagonal( tr.pairwiseRmsd( tc.aMask ) ) ) print "avg rms all: %4.2f" % avgall
def run( self ): """ Run HEX job. @raise DockerError: if HEX exists with error """ try: if not os.path.exists( self.fout ): if self.verbose: print "Executing on ", self.host, ' with ', \ t.stripFilename(self.finp) print "Command: ", self.cmd cmd_lst = self.cmd.split() self.status = os.spawnvp(os.P_WAIT, cmd_lst[0], cmd_lst ) if self.status != 0: raise DockerError,\ 'Hex returned exit status %i' % self.status waited = 0 while waited < 25 and not os.path.exists( self.fout ): sleep( 5 ) waited += 5 ## replace model numbers in HEX output file self.__hackHexOut( self.nRec, self.nLig, self.fout ) parser = HexParser(self.fout, self.owner.recDic, self.owner.ligDic) ## generate ComplexList from hex output self.result = parser.parseHex() self.done() except: self.failed()
for fname in srcfiles: fname = t.absfile( fname ) shutil.copy( fname, fname + '_' ) methods = re_lst( module, exclude ) fold = open( fname + '_' ) fnew = open( fname, 'w' ) i = 0 for l in fold: i += 1 l = replace_import_statement( l, module, importas ) l, occurrences = replace_line( l, methods, importas + '.' ) if occurrences > 0: t.errWriteln( '%s %5i %2i matches:\n\t%s' % (t.stripFilename(fname), i, occurrences, l) ) fnew.write( l ) fnew.close() fold.close() except: syntax()
def __collectAll(self, fname, skipRes=None, headPatterns=[]): """ Parse ATOM/HETATM lines from PDB. Collect coordinates plus dictionaries with the other pdb records of each atom. REMARK, HEADER, etc. lines are ignored. Some changes are made to the dictionary from PDBFile.readline():: - the 'position' entry (with the coordinates) is removed - leading and trailing spaces are removed from 'name' .. - .. but a 'name_original' entry keeps the old name with spaces - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' - a 'after_ter' entry is added. Its value is 1, if atom is preceeded by a 'TER' line, otherwise 0 - empty 'element' entries are filled with the first non-number letter from the atom 'name' @param fname: name of pdb file @type fname: str @param skipRes: list with residue names that should be skipped @type skipRes: list of str @return: tuple of (1) dictionary of profiles and (2) xyz array N x 3 @rtype: ( list, array ) """ xyz = [] aProfs = {} info = {} in_header = True headPatterns = headPatterns or self.RE_REMARKS patterns = [(key, re.compile(ex)) for key, ex in headPatterns] for k in B.PDBModel.PDB_KEYS: aProfs[k] = list() f = IO.PDBFile(fname) skipLine = False try: line, i = ('', ''), 0 while line[0] <> 'END' and line[0] <> 'ENDMDL': i += 1 if not skipLine: try: line = f.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue else: skipLine = False ## header handling if in_header and line[0] == 'HEADER': info.update(self.__parseHeader(line)) if in_header and line[0] == 'REMARK': if line[1].startswith(' 350'): biomtDict, line = self.__parseBiomt(f, line) info.update(biomtDict) # we've hogged a line beyond REMARK 350 records in # __parseBiomt(), now we need to process it here skipLine = True continue else: info.update(self.__parseRemark(line, patterns)) ## preserve position of TER records newChain = line[0] == 'TER' if newChain: line = f.readLine() if (line[0] in ['ATOM', 'HETATM']): if in_header: in_header = False ## switch off HEADER parsing a = line[1] if skipRes and a['residue_name'] in skipRes: continue a['name_original'] = a['name'] a['name'] = a['name'].strip() a['type'] = line[0] if newChain: a['after_ter'] = 1 else: a['after_ter'] = 0 if a['element'] == '': a['element'] = self.__firstLetter(a['name']) xyz.append(a['position']) del a['position'] for k, v in a.items(): aProfs[k].append(v) except: raise PDBParserError("Error parsing file "+fname+": " \ + T.lastError()) try: f.close() except: pass if len(xyz) == 0: raise PDBParserError("Error parsing file " + fname + ": " + "Couldn't find any atoms.") return aProfs, N0.array(xyz, N0.Float32), info
def cluster( tc, options ): n_cluster = int( options['n'] ) allowedAtoms = T.toList( options.get('a',[]) ) if allowedAtoms: mask = tc.traj.ref.mask( lambda a: a['name'] in allowedAtoms ) else: mask = selectedAtoms( tc.traj.ref ) saveIn = T.absfile( options['o'] ) + '/' conv = float( options['conv'] ) tc.cluster( n_cluster, aMask=mask, converged=conv ) ## collect center frame index for each cluster frames = [ members[0] for members in tc.memberFrames() ] result = tc.traj.takeFrames( frames ) ## trajectory of cluster centers model_dic = {} dic_index = 1 if options.has_key('ref'): ## use user-provided reference structure if os.path.isfile( T.absfile(options['ref']) ): print '\nUsing user specified reference pdb' m = PDBModel( options['ref'] ) m.remove( m.maskH2O() ) ## use reference in trajectory else: print '\nUsing reference in trajectory' m = tc.traj.ref m = dumpModel( m, options, saveIn+m.getPdbCode()+'_ref.model') ## add ref as first model in dictionary model_dic[dic_index] = m dic_index += 1 ## save the individual models and add them to the model dictionary for i in range(0, result.lenFrames() ): m = result.getPDBModel(i) m = dumpModel(m, options, saveIn + T.stripFilename(result.frameNames[i]) +'.model' ) model_dic[dic_index] = m dic_index += 1 ## save model dictionary fdic = options['dic'] or m.getPdbCode() + '_models.dic' T.dump( model_dic, T.absfile( fdic ) ) ## REDUNDANT CODE AS MULTIDOCK NOW WRITES THE HEX PDB FILES ## ## ## save all models in the dictionary as HEX pdb files ## for k in model_dic.keys(): ## m = model_dic[k] ## ## remove hydrogens and sort atoms in standard order ## m.remove( m.maskH() ) ## m = molUtils.sortAtomsOfModel(m) ## setChainID( m ) ## ## save single hex pdbs ## if options['hex']: ## fhex = options['hex'] + '_%03d' %(k) ## else: ## fhex = m.getPdbCode() + '_%03d_hex.pdb'%(k) ## hexTools.createHexPdb_single( m, T.absfile( fhex ) ) # fhex = options['hex'] or m.getPdbCode() + '_hex.pdb' # hexTools.createHexPdb( model_dic, T.absfile( fhex ) ) return result
def cluster(tc, options): n_cluster = int(options['n']) allowedAtoms = T.toList(options.get('a', [])) if allowedAtoms: mask = tc.traj.ref.mask(lambda a: a['name'] in allowedAtoms) else: mask = selectedAtoms(tc.traj.ref) saveIn = T.absfile(options['o']) + '/' conv = float(options['conv']) tc.cluster(n_cluster, aMask=mask, converged=conv) ## collect center frame index for each cluster frames = [members[0] for members in tc.memberFrames()] result = tc.traj.takeFrames(frames) ## trajectory of cluster centers model_dic = {} dic_index = 1 if options.has_key('ref'): ## use user-provided reference structure if os.path.isfile(T.absfile(options['ref'])): print '\nUsing user specified reference pdb' m = PDBModel(options['ref']) m.remove(m.maskH2O()) ## use reference in trajectory else: print '\nUsing reference in trajectory' m = tc.traj.ref m = dumpModel(m, options, saveIn + m.getPdbCode() + '_ref.model') ## add ref as first model in dictionary model_dic[dic_index] = m dic_index += 1 ## save the individual models and add them to the model dictionary for i in range(0, result.lenFrames()): m = result.getPDBModel(i) m = dumpModel( m, options, saveIn + T.stripFilename(result.frameNames[i]) + '.model') model_dic[dic_index] = m dic_index += 1 ## save model dictionary fdic = options['dic'] or m.getPdbCode() + '_models.dic' T.dump(model_dic, T.absfile(fdic)) ## REDUNDANT CODE AS MULTIDOCK NOW WRITES THE HEX PDB FILES ## ## ## save all models in the dictionary as HEX pdb files ## for k in model_dic.keys(): ## m = model_dic[k] ## ## remove hydrogens and sort atoms in standard order ## m.remove( m.maskH() ) ## m = molUtils.sortAtomsOfModel(m) ## setChainID( m ) ## ## save single hex pdbs ## if options['hex']: ## fhex = options['hex'] + '_%03d' %(k) ## else: ## fhex = m.getPdbCode() + '_%03d_hex.pdb'%(k) ## hexTools.createHexPdb_single( m, T.absfile( fhex ) ) # fhex = options['hex'] or m.getPdbCode() + '_hex.pdb' # hexTools.createHexPdb( model_dic, T.absfile( fhex ) ) return result
def createHexInp(recPdb, recModel, ligPdb, ligModel, comPdb=None, outFile=None, macDock=None, silent=0, sol=512): """ Prepare a Hex macro file for the docking of the receptor(s) against ligand(s). @param recPdb: hex-formatted PDB @type recPdb: str @param recModel: hex-formatted PDB @type recModel: str @param ligPdb: PDBModel, get distances from this one @type ligPdb: PDBModel @param ligModel: PDBModel, getdistances from this one @type ligModel: PDBModel @param comPdb: reference PDB @type comPdb: str @param outFile: base of file name for mac and out @type outFile: str @param macDock: None -> hex decides (from the size of the molecule), 1 -> force macroDock, 0-> force off (default: None) @type macDock: None|1|0 @param silent: don't print distances and macro warnings (default: 0) @type silent: 0|1 @param sol: number of solutions that HEx should save (default: 512) @type sol: int @return: HEX macro file name, HEX out generated bu the macro, macro docking status @rtype: str, str, boolean """ ## files and names recCode = t.stripFilename(recPdb)[0:4] ligCode = t.stripFilename(ligPdb)[0:4] outFile = outFile or recCode + '-' + ligCode ## hex macro name macName = t.absfile(outFile + '_hex.mac') ## hex rotation matrix output name outName_all = t.absfile(outFile + '_hex.out') outName_clust = t.absfile(outFile + '_hex_cluster.out') ## add surface profiles if not there if not recModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating receptor surface profile') rec_asa = PDBDope(recModel) rec_asa.addSurfaceRacer() if not ligModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating ligand surface profile') lig_asa = PDBDope(ligModel) lig_asa.addSurfaceRacer() ## surface masks, > 95% exposed rec_surf_mask = N.greater(recModel.profile('relAS'), 95) lig_surf_mask = N.greater(ligModel.profile('relAS'), 95) ## maximun and medisn distance from centre of mass to any surface atom recMax, recMin = centerSurfDist(recModel, rec_surf_mask) ligMax, ligMin = centerSurfDist(ligModel, lig_surf_mask) ## approxinate max and min center to centre distance maxDist = recMax + ligMax minDist = recMin + ligMin ## molecular separation and search range to be used in the docking molSep = (maxDist + minDist) / 2 molRange = 2 * (maxDist - molSep) if not silent: print 'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n' % ( outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist, molSep, molRange) if recMax > 30 and ligMax > 30 and not silent: print '\nWARNING! Both the receptor and ligand radius is ', print 'greater than 30A.\n' ## determine docking mode to use macroDocking = 0 if macDock == None: if recMax > 35 and not silent: print '\nReceptor has a radius that exceeds 35A ', print '-> Macro docking will be used' macroDocking = 1 else: macroDocking = macDock ##################### ## write macro file macOpen = open(macName, 'w') macOpen.write('# -- ' + macName + ' --\n') macOpen.write(' \n') macOpen.write('open_receptor ' + t.absfile(recPdb) + '\n') macOpen.write('open_ligand ' + t.absfile(ligPdb) + '\n') if comPdb and comPdb[-4:] == '.pdb': macOpen.write('open_complex ' + comPdb + '\n') macOpen.write('\n') head = """ # -------------- general settings ---------------- disc_cache 1 # disc cache on (0 off) docking_sort_mode 1 # Sort solutions by cluster (0 by energy) docking_cluster_mode 1 # Display all clusters (0 display best) docking_cluster_threshold 2.00 # docking_cluster_bumps number # ------------ molecule orientation -------------- molecule_separation %(separation)i commit_view """ % ({ 'separation': round(molSep) }) macro = """ # -------------- macro docking ------------------- macro_min_coverage 25 macro_sphere_radius 15 macro_docking_separation 25 activate_macro_model""" tail = """ # -------------- docking setup ------------------- docking_search_mode 0 # full rotational search receptor_range_angle 180 # 0, 15, 30, 45, 60, 75, 90, 180 docking_receptor_samples 720 # 362, 492, 642, 720, 980, 1280 ligand_range_angle 180 docking_ligand_samples 720 twist_range_angle 360 # 0, 15, 30, 60, 90, 180, 360 docking_alpha_samples 128 # 64, 128, 256 r12_step 0.500000 # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2 r12_range %(range)i docking_radial_filter 0 # Radial Envelope Filter - None grid_size 0.600 # 0.4, 0.5, 0.6, 0.75, 1.0 # docking_electrostatics 0 # use only surface complimentarity docking_electrostatics 1 # use electrostatic term for scoring clusters docking_main_scan 16 # docking_main_search 26 max_docking_solutions %(nr_sol)i # number of solutions to save # -------------- post-processing ---------------- docking_refine 0 # None # docking_refine 1 # Backbone Bumps # docking_refine 2 # MM energies # docking_refine 3 # MM minimization # ---------------- run docking ------------------ activate_docking # save_docking %(output_clust)s # save_range 1 512 ./ dock .pdb # ------------ also save all solutions ---------- docking_sort_mode 0 # Sort solutions by energy (1 by cluster) save_docking %(output_all)s""" \ %({'range':round(molRange), 'output_all':outName_all, 'nr_sol':int(sol), 'output_clust':outName_clust} ) macOpen.writelines(head) ## macro docking will not work with multiple models, if both are added to ## the hex macro file - macrodocking will be skipped during the docking run if macroDocking: macOpen.writelines(macro) macOpen.writelines(tail) macOpen.close() return macName, outName_all, macroDocking
def createHexInp( recPdb, recModel, ligPdb, ligModel, comPdb=None, outFile=None, macDock=None, silent=0, sol=512 ): """ Prepare a Hex macro file for the docking of the receptor(s) against ligand(s). @param recPdb: hex-formatted PDB @type recPdb: str @param recModel: hex-formatted PDB @type recModel: str @param ligPdb: PDBModel, get distances from this one @type ligPdb: PDBModel @param ligModel: PDBModel, getdistances from this one @type ligModel: PDBModel @param comPdb: reference PDB @type comPdb: str @param outFile: base of file name for mac and out @type outFile: str @param macDock: None -> hex decides (from the size of the molecule), 1 -> force macroDock, 0-> force off (default: None) @type macDock: None|1|0 @param silent: don't print distances and macro warnings (default: 0) @type silent: 0|1 @param sol: number of solutions that HEx should save (default: 512) @type sol: int @return: HEX macro file name, HEX out generated bu the macro, macro docking status @rtype: str, str, boolean """ ## files and names recCode = t.stripFilename( recPdb )[0:4] ligCode = t.stripFilename( ligPdb )[0:4] outFile = outFile or recCode + '-' + ligCode ## hex macro name macName = t.absfile( outFile + '_hex.mac' ) ## hex rotation matrix output name outName_all = t.absfile( outFile + '_hex.out' ) outName_clust = t.absfile( outFile + '_hex_cluster.out') ## add surface profiles if not there if not recModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating receptor surface profile') rec_asa = PDBDope( recModel ) rec_asa.addSurfaceRacer() if not ligModel.atoms.has_key('relAS'): #t.flushPrint('\nCalculating ligand surface profile') lig_asa = PDBDope( ligModel ) lig_asa.addSurfaceRacer() ## surface masks, > 95% exposed rec_surf_mask = N.greater( recModel.profile('relAS'), 95 ) lig_surf_mask = N.greater( ligModel.profile('relAS'), 95 ) ## maximun and medisn distance from centre of mass to any surface atom recMax, recMin = centerSurfDist( recModel, rec_surf_mask ) ligMax, ligMin = centerSurfDist( ligModel, lig_surf_mask ) ## approxinate max and min center to centre distance maxDist = recMax + ligMax minDist = recMin + ligMin ## molecular separation and search range to be used in the docking molSep = ( maxDist + minDist ) / 2 molRange = 2 * ( maxDist - molSep ) if not silent: print 'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n'%(outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist, molSep, molRange) if recMax > 30 and ligMax > 30 and not silent: print '\nWARNING! Both the receptor and ligand radius is ', print 'greater than 30A.\n' ## determine docking mode to use macroDocking = 0 if macDock==None: if recMax > 35 and not silent: print '\nReceptor has a radius that exceeds 35A ', print '-> Macro docking will be used' macroDocking = 1 else: macroDocking = macDock ##################### ## write macro file macOpen= open( macName, 'w') macOpen.write('# -- ' + macName + ' --\n') macOpen.write(' \n') macOpen.write('open_receptor '+ t.absfile(recPdb) +'\n') macOpen.write('open_ligand '+ t.absfile(ligPdb) +'\n') if comPdb and comPdb[-4:] == '.pdb': macOpen.write('open_complex '+comPdb+'\n') macOpen.write('\n') head = """ # -------------- general settings ---------------- disc_cache 1 # disc cache on (0 off) docking_sort_mode 1 # Sort solutions by cluster (0 by energy) docking_cluster_mode 1 # Display all clusters (0 display best) docking_cluster_threshold 2.00 # docking_cluster_bumps number # ------------ molecule orientation -------------- molecule_separation %(separation)i commit_view """%({'separation': round(molSep)} ) macro =""" # -------------- macro docking ------------------- macro_min_coverage 25 macro_sphere_radius 15 macro_docking_separation 25 activate_macro_model""" tail = """ # -------------- docking setup ------------------- docking_search_mode 0 # full rotational search receptor_range_angle 180 # 0, 15, 30, 45, 60, 75, 90, 180 docking_receptor_samples 720 # 362, 492, 642, 720, 980, 1280 ligand_range_angle 180 docking_ligand_samples 720 twist_range_angle 360 # 0, 15, 30, 60, 90, 180, 360 docking_alpha_samples 128 # 64, 128, 256 r12_step 0.500000 # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2 r12_range %(range)i docking_radial_filter 0 # Radial Envelope Filter - None grid_size 0.600 # 0.4, 0.5, 0.6, 0.75, 1.0 # docking_electrostatics 0 # use only surface complimentarity docking_electrostatics 1 # use electrostatic term for scoring clusters docking_main_scan 16 # docking_main_search 26 max_docking_solutions %(nr_sol)i # number of solutions to save # -------------- post-processing ---------------- docking_refine 0 # None # docking_refine 1 # Backbone Bumps # docking_refine 2 # MM energies # docking_refine 3 # MM minimization # ---------------- run docking ------------------ activate_docking # save_docking %(output_clust)s # save_range 1 512 ./ dock .pdb # ------------ also save all solutions ---------- docking_sort_mode 0 # Sort solutions by energy (1 by cluster) save_docking %(output_all)s""" \ %({'range':round(molRange), 'output_all':outName_all, 'nr_sol':int(sol), 'output_clust':outName_clust} ) macOpen.writelines( head ) ## macro docking will not work with multiple models, if both are added to ## the hex macro file - macrodocking will be skipped during the docking run if macroDocking: macOpen.writelines( macro ) macOpen.writelines( tail ) macOpen.close() return macName, outName_all, macroDocking
def __collectAll( self, fname, skipRes=None, headPatterns=[] ): """ Parse ATOM/HETATM lines from PDB. Collect coordinates plus dictionaries with the other pdb records of each atom. REMARK, HEADER, etc. lines are ignored. Some changes are made to the dictionary from PDBFile.readline():: - the 'position' entry (with the coordinates) is removed - leading and trailing spaces are removed from 'name' .. - .. but a 'name_original' entry keeps the old name with spaces - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' - a 'after_ter' entry is added. Its value is 1, if atom is preceeded by a 'TER' line, otherwise 0 - empty 'element' entries are filled with the first non-number letter from the atom 'name' @param fname: name of pdb file @type fname: str @param skipRes: list with residue names that should be skipped @type skipRes: list of str @return: tuple of (1) dictionary of profiles and (2) xyz array N x 3 @rtype: ( list, array ) """ xyz = [] aProfs = {} info = {} in_header = True headPatterns = headPatterns or self.RE_REMARKS patterns = [ (key, re.compile(ex)) for key,ex in headPatterns ] for k in B.PDBModel.PDB_KEYS: aProfs[k] = list() f = IO.PDBFile( fname ) skipLine = False try: line, i = ('',''), 0 while line[0] <> 'END' and line[0] <> 'ENDMDL': i += 1 if not skipLine: try: line = f.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue else: skipLine = False ## header handling if in_header and line[0] == 'HEADER': info.update( self.__parseHeader( line ) ) if in_header and line[0] == 'REMARK': if line[1].startswith(' 350'): biomtDict, line = self.__parseBiomt( f, line ) info.update( biomtDict ) # we've hogged a line beyond REMARK 350 records in # __parseBiomt(), now we need to process it here skipLine = True continue else: info.update( self.__parseRemark( line, patterns ) ) ## preserve position of TER records newChain = line[0] == 'TER' if newChain: line = f.readLine() if (line[0] in ['ATOM','HETATM'] ): if in_header: in_header = False ## switch off HEADER parsing a = line[1] if skipRes and a['residue_name'] in skipRes: continue a['name_original'] = a['name'] a['name'] = a['name'].strip() a['type'] = line[0] if newChain: a['after_ter'] = 1 else: a['after_ter'] = 0 if a['element'] == '': a['element'] = self.__firstLetter( a['name'] ) if a['position'].is_vector: lst = [ a['position'][0], a['position'][1], a['position'][2]] xyz.append( lst ) else: xyz.append( a['position'] ) del a['position'] for k, v in a.items(): aProfs[k].append( v ) except: raise PDBParserError("Error parsing file "+fname+": " \ + T.lastError()) try: f.close() except: pass if len( xyz ) == 0: raise PDBParserError("Error parsing file "+fname+": "+ "Couldn't find any atoms.") return aProfs, N.array( xyz, N.Float32 ), info
""" Print the part right of the diagonal in a matrix """ nr = len( matrix ) for i in range(nr): print '%5i'%(i+1), for i in range(nr): print '\n%2i'%(i+1), for k in range(i): print ' '*5, for j in range(i, nr): print '%5.2f'%matrix[i,j], ## get filenames of all models models = glob.glob( '%s/modeller/%s*.pdb'%(outFolder, tools.stripFilename(f_target)) ) ## create a Trajectory object with the models traj = Trajectory( pdbs=models ) ## fit the models against the average structure iteratively traj.blockFit2ref() ## calculate and print rmsd matrix rmsHeavy = traj.pairwiseRmsd() print '\nHEAVY ATOM RMSD BETWEEN MODELS::' __printMatrix( rmsHeavy ) ## same thing for backbone atoms BBMask = traj[0].maskBB() traj.blockFit2ref( mask = BBMask )
Print the part right of the diagonal in a matrix """ nr = len(matrix) for i in range(nr): print '%5i' % (i + 1), for i in range(nr): print '\n%2i' % (i + 1), for k in range(i): print ' ' * 5, for j in range(i, nr): print '%5.2f' % matrix[i, j], ## get filenames of all models models = glob.glob('%s/modeller/%s*.pdb' % (outFolder, tools.stripFilename(f_target))) ## create a Trajectory object with the models traj = Trajectory(pdbs=models) ## fit the models against the average structure iteratively traj.blockFit2ref() ## calculate and print rmsd matrix rmsHeavy = traj.pairwiseRmsd() print '\nHEAVY ATOM RMSD BETWEEN MODELS::' __printMatrix(rmsHeavy) ## same thing for backbone atoms BBMask = traj[0].maskBB() traj.blockFit2ref(mask=BBMask)
for fname in srcfiles: fname = t.absfile(fname) shutil.copy(fname, fname + '_') methods = re_lst(module, exclude) fold = open(fname + '_') fnew = open(fname, 'w') i = 0 for l in fold: i += 1 l = replace_import_statement(l, module, importas) l, occurrences = replace_line(l, methods, importas + '.') if occurrences > 0: t.errWriteln('%s %5i %2i matches:\n\t%s' % (t.stripFilename(fname), i, occurrences, l)) fnew.write(l) fnew.close() fold.close() except: syntax()