def GetSequenceFromPdb(self, tmpPath='.', localPdb='', verbose=False): for i, prot in self.analogs.iterrows(): pdb2name = prot['pdb'] myChain = prot['chain'] if verbose: print(" reading file " + tmpPath + '/' + pdb2name + '.pdb') pdb = Pdb() if not path.isfile(tmpPath + '/' + pdb2name + '.pdb'): if localPdb == '': if verbose: print(" downloading file") pdb.Download(pdbCode=pdb2name, verbose=True, path=tmpPath) pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) else: if verbose: print(" copying file from local directory " + localPdb) pdb.TakeLocalFiles(pdbCode=pdb2name, sourceDir=localPdb, targetDir=tmpPath) pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) else: pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) seq = pdb.GetSequence(chain=myChain) self.analogs.at[i, 'sequence'] = seq self.analogs.at[i, 'organism_id'] = pdb.GetKey('ORGANISM_TAXID') self.analogs.at[i, 'organism_scientific'] = pdb.GetKey( 'ORGANISM_SCIENTIFIC') if len(seq) == 0: sys.exit("ERROR: Empty sequence for pdb " + pdb2name + myChain) if verbose: print(' has read pdb ' + pdb2name + myChain + ' (length=' + str(len(seq)) + ')') print(seq) del pdb
def __init__(self, refPdb, refChain, tmpPath='.', localPdb='', verbose=False): self.analogs = pd.DataFrame(columns=[ 'pdb', 'chain', 'z', 'rmsd', 'lali', 'nres', 'id', 'title', 'sequence', 'aligned', 'organism_id', 'organism_scientific' ]) self.equivalences = pd.DataFrame( columns=['pdb2', 'chain2', 'from1', 'to1', 'from2', 'to2']) self.refPdb = refPdb self.refChain = refChain self.refSequence = '' self.refLength = 0 self.analogs['organism_id'] = '' if verbose: print('Creating Dali object for pdb ' + refPdb + refChain) pdb = Pdb() if not path.isdir(tmpPath): try: mkdir(tmpPath) except: sys.exit('Cannot make dir ' + tmpPath) if not path.isfile(tmpPath + '/' + refPdb + '.pdb'): if localPdb == '': pdb.Download(pdbCode=refPdb, verbose=verbose, path=tmpPath) else: pdb.TakeLocalFiles(pdbCode=refPdb, sourceDir=localPdb, targetDir=tmpPath) pdb.ReadFile(tmpPath + '/' + refPdb + '.pdb', skipNonAminoacid=True) self.refSequence = pdb.GetSequence(chain=refChain) del pdb self.refLength = len(self.refSequence) if verbose: print('Length of reference sequence is ' + str(self.refLength))
for i,line in enumerate(fp): # read pdbCode and chain pdbFile = line.strip() path, pdbFile = os.path.split(pdbFile) if pdbFile[0:3]=='pdb': pdbCode=pdbFile[3:] else: pdbCodeC=pdbFile pdbCodeC = pdbCodeC.replace('.pdb','') pdbCodeC = pdbCodeC.replace('.ent','') if len(pdbCodeC) == 4: pdbCode = pdbCodeC[0:4] chain = pdbCode[4] else: pdbCode = pdbCodeC chain = '' if verbose: print('Reading '+pdbCode+chain) # read pdb pdb = Pdb() pdb.ReadFile(filename=pdbFile, keepChainLabel=True) pdb.PrintFasta(filename='tmp.fasta', fileOpt='a', chain=chain) del pdb # sequence alignment muscle_cline = MuscleCommandline('/Users/guido/prog/muscle-3.8.31', input='tmp.fasta', out='align.fasta') print(muscle_cline) muscle_cline()
import sys, os sys.path.append('~/prog/pyTools') from pdb import Pdb pdbName = sys.argv[1] path, fname = os.path.split(pdbName) pdbCode = fname.replace('.pdb', '') pdb = Pdb() pdb.verbose = True pdb.ReadFile(pdbName) for c in pdb.chains: chainType = pdb.IdentifyChainType(c) if chainType == 'protein': newPdb = pdb.Copy(chain=c) nFileName = pdbCode + c + '.pdb' newPdb.Print(filename=nFileName)