def GetSequenceFromPdb(self, tmpPath='.', localPdb='', verbose=False): for i, prot in self.analogs.iterrows(): pdb2name = prot['pdb'] myChain = prot['chain'] if verbose: print(" reading file " + tmpPath + '/' + pdb2name + '.pdb') pdb = Pdb() if not path.isfile(tmpPath + '/' + pdb2name + '.pdb'): if localPdb == '': if verbose: print(" downloading file") pdb.Download(pdbCode=pdb2name, verbose=True, path=tmpPath) pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) else: if verbose: print(" copying file from local directory " + localPdb) pdb.TakeLocalFiles(pdbCode=pdb2name, sourceDir=localPdb, targetDir=tmpPath) pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) else: pdb.ReadFile(tmpPath + '/' + pdb2name + '.pdb', skipNonAminoacid=True) seq = pdb.GetSequence(chain=myChain) self.analogs.at[i, 'sequence'] = seq self.analogs.at[i, 'organism_id'] = pdb.GetKey('ORGANISM_TAXID') self.analogs.at[i, 'organism_scientific'] = pdb.GetKey( 'ORGANISM_SCIENTIFIC') if len(seq) == 0: sys.exit("ERROR: Empty sequence for pdb " + pdb2name + myChain) if verbose: print(' has read pdb ' + pdb2name + myChain + ' (length=' + str(len(seq)) + ')') print(seq) del pdb
def __init__(self, refPdb, refChain, tmpPath='.', localPdb='', verbose=False): self.analogs = pd.DataFrame(columns=[ 'pdb', 'chain', 'z', 'rmsd', 'lali', 'nres', 'id', 'title', 'sequence', 'aligned', 'organism_id', 'organism_scientific' ]) self.equivalences = pd.DataFrame( columns=['pdb2', 'chain2', 'from1', 'to1', 'from2', 'to2']) self.refPdb = refPdb self.refChain = refChain self.refSequence = '' self.refLength = 0 self.analogs['organism_id'] = '' if verbose: print('Creating Dali object for pdb ' + refPdb + refChain) pdb = Pdb() if not path.isdir(tmpPath): try: mkdir(tmpPath) except: sys.exit('Cannot make dir ' + tmpPath) if not path.isfile(tmpPath + '/' + refPdb + '.pdb'): if localPdb == '': pdb.Download(pdbCode=refPdb, verbose=verbose, path=tmpPath) else: pdb.TakeLocalFiles(pdbCode=refPdb, sourceDir=localPdb, targetDir=tmpPath) pdb.ReadFile(tmpPath + '/' + refPdb + '.pdb', skipNonAminoacid=True) self.refSequence = pdb.GetSequence(chain=refChain) del pdb self.refLength = len(self.refSequence) if verbose: print('Length of reference sequence is ' + str(self.refLength))