def evol_refine(msa, **kwargs): from prody import parseMSA, refineMSA, writeMSA, LOGGER from os.path import splitext outname = kwargs.get('outname') if outname is None: outname, ext = splitext(msa) if ext.lower() == '.gz': outname, _ = splitext(msa) outname += '_refined' + ext writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs) LOGGER.info('Refined MSA is written in file: ' + outname)
def evol_refine(msa, **kwargs): import prody from prody import parseMSA, refineMSA, writeMSA, LOGGER from os.path import splitext outname = kwargs.get('outname') if outname is None: outname, ext = splitext(msa) if ext.lower() == '.gz': outname, _ = splitext(msa) outname += '_refined' + ext writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs) LOGGER.info('Refined MSA is written in file: ' + outname)
def evol_refine(msa, **kwargs): import prody from prody import parseMSA, refineMSA, writeMSA, LOGGER from os.path import splitext outname = kwargs.get("outname") if outname is None: outname, ext = splitext(msa) if ext.lower() == ".gz": outname, _ = splitext(msa) outname += "_refined" + ext writeMSA(outname, refineMSA(parseMSA(msa), **kwargs), **kwargs) LOGGER.info("Refined MSA is written in file: " + outname)
def evol_merge(*msa, **kwargs): import prody from prody import parseMSA, mergeMSA, LOGGER, writeMSA, MSAFile from prody.sequence.msafile import MSAEXTMAP from os.path import splitext if len(msa) < 2: raise ValueError('multiple msa filenames must be specified') msaobj = [] try: msaobj = [parseMSA(fn) for fn in msa] except: raise IOError('failed to parse {0}'.format(fn)) msafile = MSAFile(msa[0]) format = kwargs.get('format') or msafile.format outname = kwargs.get('outname') or (msafile.getTitle() + '_merged' + MSAEXTMAP[msafile.format]) writeMSA(outname, mergeMSA(*msaobj), **kwargs) LOGGER.info('Merged MSA is saved as: {0}'.format(outname))
def evol_filter(msa, *word, **kwargs): import prody from prody import MSAFile, writeMSA, LOGGER from os.path import splitext outname = kwargs.get('outname') if outname is None: outname, ext = splitext(msa) if ext.lower() == '.gz': outname, _ = splitext(msa) outname += '_filtered' + ext single = len(word) == 1 if single: word = word[0] if kwargs.get('startswith', False): if single: filter = lambda label, seq, word=word: label.startswith(word) elif kwargs.get('endswith', False): if single: filter = lambda label, seq, word=word: label.endswith(word) elif kwargs.get('contains', False): if single: filter = lambda label, seq, word=word: word in label elif kwargs.get('equals', False): if single: filter = lambda label, seq, word=word: word == label else: filter = lambda label, seq, word=set(word): label in word else: raise TypeError('one of startswith, endswith, contains, or equals ' 'must be specified') msa = MSAFile(msa, filter=filter, filter_full=kwargs.get('filter_full', False)) LOGGER.info('Filtered MSA is written in file: ' + writeMSA(outname, msa, **kwargs))
def evol_filter(msa, *word, **kwargs): import prody from prody import MSAFile, writeMSA, LOGGER from os.path import splitext outname = kwargs.get("outname") if outname is None: outname, ext = splitext(msa) if ext.lower() == ".gz": outname, _ = splitext(msa) outname += "_filtered" + ext single = len(word) == 1 if single: word = word[0] if kwargs.get("startswith", False): if single: filter = lambda label, seq, word=word: label.startswith(word) elif kwargs.get("endswith", False): if single: filter = lambda label, seq, word=word: label.endswith(word) elif kwargs.get("contains", False): if single: filter = lambda label, seq, word=word: word in label elif kwargs.get("equals", False): if single: filter = lambda label, seq, word=word: word == label else: filter = lambda label, seq, word=set(word): label in word else: raise TypeError("one of startswith, endswith, contains, or equals " "must be specified") msa = MSAFile(msa, filter=filter, filter_full=kwargs.get("filter_full", False)) LOGGER.info("Filtered MSA is written in file: " + writeMSA(outname, msa, **kwargs))
def testFasta(self): filename = writeMSA(join(TEMPDIR, 'test.fasta.gz'), FASTA) fasta = list(MSAFile(pathDatafile(filename))) self.assertListEqual(list(FASTA), list(fasta)) if os.path.isfile(filename): os.remove(filename)
def testSelex(self): filename = writeMSA(join(TEMPDIR, 'test.slx'), SELEX) selex = parseMSA(pathDatafile(filename)) self.assertListEqual(list(SELEX), list(selex)) if os.path.isfile(filename): os.remove(filename)
import prody.sequence as sequence import prody import matplotlib.pyplot as plt alignment = prody.MSAFile("pkinase.fasta") #get positions -> by hand for now positions = [72, 83, 117, 119, 194, 251, 354, 355, 357, 429, 432] #user alignSequenceToMSA instead to derive positions automatically #set up webservice to get correspondance between MSA position and a particular PDB structure alignment.setSlice(positions) prody.writeMSA("test.fasta", alignment) pa = prody.parseMSA("pocket_type1.fasta") labs = pa.getLabels() seqidmatrix = prody.buildSeqidMatrix(pa) scamatrix = prody.buildSCAMatrix(pa) tree = prody.calcTree(names=labs, distance_matrix=seqidmatrix) plt.figure() show = prody.showTree(tree, format='plt')