def __call__(self, fasta): "Run the method." start_time = time.time() ensure_dir_exists(self.options.output_dir) predictions = [] # run MEME self.meme_cmd_args, self.stdoutdata, self.starts, self.Zs, self.thetas, self.lambdas = run_meme( fasta, self.options) # parse output from Bio import Motif for motif in Motif.parse(open(os.path.join(self.options.output_dir, 'meme.txt')), "MEME"): for instance in motif.instances: # MEME parser seems to count from 1, not 0 start = instance.start - 1 prediction = instance.sequence_name, Interval( start, start + motif.length), instance.strand == '-' predictions.append(prediction) logger.info('MEME took %.1f seconds', time.time() - start_time) return predictions
def parse_meme_output_for_sites(meme_output): "Parse MEME-like output" logging.info('Parsing predictions from %s', meme_output) predicted_sites = defaultdict(P.IntIntervalSet) motifs = list(Motif.parse(open(meme_output), "MEME")) for motif in motifs: for instance in motif.instances: logging.info('Prediction: sequence = %s; site = %s; pos = %3d', instance.sequence_name, instance, instance.start) predicted_sites[instance.sequence_name].add( P.IntInterval(instance.start, instance.start + len(instance))) return predicted_sites
options = stempy.get_default_options() options.output_dir = os.path.join('output', 'test-meme-like-output') options.min_w = options.max_w = 8 options.meme_like_output = 'meme.out' algorithm = stempy.Algorithm(options) fasta = os.path.join(os.path.dirname(__file__), 'fasta', 'T00759-tiny.fa') algorithm(fasta) logging.info('Showing MEME output from %s', algorithm.meme_like_output_file) os.system('cat %s' % algorithm.meme_like_output_file) # # Test BioPython parser # from Bio import Motif motifs = list(Motif.parse(open(algorithm.meme_like_output_file), "MEME")) # # Doesn't quite work with pycogent yet. Pycogent expects a summary section # that contains sites in all the sequences # #from cogent import LoadSeqs #from cogent.parse.meme import MemeParser #results = MemeParser(open(algorithm.meme_like_output_file, 'U')) #seqs = LoadSeqs(fasta, aligned=False) #results.Alignment = seqs #for motif in results.Motifs: # module = motif.Modules[0] # print module.ID, module.Evalue, len(module.NamedSeqs) #