def run_meme(seqs, outdir, meme_settings): """Motif discovery on given sequences via MEME. Return MEME record""" infile = tempfile.NamedTemporaryFile() with open(infile.name, 'w') as f: f.write(seqs_to_fasta(seqs)) # Call shell command cmd = [ meme_settings['path'], infile.name, '-oc', outdir, '-dna', # sequences use DNA alphabet '-revcomp', # allow sites on both strands '-maxsize', 10000000 ] # max dataset size on chars # extend meme command for opt in ['cons', 'nmotifs', 'mod', 'minw', 'maxw']: if opt in meme_settings: cmd.extend(['-' + opt, meme_settings[opt]]) cmd = map(str, cmd) #print "Running MEME." print ' '.join(cmd) call(cmd) # Parse text output of the MEME with open(os.path.join(outdir, 'meme.txt')) as f: rec = MEME.read(f) return rec
def import_from_MEME(self,filename,n=1,mode='biotools'): """imports a motif from the output of MEME (meme.txt) if there are multiple motifs in the output, we will use motif n (the first is n=1, which is also the default) """ import Bio.Motif.Parsers.MEME as MEME f = open(filename) MEME_object = MEME.read(f) motif_name = 'Motif ' + str(n) biopython_motif = MEME_object.get_motif_by_name(motif_name) if mode=='biopython': return biopython_motif if mode=='biotools': internal_n = len(biopython_motif) # this next line is instead of initializePositions biotools_motif = [CharDict(biopython_motif[i]) for i in range(internal_n)] self._L = biotools_motif self._n = internal_n else: raise UserWarning('Not a valid mode.')
def parse(handle, format): """Parses an output file of motif finding programs. Currently supported formats: - AlignAce: AlignAce output file format - MEME: MEME output file motif - TRANSFAC: TRANSFAC database file format - pfm: JASPAR-style position-frequency matrix - sites: JASPAR-style sites file - jaspar-pfm: JASPAR-style position-frequency matrix [DEPRECATED] - jaspar-sites: JASPAR-style sites file [DEPRECATED] As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.Motif.read() instead of Bio.Motif.parse() for those. For example: >>> from Bio import Motif >>> for motif in Motif.parse(open("Motif/alignace.out"),"AlignAce"): ... print motif.consensus() TCTACGATTGAG CTGCACCTAGCTACGAGTGAG GTGCCCTAAGCATACTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAAGTGCCGGAG GCACGTCCCTGAGCA GTCCATCGCAAAGCGTGGGGC GAGATCAGAGGGCCG TGGACGCGGGG GACCAGAGCCTCGCATGGGGG AGCGCGCGTG GCCGGTTGCTGTTCATTAGG ACCGACGGCAGCTAAAAGGG GACGCCGGGGAT CGACTCGCGCTTACAAGG >>> for motif in Motif.parse(open("Motif/alignace.out"),"alignace"): ... print motif.consensus TCTACGATTGAG CTGCAGCTAGCTACGAGTGAG GTGCTCTAAGCATAGTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAGGTGCCGGAG GCGCGTCGCTGAGCA GTCCATCGCAAAGCGTGGGGC GGGATCAGAGGGCCG TGGAGGCGGGG GACCAGAGCTTCGCATGGGGG GGCGTGCGTG GCTGGTTGCTGTTCATTAGG GCCGGCGGCAGCTAAAAGGG GAGGCCGGGGAT CGACTCGTGCTTAGAAGG """ if format=="AlignAce": # Old Motif code from Bio.Motif.Parsers import AlignAce record = AlignAce.read(handle) return iter(record.motifs) elif format=="alignace": # Old Motif code from Bio.Motif import AlignAce record = AlignAce.read(handle) return record elif format=="MEME": from Bio.Motif.Parsers import MEME record = MEME.read(handle) return iter(record.motifs) elif format=="meme": from Bio.Motif import MEME record = MEME.read(handle) return record elif format=="TRANSFAC": from Bio.Motif import TRANSFAC record = TRANSFAC.read(handle) return record elif format in ('pfm', 'sites'): from Bio.Motif import Jaspar motif = Jaspar.read(handle, format) elif format=="jaspar-pfm": motif = OldMotif()._from_jaspar_pfm(handle) return iter([motif]) elif format=="jaspar-sites": motif = OldMotif()._from_jaspar_sites(handle) return iter([motif]) else: raise ValueError("Unknown format %s" % format) # Treat the single-motif formats motifs = [motif] return motifs
fin = open(fasta_sequences, "r") fout = open(fasta_with_weights, "w") for line in fin: fout.write( line.replace (">WEIGHTS", weights_string) ) fin.close() fout.close() #call command line meme subprocess.call(["meme", "-dna", "-revcomp", "seqweights.fasta", "-minw", "8", "-maxw", "14"]) os.chdir(output_directory) f = open("meme.txt") record = MEME.read(f) for motif in record.motifs: for instance in motif.instances: if instance.sequence_name == myseq: print instance.sequence_name, instance.start, instance.length found += 1 for i in range(instance.length): position = instance.start+i - 1 #subtract one to adjust for meme indexing from 1 rather than 0 promoter_array[position] += 1 else: print "not found" not_found += 1 os.chdir(current_directory) percent_found = found / (found+not_found)