def parse(handle, format): """Parses an output file of motif finding programs. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - MEME: MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> for m in motifs.parse(open("Motif/alignace.out"), "AlignAce"): ... print(m.consensus) TCTACGATTGAG CTGCAGCTAGCTACGAGTGAG GTGCTCTAAGCATAGTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAGGTGCCGGAG GCGCGTCGCTGAGCA GTCCATCGCAAAGCGTGGGGC GGGATCAGAGGGCCG TGGAGGCGGGG GACCAGAGCTTCGCATGGGGG GGCGTGCGTG GCTGGTTGCTGTTCATTAGG GCCGGCGGCAGCTAAAAGGG GAGGCCGGGGAT CGACTCGTGCTTAGAAGG """ format = format.lower() if format=="alignace": from Bio.motifs import alignace record = alignace.read(handle) return record elif format=="meme": from Bio.motifs import meme record = meme.read(handle) return record elif format=="mast": from Bio.motifs import mast record = mast.read(handle) return record elif format=="transfac": from Bio.motifs import transfac record = transfac.read(handle) return record elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar record = jaspar.read(handle, format) return record else: raise ValueError("Unknown format %s" % format)
def parse(handle, format): """Parses an output file of motif finding programs. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - MEME: MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> for m in motifs.parse(open("Motif/alignace.out"),"AlignAce"): ... print m.consensus TCTACGATTGAG CTGCAGCTAGCTACGAGTGAG GTGCTCTAAGCATAGTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAGGTGCCGGAG GCGCGTCGCTGAGCA GTCCATCGCAAAGCGTGGGGC GGGATCAGAGGGCCG TGGAGGCGGGG GACCAGAGCTTCGCATGGGGG GGCGTGCGTG GCTGGTTGCTGTTCATTAGG GCCGGCGGCAGCTAAAAGGG GAGGCCGGGGAT CGACTCGTGCTTAGAAGG """ format = format.lower() if format == "alignace": from Bio.motifs import alignace record = alignace.read(handle) return record elif format == "meme": from Bio.motifs import meme record = meme.read(handle) return record elif format == "mast": from Bio.motifs import mast record = mast.read(handle) return record elif format == "transfac": from Bio.motifs import transfac record = transfac.read(handle) return record elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar record = jaspar.read(handle, format) return record else: raise ValueError("Unknown format %s" % format)
def get_motifs(meme_data_dir, e_val_threshold): ''' Parses the MEME output for the motifs. Parameters ---------- meme_data_dir: str The path to the directory where the MEME results are stored (namely, the meme.xml file) e_val_threshold: float The maximum e-value for any motif that is returned. Returns ------- motifs: [Motifs] An array of Motif objects corresponding the motifs that were parsed from the MEME analysis. ''' #List of Motif objects that met the threshold motifs_in_record = [] records = [] #Pull all of the records from the MEME output file with open(meme_data_dir + 'meme.xml') as f: try: records = meme.read(f) except: print('Error with parsing MEME output.') if len(records) == 0: return motifs_in_record #Pull out motifs that meet the e value threhold for motif in records: if motif.evalue <= e_val_threshold: motifs_in_record.append(motif) return motifs_in_record
def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - ClusterBuster: Cluster Buster position frequency matrix format - XMS: XMS matrix format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin) - pfm-four-rows: Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey) - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("motifs/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCACCTAGCTACGAGTGAG GTGCCCTAAGCATACTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAAGTGCCGGAG GCACGTCCCTGAGCA GTCCATCGCAAAGCGTGGGGC GAGATCAGAGGGCCG TGGACGCGGGG GACCAGAGCCTCGCATGGGGG AGCGCGCGTG GCCGGTTGCTGTTCATTAGG ACCGACGGCAGCTAAAAGGG GACGCCGGGGAT CGACTCGCGCTTACAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace return alignace.read(handle) elif format == "meme": from Bio.motifs import meme return meme.read(handle) elif format == "minimal": from Bio.motifs import minimal return minimal.read(handle) elif format == "clusterbuster": from Bio.motifs import clusterbuster return clusterbuster.read(handle) elif format in ("pfm-four-columns", "pfm-four-rows"): from Bio.motifs import pfm return pfm.read(handle, format) elif format == "xms": from Bio.motifs import xms return xms.read(handle) elif format == "mast": from Bio.motifs import mast return mast.read(handle) elif format == "transfac": from Bio.motifs import transfac return transfac.read(handle, strict) elif format in ("pfm", "sites", "jaspar"): from Bio.motifs import jaspar return jaspar.read(handle, format) else: raise ValueError("Unknown format %s" % format)
def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("Motif/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCAGCTAGCTACGAGTGAG GTGCTCTAAGCATAGTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAGGTGCCGGAG GCGCGTCGCTGAGCA GTCCATCGCAAAGCGTGGGGC GGGATCAGAGGGCCG TGGAGGCGGGG GACCAGAGCTTCGCATGGGGG GGCGTGCGTG GCTGGTTGCTGTTCATTAGG GCCGGCGGCAGCTAAAAGGG GAGGCCGGGGAT CGACTCGTGCTTAGAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace record = alignace.read(handle) return record elif format == "meme": from Bio.motifs import meme record = meme.read(handle) return record elif format == "minimal": from Bio.motifs import minimal record = minimal.read(handle) return record elif format == "mast": from Bio.motifs import mast record = mast.read(handle) return record elif format == "transfac": from Bio.motifs import transfac record = transfac.read(handle, strict) return record elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar record = jaspar.read(handle, format) return record else: raise ValueError("Unknown format %s" % format)
def test_meme_parser_5_0_4(self): """Parse motifs/meme_v_5_0_4.txt file.""" handle = open("motifs/meme_v_5_0_4.txt") record = meme.read(handle) self.assertEqual(record.version, '5.0.4') self.assertEqual(record.datafile, 'example.fasta') self.assertEqual(record.alphabet, IUPAC.unambiguous_dna) self.assertEqual(len(record.sequences), 10) self.assertEqual(record.sequences[0], 'SEQ1;') self.assertEqual(record.sequences[1], 'SEQ2;') self.assertEqual(record.sequences[2], 'SEQ3;') self.assertEqual(record.sequences[3], 'SEQ4;') self.assertEqual(record.sequences[4], 'SEQ5;') self.assertEqual(record.sequences[5], 'SEQ6;') self.assertEqual(record.sequences[6], 'SEQ7;') self.assertEqual(record.sequences[7], 'SEQ8;') self.assertEqual(record.sequences[8], 'SEQ9;') self.assertEqual(record.sequences[9], 'SEQ10;') self.assertEqual(record.command, 'meme -dna example.fasta') self.assertEqual(len(record), 1) motif = record[0] self.assertEqual(motif.name, "Motif CTCAATCGTA") self.assertEqual(record["Motif CTCAATCGTA"], motif) self.assertEqual(motif.num_occurrences, 10) self.assertAlmostEqual(motif.evalue, 1.1e-22) self.assertEqual(motif.alphabet, IUPAC.unambiguous_dna) self.assertEqual(len(motif.instances), 10) self.assertAlmostEqual(motif.instances[0].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[1].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[2].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[3].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[4].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[5].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[6].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[7].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[8].pvalue, 1.96e-06) self.assertAlmostEqual(motif.instances[9].pvalue, 1.96e-06) self.assertEqual(motif.instances[0].sequence_name, 'SEQ10;') self.assertEqual(motif.instances[1].sequence_name, 'SEQ9;') self.assertEqual(motif.instances[2].sequence_name, 'SEQ8;') self.assertEqual(motif.instances[3].sequence_name, 'SEQ7;') self.assertEqual(motif.instances[4].sequence_name, 'SEQ6;') self.assertEqual(motif.instances[5].sequence_name, 'SEQ5;') self.assertEqual(motif.instances[6].sequence_name, 'SEQ4;') self.assertEqual(motif.instances[7].sequence_name, 'SEQ3;') self.assertEqual(motif.instances[8].sequence_name, 'SEQ2;') self.assertEqual(motif.instances[9].sequence_name, 'SEQ1;') self.assertEqual(motif.instances[0].start, 1) self.assertEqual(motif.instances[1].start, 1) self.assertEqual(motif.instances[2].start, 1) self.assertEqual(motif.instances[3].start, 1) self.assertEqual(motif.instances[4].start, 1) self.assertEqual(motif.instances[5].start, 1) self.assertEqual(motif.instances[6].start, 1) self.assertEqual(motif.instances[7].start, 1) self.assertEqual(motif.instances[8].start, 1) self.assertEqual(motif.instances[9].start, 1) self.assertEqual(motif.instances[0].strand, '+') self.assertEqual(motif.instances[1].strand, '+') self.assertEqual(motif.instances[2].strand, '+') self.assertEqual(motif.instances[3].strand, '+') self.assertEqual(motif.instances[4].strand, '+') self.assertEqual(motif.instances[5].strand, '+') self.assertEqual(motif.instances[6].strand, '+') self.assertEqual(motif.instances[7].strand, '+') self.assertEqual(motif.instances[8].strand, '+') self.assertEqual(motif.instances[9].strand, '+') self.assertEqual(motif.instances[0].length, 10) self.assertEqual(motif.instances[1].length, 10) self.assertEqual(motif.instances[2].length, 10) self.assertEqual(motif.instances[3].length, 10) self.assertEqual(motif.instances[4].length, 10) self.assertEqual(motif.instances[5].length, 10) self.assertEqual(motif.instances[6].length, 10) self.assertEqual(motif.instances[7].length, 10) self.assertEqual(motif.instances[8].length, 10) self.assertEqual(motif.instances[9].length, 10) self.assertEqual(motif.instances[0].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[1].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[2].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[3].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[4].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[5].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[6].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[7].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[8].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[9].motif_name, 'Motif CTCAATCGTA') self.assertEqual(motif.instances[0].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[1].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[2].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[3].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[4].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[5].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[6].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[7].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[8].alphabet, IUPAC.unambiguous_dna) self.assertEqual(motif.instances[9].alphabet, IUPAC.unambiguous_dna) self.assertEqual(str(motif.instances[0]), "CTCAATCGTA") self.assertEqual(str(motif.instances[1]), "CTCAATCGTA") self.assertEqual(str(motif.instances[2]), "CTCAATCGTA") self.assertEqual(str(motif.instances[3]), "CTCAATCGTA") self.assertEqual(str(motif.instances[4]), "CTCAATCGTA") self.assertEqual(str(motif.instances[5]), "CTCAATCGTA") self.assertEqual(str(motif.instances[6]), "CTCAATCGTA") self.assertEqual(str(motif.instances[7]), "CTCAATCGTA") self.assertEqual(str(motif.instances[8]), "CTCAATCGTA") self.assertEqual(str(motif.instances[9]), "CTCAATCGTA") handle.close()
#Data Formats #Josh Rudolph #9/27/17 from Bio.motifs import meme with open("test5.txt", 'r' ) as f: record = meme.read(f) print record
def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("Motif/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCAGCTAGCTACGAGTGAG GTGCTCTAAGCATAGTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAGGTGCCGGAG GCGCGTCGCTGAGCA GTCCATCGCAAAGCGTGGGGC GGGATCAGAGGGCCG TGGAGGCGGGG GACCAGAGCTTCGCATGGGGG GGCGTGCGTG GCTGGTTGCTGTTCATTAGG GCCGGCGGCAGCTAAAAGGG GAGGCCGGGGAT CGACTCGTGCTTAGAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace record = alignace.read(handle) return record elif format == "meme": from Bio.motifs import meme record = meme.read(handle) return record elif format == "minimal": from Bio.motifs import minimal record = minimal.read(handle) return record elif format == "mast": from Bio.motifs import mast record = mast.read(handle) return record elif format == "transfac": from Bio.motifs import transfac record = transfac.read(handle, strict) return record elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar record = jaspar.read(handle, format) return record else: raise ValueError("Unknown format %s" % format)
records = meme.read(f) File "/home/issac/Desktop/soa/soa-venv/lib/python3.6/site-packages/Bio/motifs/meme.py", line 53, in read __read_motifs(record, xml_tree, sequence_id_name_map) File "/home/issac/Desktop/soa/soa-venv/lib/python3.6/site-packages/Bio/motifs/meme.py", line 176, in __read_motifs motif = Motif(record.alphabet, instances) File "/home/issac/Desktop/soa/soa-venv/lib/python3.6/site-packages/Bio/motifs/meme.py", line 67, in __init__ motifs.Motif.__init__(self, alphabet, instances) File "/home/issac/Desktop/soa/soa-venv/lib/python3.6/site-packages/Bio/motifs/__init__.py", line 263, in __init__ counts = self.instances.count() File "/home/issac/Desktop/soa/soa-venv/lib/python3.6/site-packages/Bio/motifs/__init__.py", line 219, in count counts[letter][position] += 1 KeyError: 'K' This error was thrown when attempting to parse the results for CLSTR4. ''' from Bio.motifs import meme, Motif, Instances from Bio import motifs #Path to CLSTR4 meme output meme_results = '/Users/ichaudr/Documents/UMBC/Lab-Erill/Isaac/issac-workspace-IE/soa/meme_bin/CLSTR4_meme_out/meme.xml' with open(meme_results, 'r') as f: try: records = meme.read(f) except: print('error') print('made it')
def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - ClusterBuster: Cluster Buster position frequency matrix format - XMS: XMS matrix format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin) - pfm-four-rows: Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey) - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("motifs/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCACCTAGCTACGAGTGAG GTGCCCTAAGCATACTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAAGTGCCGGAG GCACGTCCCTGAGCA GTCCATCGCAAAGCGTGGGGC GAGATCAGAGGGCCG TGGACGCGGGG GACCAGAGCCTCGCATGGGGG AGCGCGCGTG GCCGGTTGCTGTTCATTAGG ACCGACGGCAGCTAAAAGGG GACGCCGGGGAT CGACTCGCGCTTACAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace return alignace.read(handle) elif format == "meme": from Bio.motifs import meme return meme.read(handle) elif format == "minimal": from Bio.motifs import minimal return minimal.read(handle) elif format == "clusterbuster": from Bio.motifs import clusterbuster return clusterbuster.read(handle) elif format in ('pfm-four-columns', 'pfm-four-rows'): from Bio.motifs import pfm return pfm.read(handle, format) elif format == "xms": from Bio.motifs import xms return xms.read(handle) elif format == "mast": from Bio.motifs import mast return mast.read(handle) elif format == "transfac": from Bio.motifs import transfac return transfac.read(handle, strict) elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar return jaspar.read(handle, format) else: raise ValueError("Unknown format %s" % format)