def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - ClusterBuster: Cluster Buster position frequency matrix format - XMS: XMS matrix format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin) - pfm-four-rows: Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey) - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("motifs/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCACCTAGCTACGAGTGAG GTGCCCTAAGCATACTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAAGTGCCGGAG GCACGTCCCTGAGCA GTCCATCGCAAAGCGTGGGGC GAGATCAGAGGGCCG TGGACGCGGGG GACCAGAGCCTCGCATGGGGG AGCGCGCGTG GCCGGTTGCTGTTCATTAGG ACCGACGGCAGCTAAAAGGG GACGCCGGGGAT CGACTCGCGCTTACAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace return alignace.read(handle) elif format == "meme": from Bio.motifs import meme return meme.read(handle) elif format == "minimal": from Bio.motifs import minimal return minimal.read(handle) elif format == "clusterbuster": from Bio.motifs import clusterbuster return clusterbuster.read(handle) elif format in ("pfm-four-columns", "pfm-four-rows"): from Bio.motifs import pfm return pfm.read(handle, format) elif format == "xms": from Bio.motifs import xms return xms.read(handle) elif format == "mast": from Bio.motifs import mast return mast.read(handle) elif format == "transfac": from Bio.motifs import transfac return transfac.read(handle, strict) elif format in ("pfm", "sites", "jaspar"): from Bio.motifs import jaspar return jaspar.read(handle, format) else: raise ValueError("Unknown format %s" % format)
def parse(handle, format, strict=True): """Parse an output file from a motif finding program. Currently supported formats (case is ignored): - AlignAce: AlignAce output file format - ClusterBuster: Cluster Buster position frequency matrix format - XMS: XMS matrix format - MEME: MEME output file motif - MINIMAL: MINIMAL MEME output file motif - MAST: MAST output file motif - TRANSFAC: TRANSFAC database file format - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin) - pfm-four-rows: Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey) - pfm: JASPAR-style position-frequency matrix - jaspar: JASPAR-style multiple PFM format - sites: JASPAR-style sites file As files in the pfm and sites formats contain only a single motif, it is easier to use Bio.motifs.read() instead of Bio.motifs.parse() for those. For example: >>> from Bio import motifs >>> with open("motifs/alignace.out") as handle: ... for m in motifs.parse(handle, "AlignAce"): ... print(m.consensus) ... TCTACGATTGAG CTGCACCTAGCTACGAGTGAG GTGCCCTAAGCATACTAGGCG GCCACTAGCAGAGCAGGGGGC CGACTCAGAGGTT CCACGCTAAGAGAAGTGCCGGAG GCACGTCCCTGAGCA GTCCATCGCAAAGCGTGGGGC GAGATCAGAGGGCCG TGGACGCGGGG GACCAGAGCCTCGCATGGGGG AGCGCGCGTG GCCGGTTGCTGTTCATTAGG ACCGACGGCAGCTAAAAGGG GACGCCGGGGAT CGACTCGCGCTTACAAGG If strict is True (default), the parser will raise a ValueError if the file contents does not strictly comply with the specified file format. """ format = format.lower() if format == "alignace": from Bio.motifs import alignace return alignace.read(handle) elif format == "meme": from Bio.motifs import meme return meme.read(handle) elif format == "minimal": from Bio.motifs import minimal return minimal.read(handle) elif format == "clusterbuster": from Bio.motifs import clusterbuster return clusterbuster.read(handle) elif format in ('pfm-four-columns', 'pfm-four-rows'): from Bio.motifs import pfm return pfm.read(handle, format) elif format == "xms": from Bio.motifs import xms return xms.read(handle) elif format == "mast": from Bio.motifs import mast return mast.read(handle) elif format == "transfac": from Bio.motifs import transfac return transfac.read(handle, strict) elif format in ('pfm', 'sites', 'jaspar'): from Bio.motifs import jaspar return jaspar.read(handle, format) else: raise ValueError("Unknown format %s" % format)