コード例 #1
0
ファイル: __init__.py プロジェクト: mmokrejs/biopython
def parse(handle, format):
    """Parses an output file of motif finding programs.

    Currently supported formats (case is ignored):
     - AlignAce:      AlignAce output file format
     - MEME:          MEME output file motif
     - MAST:          MAST output file motif
     - TRANSFAC:      TRANSFAC database file format
     - pfm:           JASPAR-style position-frequency matrix
     - jaspar:        JASPAR-style multiple PFM format
     - sites:         JASPAR-style sites file
    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> for m in motifs.parse(open("Motif/alignace.out"),"AlignAce"):
    ...     print m.consensus
    TCTACGATTGAG
    CTGCAGCTAGCTACGAGTGAG
    GTGCTCTAAGCATAGTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAGGTGCCGGAG
    GCGCGTCGCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GGGATCAGAGGGCCG
    TGGAGGCGGGG
    GACCAGAGCTTCGCATGGGGG
    GGCGTGCGTG
    GCTGGTTGCTGTTCATTAGG
    GCCGGCGGCAGCTAAAAGGG
    GAGGCCGGGGAT
    CGACTCGTGCTTAGAAGG
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace
        record = alignace.read(handle)
        return record
    elif format == "meme":
        from Bio.motifs import meme
        record = meme.read(handle)
        return record
    elif format == "mast":
        from Bio.motifs import mast
        record = mast.read(handle)
        return record
    elif format == "transfac":
        from Bio.motifs import transfac
        record = transfac.read(handle)
        return record
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        record = jaspar.read(handle, format)
        return record
    else:
        raise ValueError("Unknown format %s" % format)
コード例 #2
0
ファイル: __init__.py プロジェクト: Honglongwu/biopython
def parse(handle, format):
    """Parses an output file of motif finding programs.

    Currently supported formats (case is ignored):
     - AlignAce:      AlignAce output file format
     - MEME:          MEME output file motif
     - MAST:          MAST output file motif
     - TRANSFAC:      TRANSFAC database file format
     - pfm:           JASPAR-style position-frequency matrix
     - jaspar:        JASPAR-style multiple PFM format
     - sites:         JASPAR-style sites file
    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> for m in motifs.parse(open("Motif/alignace.out"), "AlignAce"):
    ...     print(m.consensus)
    TCTACGATTGAG
    CTGCAGCTAGCTACGAGTGAG
    GTGCTCTAAGCATAGTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAGGTGCCGGAG
    GCGCGTCGCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GGGATCAGAGGGCCG
    TGGAGGCGGGG
    GACCAGAGCTTCGCATGGGGG
    GGCGTGCGTG
    GCTGGTTGCTGTTCATTAGG
    GCCGGCGGCAGCTAAAAGGG
    GAGGCCGGGGAT
    CGACTCGTGCTTAGAAGG
    """
    format = format.lower()
    if format=="alignace":
        from Bio.motifs import alignace
        record = alignace.read(handle)
        return record
    elif format=="meme":
        from Bio.motifs import meme
        record = meme.read(handle)
        return record
    elif format=="mast":
        from Bio.motifs import mast
        record = mast.read(handle)
        return record
    elif format=="transfac":
        from Bio.motifs import transfac
        record = transfac.read(handle)
        return record
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        record = jaspar.read(handle, format)
        return record
    else:
        raise ValueError("Unknown format %s" % format)
コード例 #3
0
ファイル: io.py プロジェクト: npdeloss/meirlop
def read_motif_matrices(motifs_file, alphabet=list('ACGT')):
    motifs_bs = jaspar.read(motifs_file, format='jaspar')
    motif_matrix_dict = {
        f'{motif.matrix_id} {motif.name}':
        np.array([list(motif.pwm[nuc]) for nuc in alphabet])
        for motif in motifs_bs
    }
    motif_consensus_dict = {
        f'{motif.matrix_id} {motif.name}': str(motif.consensus)
        for motif in motifs_bs
    }

    return motif_matrix_dict, motif_consensus_dict
コード例 #4
0
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:         AlignAce output file format
     - ClusterBuster:    Cluster Buster position frequency matrix format
     - XMS:              XMS matrix format
     - MEME:             MEME output file motif
     - MINIMAL:          MINIMAL MEME output file motif
     - MAST:             MAST output file motif
     - TRANSFAC:         TRANSFAC database file format
     - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin)
     - pfm-four-rows:    Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey)
     - pfm:              JASPAR-style position-frequency matrix
     - jaspar:           JASPAR-style multiple PFM format
     - sites:            JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("motifs/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCACCTAGCTACGAGTGAG
    GTGCCCTAAGCATACTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAAGTGCCGGAG
    GCACGTCCCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GAGATCAGAGGGCCG
    TGGACGCGGGG
    GACCAGAGCCTCGCATGGGGG
    AGCGCGCGTG
    GCCGGTTGCTGTTCATTAGG
    ACCGACGGCAGCTAAAAGGG
    GACGCCGGGGAT
    CGACTCGCGCTTACAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace

        return alignace.read(handle)
    elif format == "meme":
        from Bio.motifs import meme

        return meme.read(handle)
    elif format == "minimal":
        from Bio.motifs import minimal

        return minimal.read(handle)
    elif format == "clusterbuster":
        from Bio.motifs import clusterbuster

        return clusterbuster.read(handle)
    elif format in ("pfm-four-columns", "pfm-four-rows"):
        from Bio.motifs import pfm

        return pfm.read(handle, format)
    elif format == "xms":
        from Bio.motifs import xms

        return xms.read(handle)
    elif format == "mast":
        from Bio.motifs import mast

        return mast.read(handle)
    elif format == "transfac":
        from Bio.motifs import transfac

        return transfac.read(handle, strict)
    elif format in ("pfm", "sites", "jaspar"):
        from Bio.motifs import jaspar

        return jaspar.read(handle, format)
    else:
        raise ValueError("Unknown format %s" % format)
コード例 #5
0
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:      AlignAce output file format
     - MEME:          MEME output file motif
     - MINIMAL:       MINIMAL MEME output file motif
     - MAST:          MAST output file motif
     - TRANSFAC:      TRANSFAC database file format
     - pfm:           JASPAR-style position-frequency matrix
     - jaspar:        JASPAR-style multiple PFM format
     - sites:         JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("Motif/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCAGCTAGCTACGAGTGAG
    GTGCTCTAAGCATAGTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAGGTGCCGGAG
    GCGCGTCGCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GGGATCAGAGGGCCG
    TGGAGGCGGGG
    GACCAGAGCTTCGCATGGGGG
    GGCGTGCGTG
    GCTGGTTGCTGTTCATTAGG
    GCCGGCGGCAGCTAAAAGGG
    GAGGCCGGGGAT
    CGACTCGTGCTTAGAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace
        record = alignace.read(handle)
        return record
    elif format == "meme":
        from Bio.motifs import meme
        record = meme.read(handle)
        return record
    elif format == "minimal":
        from Bio.motifs import minimal
        record = minimal.read(handle)
        return record
    elif format == "mast":
        from Bio.motifs import mast
        record = mast.read(handle)
        return record
    elif format == "transfac":
        from Bio.motifs import transfac
        record = transfac.read(handle, strict)
        return record
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        record = jaspar.read(handle, format)
        return record
    else:
        raise ValueError("Unknown format %s" % format)
コード例 #6
0
ファイル: __init__.py プロジェクト: HuttonICS/biopython
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:      AlignAce output file format
     - MEME:          MEME output file motif
     - MINIMAL:       MINIMAL MEME output file motif
     - MAST:          MAST output file motif
     - TRANSFAC:      TRANSFAC database file format
     - pfm:           JASPAR-style position-frequency matrix
     - jaspar:        JASPAR-style multiple PFM format
     - sites:         JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("Motif/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCAGCTAGCTACGAGTGAG
    GTGCTCTAAGCATAGTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAGGTGCCGGAG
    GCGCGTCGCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GGGATCAGAGGGCCG
    TGGAGGCGGGG
    GACCAGAGCTTCGCATGGGGG
    GGCGTGCGTG
    GCTGGTTGCTGTTCATTAGG
    GCCGGCGGCAGCTAAAAGGG
    GAGGCCGGGGAT
    CGACTCGTGCTTAGAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace
        record = alignace.read(handle)
        return record
    elif format == "meme":
        from Bio.motifs import meme
        record = meme.read(handle)
        return record
    elif format == "minimal":
        from Bio.motifs import minimal
        record = minimal.read(handle)
        return record
    elif format == "mast":
        from Bio.motifs import mast
        record = mast.read(handle)
        return record
    elif format == "transfac":
        from Bio.motifs import transfac
        record = transfac.read(handle, strict)
        return record
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        record = jaspar.read(handle, format)
        return record
    else:
        raise ValueError("Unknown format %s" % format)
コード例 #7
0
def parse(handle, format, strict=True):
    """Parse an output file from a motif finding program.

    Currently supported formats (case is ignored):
     - AlignAce:         AlignAce output file format
     - ClusterBuster:    Cluster Buster position frequency matrix format
     - XMS:              XMS matrix format
     - MEME:             MEME output file motif
     - MINIMAL:          MINIMAL MEME output file motif
     - MAST:             MAST output file motif
     - TRANSFAC:         TRANSFAC database file format
     - pfm-four-columns: Generic position-frequency matrix format with four columns. (cisbp, homer, hocomoco, neph, tiffin)
     - pfm-four-rows:    Generic position-frequency matrix format with four row. (scertf, yetfasco, hdpi, idmmpmm, flyfactor survey)
     - pfm:              JASPAR-style position-frequency matrix
     - jaspar:           JASPAR-style multiple PFM format
     - sites:            JASPAR-style sites file

    As files in the pfm and sites formats contain only a single motif,
    it is easier to use Bio.motifs.read() instead of Bio.motifs.parse()
    for those.

    For example:

    >>> from Bio import motifs
    >>> with open("motifs/alignace.out") as handle:
    ...     for m in motifs.parse(handle, "AlignAce"):
    ...         print(m.consensus)
    ...
    TCTACGATTGAG
    CTGCACCTAGCTACGAGTGAG
    GTGCCCTAAGCATACTAGGCG
    GCCACTAGCAGAGCAGGGGGC
    CGACTCAGAGGTT
    CCACGCTAAGAGAAGTGCCGGAG
    GCACGTCCCTGAGCA
    GTCCATCGCAAAGCGTGGGGC
    GAGATCAGAGGGCCG
    TGGACGCGGGG
    GACCAGAGCCTCGCATGGGGG
    AGCGCGCGTG
    GCCGGTTGCTGTTCATTAGG
    ACCGACGGCAGCTAAAAGGG
    GACGCCGGGGAT
    CGACTCGCGCTTACAAGG

    If strict is True (default), the parser will raise a ValueError if the
    file contents does not strictly comply with the specified file format.
    """
    format = format.lower()
    if format == "alignace":
        from Bio.motifs import alignace
        return alignace.read(handle)
    elif format == "meme":
        from Bio.motifs import meme
        return meme.read(handle)
    elif format == "minimal":
        from Bio.motifs import minimal
        return minimal.read(handle)
    elif format == "clusterbuster":
        from Bio.motifs import clusterbuster
        return clusterbuster.read(handle)
    elif format in ('pfm-four-columns', 'pfm-four-rows'):
        from Bio.motifs import pfm
        return pfm.read(handle, format)
    elif format == "xms":
        from Bio.motifs import xms
        return xms.read(handle)
    elif format == "mast":
        from Bio.motifs import mast
        return mast.read(handle)
    elif format == "transfac":
        from Bio.motifs import transfac
        return transfac.read(handle, strict)
    elif format in ('pfm', 'sites', 'jaspar'):
        from Bio.motifs import jaspar
        return jaspar.read(handle, format)
    else:
        raise ValueError("Unknown format %s" % format)