Exemple #1
0
def read_MEME_motif(motif_file: str, bg_file: str, pseudocount: float,
                    no_reverse: bool, verbose: bool,
                    debug: bool) -> List[Motif]:
    """Read motif PWM in MEME format.

    The data read are then used to build the scoring matrix for the 
    motif, the P-value matrix, etc.

    Since a MEME file can contain one or more motifs, for each stored PWM
    is built the corresponding Motif object. The resulting set of motifs are 
    stored in a list, which will constitute a MotifSet object.

    ...
    
    Parameters
    ----------
    motif_file : str
        path to the motif PWM in JASPAR format
    bg_file
        path to the background file in Markov Background Format
        (http://meme-suite.org/doc/bfile-format.html).
    pseudocount : float
        value to add to motif PWM counts
    no_reverse : bool
        if False only the forward strand will be considered, otherwise
        both forward and reverse are considered
    verbose : bool
        print additional information
    debug:
        trace the full error stack

    Returns
    -------
    List[Motif]
        list of Motif objects
    """

    if not isinstance(motif_file, str):
        errmsg = "Expected str, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(motif_file).__name__),
                          debug)
    if not os.path.isfile(motif_file):
        errmsg = "Unable to locate {}.\n"
        exception_handler(FileNotFoundError, errmsg.format(motif_file), debug)
    if not isinstance(bg_file, str):
        errmsg = "Expected str, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(bg_file).__name__),
                          debug)
    if bg_file != UNIF and not os.path.isfile(bg_file):
        errmsg = "Unable to locate {}.\n"
        exception_handler(FileNotFoundError, errmsg.format(bg_file), debug)
    if not isinstance(pseudocount, float):
        errmsg = "Expected float, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(pseudocount).__name__),
                          debug)
    if pseudocount <= 0:
        errmsg = "The pseudocount must be > 0.\n"
        exception_handler(ValueError, errmsg, debug)
    if not isinstance(no_reverse, bool):
        errmsg = "Expected bool, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(no_reverse).__name__),
                          debug)

    motifs_raw = list()
    motifs: List[Motif] = list()
    motifs_num = 0
    proceed = False
    # begin motif parsing
    try:
        ifstream = open(motif_file, mode="r")
        alphabet = __read_alphabet_meme(motif_file, ifstream,
                                        debug)  # shared by all motifs
        nucsmap = dict()  # used with np object
        for i in range(len(alphabet)):
            nucsmap.update({alphabet[i]: i})
        while True:
            for line in ifstream:
                if line.startswith("MOTIF"): break  # new motif instance
            else:
                assert motifs_num == len(motifs_raw)
                proceed = True
                break
            if proceed: break  # read all motifs
            if verbose: start_rm = time.time()
            motifids = line.split()
            if len(motifids) == 2:  # only name
                motif_id = motifids[1]
                motif_name = motif_id
            else:  # assume first two fieds: id, name
                motif_id, motif_name = motifids[1:3]
            statistics = __read_statistics_meme(motif_file, ifstream, debug)
            probs = __read_counts_meme(motif_file, ifstream,
                                       statistics["width"], debug)
            motifs_raw.append({
                "motifId": motif_id,
                "motifName": motif_name,
                "statistics": statistics,
                "counts": probs
            })
            motifs_num += 1
            if verbose:
                end_rm = time.time()
                print("Read motif %s in %.2fs." % (motif_name,
                                                   (end_rm - start_rm)))
        if not proceed:
            errmsg = "Unexpected premature EOF in {}.\n"
            exception_handler(EOFError, errmsg.format(motif_file), debug)
    except:
        errmsg = "An error occurred while reading {}.\n"
        exception_handler(MotifFileReadError, errmsg.format(motif_file), debug)
    else:
        if bg_file == UNIF: bgs = get_uniformBG(alphabet, debug)
        elif os.path.isfile(bg_file): bgs = readBGfile(bg_file, debug)
        else:
            errmsg = "Unable to parse {}.\n"
            exception_handler(BGFileError, errmsg.format(bg_file), debug)
        bgs = pseudo_bg(bgs, no_reverse, debug)  # add pseudocount to bg
        for i in range(motifs_num):
            mp = pd.DataFrame(np.matrix(motifs_raw[i]["counts"]))
            mp.index = alphabet
            mp = norm_motif(mp, motifs_raw[i]["statistics"]["width"], alphabet,
                            debug)
            mp = apply_pseudocount_meme(mp.to_numpy(), pseudocount,
                                        motifs_raw[i]["statistics"]["nsites"],
                                        motifs_raw[i]["statistics"]["width"],
                                        bgs, alphabet, nucsmap, debug)
            motif: Motif = Motif(mp, motifs_raw[i]["statistics"]["width"],
                                 alphabet, motifs_raw[i]["motifId"],
                                 motifs_raw[i]["motifName"], nucsmap)
            motif.setBg(bgs)
            motifs.append(motif)
    finally:
        ifstream.close()

    return motifs
Exemple #2
0
def read_JASPAR_motif(motif_file: str, bg_file: str, pseudocount: float,
                      no_reverse: bool, verbose: bool, debug: bool) -> Motif:
    """Read a motif PWM in JASPAR format.

    The data read are then used to build the scoring matrix for the 
    motif, the P-value matrix, etc.

    ...

    Parameters
    ----------
    motif_file : str
        path to the motif PWM in JASPAR format
    bg_file
        path to the background file in Markov Background Format
        (http://meme-suite.org/doc/bfile-format.html).
    pseudocount : float
        value to add to motif PWM counts
    no_reverse : bool
        if False only the forward strand will be considered, otherwise
        both forward and reverse are considered
    verbose : bool
        print additional information
    debug:
        trace the full error stack

    Returns
    -------
    Motif
        Motif object 
    """

    nucs: List[str] = list()
    counts: List[float] = list()
    if verbose:
        start_rm: float = time.time()
    try:
        ifstream = open(motif_file, mode="r")
        readlines = 0  # check for empty files
        # begin parsing
        header: str = str(ifstream.readline().strip()[1:])
        if not header:  # empty file?
            errmsg = "{} seems to empty.\n"
            exception_handler(IOError, errmsg.format(motif_file), debug)
        motifID, motifName = header.split('\t')[0:2]
        readlines += 1
        while True:
            line = ifstream.readline().strip()
            if not line: break  # EOF or empty file?
            nuc = line.strip()[:1]
            count = list(map(float, line.strip()[1:].split()[1:][:-1]))
            nucs.append(nuc.upper())
            counts.append(count)
            readlines += 1
        if readlines <= 1:  # only header read ?
            errmsg = "{} seems to be empty.\n"
            exception_handler(IOError, errmsg.format(motif_file), debug)
    except:
        errmsg = "An error occurred while reading {}.\n"
        exception_handler(MotifFileReadError, errmsg.format(motif_file), debug)
    else:
        if any([len(c) != len(counts[0]) for c in counts]):
            errmsg = "Motif counts width mismatch.\n"
            exception_handler(ValueError, errmsg, debug)
        nucsmap = dict()  # used with np object
        for i in range(len(nucs)):
            nucsmap.update({nucs[i]: i})
        motif_counts: pd.DataFrame = pd.DataFrame(
            data=counts, index=nucs)  # motif count matrix
        motif_width: int = int(len(counts[0]))
        alphabet: list = sorted(nucs)

        # compute background
        if bg_file == UNIF: bgs = get_uniformBG(alphabet, debug)
        elif os.path.isfile(bg_file): bgs = readBGfile(bg_file, debug)
        else:
            errmsg = "Unable to parse {}.\n"
            exception_handler(BGFileError, errmsg.format(bg_file), debug)
        bgs = pseudo_bg(bgs, no_reverse, debug)  # add pseudocount to bg

        # motif probability matrix
        motif_probs = (motif_counts / motif_counts.sum(0))
        motif_probs = norm_motif(motif_probs, motif_width, alphabet, debug)
        motif_probs = apply_pseudocount_jaspar(motif_counts.to_numpy(),
                                               motif_probs.to_numpy(),
                                               pseudocount, bgs, motif_width,
                                               alphabet, nucsmap, debug)
        motif: Motif = Motif(motif_probs, motif_width, alphabet, motifID,
                             motifName, nucsmap)
        motif.setBg(bgs)

        if verbose:
            end_rm: float = time.time()
            msg: str = "Read motif %s in %.2fs" % (motifID,
                                                   (end_rm - start_rm))
            print(msg)
    finally:
        ifstream.close()

    return motif
Exemple #3
0
def read_JASPAR_motif(motif_file: str, bg_file: str, pseudocount: float,
                      no_reverse: bool, verbose: bool) -> Motif:
    """Read a motif PWM in JASPAR format.

    The data read are then used to build the scoring matrix for the 
    motif, the P-value matrix, etc.

    Parameters:
    motif_file : str
        path to the motif PWM in JASPAR format
    bg_file
        path to the background file in Markov Background Format
        (http://meme-suite.org/doc/bfile-format.html).
    pseudocount : float
        value to add to motif PWM counts
    no_reverse : bool
        if False only the forward strand will be considered, otherwise
        both forward and reverse are considered
    verbose : bool
        print additional information

    Returns
    -------
    Motif
        Motif object storing the data contained in motif_file
    """

    nucs: List[str]
    counts: List[float]

    # lists where store nucleotides and raw counts
    nucs = list()
    counts = list()

    if verbose:
        start_rm: float = time.time()

    try:
        # open the motif file
        with open(motif_file) as in_mtf:

            header: str
            motifID: str
            motifName: str

            # read the header
            header = str(in_mtf.readline()[1:])
            # get the jaspar ID and the common TF name
            motifID, motifName = header.split('\t')[0:2]
            motifName = motifName[:-1]  # remove '\n'

            for line in in_mtf:
                line = line.strip()
                nuc = line.strip()[:1]
                count = list(map(float, line.strip()[1:].split()[1:][:-1]))

                nucs.append(nuc)
                counts.append(count)
            # end for
        # end open

    except:
        errmsg: str = ' '.join(["\n\nERROR: unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        motif_counts = pd.DataFrame(data=counts, index=nucs)
        # the check of equal length for all raw counts is made building
        # the DataFrame
        motif_width: int = int(len(counts[0]))
        alphabet: list = sorted(nucs)  # alphabet as list

        bgs: Dict
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            errmsg = "\n\nERROR: unable to find the given background file"
            raise NotValidBGException(errmsg)
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_probs: pd.DataFrame
        motif_probs = (motif_counts / motif_counts.sum(0))
        motif_probs = norm_motif(motif_probs, motif_width, alphabet)
        motif_probs = apply_pseudocount_jaspar(motif_counts, motif_probs,
                                               pseudocount, bgs, motif_width,
                                               alphabet)

        motif: Motif = Motif(motif_probs, motif_width, alphabet, motifID,
                             motifName)
        motif.setBg(bgs)

        if verbose:
            end_rm: float = time.time()
            msg: str = ''.join(
                ["Read motif ", motifID, " in ",
                 str(end_rm - start_rm), "s"])
            print(msg)
        # end if

        return motif

    finally:
        in_mtf.close()  # close the motif file anyway
Exemple #4
0
def read_MEME_motif(motif_file: str, bg_file: str, pseudocount: float,
                    no_reverse: bool, verbose: bool) -> List[Motif]:
    """Read a motif PWM in MEME format.

    The data read are then used to build the scoring matrix for the 
    motif, the P-value matrix, etc.

    Since a MEME file can contain one or more motifs, for each PWM
    contained is built the corresponding motif object.
    The resulting set of motifs are then stored in a list.
    
    Parameters
    ----------
    motif_file : str
        path to the motif PWM
    bg_file : str
        path to the background probability distribution
    pseudocount : float
        pseudocount to add to the PWM values
    no_reverse : bool
        if False only the forward strand will be considered, otherwise
        both forward and reverse are considered
    verbose : bool
        print additional information

    Returns
    -------
    List[Motif]
        List of Motif objects storing the data contained in motif_file
    """

    try:
        with open(motif_file, 'r') as in_mtf:  # open the motif file

            # flag to keep track were the infos about the motif begin
            infostart: bool
            # flag to keep track were the motif data begin
            datastart: bool
            # number of motifs found in the MEME file
            motifs_found: int
            # list of the found motif IDs
            motifID_lst: List[str]
            # list of the found motif names
            motifName_lst: List[str]
            # list of the found motif widths
            motif_width_lst: List[int]
            # list of the found motif site counts
            site_counts_lst: List[int]
            # list of the found motif alphabet lengths
            alphalen_lst: List[int]
            # list of the found motif probability matrices
            motif_probs_lst: List[pd.DataFrame]
            # list of the found As probabilities for each motif
            a_lst: List[np.double]
            # list of the found Cs probabilities for each motif
            c_lst: List[np.double]
            # list of the found Gs probabilities for each motif
            g_lst: List[np.double]
            # list of the found Ts probabilities for each motif
            t_lst: List[np.double]

            infostart = False
            datastart = False
            motifs_found = 0

            motifID_lst = list()
            motifName_lst = list()
            motif_width_lst = list()
            site_counts_lst = list()
            alphalen_lst = list()
            motif_probs_lst = list()
            a_lst = list()
            c_lst = list()
            g_lst = list()
            t_lst = list()
            motif_width = None
            pos_read = 0

            for line in in_mtf:
                if line[0:8] == 'ALPHABET':
                    alphabet: List = sorted(list(set(line[10:-1])))
                    assert isListEqual(alphabet, DNA_ALPHABET)

                if line[0:5] == 'MOTIF':

                    if verbose:
                        start_rm: float = time.time()

                    # read motif ID and full name
                    motif_header: str = line.split()

                    assert len(motif_header) > 0

                    # there are two ways to define the motif name line
                    # in MEME file
                    # (refer to http://meme-suite.org/doc/meme-format.html?man_type=web):
                    #   1 - MOTIF motif_alternate_name
                    #   2 - MOTIF motif_identifier motif_alternate_name

                    motifID: str
                    motifName: str

                    if len(motif_header) == 2:  # support case (1)
                        motifID = motif_header[1]
                        motifName = motif_header[1]

                    else:  # support case (2)
                        motifID, motifName = motif_header[1:3]
                    # end if

                    motifID_lst.append(motifID)
                    motifName_lst.append(motifName)

                    # the informations about motif start here
                    infostart = True
                    continue
                # end if

                if infostart and len(line.strip()) != 0:
                    infos: str = line[26:]
                    infosplit: List[str] = infos.split()
                    alphalen: int = int(infosplit[1])
                    alphalen_lst.append(alphalen)

                    assert alphalen == len(alphabet)

                    motif_width: int = int(infosplit[3])
                    site_counts: int = int(infosplit[5])
                    infostart = False  # informations end here

                    # allocate space for the motif probability matrix
                    motif_probs: pd.DataFrame = pd.DataFrame(
                        index=alphabet,
                        columns=range(motif_width),
                        data=np.double(0))

                    motif_width_lst.append(motif_width)
                    site_counts_lst.append(site_counts)
                    motif_probs_lst.append(motif_probs)

                    datastart = True  # at next step begin data

                    # initialize nucleotide data
                    a = list()
                    c = list()
                    g = list()
                    t = list()
                    continue
                # end if

                if datastart and pos_read < motif_width:
                    freqs = line.split()
                    a.append(np.double(freqs[0]))
                    c.append(np.double(freqs[1]))
                    g.append(np.double(freqs[2]))
                    t.append(np.double(freqs[3]))
                    pos_read += 1
                # end if

                # we read all current motif data
                if pos_read == motif_width:
                    a_lst.append(a)
                    c_lst.append(c)
                    g_lst.append(g)
                    t_lst.append(t)

                    # update stats about found motifs
                    motifs_found += 1

                    # clear the statistics
                    pos_read: int = 0
                    motif_width = None
                    datastart = False
                    alphalen = -1
                    datastart = False

                    if verbose:
                        end_rm: float = time.time()
                        msg: str = ''.join([
                            "Read motif ", motifID, " in ",
                            str(end_rm - start_rm), "s"
                        ])
                        print(msg)
                    # end if
                # end if

    except:  # something went wrong
        errmsg: str = ' '.join(["Unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        bgs: dict
        # read the background
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            errmsg = "\n\nERROR: unable to find the given background file"
            raise NotValidBGException(errmsg)
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_lst: List[Motif]
        motif_lst = list()

        for i in range(motifs_found):
            mp: pd.DataFrame = motif_probs_lst[i]

            mp.loc['A'] = a_lst[i]
            mp.loc['C'] = c_lst[i]
            mp.loc['G'] = g_lst[i]
            mp.loc['T'] = t_lst[i]

            mw: int = motif_width_lst[i]
            sc: int = site_counts_lst[i]

            mp = norm_motif(mp, mw, alphabet)
            mp = apply_pseudocount_meme(mp, pseudocount, sc, mw, bgs, alphabet)

            motif: Motif = Motif(mp, mw, alphabet, motifID_lst[i],
                                 motifName_lst[i])
            motif.setBg(bgs)

            motif_lst.append(motif)
        # end for

        return motif_lst

    finally:
        in_mtf.close()  # close the file anyway
Exemple #5
0
def read_MEME_motif(motif_file, bg_file, pseudocount, no_reverse, verbose):
    """
        Read the motif file in MEME format and build a motif
        object from it.
        Note that a MEME file can contain a variable number of
        motifs
        ----
        Params:
            motif_file (str) : path to the motif file
            bg_file (str) : path to the background file
            pseudocount (np.double) : pseudocount to add to motif frequencies
            no_reverse (bool) : if set to True, only data related to
                                forward strand will be used
        ----
        Returns:
            motif (Motif) : returns a Motif object
    """

    try:
        with open(motif_file, 'r') as in_mtf:  # open the motif file

            infostart = False  # flag to keep track were the infos about the motif begin
            datastart = False  # flag to keep track were the motif data begin
            motifs_found = 0  # number of motifs found in the MEME file

            motifID_lst = []  # list of the found motif IDs
            motifName_lst = []  # list of the found motif names
            motif_width_lst = []  # list of the found motif widths
            site_counts_lst = []  # list of the found motif site counts
            alphalen_lst = []  # list of the found motif alphabet lengths
            motif_probs_lst = [
            ]  # list of the found motif probability matrices
            a_lst = []  # list of the found As probabilities for each motif
            c_lst = []  # list of the found Cs probabilities for each motif
            g_lst = []  # list of the found Gs probabilities for each motif
            t_lst = []  # list of the found Ts probabilities for each motif

            motif_width = None
            pos_read = 0

            for line in in_mtf:
                if line[0:8] == 'ALPHABET':
                    alphabet = sorted(list(set(line[10:-1])))
                    assert isListEqual(alphabet, DNA_ALPHABET)

                if line[0:5] == 'MOTIF':

                    if verbose:
                        start_rm = time.time()

                    motifID, motifName = line.split()[1:3]

                    motifID_lst.append(motifID)
                    motifName_lst.append(motifName)

                    # the informations about motif start here
                    infostart = True
                    continue
                # end if

                if infostart and len(line.strip()) != 0:
                    infos = line[26:]
                    infosplit = infos.split()
                    alphalen = int(infosplit[1])
                    alphalen_lst.append(alphalen)

                    assert alphalen == len(alphabet)

                    motif_width = int(infosplit[3])
                    site_counts = int(infosplit[5])
                    infostart = False  # informations end here

                    # allocate space for the motif probability matrix
                    motif_probs = pd.DataFrame(index=alphabet,
                                               columns=range(motif_width),
                                               data=np.double(0))

                    motif_width_lst.append(motif_width)
                    site_counts_lst.append(site_counts)
                    motif_probs_lst.append(motif_probs)

                    datastart = True  # at next step begin data

                    # initialize nucleotide data
                    a = []
                    c = []
                    g = []
                    t = []
                    continue
                # end if

                if datastart and pos_read < motif_width:
                    freqs = line.split()
                    a.append(np.double(freqs[0]))
                    c.append(np.double(freqs[1]))
                    g.append(np.double(freqs[2]))
                    t.append(np.double(freqs[3]))
                    pos_read += 1
                # end if

                # we read all current motif data
                if pos_read == motif_width:
                    a_lst.append(a)
                    c_lst.append(c)
                    g_lst.append(g)
                    t_lst.append(t)

                    # update stats about found motifs
                    motifs_found += 1

                    # clear the statistics
                    pos_read = 0
                    motif_width = None
                    datastart = False
                    alphalen = -1
                    datastart = False

                    if verbose:
                        end_rm = time.time()
                        msg = ''.join([
                            "Read motif ", motifID, " in ",
                            str(end_rm - start_rm), "s"
                        ])
                        print(msg)
                    # end if
                # end if

    except:  # something went wrong
        errmsg = ' '.join(["Unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        # read the background
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            raise NotValidBGException(
                "\n\nERROR: unable to find the given background file")
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_lst = []  # list of found motifs

        for i in range(motifs_found):
            mp = motif_probs_lst[i]

            mp.loc['A'] = a_lst[i]
            mp.loc['C'] = c_lst[i]
            mp.loc['G'] = g_lst[i]
            mp.loc['T'] = t_lst[i]

            mw = motif_width_lst[i]
            sc = site_counts_lst[i]

            mp = norm_motif(mp, mw, alphabet)
            mp = apply_pseudocount_meme(mp, pseudocount, sc, mw, bgs, alphabet)

            motif = Motif(mp, mw, alphabet, motifID_lst[i], motifName_lst[i])
            motif.setBg(bgs)

            motif_lst.append(motif)
        # end for

        return motif_lst

    finally:
        in_mtf.close()  # close the file anyway
Exemple #6
0
def read_JASPAR_motif(motif_file, bg_file, pseudocount, no_reverse, verbose):
    """
        Read data contained in a JASPAR motif file and build a Motif
        object from them
        ----
        Params:
            motif_file (str) : path to the motif file (in JASPAR format)
            bg_file (str) : path to the background file
            no_reverse (bool) : flag parameter to consider or not the reverse
                                complement building the Motif object
        ----
        Returns:
            motif (Motif) : Motif object summarizing data contained in
                                motif_file
    """

    # lists where store nucleotides and raw counts
    nucs = []
    counts = []

    if verbose:
        start_rm = time.time()

    try:
        # open the motif file
        with open(motif_file) as in_mtf:

            header = str(in_mtf.readline()[1:])  # read the header
            motifID, motifName = header.split('\t')[
                0:2]  # get the jaspar ID and the common TF name
            motifName = motifName[:-1]  # remove '\n'

            for line in in_mtf:
                line = line.strip()
                nuc = line.strip()[:1]  # read nucleotide
                count = list(map(
                    float,
                    line.strip()[1:].split()[1:][:-1]))  # read raw counts

                nucs.append(nuc)
                counts.append(count)
            # end for
        # end open

    except:  # something went wrong
        errmsg = ' '.join(["\n\nERROR: unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        motif_counts = pd.DataFrame(data=counts, index=nucs)  # raw counts
        motif_width = int(
            len(counts[0])
        )  # the check of equal length for all raw counts is made building the DataFrame
        alphabet = sorted(nucs)  # alphabet as list

        # read the background file
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            raise NotValidBGException(
                "\n\nERROR: unable to find the given background file")
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_probs = (motif_counts / motif_counts.sum(0))  # get probabilities
        motif_probs = norm_motif(motif_probs, motif_width, alphabet)
        motif_probs = apply_pseudocount_jaspar(motif_counts, motif_probs,
                                               pseudocount, bgs, motif_width,
                                               alphabet)

        motif = Motif(motif_probs, motif_width, alphabet, motifID, motifName)
        motif.setBg(bgs)

        if verbose:
            end_rm = time.time()
            msg = ''.join(
                ["Read motif ", motifID, " in ",
                 str(end_rm - start_rm), "s"])
            print(msg)
        # end if

        return motif

    finally:
        in_mtf.close()  # close the motif file anyway