Beispiel #1
0
def __read_alphabet_meme(motif_file: str, ifstream, debug: bool) -> List[str]:
    """Read alphabet from MEME files.
    
    ...

    Parameters
    ----------
    motif_file : str
        path to motif PWM
    ifstream : _io.TextIOWrapper
        input stream
    debug : bool
        trace the full error stack

    Returns
    -------
    list
        alphabet
    """

    for line in ifstream:
        if line.startswith("ALPHABET"): break
    else:
        errmsg = "Unexpected EOF reached, unable to parse {}.\n"
        exception_handler(EOFError, errmsg.format(motif_file), debug)
    if not line.startswith("ALPHABET"):
        errmsg = "No line stores alphabet in {}.\n"
        exception_handler(ValueError, errmsg.format(motif_file), debug)
    line = line.strip().replace("ALPHABET= ", "")
    if line == "ACGT": alphabet = sorted(list(line))
    else:
        errmsg = "The motif is not built on DNA alphabet.\n"
        exception_handler(ValueError, errmsg, debug)
    assert isListEqual(alphabet, DNA_ALPHABET)
    return alphabet
Beispiel #2
0
    def __init__(self, count_matrix, width, alphabet, motif_id, motif_name):

        if count_matrix.empty:
            errmsg = "\n\nERROR: attempt to initialize the motif object with an empty count matrix"
            raise NotValidMotifMatrixException(errmsg)

        if not isinstance(count_matrix, pd.DataFrame):
            raise NoDataFrameException(
                "\n\nERROR: the given value is not a pandas.DatFrame instance")

        if not isinstance(width, int) or width < 0:
            errmsg = "\n\nERROR: attempt to initialize motif without a valid width"
            raise WrongMotifWidthException(errmsg)

        if not isinstance(motif_id, str) or not motif_id:
            raise WrongMotifIDException(
                "\n\nERROR: cannot initialize the motif with the given ID")

        if not isinstance(motif_name, str) or not motif_name:
            raise WrongMotifNameException(
                "\n\nERROR: cannot initialize the motif with the given name")

        if not isinstance(alphabet, list) or not isListEqual(
                alphabet, DNA_ALPHABET):
            errmsg = "\n\nERROR: cannot initialize a motif object with a wrong alphabet"
            raise NotValidAlphabetException(errmsg)

        self._count_matrix = count_matrix
        self._width = width
        self._motif_id = motif_id
        self._motif_name = motif_name
        self._alphabet = alphabet
Beispiel #3
0
 def setAlphabet(self, alphabet: List[str]) -> None:
     if not isinstance(alphabet, list):
         errmsg = "\n\nERROR: Expected list, got {}.\n"
         raise TypeError(errmsg.format(type(alphabet).__name__))
     if len(alphabet) == 0:
         errmsg = "\n\nERROR: Empty motif alphabet.\n"
         raise ValueError(errmsg)
     if not isListEqual(alphabet, DNA_ALPHABET):
         errmsg = "\n\nERROR: The motif is not built on DNA alphabet.\n"
         raise ValueError(errmsg)
     self.alphabet = alphabet
Beispiel #4
0
    def setAlphabet(self, alphabet: List[str]) -> None:

        if not isinstance(alphabet, list):
            errmsg = "\n\nERROR: the given alphabet is not in a list"
            raise NotValidAlphabetException(errmsg)

        if not isListEqual(alphabet, DNA_ALPHABET):
            errmsg = "\n\nERROR: the given alphabet is not a valid DNA alphabet"
            raise NotValidAlphabetException(errmsg)

        self.alphabet = alphabet
Beispiel #5
0
    def setAlphabet(self, alphabet):

        if not isinstance(alphabet, list):
            raise NotValidAlphabetException(
                "\n\nERROR: the given alphabet is not in a list")

        if not isListEqual(alphabet, DNA_ALPHABET):
            raise NotValidAlphabetException(
                "\n\nERROR: the given alphabet is not a valid DNA alphabet")

        self.alphabet = alphabet
Beispiel #6
0
    def __init__(self, count_matrix: np.ndarray, width: int,
                 alphabet: List[str], motif_id: str, motif_name: str,
                 nucsmap: dict):

        if not isinstance(count_matrix, np.ndarray):
            errmsg = "\n\nERROR: Expected numpy.ndarray, got {}.\n"
            raise TypeError(errmsg.format(type(count_matrix).__name__))
        if count_matrix.size == 0 or sum(sum(count_matrix)) == 0:
            errmsg = "\n\nERROR: Empty motif count matrix.\n"
            raise NotValidMotifMatrixException(errmsg)
        if not isinstance(width, int):
            errmsg = "\n\nERROR: Expected int, got {}.\n"
            raise TypeError(errmsg.format(type(width).__name__))
        if width <= 0:
            errmsg = "\n\nERROR: Forbidden motif width {}.\n"
            raise ValueError(errmsg.format(width))
        if not isinstance(motif_id, str):
            errmsg = "\n\nERROR: Expected str, got {}.\n"
            raise TypeError(errmsg.format(type(motif_id).__name__))
        if not motif_id:
            errmsg = "\n\nERROR: Not valid motif ID.\n"
            raise ValueError(errmsg)
        if not isinstance(motif_name, str):
            errmsg = "\n\nERROR: Expected str, got {}.\n"
            raise TypeError(errmsg.format(type(motif_name).__name__))
        if not motif_name:
            errmsg = "\n\nERROR: Not valid motif name.\n"
            raise ValueError(errmsg)
        if not isinstance(alphabet, list):
            errmsg = "\n\nERROR: Expected list, got {}.\n"
            raise TypeError(errmsg.format(type(alphabet).__name__))
        if not isListEqual(alphabet, DNA_ALPHABET):
            errmsg = "\n\nERROR: The motif is not built on DNA alphabet.\n"
            raise ValueError(errmsg)
        if not isinstance(nucsmap, dict):
            errmsg = "\n\nERROR: Expected dict, got {}.\n"
            raise TypeError(errmsg.format(type(nucsmap).__name__))

        self._count_matrix = count_matrix
        self._width = width
        self._motif_id = motif_id
        self._motif_name = motif_name
        self._alphabet = alphabet
        self._nucsmap = nucsmap
Beispiel #7
0
def scale_pwm(motif_matrix: np.ndarray, alphabet: List[str], motif_width: int,
              nucsmap: dict,
              debug: bool) -> Tuple[np.ndarray, int, int, int, np.double]:
    """Scale the motif log-odds matrix scores to integer values.

    The values are scaled in the range [0, 1000]. The scaling improves
    computational speed while scoring potential motif occurrences, and allows
    constant time p-value estimatimation.

    ...
        
    Parameters
    ----------
    motif_matrix : numpy.ndarray
        motif log-odds matrix
    alphabet: list
        DNA motif alphabet
    motif_width: int
        motif width
    nucsmap: dict
        nucleotide index map
    debug : bool
        trace the full error stack

    Returns
    -------
    numpy.ndarray
        scaled motif score matrix
    int
        minimum value of the scaled score matrix
    int
        maximum value of the scaled score matrix
    int
        scaling factor
    numpy.double
        scaling offset
    """

    if not isinstance(motif_matrix, np.ndarray):
        errmsg = "Expected numpy.ndarray, got {}.\n"
        exception_handler(TypeError,
                          errmsg.format(type(motif_matrix).__name__), debug)
    if motif_matrix.size == 0 or sum(sum(motif_matrix)) == 0:
        errmsg = "The motif log-odds natrix is empty.\n"
        exception_handler(ValueError, errmsg, debug)
    if not isinstance(alphabet, list):
        errmsg = "Expected list, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(alphabet).__name__),
                          debug)
    if not isListEqual(alphabet, DNA_ALPHABET):
        errmsg = "The motif is not built on DNA alphabet.\n"
        exception_handler(ValueError, errmsg, debug)
    if not isinstance(motif_width, int):
        errmsg = "Expected int, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(motif_width).__name__),
                          debug)
    if motif_width <= 0:
        errmsg = "Forbidden motif width.\n"
        exception_handler(ValueError, errmsg, debug)
    if not isinstance(nucsmap, dict):
        errmsg = "Expected dict, got {}.\n"
        exception_handler(TypeError, errmsg.format(type(nucsmap).__name__),
                          debug)

    min_val = motif_matrix.min()
    max_val = motif_matrix.max()
    motif_matrixsc = np.zeros(motif_matrix.shape, dtype=np.double)

    lower: int = min_val
    upper: int = max_val
    if lower == upper:  # all values are equal
        lower = np.double(upper - 1)
    lower = np.floor(lower)
    offset = np.round(np.floor(lower))
    scale_factor = np.floor(RANGE / (upper - lower))
    # values scaled in [0, 1000]
    for nuc in alphabet:
        for j in range(motif_width):
            scaled_score = np.round(
                (motif_matrix[nucsmap[nuc], j] - (offset)) * scale_factor)
            motif_matrixsc[nucsmap[nuc], j] = scaled_score
    # make sure the values are integers
    motif_matrixsc = motif_matrixsc.astype(int)
    min_val = int(motif_matrixsc.min())  # scaled min
    max_val = int(motif_matrixsc.max())  # scaled max

    return motif_matrixsc, min_val, max_val, int(scale_factor), offset
Beispiel #8
0
def scale_pwm(motif_matrix: pd.DataFrame, alphabet: List[str],
              motif_width: int) -> Tuple[np.ndarray, int, int, int, np.double]:
    """Scale the log-odds values of the motif scoring matrix.

    The values are scaled in the range [0, 1000]. The scaling improves
    computational speed while computing the score for each motif 
    occurrence candidate, and allows a constant time computation of 
    the corresponding P-value. 
        
    Parameters
    ----------
    motif_matrix : pd.DataFrame
        motif log-odds matrix
    alphabet: list
        DNA motif alphabet
    motif_width: int
        motif width

    Returns
    -------
    numpy.ndarray
        scaled motif scoring matrix
    int
        minimum value of the scaled scoring matrix
    int
        maximum value of the scaled scoring matrix
    int
        scaling factor
    numpy.double
        scaling offset
    """

    errmsg: str
    if not isinstance(motif_matrix, pd.DataFrame):
        errmsg = "\n\nERROR: The given motif matrix must be an instance of pandas.DataFrame"
        raise NoDataFrameException(errmsg)

    if motif_matrix.empty:
        errmsg = "\n\nERROR: The given motif matrix is empty"
        raise NotValidMotifMatrixException(errmsg)

    if not isinstance(alphabet, list):
        errmsg = "\n\nERROR: The alphabet given is not in a list"
        raise NotValidAlphabetException(errmsg)

    if not isListEqual(alphabet, DNA_ALPHABET):
        errmsg = "\n\nERROR: The alphabet given is not a valid DNA alphabet"
        raise NotValidAlphabetException(errmsg)

    assert motif_width > 0

    min_val: int
    max_val: int
    motif_matrix_sc: pd.DataFrame

    min_val = min(motif_matrix.min())
    max_val = max(motif_matrix.max())
    motif_matrix_sc = pd.DataFrame(index=list(motif_matrix.index),
                                   columns=list(motif_matrix.columns),
                                   data=0)

    lower: int = min_val
    upper: int = max_val

    if lower == upper:  # all values are equal
        lower = np.double(upper - 1)

    offset: np.double
    scale_factor: int

    lower = np.floor(lower)
    offset = np.round(np.floor(lower))
    scale_factor = np.floor(RANGE / (upper - lower))

    # values will be in [0, 1000]
    for nuc in alphabet:
        for j in range(motif_width):
            scaled_score = np.round(
                (motif_matrix.loc[nuc, j] - (offset)) * scale_factor)
            motif_matrix_sc.loc[nuc, j] = scaled_score
        # end for
    # end for

    # make sure the values are integers
    motif_matrix_sc[:] = motif_matrix_sc[:].astype(int)

    # now they are scaled
    min_val = min(motif_matrix_sc.min())
    max_val = max(motif_matrix_sc.max())

    return motif_matrix_sc, min_val, max_val, int(scale_factor), offset
Beispiel #9
0
def read_MEME_motif(motif_file: str, bg_file: str, pseudocount: float,
                    no_reverse: bool, verbose: bool) -> List[Motif]:
    """Read a motif PWM in MEME format.

    The data read are then used to build the scoring matrix for the 
    motif, the P-value matrix, etc.

    Since a MEME file can contain one or more motifs, for each PWM
    contained is built the corresponding motif object.
    The resulting set of motifs are then stored in a list.
    
    Parameters
    ----------
    motif_file : str
        path to the motif PWM
    bg_file : str
        path to the background probability distribution
    pseudocount : float
        pseudocount to add to the PWM values
    no_reverse : bool
        if False only the forward strand will be considered, otherwise
        both forward and reverse are considered
    verbose : bool
        print additional information

    Returns
    -------
    List[Motif]
        List of Motif objects storing the data contained in motif_file
    """

    try:
        with open(motif_file, 'r') as in_mtf:  # open the motif file

            # flag to keep track were the infos about the motif begin
            infostart: bool
            # flag to keep track were the motif data begin
            datastart: bool
            # number of motifs found in the MEME file
            motifs_found: int
            # list of the found motif IDs
            motifID_lst: List[str]
            # list of the found motif names
            motifName_lst: List[str]
            # list of the found motif widths
            motif_width_lst: List[int]
            # list of the found motif site counts
            site_counts_lst: List[int]
            # list of the found motif alphabet lengths
            alphalen_lst: List[int]
            # list of the found motif probability matrices
            motif_probs_lst: List[pd.DataFrame]
            # list of the found As probabilities for each motif
            a_lst: List[np.double]
            # list of the found Cs probabilities for each motif
            c_lst: List[np.double]
            # list of the found Gs probabilities for each motif
            g_lst: List[np.double]
            # list of the found Ts probabilities for each motif
            t_lst: List[np.double]

            infostart = False
            datastart = False
            motifs_found = 0

            motifID_lst = list()
            motifName_lst = list()
            motif_width_lst = list()
            site_counts_lst = list()
            alphalen_lst = list()
            motif_probs_lst = list()
            a_lst = list()
            c_lst = list()
            g_lst = list()
            t_lst = list()
            motif_width = None
            pos_read = 0

            for line in in_mtf:
                if line[0:8] == 'ALPHABET':
                    alphabet: List = sorted(list(set(line[10:-1])))
                    assert isListEqual(alphabet, DNA_ALPHABET)

                if line[0:5] == 'MOTIF':

                    if verbose:
                        start_rm: float = time.time()

                    # read motif ID and full name
                    motif_header: str = line.split()

                    assert len(motif_header) > 0

                    # there are two ways to define the motif name line
                    # in MEME file
                    # (refer to http://meme-suite.org/doc/meme-format.html?man_type=web):
                    #   1 - MOTIF motif_alternate_name
                    #   2 - MOTIF motif_identifier motif_alternate_name

                    motifID: str
                    motifName: str

                    if len(motif_header) == 2:  # support case (1)
                        motifID = motif_header[1]
                        motifName = motif_header[1]

                    else:  # support case (2)
                        motifID, motifName = motif_header[1:3]
                    # end if

                    motifID_lst.append(motifID)
                    motifName_lst.append(motifName)

                    # the informations about motif start here
                    infostart = True
                    continue
                # end if

                if infostart and len(line.strip()) != 0:
                    infos: str = line[26:]
                    infosplit: List[str] = infos.split()
                    alphalen: int = int(infosplit[1])
                    alphalen_lst.append(alphalen)

                    assert alphalen == len(alphabet)

                    motif_width: int = int(infosplit[3])
                    site_counts: int = int(infosplit[5])
                    infostart = False  # informations end here

                    # allocate space for the motif probability matrix
                    motif_probs: pd.DataFrame = pd.DataFrame(
                        index=alphabet,
                        columns=range(motif_width),
                        data=np.double(0))

                    motif_width_lst.append(motif_width)
                    site_counts_lst.append(site_counts)
                    motif_probs_lst.append(motif_probs)

                    datastart = True  # at next step begin data

                    # initialize nucleotide data
                    a = list()
                    c = list()
                    g = list()
                    t = list()
                    continue
                # end if

                if datastart and pos_read < motif_width:
                    freqs = line.split()
                    a.append(np.double(freqs[0]))
                    c.append(np.double(freqs[1]))
                    g.append(np.double(freqs[2]))
                    t.append(np.double(freqs[3]))
                    pos_read += 1
                # end if

                # we read all current motif data
                if pos_read == motif_width:
                    a_lst.append(a)
                    c_lst.append(c)
                    g_lst.append(g)
                    t_lst.append(t)

                    # update stats about found motifs
                    motifs_found += 1

                    # clear the statistics
                    pos_read: int = 0
                    motif_width = None
                    datastart = False
                    alphalen = -1
                    datastart = False

                    if verbose:
                        end_rm: float = time.time()
                        msg: str = ''.join([
                            "Read motif ", motifID, " in ",
                            str(end_rm - start_rm), "s"
                        ])
                        print(msg)
                    # end if
                # end if

    except:  # something went wrong
        errmsg: str = ' '.join(["Unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        bgs: dict
        # read the background
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            errmsg = "\n\nERROR: unable to find the given background file"
            raise NotValidBGException(errmsg)
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_lst: List[Motif]
        motif_lst = list()

        for i in range(motifs_found):
            mp: pd.DataFrame = motif_probs_lst[i]

            mp.loc['A'] = a_lst[i]
            mp.loc['C'] = c_lst[i]
            mp.loc['G'] = g_lst[i]
            mp.loc['T'] = t_lst[i]

            mw: int = motif_width_lst[i]
            sc: int = site_counts_lst[i]

            mp = norm_motif(mp, mw, alphabet)
            mp = apply_pseudocount_meme(mp, pseudocount, sc, mw, bgs, alphabet)

            motif: Motif = Motif(mp, mw, alphabet, motifID_lst[i],
                                 motifName_lst[i])
            motif.setBg(bgs)

            motif_lst.append(motif)
        # end for

        return motif_lst

    finally:
        in_mtf.close()  # close the file anyway
Beispiel #10
0
def scale_pwm(motif_matrix, alphabet, motif_width):
    """
        Scale the motif matrix values
        ----
        Parameters:
            motif_matrix (str) : count matrix
            alphabet (str) : motif alphabet
            motif_width (int) : motif width
        ----
        Returns:
            motif_matrix_sc (np.ndarray) : scaled motif matrix
            min_val (int) : lowest value in the scaled motif matrix
            max_val (int) : higest value in the scaled motif matrix
            scale_factor (int)
            offset (int)
    """

    if not isinstance(motif_matrix, pd.DataFrame):
        raise NoDataFrameException(
            "The given motif matrix must be an instance of pandas.DataFrame")
        die(1)

    if motif_matrix.empty:
        raise NotValidMotifMatrixException("The given motif matrix is empty")
        die(1)

    if not isinstance(alphabet, list):
        raise NotValidAlphabetException("The alphabet given is not in a list")
        die(1)

    if not isListEqual(alphabet, DNA_ALPHABET):
        raise NotValidAlphabetException(
            "The alphabet given is not a valid DNA alphabet")
        die(1)

    assert motif_width > 0

    min_val = min(motif_matrix.min())
    max_val = max(motif_matrix.max())
    motif_matrix_sc = pd.DataFrame(index=list(motif_matrix.index),
                                   columns=list(motif_matrix.columns),
                                   data=0)

    lower = min_val
    upper = max_val

    if lower == upper:  # all values are equal
        lower = np.double(upper - 1)

    lower = np.floor(lower)
    offset = np.round(np.floor(lower))
    scale_factor = np.floor(RANGE / (upper - lower))

    # values will be in [0, 1000]
    for nuc in alphabet:
        for j in range(motif_width):
            scaled_score = np.round(
                (motif_matrix.loc[nuc, j] - (offset)) * scale_factor)
            motif_matrix_sc.loc[nuc, j] = scaled_score
        # end for
    # end for

    # make sure the values are integers
    motif_matrix_sc[:] = motif_matrix_sc[:].astype(int)

    # now they are scaled
    min_val = min(motif_matrix_sc.min())
    max_val = max(motif_matrix_sc.max())

    return motif_matrix_sc, min_val, max_val, int(scale_factor), offset
Beispiel #11
0
def read_MEME_motif(motif_file, bg_file, pseudocount, no_reverse, verbose):
    """
        Read the motif file in MEME format and build a motif
        object from it.
        Note that a MEME file can contain a variable number of
        motifs
        ----
        Params:
            motif_file (str) : path to the motif file
            bg_file (str) : path to the background file
            pseudocount (np.double) : pseudocount to add to motif frequencies
            no_reverse (bool) : if set to True, only data related to
                                forward strand will be used
        ----
        Returns:
            motif (Motif) : returns a Motif object
    """

    try:
        with open(motif_file, 'r') as in_mtf:  # open the motif file

            infostart = False  # flag to keep track were the infos about the motif begin
            datastart = False  # flag to keep track were the motif data begin
            motifs_found = 0  # number of motifs found in the MEME file

            motifID_lst = []  # list of the found motif IDs
            motifName_lst = []  # list of the found motif names
            motif_width_lst = []  # list of the found motif widths
            site_counts_lst = []  # list of the found motif site counts
            alphalen_lst = []  # list of the found motif alphabet lengths
            motif_probs_lst = [
            ]  # list of the found motif probability matrices
            a_lst = []  # list of the found As probabilities for each motif
            c_lst = []  # list of the found Cs probabilities for each motif
            g_lst = []  # list of the found Gs probabilities for each motif
            t_lst = []  # list of the found Ts probabilities for each motif

            motif_width = None
            pos_read = 0

            for line in in_mtf:
                if line[0:8] == 'ALPHABET':
                    alphabet = sorted(list(set(line[10:-1])))
                    assert isListEqual(alphabet, DNA_ALPHABET)

                if line[0:5] == 'MOTIF':

                    if verbose:
                        start_rm = time.time()

                    motifID, motifName = line.split()[1:3]

                    motifID_lst.append(motifID)
                    motifName_lst.append(motifName)

                    # the informations about motif start here
                    infostart = True
                    continue
                # end if

                if infostart and len(line.strip()) != 0:
                    infos = line[26:]
                    infosplit = infos.split()
                    alphalen = int(infosplit[1])
                    alphalen_lst.append(alphalen)

                    assert alphalen == len(alphabet)

                    motif_width = int(infosplit[3])
                    site_counts = int(infosplit[5])
                    infostart = False  # informations end here

                    # allocate space for the motif probability matrix
                    motif_probs = pd.DataFrame(index=alphabet,
                                               columns=range(motif_width),
                                               data=np.double(0))

                    motif_width_lst.append(motif_width)
                    site_counts_lst.append(site_counts)
                    motif_probs_lst.append(motif_probs)

                    datastart = True  # at next step begin data

                    # initialize nucleotide data
                    a = []
                    c = []
                    g = []
                    t = []
                    continue
                # end if

                if datastart and pos_read < motif_width:
                    freqs = line.split()
                    a.append(np.double(freqs[0]))
                    c.append(np.double(freqs[1]))
                    g.append(np.double(freqs[2]))
                    t.append(np.double(freqs[3]))
                    pos_read += 1
                # end if

                # we read all current motif data
                if pos_read == motif_width:
                    a_lst.append(a)
                    c_lst.append(c)
                    g_lst.append(g)
                    t_lst.append(t)

                    # update stats about found motifs
                    motifs_found += 1

                    # clear the statistics
                    pos_read = 0
                    motif_width = None
                    datastart = False
                    alphalen = -1
                    datastart = False

                    if verbose:
                        end_rm = time.time()
                        msg = ''.join([
                            "Read motif ", motifID, " in ",
                            str(end_rm - start_rm), "s"
                        ])
                        print(msg)
                    # end if
                # end if

    except:  # something went wrong
        errmsg = ' '.join(["Unable to read file", motif_file])
        raise FileReadingException(errmsg)

    else:

        # read the background
        if bg_file == 'UNIF':
            bgs = get_uniformBG(alphabet)
        elif os.path.exists(bg_file):
            bgs = readBGfile(bg_file)
        else:
            raise NotValidBGException(
                "\n\nERROR: unable to find the given background file")
        # end if

        bgs = pseudo_bg(bgs, no_reverse)

        motif_lst = []  # list of found motifs

        for i in range(motifs_found):
            mp = motif_probs_lst[i]

            mp.loc['A'] = a_lst[i]
            mp.loc['C'] = c_lst[i]
            mp.loc['G'] = g_lst[i]
            mp.loc['T'] = t_lst[i]

            mw = motif_width_lst[i]
            sc = site_counts_lst[i]

            mp = norm_motif(mp, mw, alphabet)
            mp = apply_pseudocount_meme(mp, pseudocount, sc, mw, bgs, alphabet)

            motif = Motif(mp, mw, alphabet, motifID_lst[i], motifName_lst[i])
            motif.setBg(bgs)

            motif_lst.append(motif)
        # end for

        return motif_lst

    finally:
        in_mtf.close()  # close the file anyway