Example #1
0
    def __init__(self, count_matrix, width, alphabet, motif_id, motif_name):

        if count_matrix.empty:
            errmsg = "\n\nERROR: attempt to initialize the motif object with an empty count matrix"
            raise NotValidMotifMatrixException(errmsg)

        if not isinstance(count_matrix, pd.DataFrame):
            raise NoDataFrameException(
                "\n\nERROR: the given value is not a pandas.DatFrame instance")

        if not isinstance(width, int) or width < 0:
            errmsg = "\n\nERROR: attempt to initialize motif without a valid width"
            raise WrongMotifWidthException(errmsg)

        if not isinstance(motif_id, str) or not motif_id:
            raise WrongMotifIDException(
                "\n\nERROR: cannot initialize the motif with the given ID")

        if not isinstance(motif_name, str) or not motif_name:
            raise WrongMotifNameException(
                "\n\nERROR: cannot initialize the motif with the given name")

        if not isinstance(alphabet, list) or not isListEqual(
                alphabet, DNA_ALPHABET):
            errmsg = "\n\nERROR: cannot initialize a motif object with a wrong alphabet"
            raise NotValidAlphabetException(errmsg)

        self._count_matrix = count_matrix
        self._width = width
        self._motif_id = motif_id
        self._motif_name = motif_name
        self._alphabet = alphabet
Example #2
0
    def setMotif_pval_matrix(self, pval_mat: np.array) -> None:

        # empty or not valid p-value matrix
        if len(pval_mat) == 0 or sum(pval_mat[:]) <= 0:
            errmsg = "\n\nERROR: invalid p-value matrix"
            raise NotValidMotifMatrixException(errmsg)

        self._pval_matrix = pval_mat
Example #3
0
    def setMotif_pval_matrix(self, pval_mat):

        if len(pval_mat) == 0 or sum(
                pval_mat[:]) <= 0:  # empty or not valid p-value matrix
            raise NotValidMotifMatrixException(
                "\n\nERROR: invalid p-value matrix")

        self._pval_matrix = pval_mat
Example #4
0
    def setMotif_scoreMatrix(self, score_matrix):

        if (not isinstance(score_matrix, np.ndarray)
                and not isinstance(score_matrix, pd.DataFrame)):
            errmsg = "\n\nERROR: the given data-structure is not an instance of numpy.ndarray or pandas.DataFrame"
            raise ValueError(errmsg)

        if isinstance(score_matrix, pd.DataFrame):
            if score_matrix.empty:
                errmsg = "\n\nERROR: attempt to use an empty score matrix"
                raise NotValidMotifMatrixException(errmsg)

        if isinstance(score_matrix, np.ndarray):
            if score_matrix.size == 0:
                errmsg = "\n\nERROR: attempt to use an empty score matrix"
                raise NotValidMotifMatrixException(errmsg)

        self._score_matrix = score_matrix
Example #5
0
    def setMotif_matrix(self, motif_matrix):

        if motif_matrix.empty:
            errmsg = "\n\nERROR: attempt to use an empty motif matrix"
            raise NotValidMotifMatrixException(errmsg)

        if not isinstance(motif_matrix, pd.DataFrame):
            raise NoDataFrameException(
                "\n\nERROR: the given value is not a pandas.DataFrame instance"
            )

        self._count_matrix = motif_matrix
Example #6
0
    def __init__(self, count_matrix: np.ndarray, width: int,
                 alphabet: List[str], motif_id: str, motif_name: str,
                 nucsmap: dict):

        if not isinstance(count_matrix, np.ndarray):
            errmsg = "\n\nERROR: Expected numpy.ndarray, got {}.\n"
            raise TypeError(errmsg.format(type(count_matrix).__name__))
        if count_matrix.size == 0 or sum(sum(count_matrix)) == 0:
            errmsg = "\n\nERROR: Empty motif count matrix.\n"
            raise NotValidMotifMatrixException(errmsg)
        if not isinstance(width, int):
            errmsg = "\n\nERROR: Expected int, got {}.\n"
            raise TypeError(errmsg.format(type(width).__name__))
        if width <= 0:
            errmsg = "\n\nERROR: Forbidden motif width {}.\n"
            raise ValueError(errmsg.format(width))
        if not isinstance(motif_id, str):
            errmsg = "\n\nERROR: Expected str, got {}.\n"
            raise TypeError(errmsg.format(type(motif_id).__name__))
        if not motif_id:
            errmsg = "\n\nERROR: Not valid motif ID.\n"
            raise ValueError(errmsg)
        if not isinstance(motif_name, str):
            errmsg = "\n\nERROR: Expected str, got {}.\n"
            raise TypeError(errmsg.format(type(motif_name).__name__))
        if not motif_name:
            errmsg = "\n\nERROR: Not valid motif name.\n"
            raise ValueError(errmsg)
        if not isinstance(alphabet, list):
            errmsg = "\n\nERROR: Expected list, got {}.\n"
            raise TypeError(errmsg.format(type(alphabet).__name__))
        if not isListEqual(alphabet, DNA_ALPHABET):
            errmsg = "\n\nERROR: The motif is not built on DNA alphabet.\n"
            raise ValueError(errmsg)
        if not isinstance(nucsmap, dict):
            errmsg = "\n\nERROR: Expected dict, got {}.\n"
            raise TypeError(errmsg.format(type(nucsmap).__name__))

        self._count_matrix = count_matrix
        self._width = width
        self._motif_id = motif_id
        self._motif_name = motif_name
        self._alphabet = alphabet
        self._nucsmap = nucsmap
Example #7
0
def scale_pwm(motif_matrix: pd.DataFrame, alphabet: List[str],
              motif_width: int) -> Tuple[np.ndarray, int, int, int, np.double]:
    """Scale the log-odds values of the motif scoring matrix.

    The values are scaled in the range [0, 1000]. The scaling improves
    computational speed while computing the score for each motif 
    occurrence candidate, and allows a constant time computation of 
    the corresponding P-value. 
        
    Parameters
    ----------
    motif_matrix : pd.DataFrame
        motif log-odds matrix
    alphabet: list
        DNA motif alphabet
    motif_width: int
        motif width

    Returns
    -------
    numpy.ndarray
        scaled motif scoring matrix
    int
        minimum value of the scaled scoring matrix
    int
        maximum value of the scaled scoring matrix
    int
        scaling factor
    numpy.double
        scaling offset
    """

    errmsg: str
    if not isinstance(motif_matrix, pd.DataFrame):
        errmsg = "\n\nERROR: The given motif matrix must be an instance of pandas.DataFrame"
        raise NoDataFrameException(errmsg)

    if motif_matrix.empty:
        errmsg = "\n\nERROR: The given motif matrix is empty"
        raise NotValidMotifMatrixException(errmsg)

    if not isinstance(alphabet, list):
        errmsg = "\n\nERROR: The alphabet given is not in a list"
        raise NotValidAlphabetException(errmsg)

    if not isListEqual(alphabet, DNA_ALPHABET):
        errmsg = "\n\nERROR: The alphabet given is not a valid DNA alphabet"
        raise NotValidAlphabetException(errmsg)

    assert motif_width > 0

    min_val: int
    max_val: int
    motif_matrix_sc: pd.DataFrame

    min_val = min(motif_matrix.min())
    max_val = max(motif_matrix.max())
    motif_matrix_sc = pd.DataFrame(index=list(motif_matrix.index),
                                   columns=list(motif_matrix.columns),
                                   data=0)

    lower: int = min_val
    upper: int = max_val

    if lower == upper:  # all values are equal
        lower = np.double(upper - 1)

    offset: np.double
    scale_factor: int

    lower = np.floor(lower)
    offset = np.round(np.floor(lower))
    scale_factor = np.floor(RANGE / (upper - lower))

    # values will be in [0, 1000]
    for nuc in alphabet:
        for j in range(motif_width):
            scaled_score = np.round(
                (motif_matrix.loc[nuc, j] - (offset)) * scale_factor)
            motif_matrix_sc.loc[nuc, j] = scaled_score
        # end for
    # end for

    # make sure the values are integers
    motif_matrix_sc[:] = motif_matrix_sc[:].astype(int)

    # now they are scaled
    min_val = min(motif_matrix_sc.min())
    max_val = max(motif_matrix_sc.max())

    return motif_matrix_sc, min_val, max_val, int(scale_factor), offset
Example #8
0
def scale_pwm(motif_matrix, alphabet, motif_width):
    """
        Scale the motif matrix values
        ----
        Parameters:
            motif_matrix (str) : count matrix
            alphabet (str) : motif alphabet
            motif_width (int) : motif width
        ----
        Returns:
            motif_matrix_sc (np.ndarray) : scaled motif matrix
            min_val (int) : lowest value in the scaled motif matrix
            max_val (int) : higest value in the scaled motif matrix
            scale_factor (int)
            offset (int)
    """

    if not isinstance(motif_matrix, pd.DataFrame):
        raise NoDataFrameException(
            "The given motif matrix must be an instance of pandas.DataFrame")
        die(1)

    if motif_matrix.empty:
        raise NotValidMotifMatrixException("The given motif matrix is empty")
        die(1)

    if not isinstance(alphabet, list):
        raise NotValidAlphabetException("The alphabet given is not in a list")
        die(1)

    if not isListEqual(alphabet, DNA_ALPHABET):
        raise NotValidAlphabetException(
            "The alphabet given is not a valid DNA alphabet")
        die(1)

    assert motif_width > 0

    min_val = min(motif_matrix.min())
    max_val = max(motif_matrix.max())
    motif_matrix_sc = pd.DataFrame(index=list(motif_matrix.index),
                                   columns=list(motif_matrix.columns),
                                   data=0)

    lower = min_val
    upper = max_val

    if lower == upper:  # all values are equal
        lower = np.double(upper - 1)

    lower = np.floor(lower)
    offset = np.round(np.floor(lower))
    scale_factor = np.floor(RANGE / (upper - lower))

    # values will be in [0, 1000]
    for nuc in alphabet:
        for j in range(motif_width):
            scaled_score = np.round(
                (motif_matrix.loc[nuc, j] - (offset)) * scale_factor)
            motif_matrix_sc.loc[nuc, j] = scaled_score
        # end for
    # end for

    # make sure the values are integers
    motif_matrix_sc[:] = motif_matrix_sc[:].astype(int)

    # now they are scaled
    min_val = min(motif_matrix_sc.min())
    max_val = max(motif_matrix_sc.max())

    return motif_matrix_sc, min_val, max_val, int(scale_factor), offset