예제 #1
0
def prob(sigma, sigma0, theta=None, phi=None):
    """ Probability mass function of a MM with central ranking sigma0 and
    dispersion parameter theta/phi.
    Parameters
    ----------
    sigma: ndarray
        A pemutation
    sigma0: ndarray
        central permutation
    theta: float
        Dispersion parameter (optional, if phi is given)
    phi: float
        Dispersion parameter (optional, if theta is given)
    Returns
    -------
    float
        Probability mass function.
    """
    theta, phi = mm.check_theta_phi(theta, phi)
    d = distance(sigma, sigma0)
    n = len(sigma)
    facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float)

    for i in range(2, n + 1):
        facts_[i] = facts_[i - 1] * i
    sum = 0
    for i in range(n + 1):
        sum += (((np.exp(theta) - 1)**i) / facts_[i])
    psi = sum * np.exp(-n * theta) * facts_[n]
    return np.exp(-d * theta) / psi
예제 #2
0
def variance_v(n, theta=None, phi=None, k=None):
    """This function computes the variance of the decomposition vector under GMM.
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        theta: float, optional (if phi is given)
            Real dispersion parameter
        phi  : float, optional (if theta is given)
            Real dispersion parameter
        k: int, optional
            Length of partial permutations (only top items)
        Returns
        -------
        ndarray
            The variance of the decomposition vector.
    """
    theta, phi = mm.check_theta_phi(theta, phi)
    if k is None:
        k = n - 1
    if type(phi) != list:
        phi = np.full(k, phi)
    rnge = np.array(range(k))
    var_v = phi[rnge] / (1 - phi[rnge])**2 - (n - rnge)**2 * phi[rnge]**(
        n - rnge) / (1 - phi[rnge]**(n - rnge))**2
    return var_v
예제 #3
0
def expected_dist_mm(n, theta=None, phi=None):
    """The function computes the expected value of Hamming distance under Mallows Models (MMs).
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        theta: float
            Real dispersion parameter, optional (if phi is given)
        phi: float
            Real dispersion parameter, optional (if theta is given)
        Returns
        -------
        float
            The expected distance under MMs.
    """
    theta, phi = mm.check_theta_phi(theta, phi)

    facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float)
    for i in range(2, n + 1):
        facts_[i] = facts_[i - 1] * i
    x_n_1, x_n = 0, 0

    for k in range(n + 1):
        aux = (np.exp(theta) - 1)**k / facts_[k]
        x_n += aux
        if k < n: x_n_1 += aux
    return (n * x_n - x_n_1 * np.exp(theta)) / x_n
예제 #4
0
def prob(sigma, sigma0, theta=None, phi=None):
    """Probability mass function of a MM with central ranking sigma0 and
    dispersion parameter theta/phi.
    Parameters
    ----------
    sigma: ndarray
        A pemutation
    sigma0: ndarray
        central permutation
    theta: float
        Dispersion parameter (optional, if phi is given)
    phi: float
        Dispersion parameter (optional, if theta is given)
    Returns
    -------
    float
        Probability mass function.
    """
    n = len(sigma)
    theta, phi = mm.check_theta_phi(theta, phi)
    sigma0_inv = pu.inverse(sigma0)
    rnge = np.array(range(n - 1))

    psi = (1 - np.exp((-n + rnge) * (theta))) / (1 - np.exp(-theta))
    psi = np.prod(psi)

    dist = distance(pu.compose(sigma, sigma0_inv))

    return np.exp(-theta * dist) / psi
예제 #5
0
def expected_v(n, theta=None, phi=None, k=None):  #txapu integrar
    """This function computes the expected decomposition vector.
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        theta: float, optional (if phi is given)
            Real dispersion parameter
        phi  : float, optional (if theta is given)
            Real dispersion parameter
        k: int, optional
            Length of partial permutations (only top items)
        Returns
        -------
        ndarray
            The expected decomposition vector.
    """
    theta, phi = mm.check_theta_phi(theta, phi)
    if k is None: k = n - 1
    if type(theta) != list: theta = np.full(k, theta)
    rnge = np.array(range(k))
    expected_v = np.exp(-theta[rnge]) / (1 - np.exp(-theta[rnge])) - (
        n - rnge) * np.exp(-(n - rnge) * theta[rnge]) / (
            1 - np.exp(-(n - rnge) * theta[rnge]))
    return expected_v
예제 #6
0
def sample(m, n, *, k=None, theta=None, phi=None, s0=None):
    """This function generates m (rankings) according to Mallows Models (if the given parameters
    are m, n, k/None, theta/phi: float, s0/None) or Generalized Mallows Models (if the given
    parameters are m, n, theta/phi: ndarray, s0/None). Moreover, the parameter k allows the
    function to generate top-k rankings only.
        Parameters
        ----------
        m: int
            Number of rankings to generate
        n: int
            Length of rankings
        theta: float or ndarray, optional (if phi given)
            The dispersion parameter theta
        phi: float or ndarray, optional (if theta given)
            Dispersion parameter phi
        k: int
            Length of partial permutations (only top items)
        s0: ndarray
            Consensus ranking
        Returns
        -------
        ndarray
            The rankings generated
    """

    theta, phi = mm.check_theta_phi(theta, phi)

    theta = np.full(n - 1, theta)

    if s0 is None:
        s0 = np.array(range(n))

    rnge = np.array(range(n - 1))

    psi = (1 - np.exp(
        (-n + rnge) * (theta[rnge]))) / (1 - np.exp(-theta[rnge]))
    vprobs = np.zeros((n, n))
    for j in range(n - 1):
        vprobs[j][0] = 1.0 / psi[j]
        for r in range(1, n - j):
            vprobs[j][r] = np.exp(-theta[j] * r) / psi[j]
    sample = []
    vs = []
    for samp in range(m):
        v = [np.random.choice(n, p=vprobs[i, :]) for i in range(n - 1)]
        v += [0]
        ranking = v_to_ranking(v, n)
        sample.append(ranking)

    sample = np.array([s[s0] for s in sample])

    if k is not None:
        sample_rankings = np.array(
            [pu.inverse(ordering) for ordering in sample])
        sample_rankings = np.array([ran[s0] for ran in sample_rankings])
        sample = np.array([[i if i in range(k) else np.nan for i in ranking]
                           for ranking in sample_rankings])
    return sample
예제 #7
0
def sample(m, n, *, theta=None, phi=None, s0=None):
    """This function generates m permutations (rankings) according to Mallows Models.
        Parameters
        ----------
        m: int
            Number of rankings to generate
        n: int
            Length of rankings
        theta: float, optional (if phi given)
            Dispersion parameter theta
        phi: float, optional (if theta given)
            Dispersion parameter phi
        s0: ndarray
            Consensus ranking
        Returns
        -------
        ndarray
            The rankings generated.
    """
    sample = np.zeros((m, n))
    theta, phi = mm.check_theta_phi(theta, phi)

    facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float)
    deran_num_ = np.array([1, 0] + [0] * (n - 1), dtype=np.float)
    for i in range(2, n + 1):
        facts_[i] = facts_[i - 1] * i
        deran_num_[i] = deran_num_[i - 1] * (i - 1) + deran_num_[i - 2] * (i -
                                                                           1)
    hamm_count_ = np.array([
        deran_num_[d] * facts_[n] / (facts_[d] * facts_[n - d])
        for d in range(n + 1)
    ],
                           dtype=np.float)
    probsd = np.array(
        [hamm_count_[d] * np.exp(-theta * d) for d in range(n + 1)],
        dtype=np.float)

    for m_ in range(m):
        target_distance = np.random.choice(n + 1, p=probsd / probsd.sum())
        sample[m_, :] = sample_at_dist(n, target_distance, s0)

    return sample
예제 #8
0
def psi_mm(n, theta=None, phi=None):
    """This function computes the normalization constant psi.
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        theta: float
            Real dispersion parameter (optional if phi is given)
        phi: float
            Real dispersion parameter (optional if theta is given)
        Returns
        -------
        float
            The normalization constant psi.
    """
    rnge = np.array(range(2, n + 1))
    if theta is not None:
        return np.prod((1 - np.exp(-theta * rnge)) / (1 - np.exp(-theta)))
    if phi is not None:
        return np.prod((1 - np.power(phi, rnge)) / (1 - phi))
    theta, phi = mm.check_theta_phi(theta, phi)
예제 #9
0
def expected_dist_mm(n, theta=None, phi=None):
    """The function computes the expected distance of Kendall's-tau distance under Mallows models (MMs).
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        theta: float
            Real dispersion parameter, optional (if phi is given)
        phi: float
            Real dispersion parameter, optional (if theta is given)
        Returns
        -------
        float
            The expected distance under MMs.
    """
    theta, phi = mm.check_theta_phi(theta, phi)
    rnge = np.array(range(1, n + 1))
    expected_dist = n * np.exp(-theta) / (1 - np.exp(-theta)) - np.sum(
        rnge * np.exp(-rnge * theta) / (1 - np.exp(-rnge * theta)))

    return expected_dist
예제 #10
0
def variance_dist_mm(n, theta=None, phi=None):
    """ This function returns the variance of Kendall's-tau distance under the MMs.
        Parameters
        ----------
        n: int
            Length of the permutations
        theta: float
            Dispersion parameter, optional (if phi is given)
        phi  : float
            Dispersion parameter, optional (if theta is given)
        Returns
        -------
        float
            The variance of Kendall's-tau distance under the MMs.

    """
    theta, phi = mm.check_theta_phi(theta, phi)
    rnge = np.array(range(1, n + 1))
    variance = (phi * n) / (1 - phi)**2 - np.sum(
        (pow(phi, rnge) * rnge**2) / (1 - pow(phi, rnge))**2)

    return variance
예제 #11
0
def variance_dist_top_k(n, k, theta=None, phi=None):
    """Compute the variance of the distance for top-k rankings, following
        a MM under the Kendall's-tau distance.
        Parameters
        ----------
        n: int
            Length of the permutation in the considered model
        k: int
            Length of partial permutations (only top items)
        theta: float, optional (if phi is given)
            Real dispersion parameter
        phi  : float, optional (if theta is given)
            Real dispersion parameter
        Returns
        -------
        float
            The variance under the MMs.
    """
    theta, phi = mm.check_theta_phi(theta, phi)
    rnge = np.array(range(n - k + 1, n + 1))
    variance = (phi * k) / (1 - phi)**2 - np.sum(
        (pow(phi, rnge) * rnge**2) / (1 - pow(phi, rnge))**2)
    return variance