def prob(sigma, sigma0, theta=None, phi=None): """ Probability mass function of a MM with central ranking sigma0 and dispersion parameter theta/phi. Parameters ---------- sigma: ndarray A pemutation sigma0: ndarray central permutation theta: float Dispersion parameter (optional, if phi is given) phi: float Dispersion parameter (optional, if theta is given) Returns ------- float Probability mass function. """ theta, phi = mm.check_theta_phi(theta, phi) d = distance(sigma, sigma0) n = len(sigma) facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float) for i in range(2, n + 1): facts_[i] = facts_[i - 1] * i sum = 0 for i in range(n + 1): sum += (((np.exp(theta) - 1)**i) / facts_[i]) psi = sum * np.exp(-n * theta) * facts_[n] return np.exp(-d * theta) / psi
def variance_v(n, theta=None, phi=None, k=None): """This function computes the variance of the decomposition vector under GMM. Parameters ---------- n: int Length of the permutation in the considered model theta: float, optional (if phi is given) Real dispersion parameter phi : float, optional (if theta is given) Real dispersion parameter k: int, optional Length of partial permutations (only top items) Returns ------- ndarray The variance of the decomposition vector. """ theta, phi = mm.check_theta_phi(theta, phi) if k is None: k = n - 1 if type(phi) != list: phi = np.full(k, phi) rnge = np.array(range(k)) var_v = phi[rnge] / (1 - phi[rnge])**2 - (n - rnge)**2 * phi[rnge]**( n - rnge) / (1 - phi[rnge]**(n - rnge))**2 return var_v
def expected_dist_mm(n, theta=None, phi=None): """The function computes the expected value of Hamming distance under Mallows Models (MMs). Parameters ---------- n: int Length of the permutation in the considered model theta: float Real dispersion parameter, optional (if phi is given) phi: float Real dispersion parameter, optional (if theta is given) Returns ------- float The expected distance under MMs. """ theta, phi = mm.check_theta_phi(theta, phi) facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float) for i in range(2, n + 1): facts_[i] = facts_[i - 1] * i x_n_1, x_n = 0, 0 for k in range(n + 1): aux = (np.exp(theta) - 1)**k / facts_[k] x_n += aux if k < n: x_n_1 += aux return (n * x_n - x_n_1 * np.exp(theta)) / x_n
def prob(sigma, sigma0, theta=None, phi=None): """Probability mass function of a MM with central ranking sigma0 and dispersion parameter theta/phi. Parameters ---------- sigma: ndarray A pemutation sigma0: ndarray central permutation theta: float Dispersion parameter (optional, if phi is given) phi: float Dispersion parameter (optional, if theta is given) Returns ------- float Probability mass function. """ n = len(sigma) theta, phi = mm.check_theta_phi(theta, phi) sigma0_inv = pu.inverse(sigma0) rnge = np.array(range(n - 1)) psi = (1 - np.exp((-n + rnge) * (theta))) / (1 - np.exp(-theta)) psi = np.prod(psi) dist = distance(pu.compose(sigma, sigma0_inv)) return np.exp(-theta * dist) / psi
def expected_v(n, theta=None, phi=None, k=None): #txapu integrar """This function computes the expected decomposition vector. Parameters ---------- n: int Length of the permutation in the considered model theta: float, optional (if phi is given) Real dispersion parameter phi : float, optional (if theta is given) Real dispersion parameter k: int, optional Length of partial permutations (only top items) Returns ------- ndarray The expected decomposition vector. """ theta, phi = mm.check_theta_phi(theta, phi) if k is None: k = n - 1 if type(theta) != list: theta = np.full(k, theta) rnge = np.array(range(k)) expected_v = np.exp(-theta[rnge]) / (1 - np.exp(-theta[rnge])) - ( n - rnge) * np.exp(-(n - rnge) * theta[rnge]) / ( 1 - np.exp(-(n - rnge) * theta[rnge])) return expected_v
def sample(m, n, *, k=None, theta=None, phi=None, s0=None): """This function generates m (rankings) according to Mallows Models (if the given parameters are m, n, k/None, theta/phi: float, s0/None) or Generalized Mallows Models (if the given parameters are m, n, theta/phi: ndarray, s0/None). Moreover, the parameter k allows the function to generate top-k rankings only. Parameters ---------- m: int Number of rankings to generate n: int Length of rankings theta: float or ndarray, optional (if phi given) The dispersion parameter theta phi: float or ndarray, optional (if theta given) Dispersion parameter phi k: int Length of partial permutations (only top items) s0: ndarray Consensus ranking Returns ------- ndarray The rankings generated """ theta, phi = mm.check_theta_phi(theta, phi) theta = np.full(n - 1, theta) if s0 is None: s0 = np.array(range(n)) rnge = np.array(range(n - 1)) psi = (1 - np.exp( (-n + rnge) * (theta[rnge]))) / (1 - np.exp(-theta[rnge])) vprobs = np.zeros((n, n)) for j in range(n - 1): vprobs[j][0] = 1.0 / psi[j] for r in range(1, n - j): vprobs[j][r] = np.exp(-theta[j] * r) / psi[j] sample = [] vs = [] for samp in range(m): v = [np.random.choice(n, p=vprobs[i, :]) for i in range(n - 1)] v += [0] ranking = v_to_ranking(v, n) sample.append(ranking) sample = np.array([s[s0] for s in sample]) if k is not None: sample_rankings = np.array( [pu.inverse(ordering) for ordering in sample]) sample_rankings = np.array([ran[s0] for ran in sample_rankings]) sample = np.array([[i if i in range(k) else np.nan for i in ranking] for ranking in sample_rankings]) return sample
def sample(m, n, *, theta=None, phi=None, s0=None): """This function generates m permutations (rankings) according to Mallows Models. Parameters ---------- m: int Number of rankings to generate n: int Length of rankings theta: float, optional (if phi given) Dispersion parameter theta phi: float, optional (if theta given) Dispersion parameter phi s0: ndarray Consensus ranking Returns ------- ndarray The rankings generated. """ sample = np.zeros((m, n)) theta, phi = mm.check_theta_phi(theta, phi) facts_ = np.array([1, 1] + [0] * (n - 1), dtype=np.float) deran_num_ = np.array([1, 0] + [0] * (n - 1), dtype=np.float) for i in range(2, n + 1): facts_[i] = facts_[i - 1] * i deran_num_[i] = deran_num_[i - 1] * (i - 1) + deran_num_[i - 2] * (i - 1) hamm_count_ = np.array([ deran_num_[d] * facts_[n] / (facts_[d] * facts_[n - d]) for d in range(n + 1) ], dtype=np.float) probsd = np.array( [hamm_count_[d] * np.exp(-theta * d) for d in range(n + 1)], dtype=np.float) for m_ in range(m): target_distance = np.random.choice(n + 1, p=probsd / probsd.sum()) sample[m_, :] = sample_at_dist(n, target_distance, s0) return sample
def psi_mm(n, theta=None, phi=None): """This function computes the normalization constant psi. Parameters ---------- n: int Length of the permutation in the considered model theta: float Real dispersion parameter (optional if phi is given) phi: float Real dispersion parameter (optional if theta is given) Returns ------- float The normalization constant psi. """ rnge = np.array(range(2, n + 1)) if theta is not None: return np.prod((1 - np.exp(-theta * rnge)) / (1 - np.exp(-theta))) if phi is not None: return np.prod((1 - np.power(phi, rnge)) / (1 - phi)) theta, phi = mm.check_theta_phi(theta, phi)
def expected_dist_mm(n, theta=None, phi=None): """The function computes the expected distance of Kendall's-tau distance under Mallows models (MMs). Parameters ---------- n: int Length of the permutation in the considered model theta: float Real dispersion parameter, optional (if phi is given) phi: float Real dispersion parameter, optional (if theta is given) Returns ------- float The expected distance under MMs. """ theta, phi = mm.check_theta_phi(theta, phi) rnge = np.array(range(1, n + 1)) expected_dist = n * np.exp(-theta) / (1 - np.exp(-theta)) - np.sum( rnge * np.exp(-rnge * theta) / (1 - np.exp(-rnge * theta))) return expected_dist
def variance_dist_mm(n, theta=None, phi=None): """ This function returns the variance of Kendall's-tau distance under the MMs. Parameters ---------- n: int Length of the permutations theta: float Dispersion parameter, optional (if phi is given) phi : float Dispersion parameter, optional (if theta is given) Returns ------- float The variance of Kendall's-tau distance under the MMs. """ theta, phi = mm.check_theta_phi(theta, phi) rnge = np.array(range(1, n + 1)) variance = (phi * n) / (1 - phi)**2 - np.sum( (pow(phi, rnge) * rnge**2) / (1 - pow(phi, rnge))**2) return variance
def variance_dist_top_k(n, k, theta=None, phi=None): """Compute the variance of the distance for top-k rankings, following a MM under the Kendall's-tau distance. Parameters ---------- n: int Length of the permutation in the considered model k: int Length of partial permutations (only top items) theta: float, optional (if phi is given) Real dispersion parameter phi : float, optional (if theta is given) Real dispersion parameter Returns ------- float The variance under the MMs. """ theta, phi = mm.check_theta_phi(theta, phi) rnge = np.array(range(n - k + 1, n + 1)) variance = (phi * k) / (1 - phi)**2 - np.sum( (pow(phi, rnge) * rnge**2) / (1 - pow(phi, rnge))**2) return variance