def evaluate(self, train, val, test, dim, area):
        log.info('Learning Memory, NMF and hb NMF mfs on train only for mixing weights optimization')
        nmf_scores, hb_nmf_scores, mem_scores = self._train_mfs(['nmf', 'hbnmf', 'memory'], train, dim, area)

        log.info('Learning mix for MEM and NMF')
        mem_mult = normalize_mat_row(mem_scores)
        nmf_mult = normalize_mat_row(nmf_scores + 0.001)   # Small flat prior to avoid 0.
        pis_mem_nmf = learn_mix_mult_on_individual(1.1, mem_mult, nmf_mult, val)

        log.info('Learning mix for MEM and hb NMF')
        hb_nmf_mult = normalize_mat_row(hb_nmf_scores + 0.001)  # Small flat prior to avoid 0.
        pis_mem_hb_nmf = learn_mix_mult_on_individual(1.1, mem_mult, hb_nmf_mult, val)

        log.info('Learning Memory NMF and hier NMF mfs on train+val for evaluation')
        eval_train = train + val
        nmf_scores, hb_nmf_scores, mem_scores = self._train_mfs(['nmf', 'hbnmf', 'memory'], eval_train, dim, area)

        # The flat prior won't change the ranking so there's no need to add it here.
        log.info('Evaluating memory with NMF')
        mem_nmf_erank = self._compute_erank(test, mem_scores, nmf_scores, pis_mem_nmf)

        log.info('Evaluating memory with hb_NMF')
        mem_hb_nmf_erank = self._compute_erank(test, mem_scores, hb_nmf_scores, pis_mem_hb_nmf)

        results = {'mem_nmf': mem_nmf_erank, 'mem_hb_nmf': mem_hb_nmf_erank}
        self.pretty_print(results)

        return results
Exemple #2
0
    def evaluate(self, train, val, test, dim, area):

        ALPHA = [
            0, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99,
            0.999, 1
        ]
        mem_scores = self._train_mfs(['memory'], train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'], train, dim,
                                            area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        log.info(
            'Mem and popularity learnt from training data; searching alpha')
        results_val = dict()
        results_test = dict()
        for alpha in ALPHA:
            log.info('Ranking when alpha is %.2f' % alpha)
            scores = alpha * mem_mult + (1 - alpha) * popularity_mult
            erank_val = self._compute_logp(val, scores)
            erank_test = self._compute_logp(test, scores)
            results_val['%.2f' % alpha] = erank_val
            results_test['%.2f' % alpha] = erank_test
        log.info('Log likelihood on validation data')
        self.pretty_print(results_val)
        log.info('Log likelihood on test data')
        self.pretty_print(results_test)

        eval_train = train + val
        mem_scores = self._train_mfs(['memory'], eval_train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'], eval_train, dim,
                                            area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        log.info(
            'Mem and popularity learnt from training and val data; searching alpha'
        )
        results_val = dict()
        results_test = dict()
        for alpha in ALPHA:
            log.info('Ranking when alpha is %.2f' % alpha)
            scores = alpha * mem_mult + (1 - alpha) * popularity_mult
            erank_val = self._compute_logp(val, scores)
            erank_test = self._compute_logp(test, scores)
            results_val['%.2f' % alpha] = erank_val
            results_test['%.2f' % alpha] = erank_test
        log.info('Log likelihood on validation data')
        self.pretty_print(results_val)
        log.info('Log likelihood on test data')
        self.pretty_print(results_test)
Exemple #3
0
    def evaluate(self, train, val, test, dim, area):
        mem_scores = self._train_mfs(['memory'],train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores+0.001)

        pi_mem_pop = learn_mix_mult_on_individual(1.1, mem_mult, popularity_mult, val)

        # The flat prior won't change the ranking so there's no need to add it here.
        log.info('Evaluating memory with popularity')
        mem_pop_erank = self._compute_erank(test, mem_mult, popularity_mult, pi_mem_pop)

        results = {'MEMORY+POPULARITY': mem_pop_erank}
        self.pretty_print(results)

        return results
    def evaluate(self, train, val, test, dim, area):

        ALPHA = [0,0.001,0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.99,0.999,1]
        mem_scores = self._train_mfs(['memory'],train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        log.info('Mem and popularity learnt from training data; searching alpha')
        results_val = dict()
        results_test = dict()
        for alpha in ALPHA:
            log.info('Ranking when alpha is %.2f' % alpha)
            scores = alpha * mem_mult + (1-alpha)*popularity_mult
            erank_val = self._compute_erank(val, scores)
            erank_test = self._compute_erank(test, scores)
            results_val['%.2f' % alpha] = erank_val
            results_test['%.2f' % alpha] = erank_test
        log.info('Erank on validation data')
        self.pretty_print(results_val)
        log.info('Erank on test data')
        self.pretty_print(results_test)

        eval_train = train + val
        mem_scores = self._train_mfs(['memory'],eval_train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'],eval_train,dim,area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        log.info('Mem and popularity learnt from training and val data; searching alpha')
        results_val = dict()
        results_test = dict()
        for alpha in ALPHA:
            log.info('Ranking when alpha is %.2f' % alpha)
            scores = alpha * mem_mult + (1-alpha)*popularity_mult
            erank_val = self._compute_erank(val, scores)
            erank_test = self._compute_erank(test, scores)
            results_val['%.2f' % alpha] = erank_val
            results_test['%.2f' % alpha] = erank_test
        log.info('Erank on validation data')
        self.pretty_print(results_val)
        log.info('Erank on test data')
        self.pretty_print(results_test)
Exemple #5
0
    def evaluate(self, train, val, test, dim, area):
        mem_scores = self._train_mfs(['memory'],train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'],train,dim,area)[0]

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores+0.001)

        pi_mem_pop = learn_mix_mult_global(1.1, mem_mult, popularity_mult, val)
        log.info('Global mixing weight is %f and %f' % (pi_mem_pop[0],pi_mem_pop[1]))
        print sum((pi_mem_pop).astype(float))

        # The flat prior won't change the ranking so there's no need to add it here.
        log.info('Evaluating memory with popularity')
        mem_pop_erank = self._compute_erank(test, mem_mult, popularity_mult, pi_mem_pop)

        results = {'MEMORY+POPULARITY': mem_pop_erank}
        self.pretty_print(results)

        return results
Exemple #6
0
        def logP(score_mat, test):
            logp_p = np.zeros(int(test.sum()))
            logp_indiv = np.zeros(test.shape[0])
            test_data = coo_matrix(test)

            temp = score_mat / np.sum(score_mat)
            idx = 0
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_p[int(idx):int(idx + v)] = np.log(temp[i, j])
                idx += v

            temp = normalize_mat_row(score_mat)
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_indiv[i] += v * np.log(temp[i, j])

            n_train = np.array([int(test.sum(axis=1)[i][0]) for i in range(I)])
            logp_indiv /= n_train

            return logp_p, logp_indiv
def _learn_mix_mult(alpha, mem_mult, mf_mult, val_data, num_em_iter=100, tol=0.00001):
    """
    Learning the mixing weights for mixture of two multinomials. Each observation is considered as a data point
    and the mixing weights (\pi) are learned using all the points.

    NOTE: In order for the algorithm to work, there can be no location that can get 0 probability by both the mem_mult
    and the mf_mult. In my runs, I use MPE to estimate the mf_mult while using MLE for the mum_mul. That way the mf_mult
    has no 0 values.


     INPUT:
    -------
        1. alpha:       <float / (2, ) ndarray>   Dirichlet prior for the pi learning. If <float> is given it is treated
                                                  as a flat prior. Has to be bigger than 1.
        2. mem_mult:    <(I, L) ndarray>    each row is the multinomial parameter according to the "self" data
        3. mf_mult:     <(I, L) ndarray>    each row is the multinomial parameter according to the matrix factorization
        4. val_data:    <(N, 3) ndarray>    each row is [ind_id, loc_id, counts]
        5. num_em_iter: <int>               number of em iterations
        6. tol:         <float>             convergence threshold

     OUTPUT:
    --------
        1. pi:  <(2, ) ndarray>     mixing weights.

     RAISE:
    -------
        1. ValueError:
                a. alphas are not bigger than 1
                b. the multinomial's rows don't sum to 1
                c. There is a location with both mults 0 (see NOTE)

    """
    if np.any(alpha <= 1):
        raise ValueError('alpha values have to be bigger than 1')

    if np.any(np.abs(np.sum(mem_mult, axis=1) - 1) > 0.001):
        raise ValueError('mem_mult param is not a multinomial -- all rows must sum to 1')

    if np.any(np.abs(np.sum(mf_mult, axis=1) - 1) > 0.001):
        raise ValueError('mf_mult param is not a multinomial -- all rows must sum to 1')

    if type(alpha) == float or type(alpha) == int:
        alpha = np.array([alpha, alpha])

    # Creating responsibility matrix and initializing it hard assignment on random
    log_like_tracker = [-np.inf]
    pi = np.array([0.5, 0.5])
    start = time.time()
    for em_iter in range(1, num_em_iter + 1):
        # Evey 5 iteration we will compute the posterior log probability to see if we converged.
        if em_iter % 5 == 0:
            data_log_like = pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)] + \
                            pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]

            # The data likelihood was computed for each location, but it should be in the power of the number
            # of observations there, or a product in the log space.
            data_likelihood = np.log(data_log_like) * val_data[:, 2]

            prior_probability = dirch.logpdf(pi, alpha=alpha)
            log_likelihood = np.mean(data_likelihood + prior_probability)

            if np.abs(log_likelihood - log_like_tracker[-1]) < tol:
                break


            log_like_tracker.append(log_likelihood)

        # E-Step
        resp = [pi[0] * mem_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)],
                pi[1] * mf_mult[val_data[:, 0].astype(int), val_data[:, 1].astype(int)]]

        if np.all(resp == 0):
            raise ValueError('0 mix probability')

        resp = np.array(resp).T
        resp = normalize_mat_row(resp)

        # M-Step. Only on the \pi with Dirichlet prior alpha > 1
        pi = np.sum(resp * col_vector(val_data[:, 2]), axis=0)
        pi += alpha - 1
        pi /= np.sum(pi)

    total_time = time.time() - start
    log.debug('Finished EM. Total time = %d secs -- %.3f per iteration' % (total_time, total_time / em_iter))

    return pi
Exemple #8
0
    def evaluate(self, train, val, test, dim, area):
        def logP(score_mat, test):
            logp_p = np.zeros(int(test.sum()))
            logp_indiv = np.zeros(test.shape[0])
            test_data = coo_matrix(test)

            temp = score_mat / np.sum(score_mat)
            idx = 0
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_p[int(idx):int(idx + v)] = np.log(temp[i, j])
                idx += v

            temp = normalize_mat_row(score_mat)
            for i, j, v in zip(test_data.row, test_data.col, test_data.data):
                logp_indiv[i] += v * np.log(temp[i, j])

            n_train = np.array([int(test.sum(axis=1)[i][0]) for i in range(I)])
            logp_indiv /= n_train

            return logp_p, logp_indiv

        ALPHA = np.arange(0.1, 1.1, 0.1)

        mem_scores = self._train_mfs(['memory'], train, dim, area)[0]
        popularity_scores = self._train_mfs(['popularity'], train, dim,
                                            area)[0] + 0.0001

        mem_mult = normalize_mat_row(mem_scores)
        popularity_mult = normalize_mat_row(popularity_scores)

        N = int(np.sum(mem_scores))
        I, L = train.shape
        n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)])

        results = dict()
        headers = [
            'EM global', 'EM indiv', 'S_mem', 'Dirichlet', 'Translation_JM',
            'Translation_Dirichlet'
        ]
        logP_p = DataFrame(np.zeros((int(test.sum()), 6)), columns=headers)
        logP_indiv = DataFrame(np.zeros((I, 6)), columns=headers)
        mix_alpha = DataFrame(np.zeros((I, 6)), columns=headers)

        log.info('#####learning statistical translation model#######')
        log.info('computing sparse mutual information')

        binary = (train > 0) * 1  #I*L
        count_1d = binary.sum(axis=0)  #1*L
        count_2d = np.dot(binary.T, binary)  #L*L
        P_1d = count_1d / I  # exists zeros
        P_2d = count_2d / I
        temp = P_2d / np.outer(P_1d, P_1d)
        temp[~np.isfinite(temp)] = 1  # zero / zero = zero
        temp[temp == 0] = 1  # avoid log_zero
        PPMI = np.log2(temp)
        PPMI[PPMI < 0] = 0

        k = 50
        idx = np.array([[
            j for j in np.asarray(PPMI[i].argsort().T).reshape(-1)[-k:][::-1]
            if PPMI[i, j] > 0
        ] for i in range(L)])
        for u in range(L):
            if u not in idx[u]:
                idx[u].append(u)

        binary = (np.array(train.toarray()) > 0) * 1  #I*L
        MI = np.zeros((L, L))
        from sklearn import metrics
        for u in range(L):
            for w in idx[u]:
                if MI[u, w] == 0:
                    MI[u, w] = metrics.mutual_info_score(
                        None,
                        None,
                        contingency=np.histogram2d(binary[:, u], binary[:,
                                                                        w])[0])
                    MI[w, u] = MI[u, w]
        MI = normalize_mat_row(MI)
        MI[~np.isfinite(MI)] = 1 / L
        ##########and self transition probability########
        log.info(
            'gridsearching on validation set (can be optimized) with JM smoothing'
        )
        val_result = dict()
        for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
                trans = MI * (1 - alpha) + np.identity(L) * alpha
                pref = np.dot(
                    mem_mult,
                    trans)  # consider each trans[i] as a  base vector
                temp = pref * mu + popularity_mult * (1 - mu)
                val_result[(alpha, mu)] = self._compute_logp_point(val, temp)
        #####choose alpha and mu that achieves best avg. point logP
        alpha, mu = max(val_result, key=val_result.get)
        trans = MI * (1 - alpha) + np.identity(L) * alpha
        pref = np.dot(mem_mult, trans)
        stm_scores = pref * mu + popularity_mult * (1 - mu)
        log.info('Evaluating MI based translation model with JM smoothing')
        stm_result = self._compute_erank_logp(test, stm_scores)
        results['Translation_JM'] = stm_result
        log.info("self transition weight and popularity weight: %f, %f" %
                 (alpha, 1 - mu))
        #####record results and mixture parameters########
        logP_p['Translation_JM'], logP_indiv['Translation_JM'] = logP(
            stm_scores, test)
        mix_alpha['Translation_JM'] = np.zeros(I) + mu * alpha

        ##########and self transition probability########
        log.info(
            'gridsearching on validation set (can be optimized) with Dirichlet prior'
        )
        val_result = dict()
        for alpha in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            for mu in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
                trans = MI * (1 - alpha) + np.identity(L) * alpha
                pref = np.dot(
                    mem_scores,
                    trans)  # consider each trans[i] as a  base vector
                temp = pref + popularity_mult * mu * N / I
                val_result[(alpha, mu)] = self._compute_logp_point(val, temp)
        #####choose alpha and mu that achieves best avg. point logP
        alpha, mu = max(val_result, key=val_result.get)
        trans = MI * (1 - alpha) + np.identity(L) * alpha
        pref = np.dot(mem_scores, trans)
        stm_scores = pref + popularity_mult * mu * N / I
        log.info('Evaluating MI based translation model with Dirichlet prior')
        stm_result = self._compute_erank_logp(test, stm_scores)
        results['Translation_Dirichlet'] = stm_result
        log.info("self transition weight and prior strength: %f, %f" %
                 (alpha, mu * N / I))
        #####record results and mixture parameters########
        logP_p['Translation_Dirichlet'], logP_indiv[
            'Translation_Dirichlet'] = logP(stm_scores, test)
        mix_alpha['Translation_Dirichlet'] = n_train * alpha / (n_train +
                                                                mu * N / I)

        log.info('#############learning EM global#################')
        pi_mem_pop = learn_mix_mult_global(1.1, mem_mult, popularity_mult, val)
        log.info('Global mixing weight is %f and %f' %
                 (pi_mem_pop[0], pi_mem_pop[1]))
        log.info('Evaluating EM global')

        em_global_scores = pi_mem_pop[0] * mem_mult + pi_mem_pop[
            1] * popularity_mult
        EM_global_result = self._compute_erank_logp(test, em_global_scores)
        results['EM global'] = EM_global_result
        logP_p['EM global'], logP_indiv['EM global'] = logP(
            em_global_scores, test)
        mix_alpha['EM global'] = pi_mem_pop[0] + np.zeros(I)

        log.info('#############learning EM individual##############')
        pi_mem_pop = learn_mix_mult_on_individual(1.1, mem_mult,
                                                  popularity_mult, val)
        log.info('Evaluating EM indiv')

        em_indiv_scores = col_vector(pi_mem_pop[:, 0]) * mem_mult + col_vector(
            pi_mem_pop[:, 1]) * popularity_mult
        EM_indiv_result = self._compute_erank_logp(test, mem_mult,
                                                   popularity_mult, pi_mem_pop)
        results['EM indiv'] = EM_indiv_result
        logP_p['EM indiv'], logP_indiv['EM indiv'] = logP(
            em_indiv_scores, test)
        mix_alpha['EM indiv'] = pi_mem_pop[:, 0]

        log.info('#############learning S_memory###################')
        log.info('gridsearching on validation set')
        val_result = dict()
        for alpha in ALPHA:
            temp = mem_scores * alpha + popularity_scores * (1 - alpha)
            val_result[alpha] = self._compute_logp_point(val, temp)
        #####choose alpha that achieves best avg. point logP
        alpha = max(val_result, key=val_result.get)
        print('alpha:', alpha)
        s_mem_scores = mem_scores * alpha + popularity_scores * (1 - alpha)
        log.info('Evaluating smoothed memory')
        s_mem_result = self._compute_erank_logp(test, s_mem_scores)
        results['S_Mem'] = s_mem_result

        n_train = np.array([int(train.sum(axis=1)[i][0]) for i in range(I)])
        temp = n_train.mean()
        logP_p['S_mem'], logP_indiv['S_mem'] = logP(s_mem_scores, test)
        mix_alpha['S_mem'] = alpha * n_train / (alpha * n_train +
                                                (1 - alpha) * temp)

        log.info('############learning with Dirichlet prior#############')
        log.info('gridsearching on validation set')
        val_result = dict()
        for alpha in ALPHA:
            temp = mem_scores + popularity_mult * alpha * N / I
            val_result[alpha] = self._compute_logp_point(val, temp)
        #####choose alpha that achieves best avg. point logP
        alpha = max(val_result, key=val_result.get)
        print('alpha:', alpha)
        dirichlet_scores = mem_scores + popularity_mult * alpha * N / I
        log.info('Evaluating with Dirichlet prior')
        dirichlet_result = self._compute_erank_logp(test, dirichlet_scores)
        results['Dirichlet'] = dirichlet_result

        logP_p['Dirichlet'], logP_indiv['Dirichlet'] = logP(
            dirichlet_scores, test)
        mix_alpha['Dirichlet'] = n_train / (n_train + alpha * N / I)

        self.pretty_print(results)
        return logP_p, logP_indiv, mix_alpha