Esempio n. 1
0
def em_optimization(n_dw_matrix,
                    phi_matrix,
                    theta_matrix,
                    regularization_list,
                    iters_count=100,
                    loss_function=None,
                    iteration_callback=None,
                    const_phi=False,
                    params=None):
    if loss_function is None:
        loss_function = LogFunction()

    if params is None:
        params = {}
    return_counters = params.get('return_counters', False)

    phi_matrix = np.copy(phi_matrix)
    theta_matrix = np.copy(theta_matrix)

    docptr = get_docptr(n_dw_matrix)
    wordptr = n_dw_matrix.indices

    start_time = time.time()
    for it in xrange(iters_count):
        phi_matrix_tr = np.transpose(phi_matrix)
        s_data = loss_function.calc_der(
            memory_efficient_inner1d(theta_matrix, docptr, phi_matrix_tr,
                                     wordptr))

        A = scipy.sparse.csr_matrix((n_dw_matrix.data * s_data,
                                     n_dw_matrix.indices, n_dw_matrix.indptr),
                                    shape=n_dw_matrix.shape)
        n_dt = A.dot(phi_matrix_tr) * theta_matrix
        n_tw = np.transpose(
            A.tocsc().transpose().dot(theta_matrix)) * phi_matrix

        r_tw, r_dt = regularization_list[it](phi_matrix, theta_matrix, n_tw,
                                             n_dt)
        n_tw += r_tw
        n_dt += r_dt

        if not const_phi:
            phi_matrix = get_prob_matrix_by_counters(n_tw)
        theta_matrix = get_prob_matrix_by_counters(n_dt)

        if iteration_callback is not None:
            iteration_callback(it, phi_matrix, theta_matrix)

    print 'Iters time', time.time() - start_time

    if return_counters:
        return phi_matrix, theta_matrix, n_tw, n_dt
    else:
        return phi_matrix, theta_matrix
Esempio n. 2
0
def gradient_optimization(
    n_dw_matrix,
    phi_matrix,
    theta_matrix,
    regularization_gradient_list,
    iters_count=100,
    loss_function=None,
    iteration_callback=None,
    learning_rate=1.,
):
    if loss_function is None:
        loss_function = LogFunction()

    phi_matrix = np.copy(phi_matrix)
    theta_matrix = np.copy(theta_matrix)

    docptr = get_docptr(n_dw_matrix)
    wordptr = n_dw_matrix.indices

    start_time = time.time()
    for it in xrange(iters_count):
        phi_matrix_tr = np.transpose(phi_matrix)
        s_data = loss_function.calc_der(
            memory_efficient_inner1d(theta_matrix, docptr, phi_matrix_tr,
                                     wordptr))
        A = scipy.sparse.csr_matrix((n_dw_matrix.data * s_data,
                                     n_dw_matrix.indices, n_dw_matrix.indptr),
                                    shape=n_dw_matrix.shape).tocsc()
        g_tw = theta_matrix.T * A
        g_dt = A.dot(phi_matrix_tr)

        r_tw, r_dt = regularization_gradient_list[it](phi_matrix, theta_matrix,
                                                      phi_matrix, theta_matrix)
        g_tw += r_tw
        g_dt += r_dt

        g_tw -= np.sum(g_tw * phi_matrix, axis=1)[:, np.newaxis]
        g_dt -= np.sum(g_dt * theta_matrix, axis=1)[:, np.newaxis]

        phi_matrix += g_tw * learning_rate
        theta_matrix += g_dt * learning_rate

        phi_matrix = get_prob_matrix_by_counters(phi_matrix)
        theta_matrix = get_prob_matrix_by_counters(theta_matrix)

        if iteration_callback is not None:
            iteration_callback(it, phi_matrix, theta_matrix)

    print 'Iters time', time.time() - start_time

    return phi_matrix, theta_matrix
Esempio n. 3
0
def naive_thetaless_em_optimization(n_dw_matrix,
                                    phi_matrix,
                                    regularization_list,
                                    iters_count=100,
                                    iteration_callback=None,
                                    theta_matrix=None,
                                    params=None):
    if params is None:
        params = {}
    loss_function = LogFunction()
    return_counters = params.get('return_counters', False)

    phi_matrix = np.copy(phi_matrix)

    docptr = get_docptr(n_dw_matrix)
    wordptr = n_dw_matrix.indices

    start_time = time.time()
    for it in xrange(iters_count):
        phi_rev_matrix = np.transpose(phi_matrix / np.sum(phi_matrix, axis=0))
        phi_matrix_tr = np.transpose(phi_matrix)
        theta_matrix = get_prob_matrix_by_counters(
            n_dw_matrix.dot(phi_rev_matrix))

        s_data = loss_function.calc_der(
            memory_efficient_inner1d(theta_matrix, docptr, phi_matrix_tr,
                                     wordptr))
        A = scipy.sparse.csr_matrix((n_dw_matrix.data * s_data,
                                     n_dw_matrix.indices, n_dw_matrix.indptr),
                                    shape=n_dw_matrix.shape).tocsc()

        n_tw = (A.T.dot(theta_matrix)).T * phi_matrix
        r_tw, _ = regularization_list[it](phi_matrix, theta_matrix, n_tw,
                                          theta_matrix)
        n_tw += r_tw
        phi_matrix = get_prob_matrix_by_counters(n_tw)

        if iteration_callback is not None:
            iteration_callback(it, phi_matrix, theta_matrix)

    print 'Iters time', time.time() - start_time

    if return_counters:
        return phi_matrix, theta_matrix, n_tw, None
    else:
        return phi_matrix, theta_matrix
Esempio n. 4
0
def create_calculate_likelihood_like_function(n_dw_matrix, loss_function=None):
    if loss_function is None:
        loss_function = LogFunction()

    docptr = get_docptr(n_dw_matrix)
    wordptr = n_dw_matrix.indices

    def fun(phi_matrix, theta_matrix):
        s_data = loss_function.calc(
            memory_efficient_inner1d(theta_matrix, docptr,
                                     np.transpose(phi_matrix), wordptr))
        return np.sum(n_dw_matrix.data * s_data)

    return fun
Esempio n. 5
0
def artm_thetaless_em_optimization(n_dw_matrix,
                                   phi_matrix,
                                   regularization_list,
                                   iters_count=100,
                                   iteration_callback=None,
                                   theta_matrix=None,
                                   params=None):
    if params is None:
        params = {}
    loss_function = LogFunction()
    use_B_cheat = params.get('use_B_cheat', False)
    return_counters = params.get('return_counters', False)

    phi_matrix = np.copy(phi_matrix)

    docptr = get_docptr(n_dw_matrix)
    wordptr = n_dw_matrix.indices

    D, _ = n_dw_matrix.shape
    docsizes = []
    indptr = n_dw_matrix.indptr
    for doc_num in xrange(D):
        size = indptr[doc_num + 1] - indptr[doc_num]
        if use_B_cheat:
            docsizes.extend([size] * size)
        else:
            docsizes.extend([
                np.sum(n_dw_matrix.data[indptr[doc_num]:indptr[doc_num + 1]])
            ] * size)
    docsizes = np.array(docsizes)

    B = scipy.sparse.csr_matrix((1. * n_dw_matrix.data / docsizes,
                                 n_dw_matrix.indices, n_dw_matrix.indptr),
                                shape=n_dw_matrix.shape).tocsc()

    start_time = time.time()
    for it in xrange(iters_count):
        phi_matrix_tr = np.transpose(phi_matrix)
        phi_rev_matrix = get_prob_matrix_by_counters(phi_matrix_tr)
        theta_matrix = get_prob_matrix_by_counters(
            n_dw_matrix.dot(phi_rev_matrix))

        s_data = loss_function.calc_der(
            memory_efficient_inner1d(theta_matrix, docptr, phi_matrix_tr,
                                     wordptr))
        A = scipy.sparse.csr_matrix((n_dw_matrix.data * s_data,
                                     n_dw_matrix.indices, n_dw_matrix.indptr),
                                    shape=n_dw_matrix.shape).tocsc()

        n_tw = A.T.dot(theta_matrix).T * phi_matrix
        r_tw, r_dt = regularization_list[it](phi_matrix, theta_matrix, n_tw,
                                             theta_matrix)

        theta_indices = theta_matrix > OPT_EPS
        r_dt[theta_indices] /= theta_matrix[theta_indices]
        r_dt[~theta_indices] = 0.

        g_dt = A.dot(phi_matrix_tr) + r_dt
        tmp = g_dt.T * B / (phi_matrix_tr.sum(axis=1) + OPT_EPS)
        r_tw += (tmp - np.einsum('ij,ji->i', phi_rev_matrix, tmp)) * phi_matrix

        n_tw += r_tw
        phi_matrix = get_prob_matrix_by_counters(n_tw)

        if iteration_callback is not None:
            iteration_callback(it, phi_matrix, theta_matrix)

    print 'Iters time', time.time() - start_time

    if return_counters:
        return phi_matrix, theta_matrix, n_tw, None
    else:
        return phi_matrix, theta_matrix
Esempio n. 6
0
def artm_calc_perplexity_factory(n_dw_matrix):
    helper = create_calculate_likelihood_like_function(
        loss_function=LogFunction(), n_dw_matrix=n_dw_matrix)
    total_words_number = n_dw_matrix.sum()
    return lambda phi, theta: np.exp(-helper(phi, theta) / total_words_number)