Python normalize_cols Examples, common.normalize_cols Python Examples

Example #1

0

Show file

File: main.py Project: iv-ivan/diplom

def construct_from_svd(U, s, V, cfg):
    T = cfg['T']
    Phi = np.zeros((U.shape[0], T))
    Theta = np.zeros((T, V.shape[1]))
    for i in xrange(T):
        x = U[:, i]
        y = V[i, :]
        xp = np.copy(x)
        xp[xp < 0] = 0
        xn = (-1)*np.copy(x)
        xn[xn < 0] = 0
        yp = np.copy(y)
        yp[yp < 0] = 0
        yn = (-1)*np.copy(y)
        yn[yn < 0] = 0
        xp_norm = np.linalg.norm(xp, ord=1)
        yp_norm = np.linalg.norm(yp, ord=1)
        xn_norm = np.linalg.norm(xn, ord=1)
        yn_norm = np.linalg.norm(yn, ord=1)
        if xp_norm*yp_norm > xn_norm*yn_norm:
            Phi[:, i] = np.sqrt(s[i]*xp_norm*yp_norm)*xp/xp_norm
            Theta[i, :] = np.sqrt(s[i]*xp_norm*yp_norm)*yp/yp_norm
        else:
            Phi[:, i] = np.sqrt(s[i]*xn_norm*yn_norm)*xn/xn_norm
            Theta[i, :] = np.sqrt(s[i]*xn_norm*yn_norm)*yn/yn_norm
    return normalize_cols(Phi), normalize_cols(Theta)

Example #2

0

Show file

def plsa3D(V, W, H, post='', cfg=config.default_config()):
    #print('Probabilistic Latent Semantic Analysis.')
    eps = cfg['eps']
    (N, M) = V.shape
    T = H.shape[0]
    V3 = V.reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    W3 = W.reshape(N, T, 1).repeat(M, 2)
    H3 = H.T.reshape(M, T, 1).repeat(N, 2).swapaxes(0, 2)
    Q3 = dot(W, H).reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    Z = V3 * W3 * H3 / (Q3 + eps)
    W = normalize_cols(sum(Z, 2).reshape(N, T))
    H = normalize_cols(sum(Z, 0).reshape(T, M))
    return W, H

Example #3

0

Show file

File: methods.py Project: mikimaus78/nmf-1

def plsa3D(V, W, H, post='', cfg=config.default_config()):
    #print('Probabilistic Latent Semantic Analysis.')
    eps = cfg['eps']
    (N, M) = V.shape
    T = H.shape[0]
    V3 = V.reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    W3 = W.reshape(N, T, 1).repeat(M, 2)
    H3 = H.T.reshape(M, T, 1).repeat(N, 2).swapaxes(0, 2)
    Q3 = dot(W, H).reshape(N, M, 1).repeat(T, 2).swapaxes(1, 2)
    Z = V3 * W3 * H3 / (Q3 + eps)
    W = normalize_cols(sum(Z, 2).reshape(N, T))
    H = normalize_cols(sum(Z, 0).reshape(T, M))
    return W, H

Example #4

0

Show file

File: generators.py Project: mikimaus78/nmf-1

def gen_matrix_topic(params):
    N, T = params['rows'], params['cols']
    phi = np.zeros((N, T))
    sparse = params['sparsity'] # sparsness (the main parameter)
    if sparse < params['eps']:
        sparse = params['eps']
    elif sparse > 1:
        sparse = 1
    nkernel = params['nkernel'] # number of average kernel words in topic
    nnoise = params['nnoise'] # number of noise (smooth) topics
    ntopic = T - nnoise
    kernel = np.maximum(1, np.random.binomial(N, min(1, nkernel / (N*sparse)), ntopic))
    s = 0
    for i in range(ntopic):
        phi[s:s+kernel[i], i] = -np.sort(-np.random.exponential(0.5, kernel[i]))
        s = s + int(kernel[i] * sparse)
        if i < ntopic-1 and s + kernel[i+1] > N:
            kernel[i+1] = max(1, N - s)
            s = N - kernel[i+1]
    if N-s-kernel[-1]+1 > 0:
        if nnoise == 0:
            phi[s+kernel[-1]-1:, :] = np.random.random_sample((N-s-kernel[-1]+1, T))
        else:
            #phi[s+kernel[-1]-1:, ntopic:] = np.random.random_sample((N-s-kernel[-1]+1, nnoise))
            phi[:, ntopic:] = np.random.random_sample((N, nnoise))
    return normalize_cols(phi)

Example #5

0

Show file

File: generators.py Project: yangkuoone/nmf

def gen_matrix_topic(params):
    N, T = params['rows'], params['cols']
    phi = np.zeros((N, T))
    sparse = params['sparsity']  # sparsness (the main parameter)
    if sparse < params['eps']:
        sparse = params['eps']
    elif sparse > 1:
        sparse = 1
    nkernel = params['nkernel']  # number of average kernel words in topic
    nnoise = params['nnoise']  # number of noise (smooth) topics
    ntopic = T - nnoise
    kernel = np.maximum(
        1, np.random.binomial(N, min(1, nkernel / (N * sparse)), ntopic))
    s = 0
    for i in range(ntopic):
        phi[s:s + kernel[i],
            i] = -np.sort(-np.random.exponential(0.5, kernel[i]))
        s = s + int(kernel[i] * sparse)
        if i < ntopic - 1 and s + kernel[i + 1] > N:
            kernel[i + 1] = max(1, N - s)
            s = N - kernel[i + 1]
    if N - s - kernel[-1] + 1 > 0:
        if nnoise == 0:
            phi[s + kernel[-1] - 1:, :] = np.random.random_sample(
                (N - s - kernel[-1] + 1, T))
        else:
            #phi[s+kernel[-1]-1:, ntopic:] = np.random.random_sample((N-s-kernel[-1]+1, nnoise))
            phi[:, ntopic:] = np.random.random_sample((N, nnoise))
    return normalize_cols(phi)

Example #6

0

Show file

File: generators.py Project: iv-ivan/diplom

def gen_matrix_sparse(params):
    rows = params['rows']
    cols = params['cols']
    sparsity = params['sparsity']
    M = np.zeros((rows, cols), dtype='float32')
    for i in xrange(cols):
        M[:, i] = np.random.dirichlet([sparsity]*rows)
    return normalize_cols(M)

Example #7

0

Show file

def grad_desc(V, W, H, post='', cfg=config.default_config()):
    alpha = cfg[post + '_alpha']
    step = cfg[post + '_alpha_step']
    eps = cfg['eps']
    #print('Gradient Descent with alpha={alpha}.'.format(alpha=alpha))
    grad_W = dot((V - dot(W, H)), H.T)
    grad_H = dot(W.T, (V - dot(W, H)))
    #grad_W[grad_W < eps] = 0
    #grad_H[grad_H < eps] = 0
    W = W + alpha * grad_W
    W[(grad_W < eps) & (W < eps)] = 0
    W = normalize_cols(W)

    H = H + alpha * grad_H
    H[(grad_H < eps) & (H < eps)] = 0
    H = normalize_cols(H)

    alpha = alpha * step
    cfg[post + '_alpha'] = alpha
    return (W, H)

Example #8

0

Show file

File: methods.py Project: mikimaus78/nmf-1

def grad_desc(V, W, H, post='', cfg=config.default_config()):
    alpha = cfg[post + '_alpha']
    step = cfg[post + '_alpha_step']
    eps = cfg['eps']
    #print('Gradient Descent with alpha={alpha}.'.format(alpha=alpha))
    grad_W = dot((V - dot(W, H)), H.T)
    grad_H = dot(W.T, (V - dot(W, H)))
    #grad_W[grad_W < eps] = 0
    #grad_H[grad_H < eps] = 0
    W = W + alpha * grad_W
    W[(grad_W < eps) & (W < eps)] = 0
    W = normalize_cols(W)
    
    H = H + alpha * grad_H
    H[(grad_H < eps) & (H < eps)] = 0
    H = normalize_cols(H)
    
    alpha = alpha * step
    cfg[post + '_alpha'] = alpha
    return (W, H)

Example #9

0

Show file

File: generators.py Project: mikimaus78/nmf-1

def gen_matrix_sparse(params):
    rows = params['rows']
    cols = params['cols']
    sparsity = params['sparsity']
    M = np.zeros((rows, cols), dtype='float32')
    sz = int(rows * cols * (1 - sparsity))
    idx0_t = [i for i in range(rows) for j in range(cols)]
    np.random.shuffle(idx0_t)
    idx0 = idx0_t[:sz]
    idx1_t = [j for i in range(rows) for j in range(cols)]
    np.random.shuffle(idx1_t)
    idx1 = idx1_t[:sz]
    M[idx0, idx1] = np.random.sample(sz)
    if rows < cols:
        M[:, :rows] = M[:, :rows] + np.eye(rows) * params['eps']
        M[0, rows:] = params['eps']
    else:
        M[:cols, :] = M[:cols, :] + np.eye(cols) * params['eps']
    return normalize_cols(M)

Example #10

0

Show file

File: generators.py Project: yangkuoone/nmf

def gen_matrix_sparse(params):
    rows = params['rows']
    cols = params['cols']
    sparsity = params['sparsity']
    M = np.zeros((rows, cols), dtype='float32')
    sz = int(rows * cols * (1 - sparsity))
    idx0_t = [i for i in range(rows) for j in range(cols)]
    np.random.shuffle(idx0_t)
    idx0 = idx0_t[:sz]
    idx1_t = [j for i in range(rows) for j in range(cols)]
    np.random.shuffle(idx1_t)
    idx1 = idx1_t[:sz]
    M[idx0, idx1] = np.random.sample(sz)
    if rows < cols:
        M[:, :rows] = M[:, :rows] + np.eye(rows) * params['eps']
        M[0, rows:] = params['eps']
    else:
        M[:cols, :] = M[:cols, :] + np.eye(cols) * params['eps']
    return normalize_cols(M)

Example #11

0

Show file

File: generators.py Project: mikimaus78/nmf-1

def gen_matrix_normal(params):
    return normalize_cols(abs(np.random.randn(params['rows'], params['cols'])))

Example #12

0

Show file

File: generators.py Project: yangkuoone/nmf

def gen_matrix_uniform(params):
    return normalize_cols(
        np.random.uniform(size=(params['rows'], params['cols'])))

Example #13

0

Show file

def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()):
    T = H.shape[0]
    eps = cfg['eps']
    schedule = cfg['schedule'].split(',')
    meas = cfg['measure'].split(',')
    val = np.zeros((cfg['max_iter'] + 2, len(meas)))
    hdist = np.zeros((cfg['max_iter'] + 2, 1))

    for i, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[0, i] = fun(V, np.dot(W, H))

    if cfg['compare_real']:
        #m = Munkres()
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    if cfg['print_lvl'] > 1:
        print('Initial loss:', val[0])
    status = 0
    methods_num = len(schedule)
    it = -1
    for it in range(cfg['max_iter']):
        if cfg['print_lvl'] > 1:
            print('Iteration', it + 1)
        W_old = deepcopy(W)
        H_old = deepcopy(H)
        method_name = schedule[it % methods_num]
        if cfg['print_lvl'] > 1:
            print('Method:', method_name)
        method = getattr(methods, method_name)
        (W, H) = method(V, W, H, method_name, cfg)
        if (it + 1) % cfg['normalize_iter'] == 0:
            W = normalize_cols(W)
            H = normalize_cols(H)
        for j, fun_name in enumerate(meas):
            fun = getattr(measure, fun_name)
            val[it + 1, j] = fun(V, np.dot(W, H))

        if cfg['compare_real']:
            idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
            hdist[it + 1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T

        if cfg['print_lvl'] > 1:
            print(val[it + 1])
        if all(val[it, :] < eps):
            if cfg['print_lvl'] > 1:
                print('By cost.')
            status = 1
            break
        if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps:
            if cfg['print_lvl'] > 1:
                print('By argument.')
            status = 2
            break
        #del W_old
        #del H_old
    if cfg['print_lvl'] > 1:
        print('Final:')
    W = normalize_cols(W)
    H = normalize_cols(H)
    for j, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[it + 2:, j] = fun(V, np.dot(W, H))

    if cfg['compare_real']:
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[it + 2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    return (val, hdist, it, W, H, status)

Example #14

0

Show file

def main(config_file='config.txt', results_file='results.txt', cfg=None):
    if cfg == None:
        cfg = config.load(config_file)
    if cfg['seed'] >= 0:
        np.random.seed(cfg['seed'])
    else:
        np.random.seed(None)
    eps = cfg['eps']
    N = cfg['N']
    T = cfg['T']
    M = cfg['M']
    vocab = None
    W_r = None
    H_r = None
    if cfg['run_info'] == 'results' or cfg['run_info'] == 1:
        cfg['print_lvl'] = 1
    elif cfg['run_info'] == 'run' or cfg['run_info'] == 2:
        cfg['print_lvl'] = 2
    else:
        cfg['print_lvl'] = 0
    if cfg['print_lvl'] > 0:
        print('Generating...')
    if cfg['load_data'] == 'uci' or cfg['load_data'] == 2:
        V, vocab = load_uci(cfg['data_name'], cfg)
        V = normalize_cols(V)
        N, M = V.shape
        cfg['N'], cfg['M'] = V.shape
        print('Size:', N, M)
    elif cfg['load_data'] == 'csv' or cfg['load_data'] == 1:
        _, W_r, H_r = load_csv(cfg['gen_name'], cfg)
        #plt.matshow(1-W_r, cmap=plt.cm.gray)
        #plt.title('real')
        V, vocab = load_uci(cfg['gen_name'], cfg)
        V = normalize_cols(V)
        N, M = V.shape
        cfg['N'], cfg['M'] = V.shape
        print('Size:', N, M)
        cfg['T_0'] = W_r.shape[1]
    else:
        V, W_r, H_r = gen_real(cfg)
    print('Checking assumption on V:', np.sum(V, axis=0).max())

    #tp = '0_5_100_16_500'
    #V_filename = 'datasets/V.' + tp + '.txt.csv'
    #W_filename = 'datasets/W.' + tp + '.txt.csv'
    #H_filename = 'datasets/H.' + tp + '.txt.csv'

    #V = np.loadtxt(V_filename, delimiter=',')
    #W_r = np.loadtxt(W_filename, delimiter=',')
    #H_r = np.loadtxt(H_filename, delimiter=',')
    #show_matrices(W_r, H_r)
    #plt.savefig('tm_tests/real' + tp + '.eps', format='eps')

    res = [0] * cfg['runs']
    finals = [0] * cfg['runs']
    hdist_runs = [0] * cfg['runs']
    exp_time = [0] * cfg['runs']
    meas = cfg['measure'].split(',')
    meas_name = [''] * len(meas)
    for i, f_name in enumerate(meas):
        f = getattr(measure, f_name + '_name')
        meas_name[i] = f()
    print('Measures:', meas_name)
    if cfg['compare_methods']:
        methods = cfg['schedule'].split(',')
        nmethods = len(methods)
    for r in range(cfg['runs']):
        if cfg['print_lvl'] > 0:
            print('Run', r + 1)
        #(W, H) = gen_init(cfg)
        if cfg['print_lvl'] > 0:
            print('  Starting...')

        labels = None
        st = time()
        if r >= cfg['prepare'] and cfg['prepare'] >= 0 and cfg[
                'prepare_method'] > 0:
            print('Preparing data...')
            if cfg['prepare_method'] == 1:
                W = anchor_words(V, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(
                    np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
                H[H < eps] = 0
                H = normalize_cols(H)
            elif cfg['prepare_method'] == 2:
                centroids, labels = reduce_cluster(V.T, cfg['T'], cfg)
                W = centroids.T
                W[W < eps] = 0
                W = normalize_cols(W)
                print('Solving for H')
                H = linalg.solve(
                    np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
                H[H < eps] = 0
                H = normalize_cols(H)
            elif cfg['prepare_method'] == 3:
                centroids, labels = reduce_cluster(V, cfg['num_clusters'], cfg)
                W = anchor_words(centroids, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(
                    np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                    np.dot(W.T, normalize_cols(centroids)))
                H[H < eps] = 0
                H = normalize_cols(H)
                W = restore_cluster(W, labels, cfg)
            elif cfg['prepare_method'] >= 4 and cfg['prepare_method'] <= 6:
                if cfg['prepare_method'] == 4:
                    red = reduce_tsne(V, to_dim=4)
                elif cfg['prepare_method'] == 5:
                    red = reduce_tsne(V, to_dim=3)
                elif cfg['prepare_method'] == 6:
                    red = reduce_tsne(V, to_dim=2)
                centroids, labels = reduce_cluster(red, cfg['num_clusters'],
                                                   cfg)
                nearest_words = find_nearest(red, centroids, labels)
                V_reduced = normalize_cols(V[nearest_words, :])
                W = anchor_words(V_reduced, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(
                    np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                    np.dot(W.T, V_reduced))
                H[H < eps] = 0
                H = normalize_cols(H)
                W = restore_cluster(W, labels, cfg)
            elif cfg['prepare_method'] == 10:
                centroids, labels = reduce_multi_cluster(
                    V, cfg['num_clusters'], cfg)
                W = anchor_words(centroids, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(
                    np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                    np.dot(W.T, normalize_cols(centroids)))
                H[H < eps] = 0
                H = normalize_cols(H)
                #W = restore_multi_cluster(W, labels, cfg)
                W = linalg.solve(
                    dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
                W[W < eps] = 0
                W = normalize_cols(W)
        else:
            (W, H) = gen_init(cfg)
            #cur_frob = measure.frobenius(V, np.dot(W, H))
            #for init_it in xrange(200):
            #    W_new, H_new = gen_init(cfg)
            #    if measure.frobenius(V, np.dot(W_new, H_new)) #< cur_frob:
            #        W = deepcopy(W_new)
            #        H = deepcopy(H_new)
        se = time() - st
        print('Preparing took time:', timedelta(seconds=se))
        #labels=None
        #print('Preparing data...')
        #centroids, labels = reduce_cluster(V, cfg['T'], cfg)
        #H = centroids
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #print('Solving for W')
        #W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
        #W[W < eps] = 0
        #W = normalize_cols(W)

        #centroids, labels = reduce_cluster(V, cfg['num_clusters'], cfg)
        #W = anchor_words(centroids, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, normalize_cols(centroids)))
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #W = restore_cluster(W, labels, cfg)

        #W = anchor_words(V, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
        #H[H < eps] = 0
        #H = normalize_cols(H)

        #red = reduce_tsne(V, to_dim=3)
        #centroids, labels = reduce_cluster(red, cfg['num_clusters'], cfg)
        #print('c', centroids.shape, 'l', labels.shape)
        #nearest_words = find_nearest(red, centroids, labels)
        #print('nw:', nearest_words.shape)
        #V_reduced = V[nearest_words, :]
        #print('Vr', V_reduced.shape)
        #W = anchor_words(V_reduced, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V_reduced))
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #W = restore_cluster(W, labels, cfg)

        if cfg['compare_prepare'] > 0:
            if r > 0:
                print('Preparing data...')
                if r == 1:
                    W = anchor_words(V, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(
                        np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                        np.dot(W.T, V))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                elif r == 2:
                    centroids, labels = reduce_cluster(V, cfg['T'], cfg)
                    H = centroids
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    print('Solving for W')
                    W = linalg.solve(
                        dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
                    W[W < eps] = 0
                    W = normalize_cols(W)
                elif r == 3:
                    centroids, labels = reduce_cluster(V, cfg['num_clusters'],
                                                       cfg)
                    W = anchor_words(centroids, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(
                        np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                        np.dot(W.T, normalize_cols(centroids)))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    W = restore_cluster(W, labels, cfg)
                elif r >= 4 and r <= 6:
                    if r == 4:
                        red = reduce_tsne(V, to_dim=4)
                    elif r == 5:
                        red = reduce_tsne(V, to_dim=3)
                    elif r == 6:
                        red = reduce_tsne(V, to_dim=2)
                    centroids, labels = reduce_cluster(red,
                                                       cfg['num_clusters'],
                                                       cfg)
                    nearest_words = find_nearest(red, centroids, labels)
                    V_reduced = V[nearest_words, :]
                    W = anchor_words(V_reduced, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(
                        np.dot(W.T, W) + np.eye(W.shape[1]) * eps,
                        np.dot(W.T, V_reduced))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    W = restore_cluster(W, labels, cfg)
        if cfg['compare_methods'] > 0:
            cfg['schedule'] = methods[r % nmethods]
        start = time()
        (val, hdist, it, W, H, status) = run(V, W, H, W_r, H_r, cfg)
        stop = time()
        print('Run time:', timedelta(seconds=stop - start))
        exp_time[r] = stop - start
        res[r] = val
        hdist_runs[r] = hdist
        if cfg['print_lvl'] > 0:
            print('  Result:', val[-1, :])
        for i, fun_name in enumerate(cfg['finals'].split(',')):
            #val = np.array([r[:, i] for r in res])
            fun = getattr(measure, fun_name)
            name, val = fun(W, H)
            print(name, ':', val)
    print(cfg['experiment'])
    if cfg['experiment'] == '':
        exp_name = 'test'
    else:
        exp_name = cfg['experiment']
    #if cfg['save_results']:
    #    if cfg['save_file']:
    #        results_file = cfg['save_file']
    #    with open(results_file, 'w') as rf:
    #        print_head(rf)
    #        print('# Generated on {}'.format(datetime.today()), file=rf)
    #        print('# Experiments config:', file=rf)
    #        print('#   Number of experiments: {}'.format(cfg['runs']), file=rf)
    #        print('#   Methods schedule: {}'.format(cfg['schedule']), file=rf)
    #        print('#   Iterations number: {}'.format(cfg['max_iter']), file=rf)
    #        print('#   All experiments done in {}'.format(
    #            timedelta(seconds=sum(exp_time))), file=rf)
    #        print_head(rf)
    #        for r in range(cfg['runs']):
    #            print('# Run #{}. Done in {}'.format(r+1,
    #                timedelta(seconds=exp_time[r])), file=rf)
    #            [print(val, file=rf) for val in res[:, r]]
    #            print_head(rf)
    if cfg['show_results']:
        if not os.path.exists(cfg['result_dir']):
            os.makedirs(cfg['result_dir'])
        np.savetxt(join(cfg['result_dir'], cfg['experiment'] + '_W.csv'), W)
        #show_topics(W, 25, vocab=vocab)
        save_topics(W,
                    join(cfg['result_dir'], cfg['experiment'] + '_topics.txt'),
                    vocab)
        #plot_matrix(V, 'Documents', labels=labels, vocab=vocab)
        #filename = os.path.join(cfg['result_dir'], cfg['experiment']+'_V.eps')
        #plt.savefig(filename, format='eps')
        #plot_matrix(W, u'Распределение слов в темах', labels, vocab)
        #filename = os.path.join(cfg['result_dir'], cfg['experiment']+'_W.pdf')
        #plt.savefig(filename, format='pdf')

        for i, fun_name in enumerate(cfg['measure'].split(',')):
            val = np.array([r[:, i] for r in res])
            fun = getattr(measure, fun_name + '_name')
            plot_measure(val.T, fun())
            filename = os.path.join(
                cfg['result_dir'], cfg['experiment'] + '_' + fun_name + '.pdf')
            plt.savefig(filename, format='pdf')
        if cfg['compare_real']:
            print('Hellinger res:', hdist_runs[0][-1, 0])
            plot_measure(
                np.array([r[:, 0] for r in hdist_runs]).T,
                measure.hellinger_name())
            show_matrices_recovered(W_r, H_r, W, H, cfg, permute=True)
            #plt.savefig('tm_tests/recovered_cnmf_' + tp + '.eps', format='eps')
        #plt.show()
    return res

Example #15

0

Show file

File: main.py Project: mikimaus78/nmf-1

def main(config_file='config.txt', results_file='results.txt', cfg=None):
    if cfg == None:
        cfg = config.load(config_file)
    if cfg['seed'] >= 0:
        np.random.seed(cfg['seed'])
    else:
        np.random.seed(None)
    eps = cfg['eps']
    N = cfg['N']
    T = cfg['T']
    M = cfg['M']
    vocab = None
    W_r = None
    H_r = None
    if cfg['run_info'] == 'results' or cfg['run_info'] == 1:
        cfg['print_lvl'] = 1
    elif cfg['run_info'] == 'run' or cfg['run_info'] == 2:
        cfg['print_lvl'] = 2
    else:
        cfg['print_lvl'] = 0
    if cfg['print_lvl'] > 0:
        print('Generating...')
    if cfg['load_data'] == 'uci' or cfg['load_data'] == 2:
        V, vocab = load_uci(cfg['data_name'], cfg)
        V = normalize_cols(V)
        N, M = V.shape
        cfg['N'], cfg['M'] = V.shape
        print('Size:', N, M)
    elif cfg['load_data'] == 'csv' or cfg['load_data'] == 1:
        _, W_r, H_r = load_csv(cfg['gen_name'], cfg)
        #plt.matshow(1-W_r, cmap=plt.cm.gray)
        #plt.title('real')
        V, vocab = load_uci(cfg['gen_name'], cfg)
        V = normalize_cols(V)
        N, M = V.shape
        cfg['N'], cfg['M'] = V.shape
        print('Size:', N, M)
        cfg['T_0'] = W_r.shape[1]
    else:
        V, W_r, H_r = gen_real(cfg)
    print('Checking assumption on V:', np.sum(V, axis=0).max())
    
    #tp = '0_5_100_16_500'
    #V_filename = 'datasets/V.' + tp + '.txt.csv'
    #W_filename = 'datasets/W.' + tp + '.txt.csv'
    #H_filename = 'datasets/H.' + tp + '.txt.csv'
    
    #V = np.loadtxt(V_filename, delimiter=',')
    #W_r = np.loadtxt(W_filename, delimiter=',')
    #H_r = np.loadtxt(H_filename, delimiter=',')
    #show_matrices(W_r, H_r)
    #plt.savefig('tm_tests/real' + tp + '.eps', format='eps')

    res = [0] * cfg['runs']
    finals = [0] * cfg['runs']
    hdist_runs = [0] * cfg['runs']
    exp_time = [0] * cfg['runs']
    meas = cfg['measure'].split(',')
    meas_name = [''] * len(meas)
    for i, f_name in enumerate(meas):
        f = getattr(measure, f_name + '_name')
        meas_name[i] = f()
    print('Measures:', meas_name)
    if cfg['compare_methods']:
        methods = cfg['schedule'].split(',')
        nmethods = len(methods)
    for r in range(cfg['runs']):
        if cfg['print_lvl'] > 0:
            print('Run', r+1)
        #(W, H) = gen_init(cfg)
        if cfg['print_lvl'] > 0:
            print('  Starting...')
        
        labels = None
        st = time()
        if r >= cfg['prepare'] and cfg['prepare'] >= 0 and cfg['prepare_method'] > 0:
            print('Preparing data...')
            if cfg['prepare_method'] == 1:
                W = anchor_words(V, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
                H[H < eps] = 0
                H = normalize_cols(H)
            elif cfg['prepare_method'] == 2:
                centroids, labels = reduce_cluster(V.T, cfg['T'], cfg)
                W = centroids.T
                W[W < eps] = 0
                W = normalize_cols(W)
                print('Solving for H')
                H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
                H[H < eps] = 0
                H = normalize_cols(H)
            elif cfg['prepare_method'] == 3:
                centroids, labels = reduce_cluster(V, cfg['num_clusters'], cfg)
                W = anchor_words(centroids, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, normalize_cols(centroids)))
                H[H < eps] = 0
                H = normalize_cols(H)
                W = restore_cluster(W, labels, cfg)
            elif cfg['prepare_method'] >= 4 and cfg['prepare_method'] <= 6:
                if cfg['prepare_method'] == 4:
                    red = reduce_tsne(V, to_dim=4)
                elif cfg['prepare_method'] == 5:
                    red = reduce_tsne(V, to_dim=3)
                elif cfg['prepare_method'] == 6:
                    red = reduce_tsne(V, to_dim=2)
                centroids, labels = reduce_cluster(red, cfg['num_clusters'], cfg)
                nearest_words = find_nearest(red, centroids, labels)
                V_reduced = normalize_cols(V[nearest_words, :])
                W = anchor_words(V_reduced, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V_reduced))
                H[H < eps] = 0
                H = normalize_cols(H)
                W = restore_cluster(W, labels, cfg)
            elif cfg['prepare_method'] == 10:
                centroids, labels = reduce_multi_cluster(V, cfg['num_clusters'], cfg)
                W = anchor_words(centroids, 'L2', cfg)
                print('Solving for H')
                H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, normalize_cols(centroids)))
                H[H < eps] = 0
                H = normalize_cols(H)
                #W = restore_multi_cluster(W, labels, cfg)
                W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
                W[W < eps] = 0
                W = normalize_cols(W)
        else:
            (W, H) = gen_init(cfg)
            #cur_frob = measure.frobenius(V, np.dot(W, H))
            #for init_it in xrange(200):
            #    W_new, H_new = gen_init(cfg)
            #    if measure.frobenius(V, np.dot(W_new, H_new)) #< cur_frob:
            #        W = deepcopy(W_new)
            #        H = deepcopy(H_new)
        se = time() - st
        print('Preparing took time:', timedelta(seconds=se))
        #labels=None
        #print('Preparing data...')
        #centroids, labels = reduce_cluster(V, cfg['T'], cfg)
        #H = centroids
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #print('Solving for W')
        #W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
        #W[W < eps] = 0
        #W = normalize_cols(W)
        
        #centroids, labels = reduce_cluster(V, cfg['num_clusters'], cfg)
        #W = anchor_words(centroids, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, normalize_cols(centroids)))
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #W = restore_cluster(W, labels, cfg)
        
        #W = anchor_words(V, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
        #H[H < eps] = 0
        #H = normalize_cols(H)
        
        #red = reduce_tsne(V, to_dim=3)
        #centroids, labels = reduce_cluster(red, cfg['num_clusters'], cfg)
        #print('c', centroids.shape, 'l', labels.shape)
        #nearest_words = find_nearest(red, centroids, labels)
        #print('nw:', nearest_words.shape)
        #V_reduced = V[nearest_words, :]
        #print('Vr', V_reduced.shape)
        #W = anchor_words(V_reduced, 'L2', cfg)
        #print('Solving for H')
        #H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V_reduced))
        #H[H < eps] = 0
        #H = normalize_cols(H)
        #W = restore_cluster(W, labels, cfg)
        
        
        if cfg['compare_prepare'] > 0:
            if r > 0:
                print('Preparing data...')
                if r == 1:
                    W = anchor_words(V, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                elif r == 2:
                    centroids, labels = reduce_cluster(V, cfg['T'], cfg)
                    H = centroids
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    print('Solving for W')
                    W = linalg.solve(dot(H, H.T) + eye(H.shape[0]) * eps, dot(H, V.T)).T
                    W[W < eps] = 0
                    W = normalize_cols(W)
                elif r == 3:
                    centroids, labels = reduce_cluster(V, cfg['num_clusters'], cfg)
                    W = anchor_words(centroids, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, normalize_cols(centroids)))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    W = restore_cluster(W, labels, cfg)
                elif r >= 4 and r <= 6:
                    if r == 4:
                        red = reduce_tsne(V, to_dim=4)
                    elif r == 5:
                        red = reduce_tsne(V, to_dim=3)
                    elif r == 6:
                        red = reduce_tsne(V, to_dim=2)
                    centroids, labels = reduce_cluster(red, cfg['num_clusters'], cfg)
                    nearest_words = find_nearest(red, centroids, labels)
                    V_reduced = V[nearest_words, :]
                    W = anchor_words(V_reduced, 'L2', cfg)
                    print('Solving for H')
                    H = linalg.solve(np.dot(W.T, W) + np.eye(W.shape[1]) * eps, np.dot(W.T, V_reduced))
                    H[H < eps] = 0
                    H = normalize_cols(H)
                    W = restore_cluster(W, labels, cfg)
        if cfg['compare_methods'] > 0:
            cfg['schedule'] = methods[r % nmethods]
        start = time()
        (val, hdist, it, W, H, status) = run(V, W, H , W_r, H_r, cfg)
        stop = time()
        print('Run time:', timedelta(seconds=stop - start))
        exp_time[r] = stop - start
        res[r] = val
        hdist_runs[r] = hdist
        if cfg['print_lvl'] > 0:
            print('  Result:', val[-1, :])
        for i, fun_name in enumerate(cfg['finals'].split(',')):
            #val = np.array([r[:, i] for r in res])
            fun = getattr(measure, fun_name)
            name, val = fun(W, H)
            print(name, ':', val)
    print(cfg['experiment'])
    if cfg['experiment'] == '':
        exp_name = 'test'
    else:
        exp_name = cfg['experiment']
    #if cfg['save_results']:
    #    if cfg['save_file']:
    #        results_file = cfg['save_file']
    #    with open(results_file, 'w') as rf:
    #        print_head(rf)
    #        print('# Generated on {}'.format(datetime.today()), file=rf)
    #        print('# Experiments config:', file=rf)
    #        print('#   Number of experiments: {}'.format(cfg['runs']), file=rf)
    #        print('#   Methods schedule: {}'.format(cfg['schedule']), file=rf)
    #        print('#   Iterations number: {}'.format(cfg['max_iter']), file=rf)
    #        print('#   All experiments done in {}'.format(
    #            timedelta(seconds=sum(exp_time))), file=rf)
    #        print_head(rf)
    #        for r in range(cfg['runs']):
    #            print('# Run #{}. Done in {}'.format(r+1, 
    #                timedelta(seconds=exp_time[r])), file=rf)
    #            [print(val, file=rf) for val in res[:, r]]
    #            print_head(rf)
    if cfg['show_results']:
        if not os.path.exists(cfg['result_dir']):
            os.makedirs(cfg['result_dir'])
        np.savetxt(join(cfg['result_dir'], cfg['experiment'] + '_W.csv'), W)
        #show_topics(W, 25, vocab=vocab)
        save_topics(W, join(cfg['result_dir'], cfg['experiment'] + '_topics.txt'), vocab)
        #plot_matrix(V, 'Documents', labels=labels, vocab=vocab)
        #filename = os.path.join(cfg['result_dir'], cfg['experiment']+'_V.eps')
        #plt.savefig(filename, format='eps')
        #plot_matrix(W, u'Распределение слов в темах', labels, vocab)
        #filename = os.path.join(cfg['result_dir'], cfg['experiment']+'_W.pdf')
        #plt.savefig(filename, format='pdf')
        
        for i, fun_name in enumerate(cfg['measure'].split(',')):
            val = np.array([r[:, i] for r in res])
            fun = getattr(measure, fun_name + '_name')
            plot_measure(val.T, fun())
            filename = os.path.join(cfg['result_dir'], cfg['experiment']+'_'+fun_name+'.pdf')
            plt.savefig(filename, format='pdf')
        if cfg['compare_real']:
            print('Hellinger res:', hdist_runs[0][-1,0])
            plot_measure(np.array([r[:, 0] for r in hdist_runs]).T, measure.hellinger_name())
            show_matrices_recovered(W_r, H_r, W, H, cfg, permute=True)
            #plt.savefig('tm_tests/recovered_cnmf_' + tp + '.eps', format='eps')
        #plt.show()
    return res

Example #16

0

Show file

File: methods.py Project: mikimaus78/nmf-1

def plsa(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = V / maximum(dot(W, H), eps)
    H = normalize_cols(H * dot(W.T, tmp))
    W = normalize_cols(W * dot(tmp, H.T))
    return W, H

Example #17

0

Show file

File: prepare.py Project: yangkuoone/nmf

def restore_cluster(W_reduced, labels, params):
    W = zeros((params['N'], params['T']))
    for word, label in enumerate(labels):
        W[word, :] = W_reduced[label, :]
    return normalize_cols(W)

Example #18

0

Show file

File: main.py Project: mikimaus78/nmf-1

def run(V, W, H, W_r=None, H_r=None, cfg=config.default_config()):
    T = H.shape[0]
    eps = cfg['eps']
    schedule = cfg['schedule'].split(',')
    meas = cfg['measure'].split(',')
    val = np.zeros((cfg['max_iter']+2, len(meas)))
    hdist = np.zeros((cfg['max_iter']+2, 1))
    
    for i, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[0, i] = fun(V, np.dot(W, H))
    
    if cfg['compare_real']:
        #m = Munkres()
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[0] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    if cfg['print_lvl'] > 1:
        print('Initial loss:', val[0])
    status = 0
    methods_num = len(schedule)
    it = -1
    for it in range(cfg['max_iter']):
        if cfg['print_lvl'] > 1:
            print('Iteration', it+1)
        W_old = deepcopy(W)
        H_old = deepcopy(H)
        method_name = schedule[it % methods_num]
        if cfg['print_lvl'] > 1:
            print('Method:', method_name)
        method = getattr(methods, method_name)
        (W, H) = method(V, W, H, method_name, cfg)
        if (it+1) % cfg['normalize_iter'] == 0:
            W = normalize_cols(W)
            H = normalize_cols(H)
        for j, fun_name in enumerate(meas):
            fun = getattr(measure, fun_name)
            val[it+1, j] = fun(V, np.dot(W, H))
        
        if cfg['compare_real']:
            idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
            hdist[it+1] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
        
        if cfg['print_lvl'] > 1:
            print(val[it+1])
        if all(val[it, :] < eps):
            if cfg['print_lvl'] > 1:
                print('By cost.')
            status = 1
            break
        if abs(W_old - W).max() < eps and abs(H_old - H).max() < eps:
            if cfg['print_lvl'] > 1:
                print('By argument.')
            status = 2
            break
        #del W_old
        #del H_old
    if cfg['print_lvl'] > 1:
        print('Final:')
    W = normalize_cols(W)
    H = normalize_cols(H)
    for j, fun_name in enumerate(meas):
        fun = getattr(measure, fun_name)
        val[it+2:, j] = fun(V, np.dot(W, H))
    
    if cfg['compare_real']:
        idx = get_permute(W_r, H_r, W, H, cfg['munkres'])
        hdist[it+2:] = hellinger(W[:, idx[:, 1]], W_r[:, idx[:, 0]]) / T
    return (val, hdist, it, W, H, status)

Example #19

0

Show file

File: methods.py Project: iv-ivan/diplom

def plsa(F, Phi, Theta, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = F / maximum(dot(Phi, Theta), eps)
    Theta, Phi = normalize_cols(Theta * dot(Phi.T, tmp)), normalize_cols(Phi * dot(tmp, Theta.T))
    return Phi, Theta

Example #20

0

Show file

def plsa(V, W, H, post='', cfg=config.default_config()):
    eps = cfg['eps']
    tmp = V / maximum(dot(W, H), eps)
    H = normalize_cols(H * dot(W.T, tmp))
    W = normalize_cols(W * dot(tmp, H.T))
    return W, H

Example #21

0

Show file

File: generators.py Project: mikimaus78/nmf-1

def gen_matrix_uniform(params):
    return normalize_cols(np.random.uniform(size=(params['rows'], params['cols'])))

Example #22

0

Show file

File: main.py Project: iv-ivan/diplom

def initialize_matrices(i, F, cfg=config.default_config()):
    """Initialize matrices Phi Theta.
       - Return:
       Phi
       Theta
       - Used params:
       prepare_method
    """
    if (int(cfg['prepare_method'].split(',')[i]) == 1):
        print("Arora")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 2):
        print("Random rare")
        cfg['phi_sparsity'] = 0.05
        cfg['theta_sparsity'] = 0.1
        return gen_init(cfg)
    elif (int(cfg['prepare_method'].split(',')[i]) == 3):
        print("Random uniform")
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        return gen_init(cfg)
    elif (int(cfg['prepare_method'].split(',')[i]) == 4):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of words")
        centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 5):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("SVD init")
        U, s, V = np.linalg.svd(F_norm)
        Phi, Theta = construct_from_svd(U, s, V, cfg)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 6):
        eps = cfg['eps']
        transformer = TfidfTransformer()
        transformer.fit(F)
        F_tfidf = (transformer.transform(F)).toarray()
        print("Clustering of tf-idf")
        centroids, labels = prepare.reduce_cluster(F_tfidf, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_tfidf.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 7):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of words mixed")
        centroids, labels = prepare.reduce_cluster(F_norm, cfg['T'], cfg)
        Theta = centroids
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        print('Solving for Phi')
        Phi = np.transpose(np.linalg.solve(np.dot(Theta, Theta.T) + np.eye((Theta.T).shape[1]) * eps, np.dot(Theta, F_norm.T)))
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        Phi1, Theta1 = gen_init(cfg)
        zzz = 0.3
        return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 8):
        print("Arora mixed")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        cfg['phi_sparsity'] = 1.
        cfg['theta_sparsity'] = 1.
        Phi1, Theta1 = gen_init(cfg)
        zzz = 0.3
        return zzz*Phi1+(1.-zzz)*Phi, zzz*Theta1+(1.-zzz)*Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 9):
        print("Arora unifrom")
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        Phi = prepare.anchor_words(F_norm, 'L2', cfg)
        print('Solving for Theta')
        Theta = np.ones((Phi.shape[1], F.shape[1]))
        Theta = normalize_cols(Theta)
        return Phi, Theta
    elif (int(cfg['prepare_method'].split(',')[i]) == 10):
        eps = cfg['eps']
        F_norm = normalize_cols(F)
        print("Clustering of docs")
        centroids, labels = prepare.reduce_cluster(F_norm.T, cfg['T'], cfg)
        Phi = centroids.T
        Phi[Phi < eps] = 0
        Phi = normalize_cols(Phi)
        print('Solving for Theta')
        Theta = np.linalg.solve(np.dot(Phi.T, Phi) + np.eye(Phi.shape[1]) * eps, np.dot(Phi.T, F_norm))
        Theta[Theta < eps] = 0
        Theta = normalize_cols(Theta)
        return Phi, Theta

Example #23

0

Show file

File: prepare.py Project: yangkuoone/nmf

def restore_multi_cluster(W_reduced, labels, params):
    W = zeros((params['N'], params['T']))
    for word in xrange(W.shape[0]):
        W[word, :] = mean(W_reduced[labels[word, :], :])
    return normalize_cols(W)

Example #24

0

Show file

File: generators.py Project: yangkuoone/nmf

def gen_matrix_normal(params):
    return normalize_cols(abs(np.random.randn(params['rows'], params['cols'])))