def write_cv_data(K, data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    # split cv data
    B_sparse = csr_matrix(B)
    Bs = [dok_matrix((D, W), dtype=np.float32) for k in range(K)]
    test_counts = [dok_matrix((D, W), dtype=np.float32) for k in range(K)]
    for d in range(B_sparse.shape[0]):
        crow = B_sparse[d,:].tocoo()
        list_of_tokens = []
        for term_idx,count in itertools.izip(crow.col, crow.data):
            list_of_tokens += [term_idx]*count
        list_of_tokens = list(np.random.permutation(np.array(list_of_tokens)))
        kf = KFold(len(list_of_tokens), n_folds=K)
        for k,(train, test) in enumerate(kf):
            l = [list_of_tokens[i] for i in train]
            dict_of_counts = collections.Counter(l)
            for w,count in dict_of_counts.iteritems():
                Bs[k][d,w] = count
            l = [list_of_tokens[i] for i in test]
            dict_of_counts = collections.Counter(l)
            for w,count in dict_of_counts.iteritems():
                test_counts[k][d,w] = count
    Bs = [csr_matrix(i) for i in Bs]
    test_counts = [csr_matrix(i) for i in test_counts]
    for i,counts in enumerate(test_counts):
        pickle.dump(counts,
            open(os.path.join(data_dir, 'counts_{}.pkl'.format(i)), 'w'))
    write_pb_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False)
    write_stan_cv(data_dir, idx, W, T, D, alpha, beta, Bs, chains=chains, write_params=False)
    write_prism_cv(data_dir, idx, W, T, D, alpha, beta, Bs, write_params=False)
    write_txt_cv(data_dir, idx, Bs, T, alpha, beta, write_params=False)
def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    pickle.dump(B, open(os.path.join(data_dir, 'counts.pkl'), 'w'))
    write_pb(data_dir, idx, W, T, D, alpha, beta, B)
    write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains)
    write_prism(data_dir, idx, W, T, D, alpha, beta, B)
    write_txt(data_dir, idx, B, T, alpha, beta)
def write_data(data_dir, idx, W, L, T, D, N, phi, alpha, beta, chains):
    B = generate_lda(T, W, D, N, phi, alpha)
    write_pb(data_dir, idx, W, T, D, alpha, beta, B, write_params=False)
    write_stan(data_dir, idx, W, T, D, alpha, beta, B, chains=chains, write_params=False)
    write_prism(data_dir, idx, W, T, D, alpha, beta, B, write_params=False)
    write_txt(data_dir, idx, B, T, alpha, beta, write_params=False)
def write_pb_church(data_dir, idx, W, L, T, D, N, phi, alpha, beta, n_samples, lag):
    B = generate_lda(T, W, D, N, phi, alpha)
    write_pb(data_dir, idx, W, T, D, alpha, beta, B)
    write_church(data_dir, idx, B, alpha,beta,D,T,W,N, n_samples, lag)
    write_church2(data_dir, idx, B, alpha,beta,D,T,W, n_samples, lag)