def data_gen():
    ### IN
    ## experiment stuff
    np.random.seed(1234)
    data_dir    = 'data'
    n_exp       = 1  # number of experiments
    ## LDA stuff
    W           = 25  #  word vocabulary
    L           = int(np.sqrt(W)) # image size
    T           = 2*L # topics
    D           = 1000 # documents
    N           = 100 # words per document
    alpha       = 1.  # hyper-param for mixture of topics (theta)
    beta        = 1.  # hyper-param for topic distribs (phi),
                      # used only as param in pb

    # stan
    chains = 1

    # phi is given as the horizontal and vertical topics on the 5X5 images
    phi = [np.zeros((L, L)) for i in range(T)]
    line = 0
    for phi_t in phi:
        if line >= L:
            trueLine = int(line - L)
            phi_t[:,trueLine] = 1./L*np.ones(L)
        else:
            phi_t[line] = 1./L*np.ones(L)
        line += 1
    rewrite_dir(data_dir)
    [write_data(data_dir, i, W, L, T, D, N, phi, alpha, beta, chains)
        for i in range(n_exp)]
def data_gen():
    K = 6
    beta = 50./float(K)
    gamma = 0.1
    fin_name = 'data/sushi3/sushi3a.5000.10.order'
    fout_name = 'data/sushi3/sushi3.pb'
    
    rewrite_dir('data')
    call_cmd('sh get_data.sh')
    N, data = read_data(fin_name)
    write_pb(fout_name, N, data, K, beta, gamma)
def data_gen(seed, n_exp, W, L, T, D, N, alpha, beta):
    np.random.seed(seed)
    data_dir = 'data'
    # phi is given as the horizontal and vertical topics on the 5X5 images
    phi = [np.zeros((L, L)) for i in range(T)]
    line = 0
    for phi_t in phi:
        if line >= L:
            trueLine = int(line - L)
            phi_t[:,trueLine] = 1./L*np.ones(L)
        else:
            phi_t[line] = 1./L*np.ones(L)
        line += 1
           
    rewrite_dir(data_dir)
    plot_topics(T, phi, 'data/lda_ground_phi')
    [write_pb_txt(data_dir, i, W, L, T, D, N, phi, alpha, beta) 
        for i in range(n_exp)]