100 * xtrain.shape[0] / bsize,
    1.0  # decay rate = 1 means no decay
)

cost = rbm.neg_log_likelihood_forGrad(placeholders.visible_samples,
                                      num_gibbs=num_gibbs)
optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-2)
ops.lr = learning_rate
ops.train = optimizer.minimize(cost, global_step=ops.global_step)
ops.init = tf.group(tf.initialize_all_variables(),
                    tf.initialize_local_variables())

# Define the negative log-likelihood
# We can use this to plot the RBM's training progress.
# This calculation is intractable for large networks so let's only do it for small num_hidden
logZ = rbm.exact_log_partition_function()
placeholders.logZ = tf.placeholder(tf.float32)
NLL = rbm.neg_log_likelihood(placeholders.visible_samples, placeholders.logZ)

sess = tf.Session()
sess.run(ops.init)

bcount = 0  #counter
epochs_done = 0  #epochs counter
nll_test_list = []  #negative log-likelihood for each epoch
nll_train_list = []  #negative log-likelihood for each epoch
for ii in range(nsteps):
    if bcount * bsize + bsize >= xtrain.shape[0]:
        bcount = 0
        xtrain_randomized = np.random.permutation(xtrain)
def train(args):
   
    # Simulation parameters
    B = args.B                      # magnetic field
    num_visible = args.L            # number of visible nodes
    num_hidden = args.nH            # number of hidden nodes
    nsteps = args.steps             # training steps
    bsize = args.bs                 # batch size
    learning_rate_b=args.lr         # learning rate
    num_gibbs = args.CD             # number of Gibbs iterations
    num_samples = args.nC           # number of chains in PCD
    weights=None                    # weights
    visible_bias=None               # visible bias
    hidden_bias=None                # hidden bias
    bcount=0                        # counter
    epochs_done=1                   # epochs counter
 
    # Loading the data
    train_dir = '../data/tfim1d/datasets/'  # Location of training data.
    trainName = '../data/tfim1d/datasets/tfim1d_L'+str(args.L)+'_B'+str(B)+'_train.txt'
    testName = '../data/tfim1d/datasets/tfim1d_L'+str(args.L)+'_B'+str(B)+'_test.txt'
    xtrain = np.loadtxt(trainName)
    xtest = np.loadtxt(testName)
    
    ept=np.random.permutation(xtrain) # random permutation of training data
    epv=np.random.permutation(xtest) # random permutation of test data
    iterations_per_epoch = xtrain.shape[0] / bsize  

    # Initialize RBM class
    rbm = RBM(num_hidden=num_hidden, num_visible=num_visible, weights=weights, visible_bias=visible_bias,hidden_bias=hidden_bias, num_samples=num_samples) 
    
    # Initialize operations and placeholders classes
    ops = Ops()
    placeholders = Placeholders()

    placeholders.visible_samples = tf.placeholder(tf.float32, shape=(None, num_visible), name='v') # placeholder for training data

    total_iterations = 0 # starts at zero 
    ops.global_step = tf.Variable(total_iterations, name='global_step_count', trainable=False)

    learning_rate = tf.train.exponential_decay(
        learning_rate_b,
        ops.global_step,
        100 * xtrain.shape[0]/bsize,
        1.0 # decay rate =1 means no decay
    )

    cost = rbm.neg_log_likelihood_grad(placeholders.visible_samples, num_gibbs=num_gibbs)
    optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-2)

    # define operations
    ops.lr=learning_rate
    ops.train = optimizer.minimize(cost, global_step=ops.global_step)
    ops.init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())
    
    logZ = rbm.exact_log_partition_function()
    placeholders.logZ = tf.placeholder(tf.float32) 
    NLL = rbm.neg_log_likelihood(placeholders.visible_samples,placeholders.logZ)
    path_to_wf='../data/tfim1d/wavefunctions/wavefunction_tfim1d_L'+str(args.L)+'_B'+str(B)+'.txt'
    wf=np.loadtxt(path_to_wf)
    psi_x = tf.exp(0.5*rbm.free_energy(placeholders.visible_samples))
    all_v_states= np.array(list(it.product([0, 1], repeat=num_visible)), dtype=np.float32)
    
    observer_file=open('../data/tfim1d/observables/training_observer.txt','w',0)
    observer_file.write('#      O')
    observer_file.write('        NLL')
    observer_file.write('         <H>')
    observer_file.write('     <|Sz|>')
    observer_file.write('       <Sx>')
    observer_file.write('\n')
    gibb_updates=10
    observer_samples=rbm.observer_sampling(gibb_updates)
    nbins=100
    
    e = np.zeros((num_samples))
    sX= np.zeros((num_samples))
    L = num_visible
    
    with tf.Session() as sess:
        sess.run(ops.init)
        
        for ii in range(nsteps):
            if bcount*bsize+ bsize>=xtrain.shape[0]:
               bcount=0
               ept=np.random.permutation(xtrain)

            batch=ept[ bcount*bsize: bcount*bsize+ bsize,:]
            bcount=bcount+1
            feed_dict = {placeholders.visible_samples: batch}
            
            _, num_steps = sess.run([ops.train, ops.global_step], feed_dict=feed_dict)

            if num_steps % iterations_per_epoch == 0:
                print ('Epoch = %d     ' % epochs_done,end='')
                lz = sess.run(logZ)
                nll = sess.run(NLL,feed_dict={placeholders.visible_samples: epv, placeholders.logZ: lz})
                psix = sess.run(psi_x,feed_dict={placeholders.visible_samples: all_v_states}) 
 
                Ov = 0.0
                E = 0.0
                Sz = 0.0
                Sx = 0.0
                SzSz = np.zeros((L,L))

                for i in range(1<<num_visible): 
                    psix[i] /= m.exp(0.5*lz)
                    Ov += wf[i]*psix[i]
                
                for i in range(nbins):
                    
                    # Gibbs sampling
                    samples=sess.run(observer_samples)
                    spins = np.asarray((2*samples-1))
                    
                    # Compute average of longitudinal magnetizations
                    sZ_avg = np.mean(np.absolute(np.sum(spins,axis=1)))
                    
                    # Compute averages of energies
                    e.fill(0.0)
                    sX.fill(0.0)

                    for k in range(num_samples):
                        state = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                        
                        # Compute the average of transverse magnetization
                        for i in range(L):
                            samples[k,i] = 1 - samples[k,i]
                            state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                            sX[k] += float(psix[state_flip])/float(psix[state])
                            samples[k,i] = 1 - samples[k,i]
                        
                        # Compute the correlations ZZ
                        for i in range(L):
                            for j in range(L):
                                SzSz[i,j] += spins[k,i]*spins[k,j]/float(num_samples*nbins)

                        # Compute the Energy
                        for i in range(L-1):
                            e[k] += -spins[k,i]*spins[k,i+1]
                            samples[k,i] = 1 - samples[k,i]
                            state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                            e[k] += -B*psix[state_flip]/psix[state]
                            samples[k,i] = 1 - samples[k,i]
                        e[k] += -spins[k,L-1]*spins[k,0]
                        samples[k,L-1] = 1 - samples[k,L-1]
                        state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                        e[k] += -B*psix[state_flip]/psix[state]
                        samples[k,L-1] = 1 - samples[k,L-1]
                    sX_avg = np.mean(sX) 
                    e_avg = np.mean(e)
                    
                    E += (e_avg-E)/float(i+1)
                    Sz += (sZ_avg-Sz)/float(i+1)
                    Sx += (sX_avg-Sx)/float(i+1)
 
                               
                # Print observer on screen
                print ('Ov = %.6f     ' % Ov,end='')
                print ('NLL = %.6f     ' % nll,end='')
                print ('<H> = %.6f     ' % (E/float(L)),end='')
                print ('<|Sz|> = %.6f     ' % (Sz/float(L)),end='')
                print ('<Sx> = %.6f     ' % (Sx/float(L)),end='')
                print()
 
                # Save observer on file
                observer_file.write('%.6f   ' % Ov)
                observer_file.write('%.6f   ' % nll)
                observer_file.write('%.6f   ' % (E/float(L)))
                observer_file.write('%.6f   ' % (Sz/float(L)))
                observer_file.write('%.6f   ' % (Sx/float(L)))
                observer_file.write('\n')
                
                #observer_corr_file=open('../data/tfim1d/observables/training_observer_corr.txt','w')
                #for i in range(L):
                #    for j in range(L):
                #        observer_corr_file.write('%.6f  ' % SzSz[i,j])
                #    observer_corr_file.write('\n')

                epochs_done += 1
def train(args):
   
    # Simulation parameters
    T = args.T                      # temperature
    num_visible = args.L*args.L     # number of visible nodes
    num_hidden = args.nH            # number of hidden nodes
    nsteps = args.steps             # training steps
    bsize = args.bs                 # batch size
    learning_rate_b=args.lr         # learning rate
    num_gibbs = args.CD             # number of Gibbs iterations
    num_samples = args.nC           # number of chains in PCD
    weights=None                    # weights
    visible_bias=None               # visible bias
    hidden_bias=None                # hidden bias
    bcount=0                        # counter
    epochs_done=1                   # epochs counter
 
    # Loading the data
    train_dir = '../data/ising2d/datasets/'  # Location of training data.
    trainName = '../data/ising2d/datasets/ising2d_L'+str(args.L)+'_T'+str(T)+'_train.txt'
    testName = '../data/ising2d/datasets/ising2d_L'+str(args.L)+'_T'+str(T)+'_test.txt'
    xtrain = np.loadtxt(trainName)
    xtest = np.loadtxt(testName)
    
    ept=np.random.permutation(xtrain) # random permutation of training data
    epv=np.random.permutation(xtest) # random permutation of test data
    iterations_per_epoch = xtrain.shape[0] / bsize  

    # Initialize RBM class
    rbm = RBM(num_hidden=num_hidden, num_visible=num_visible, weights=weights, visible_bias=visible_bias,hidden_bias=hidden_bias, num_samples=num_samples) 
    
    # Initialize operations and placeholders classes
    ops = Ops()
    placeholders = Placeholders()
    placeholders.visible_samples = tf.placeholder(tf.float32, shape=(None, num_visible), name='v') # placeholder for training data

    total_iterations = 0 # starts at zero 
    ops.global_step = tf.Variable(total_iterations, name='global_step_count', trainable=False)
    learning_rate = tf.train.exponential_decay(
        learning_rate_b,
        ops.global_step,
        100 * xtrain.shape[0]/bsize,
        1.0 # decay rate =1 means no decay
    )
    
    cost = rbm.neg_log_likelihood_grad(placeholders.visible_samples, num_gibbs=num_gibbs)
    optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-2)
    ops.lr=learning_rate
    ops.train = optimizer.minimize(cost, global_step=ops.global_step)
    ops.init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())
    logZ = rbm.exact_log_partition_function()
    placeholders.logZ = tf.placeholder(tf.float32) 
    NLL = rbm.neg_log_likelihood(placeholders.visible_samples,placeholders.logZ)
    path_to_distr = '../data/ising2d/boltzmann_distributions/distribution_ising2d_L4_T'+str(T)+'.txt'
    boltz_distr=np.loadtxt(path_to_distr)
    p_x = tf.exp(rbm.free_energy(placeholders.visible_samples))
    all_v_states= np.array(list(it.product([0, 1], repeat=num_visible)), dtype=np.float32)
    
    # Observer file
    observer_file=open('../data/ising2d/observables/training_observer.txt','w',0)
    observer_file.write('#      O')
    observer_file.write('         NLL')
    observer_file.write('         <E>')
    observer_file.write('      <|M|>')
    observer_file.write('\n')
    gibb_updates=10
    observer_samples=rbm.observer_sampling(gibb_updates)
    nbins=100
    
    # Build lattice
    path_to_lattice = '../data/ising2d/lattice2d_L'+str(args.L)+'.txt'
    nn=np.loadtxt(path_to_lattice)
    e = np.zeros((num_samples))
    N = num_visible
    E=0.0
    M=0.0
    
    with tf.Session() as sess:
        sess.run(ops.init)
        
        for ii in range(nsteps):
            if bcount*bsize+ bsize>=xtrain.shape[0]:
               bcount=0
               ept=np.random.permutation(xtrain)

            batch=ept[ bcount*bsize: bcount*bsize+ bsize,:]
            bcount=bcount+1
            feed_dict = {placeholders.visible_samples: batch}
            
            _, num_steps = sess.run([ops.train, ops.global_step], feed_dict=feed_dict)

            if num_steps % iterations_per_epoch == 0:
                print ('Epoch = %d     ' % epochs_done,end='')
                lz = sess.run(logZ)
                nll = sess.run(NLL,feed_dict={placeholders.visible_samples: epv, placeholders.logZ: lz})
                px = sess.run(p_x,feed_dict={placeholders.visible_samples: all_v_states})
                
                Ov = 0.0
                E = 0.0
                M = 0.0
                
                for i in range(1<<num_visible):
                    Ov += boltz_distr[i]*m.log(boltz_distr[i])
                    Ov += -boltz_distr[i]*(m.log(px[i])-lz)
                
                for i in range(nbins):
                    
                    # Gibbs sampling
                    samples=sess.run(observer_samples)
                    spins = np.asarray((2*samples-1))
                    
                    # Compute averages of magnetizations
                    m_avg = np.mean(np.absolute(np.sum(spins,axis=1)))
                    
                    # Compute averages of energies
                    e.fill(0.0)
                    for k in range(num_samples):
                        for i in range(N):
                            e[k] += -spins[k,i]*(spins[k,int(nn[i,0])]+spins[k,int(nn[i,1])])
                    e_avg = np.mean(e)
                    E += (e_avg-E)/float(i+1)
                    M += (m_avg-M)/float(i+1)
              
                # Print observer on screen
                print ('Ov = %.6f     ' % Ov,end='')
                print ('NLL = %.6f     ' % nll,end='')
                print ('<E> = %.6f     ' % (E/float(N)),end='')
                print ('<|M|> = %.6f     ' % (M/float(N)),end='')
                
                # Save observer on file
                observer_file.write('%.6f   ' % Ov)
                observer_file.write('%.6f   ' % nll)
                observer_file.write('%.6f   ' % (E/float(N)))
                observer_file.write('%.6f   ' % (M/float(N)))
                observer_file.write('\n')
 
                print()
                #save_parameters(sess, rbm)
                epochs_done += 1
def sample(args):
       
    # Architecture
    B = args.B                      # magnetic field
    num_visible = args.L            # number of visible nodes
    num_hidden = args.nH            # number of hidden nodes
    
    # Load the RBM parameters
    path_to_params = '../data/tfim1d/parameters/parameters_nH'+str(num_hidden) + '_L'+str(args.L)+'_B'+str(B)+'.npz'
    params = np.load(path_to_params)
    weights = params['weights']
    visible_bias = params['visible_bias']
    hidden_bias = params['hidden_bias']
    hidden_bias=np.reshape(hidden_bias,(hidden_bias.shape[0],1))
    visible_bias=np.reshape(visible_bias,(visible_bias.shape[0],1))
    
    # Sampling parameters
    num_samples=1000   # how many independent chains will be sampled
    gibb_updates=10    # how many gibbs updates per call to the gibbs sampler
    nbins=1000         # number of calls to the RBM sampler      

    # Initialize RBM class
    rbm = RBM(num_hidden=num_hidden, num_visible=num_visible, weights=weights, visible_bias=visible_bias,hidden_bias=hidden_bias, num_samples=num_samples)
    hsamples,vsamples=rbm.stochastic_maximum_likelihood(gibb_updates)
    placeholders = Placeholders()
    placeholders.visible_samples = tf.placeholder(tf.float32, shape=(None, num_visible), name='v') # placeholder for training data

    # Initialize tensorflow
    init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables())
    logZ = rbm.exact_log_partition_function()
    placeholders.logZ = tf.placeholder(tf.float32) 
    psi_x = tf.exp(0.5*rbm.free_energy(placeholders.visible_samples))
    all_v_states= np.array(list(it.product([0, 1], repeat=num_visible)), dtype=np.float32)

    sX= np.zeros((num_samples))
    L = num_visible
    e = np.zeros((num_samples)) 
    with tf.Session() as sess:
        sess.run(init)
        lz = sess.run(logZ)
        psix = sess.run(psi_x,feed_dict={placeholders.visible_samples: all_v_states}) 
        #Ov=0.0
        for i in range(1<<num_visible): 
            psix[i] /= m.exp(0.5*lz)
        #    Ov += wf[i]*psix[i]
 
        for i in range(nbins):
            print ('bin %d\t\t' %i,end='')
            
            # Gibbs sampling
            _,samples=sess.run([hsamples,vsamples])
            spins = np.asarray((2*samples-1))
            
            # Compute average of longitudinal magnetizations
            sZ_avg = np.mean(np.absolute(np.sum(spins,axis=1)))
            
            # Compute averages of energies
            e.fill(0.0)
            sX.fill(0.0)
            for k in range(num_samples):
                state = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                # Compute the average of transverse magnetization
                for i in range(L):
                    samples[k,i] = 1 - samples[k,i]
                    state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                    sX[k] += float(psix[state_flip])/float(psix[state])
                    samples[k,i] = 1 - samples[k,i]
            
                # Compute the Energy
                for i in range(L-1):
                    e[k] += -spins[k,i]*spins[k,i+1]
                    samples[k,i] = 1 - samples[k,i]
                    state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                    e[k] += -B*psix[state_flip]/psix[state]
                    samples[k,i] = 1 - samples[k,i]
                e[k] += -spins[k,L-1]*spins[k,0]
                samples[k,L-1] = 1 - samples[k,L-1]
                state_flip = int(samples[k].dot(1 << np.arange(samples[k].size)[::-1]))
                e[k] += -B*psix[state_flip]/psix[state]
                samples[k,L-1] = 1 - samples[k,L-1]
            
            e_avg = np.mean(e)
            sX_avg = np.mean(sX) 
 
            # Print observer on screen
            print ('<H> = %.6f     ' % (e_avg/float(L)),end='')
            print ('<|Sz|> = %.6f     ' % (sZ_avg/float(L)),end='')
            print ('<Sx> = %.6f     ' % (sX_avg/float(L)),end='')
            print()