Ejemplo n.º 1
0
    def run_mbar(self, test_overlap = True):
        r"""Runs MBAR free energy estimate """
        MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True)
        self._f_k = MBAR_obj.f_k
        try:
            (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences()
        except:
            (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences(return_theta=True)
        self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0]-1]
        self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0]-1]
        self._pmf_mbar = numpy.zeros(shape=(self._lambda_array.shape[0], 3))
        self._pmf_mbar[:, 0] = self._lambda_array
        self._pmf_mbar[:, 1] = self._f_k
        self._pmf_mbar[:,2] = dDeltaF_ij[0]
        self._pairwise_F = numpy.zeros(shape=(self._lambda_array.shape[0]-1,4))
        self._pairwise_F[:,0] = self._lambda_array[:-1]
        self._pairwise_F[:,1] = self._lambda_array[1:]
        self._pairwise_F[:,2] = numpy.diag(deltaF_ij,1)
        self._pairwise_F[:,3] = numpy.diag(dDeltaF_ij,1)


        ##testing data overlap:
        if test_overlap:
            overlap_matrix = MBAR_obj.computeOverlap()
            self._overlap_matrix = overlap_matrix[2]
Ejemplo n.º 2
0
def calcTension(energy_data, verbose=False):
    dE1 = energy_data[:, 1] - energy_data[:, 0]
    dE2 = energy_data[:, 2] - energy_data[:, 0]
    BdE1 = dE1 / kTkJmol
    BdE2 = dE2 / kTkJmol

    nstates = 2
    nframes = len(dE1)
    u_kln = np.zeros([nstates, nstates, nframes], np.float64)
    u_kln[0, 1, :] = BdE1
    u_kln[1, 0, :] = BdE2

    N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
    for k in range(nstates):
        [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
        indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
        N_k[k] = len(indices)
        u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
    if verbose:
        print("...found {} uncorrelated samples out of {} total samples...".
              format(N_k, nframes))

    if verbose: print("=== Computing free energy differences ===")
    mbar = MBAR(u_kln, N_k)
    [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

    tension = DeltaF_ij[
        0,
        1] / da * 1e18 * kT  #(in J/m^2). note da already has a factor of two for the two areas!
    tensionError = dDeltaF_ij[0, 1] / da * 1e18 * kT
    if verbose:
        print('tension (pymbar): {} +/- {}N/m'.format(tension, tensionError))

    return tension, tensionError
Ejemplo n.º 3
0
    def update_logZ_with_mbar(self):
        """
        Use MBAR to update logZ estimates.
        """
        if not self.ncfile:
            raise Exception("Cannot update logZ using MBAR since no NetCDF file is storing history.")

        if not self.sampler.update_scheme == 'global-jump':
            raise Exception("Only global jump is implemented right now.")

        if not self.ncfile:
            raise Exception("Must have a storage file attached to use MBAR updates")

        # Extract relative energies.
        if self.verbose:
            print('Updating logZ estimate with MBAR...')
        initial_time = time.time()
        from pymbar import MBAR
        #first = int(self.iteration / 2)
        first = 0
        u_kn = np.array(self.ncfile.variables['u_k'][first:,:]).T
        [N_k, bins] = np.histogram(self.ncfile.variables['state_index'][first:], bins=(np.arange(self.sampler.nstates+1) - 0.5))
        mbar = MBAR(u_kn, N_k)
        Deltaf_ij, dDeltaf_ij, Theta_ij = mbar.getFreeEnergyDifferences(compute_uncertainty=True, uncertainty_method='approximate')
        self.logZ[:] = -mbar.f_k[:]
        self.logZ -= self.logZ[0]
        final_time = time.time()
        elapsed_time = final_time - initial_time
        self._timing['MBAR time'] = elapsed_time
        if self.verbose:
            print('MBAR time    %8.3f s' % elapsed_time)
Ejemplo n.º 4
0
def calc_df(u_kln):
    """
    u_kln should be (nstates) x (nstates) x (nframes)
    note that u_kln should be normalized by kT already
    where each element is 
        a config from frame `n` of a trajectory conducted with state `k`
        with energy recalculated using parameters of state `l`
    """
    dims = u_kln.shape
    if dims[0] != dims[1]:
        raise ValueError(
            "dimensions {} of u_kln should be square in the first two indices".
            format(dims))
    nstates = dims[0]

    N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
    for k in range(nstates):
        [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
        indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
        N_k[k] = len(indices)
        u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
    # Compute free energy differences and statistical uncertainties
    mbar = MBAR(u_kln, N_k)
    [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

    # save data?

    return DeltaF_ij, dDeltaF_ij
Ejemplo n.º 5
0
    def sqdeltaW(self, mu_VLE, eps_scaled):
        '''
        Computes the square difference between the sum of the weights in the
        vapor and liquid phases.
        Stores the optimal reduced free energy as f_k_guess for future iterations
        Stores mbar, sumWliq, and sumWvap for computing VLE properties if converged
        '''

        nTsim, U_flat, Nmol_flat, Ncut, f_k_guess, Temp_VLE, u_kn_all, N_k_all = self.nTsim, self.U_flat, self.Nmol_flat, self.Ncut, self.f_k_guess, self.Temp_VLE, self.u_kn_all, self.N_k_all

        for jT, (Temp, mu) in enumerate(zip(Temp_VLE, mu_VLE)):

            u_kn_all[nTsim + jT, :] = self.U_to_u(eps_scaled * U_flat, Temp,
                                                  mu, Nmol_flat)

        mbar = MBAR(u_kn_all, N_k_all, initial_f_k=f_k_guess)

        sumWliq = np.sum(mbar.W_nk[:, nTsim:][Nmol_flat > Ncut], axis=0)
        sumWvap = np.sum(mbar.W_nk[:, nTsim:][Nmol_flat <= Ncut], axis=0)
        sqdeltaW_VLE = (sumWliq - sumWvap)**2

        ### Store previous solutions to speed-up future convergence of MBAR
        Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0]
        self.f_k_guess = Deltaf_ij[0, :]
        self.mbar, self.sumWliq, self.sumWvap = mbar, sumWliq, sumWvap

        return sqdeltaW_VLE
Ejemplo n.º 6
0
    def build_MBAR_sim(self):
        '''
        Creates an instance of the MBAR object for just the simulated state points
        N_k: contains the number of snapshots from each state point simulated
        Nmol_kn: contains all of the Number of molecules in 1-d array
        u_kn_sim: contains all the reduced potential energies just for the simulated points
        f_k_sim: the converged reduced free energies for each simulated state point (used as initial guess for non-simulated state points)
        '''

        Temp_sim, mu_sim, nSnapshots, Nmol_flat, U_flat = self.Temp_sim, self.mu_sim, self.K_sim, self.Nmol_flat, self.U_flat

        N_k_sim = np.array(nSnapshots)
        sumN_k = np.sum(N_k_sim)
        #        Nmol_flat = np.array(N_data_sim).flatten()
        #        U_flat = np.array(U_data_sim).flatten()
        u_kn_sim = np.zeros([len(Temp_sim), sumN_k])

        for iT, (Temp, mu) in enumerate(zip(Temp_sim, mu_sim)):

            u_kn_sim[iT] = self.U_to_u(U_flat, Temp, mu, Nmol_flat)

        mbar_sim = MBAR(u_kn_sim, N_k_sim)

        Deltaf_ij = mbar_sim.getFreeEnergyDifferences(return_theta=False)[0]
        f_k_sim = Deltaf_ij[0, :]
        #        print(f_k_sim)

        self.u_kn_sim, self.f_k_sim, self.sumN_k, self.N_k_sim, self.mbar_sim = u_kn_sim, f_k_sim, sumN_k, N_k_sim, mbar_sim
Ejemplo n.º 7
0
    def calc_abs_press_int(self,show_plot=True):
        '''
        Fits ln(Xi) with respect to N for low-density vapor
        '''
        Temp_sim, u_kn_sim,f_k_sim,sumN_k = self.Temp_sim, self.u_kn_sim,self.f_k_sim,self.sumN_k
        nTsim, U_flat, Nmol_flat,Ncut = self.nTsim, self.U_flat, self.Nmol_flat, self.Ncut
        
        Temp_IG = np.min(Temp_sim[self.mu_sim == self.mu_sim.min()]) 
#        print(Temp_IG)

        mu_IG = np.linspace(2.*self.mu_opt[self.Temp_VLE==Temp_IG],5.*self.mu_opt[self.Temp_VLE==Temp_IG],10)

        N_k_all = self.K_sim[:]
        N_k_all.extend([0]*len(mu_IG))

        u_kn_IG = np.zeros([len(mu_IG),sumN_k])
        u_kn_all = np.concatenate((u_kn_sim,u_kn_IG))
        
        f_k_guess = np.concatenate((f_k_sim,np.zeros(len(mu_IG))))

        for jT, mu in enumerate(mu_IG):
            
            u_kn_all[nTsim+jT,:] = self.U_to_u(U_flat,Temp_IG,mu,Nmol_flat)

        mbar = MBAR(u_kn_all,N_k_all,initial_f_k=f_k_guess)
                
        sumW_IG = np.sum(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut],axis=0)
         
        Nmol_IG = np.sum(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T*Nmol_flat[Nmol_flat<Ncut],axis=1)/sumW_IG
#        print(sumW_IG,Nmol_IG)
#        print(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T)
#        print(mbar.W_nk[:,nTsim:][Nmol_flat<Ncut].T*Nmol_flat[Nmol_flat<Ncut])
        ### Store previous solutions to speed-up future convergence of MBAR
        Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0]
        f_k_IG = Deltaf_ij[nTsim:,0]
#        print(f_k_sim,f_k_guess[:nTsim+1],Deltaf_ij[0,:nTsim],f_k_IG)#,Nmol_IG,press_IG,Psat)

        fit=stats.linregress(Nmol_IG[mu_IG<2.*self.mu_sim.min()],f_k_IG[mu_IG<2.*self.mu_sim.min()])
        
        if show_plot:
            
            Nmol_plot = np.linspace(Nmol_IG.min(),Nmol_IG.max(),50)
            lnXi_plot = fit.intercept + fit.slope*Nmol_plot

            plt.figure(figsize=[6,6])
            plt.plot(Nmol_IG,f_k_IG,'bo',mfc='None',label='MBAR-GCMC')
            plt.plot(Nmol_plot,lnXi_plot,'k-',label='Linear fit')
            plt.xlabel('Number of Molecules')
            plt.ylabel(r'$\ln(\Xi)$')
            plt.legend()
            plt.show()
            
            print('Slope for ideal gas is 1, actual slope is: '+str(fit.slope))
            print('Intercept for absolute pressure is:'+str(fit.intercept))
        
        self.abs_press_int, self.Temp_IG, self.f_k_IG, self.Nmol_IG = fit.intercept, Temp_IG, f_k_IG, Nmol_IG
Ejemplo n.º 8
0
 def run_mbar(self):
     r"""Runs MBAR free energy estimate """
     MBAR_obj = MBAR(self._u_kln, self._N_k, verbose=True)
     self._f_k = MBAR_obj.f_k
     (deltaF_ij, dDeltaF_ij, theta_ij) = MBAR_obj.getFreeEnergyDifferences()
     self._deltaF_mbar = deltaF_ij[0, self._lambda_array.shape[0]-1]
     self._dDeltaF_mbar = dDeltaF_ij[0, self._lambda_array.shape[0]-1]
     self._pmf_mbar = np.zeros(shape=(self._lambda_array.shape[0], 2))
     self._pmf_mbar[:, 0] = self._lambda_array
     self._pmf_mbar[:, 1] = self._f_k
Ejemplo n.º 9
0
    def gather_dg(self, u_kln, nstates):
        # Subsample data to extract uncorrelated equilibrium timeseries
        N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
        for k in range(nstates):
            [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :])
            indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
            N_k[k] = len(indices)
            u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
        # Compute free energy differences and statistical uncertainties
        mbar = MBAR(u_kln, N_k)
        [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences()
        print("Number of uncorrelated samples per state: {}".format(N_k))

        return DeltaF_ij, dDeltaF_ij
Ejemplo n.º 10
0
def test_mbar_free_energies():
    """Can MBAR calculate moderately correct free energy differences?"""

    for system_generator in system_generators:
        name, test = system_generator()
        x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn')
        eq(N_k, N_k_output)
        mbar = MBAR(u_kn, N_k)

        results = mbar.getFreeEnergyDifferences(return_dict=True)
        fe_t, dfe_t = mbar.getFreeEnergyDifferences(return_dict=False)
        fe = results['Delta_f']
        fe_sigma = results['dDelta_f']

        eq(fe, fe_t)
        eq(fe_sigma, dfe_t)

        fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:]

        fe0 = test.analytical_free_energies()
        fe0 = fe0[1:] - fe0[0]

        z = (fe - fe0) / fe_sigma
        eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_free_energies():
    """Exponential Distribution Test: can MBAR calculate correct free energy differences?"""
    test = exponential_distributions.ExponentialTestCase(rates)
    x_kn, u_kln, N_k_output = test.sample(N_k, mode='u_kln')
    eq(N_k, N_k_output)

    mbar = MBAR(u_kln, N_k)
    fe, fe_sigma = mbar.getFreeEnergyDifferences()
    fe, fe_sigma = fe[0,1:], fe_sigma[0,1:]

    fe0 = test.analytical_free_energies()
    fe0 = fe0[1:] - fe0[0]

    z = (fe - fe0) / fe_sigma
    eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_harmonic_oscillators_mbar_free_energies():
    """Harmonic Oscillators Test: can MBAR calculate correct free energy differences?"""
    test = harmonic_oscillators.HarmonicOscillatorsTestCase(O_k, k_k)
    x_kn, u_kln, N_k_output = test.sample(N_k)

    eq(N_k, N_k_output)

    mbar = MBAR(u_kln, N_k)
    fe, fe_sigma = mbar.getFreeEnergyDifferences()
    fe, fe_sigma = fe[0,1:], fe_sigma[0,1:]

    fe0 = test.analytical_free_energies()
    fe0 = fe0[1:] - fe0[0]

    z = (fe - fe0) / fe_sigma
    eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_harmonic_oscillators_mbar_free_energies():
    """Harmonic Oscillators Test: can MBAR calculate correct free energy differences?"""
    test = harmonic_oscillators.HarmonicOscillatorsTestCase(O_k, k_k)
    x_n, u_kn, origin = test.sample(N_k)
    u_ijn, N_k_output = convert_ukn_to_uijn(u_kn)
    
    eq(N_k, N_k_output.values)

    mbar = MBAR(u_ijn.values, N_k)
    fe, fe_sigma = mbar.getFreeEnergyDifferences()
    fe, fe_sigma = fe[0], fe_sigma[0]

    fe0 = test.analytical_free_energies()

    z = (fe - fe0) / fe_sigma
    z = z[1:]  # First component is undetermined.
    eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
def test_exponential_mbar_free_energies():
    """Exponential Distribution Test: can MBAR calculate correct free energy differences?"""
    test = exponential_distributions.ExponentialTestCase(rates)
    x_n, u_kn, origin = test.sample(N_k)
    u_ijn, N_k_output = convert_ukn_to_uijn(u_kn)
    
    eq(N_k, N_k_output.values)

    mbar = MBAR(u_ijn.values, N_k)
    fe, fe_sigma = mbar.getFreeEnergyDifferences()
    fe, fe_sigma = fe[0], fe_sigma[0]

    fe0 = test.analytical_free_energies()

    z = (fe - fe0) / fe_sigma
    z = z[1:]  # First component is undetermined.
    eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
Ejemplo n.º 15
0
def test_mbar_free_energies():
    """Can MBAR calculate moderately correct free energy differences?"""

    for system_generator in system_generators:
        name, test = system_generator()
        x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn')
        eq(N_k, N_k_output)
        mbar = MBAR(u_kn, N_k)

        fe, fe_sigma, Theta_ij = mbar.getFreeEnergyDifferences()
        fe, fe_sigma = fe[0, 1:], fe_sigma[0, 1:]

        fe0 = test.analytical_free_energies()
        fe0 = fe0[1:] - fe0[0]

        z = (fe - fe0) / fe_sigma
        eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
Ejemplo n.º 16
0
def test_mbar_free_energies():

    """Can MBAR calculate moderately correct free energy differences?"""

    for system_generator in system_generators:
        name, test = system_generator()
        x_n, u_kn, N_k_output, s_n = test.sample(N_k, mode='u_kn')
        eq(N_k, N_k_output)
        mbar = MBAR(u_kn, N_k)

        fe, fe_sigma, Theta_ij = mbar.getFreeEnergyDifferences()
        fe, fe_sigma = fe[0,1:], fe_sigma[0,1:]

        fe0 = test.analytical_free_energies()
        fe0 = fe0[1:] - fe0[0]

        z = (fe - fe0) / fe_sigma
        eq(z / z_scale_factor, np.zeros(len(z)), decimal=0)
Ejemplo n.º 17
0
    def gather_dg(self, u_kln, nstates):
        u_kln = np.vstack(u_kln)
        # Subsample data to extract uncorrelated equilibrium timeseries
        N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
        for k in range(nstates):
            [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :])
            indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
            N_k[k] = len(indices)
            u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
        # Compute free energy differences and statistical uncertainties
        mbar = MBAR(u_kln, N_k)
        [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences()
        logger.debug(
            "Number of uncorrelated samples per state: {}".format(N_k))
        logger.debug("Relative free energy change for {0} = {1} +- {2}".format(
            self.name, DeltaF_ij[0, nstates - 1] * self.kTtokcal,
            dDeltaF_ij[0, nstates - 1] * self.kTtokcal))

        return DeltaF_ij[0, nstates -
                         1] * self.kTtokcal, dDeltaF_ij[0, nstates -
                                                        1] * self.kTtokcal
Ejemplo n.º 18
0
    def run_mbar(self, ndiscard=0, nuse=None):
        """Estimate free energies of all alchemical states.

        Parameters
        ----------
        ndiscard : int, optinoal, default=0
            number of iterations to discard to equilibration
        nuse : int, optional, default=None
            maximum number of iterations to use (after discarding)

        Returns
        -------
        
        Deltaf_ij : np.ndarray, shape=(n_states, n_states)
            The statewise free energy differences

        dDeltaf_ij : np.ndarray, shape=(n_states, n_states)
            The statewise free energy difference uncertainties

        """    
        
        u_kln_replica, u_kln, u_n = self.get_u_kln()

        u_kln_replica, u_kln, u_n, N_k, N = self.equilibrate_and_subsample(u_kln_replica, u_kln, u_n, ndiscard=ndiscard, nuse=nuse)

        logger.info("Initialing MBAR and computing free energy differences...")
        mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)

        # Get matrix of dimensionless free energy differences and uncertainty estimate.
        logger.info("Computing covariance matrix...")
        (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
       
        logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(Deltaf_ij).to_string()))
        logger.info("\n%-24s %16s\n%s" % ("Deltaf_ij", "current state", pd.DataFrame(dDeltaf_ij).to_string()))        
                
        return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 19
0
#      print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n]


# Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k.
# 
# u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l,
# beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k.
#
# N_k[k] is the number of configurations from state k stored in u_knm
# 
# Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point.
mbar = MBAR(u_kln, N_k)

# Extract dimensionless free energy differences and their statistical uncertainties.
#(Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences()
(Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='approximate')
#print 'Deltaf_ij', Deltaf_ij
#print 'dDeltaf_ij', dDeltaf_ij
beta = 1.0 # keep in units kT
#print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1])
f_df = np.zeros( (nlambda, 2) )  # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:]
f_df[:,0] = Deltaf_ij[0,:]
f_df[:,1] = dDeltaf_ij[0,:]
print 'Writing %s...'%args.bayesfactorfile
savetxt(args.bayesfactorfile, f_df)
print '...Done.'


# Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix.
# Here, A_kn[k,n] = A(x_{kn})
#(A_k, dA_k) = mbar.computeExpectations(A_kn)
Ejemplo n.º 20
0
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=None):
    """
    Estimate free energies of all alchemical states.

    Parameters
    ----------
    ncfile : NetCDF
       Input YANK netcdf file
    ndiscard : int, optional, default=0
       Number of iterations to discard to equilibration
    nuse : int, optional, default=None
       Maximum number of iterations to use (after discarding)
    g : int, optional, default=None
       Statistical inefficiency to use if desired; if None, will be computed.

    TODO
    ----
    * Automatically determine 'ndiscard'.

    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    logger.info("Reading energies...")
    energies = ncfile.variables['energies']
    u_kln_replica = np.zeros([nstates, nstates, niterations], np.float64)
    for n in range(niterations):
        u_kln_replica[:, :, n] = energies[n, :, :]
    logger.info("Done.")

    # Deconvolute replicas
    logger.info("Deconvoluting replicas...")
    u_kln = np.zeros([nstates, nstates, niterations], np.float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration, :]
        u_kln[state_indices, :, iteration] = energies[iteration, :, :]
    logger.info("Done.")

    # Compute total negative log probability over all iterations.
    u_n = np.zeros([niterations], np.float64)
    for iteration in range(niterations):
        u_n[iteration] = np.sum(np.diagonal(u_kln[:, :, iteration]))
    #logger.info(u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:, :, ndiscard:]
    u_kln = u_kln[:, :, ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:, :, 0:nuse]
        u_kln = u_kln[:, :, 0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = np.zeros(nstates, np.int32)
    indices = timeseries.subsampleCorrelatedData(
        u_n, g=g)  # indices of uncorrelated samples
    #print u_n # DEBUG
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices)  # number of uncorrelated samples
    N_k[:] = N
    u_kln[:, :, 0:N] = u_kln[:, :, indices]
    logger.info("number of uncorrelated samples:")
    logger.info(N_k)
    logger.info("")

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================

    # Initialize MBAR (computing free energy estimates, which may take a while)
    logger.info("Computing free energy differences...")
    mbar = MBAR(u_kln, N_k)

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    logger.info("Computing covariance matrix...")

    try:
        # pymbar 2
        (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences()
    except ValueError:
        # pymbar 3
        (Deltaf_ij, dDeltaf_ij, theta_ij) = mbar.getFreeEnergyDifferences()

#    # Matrix of free energy differences
    logger.info("Deltaf_ij:")
    for i in range(nstates):
        str_row = ""
        for j in range(nstates):
            str_row += "%8.3f" % Deltaf_ij[i, j]
        logger.info(str_row)


#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    logger.info("dDeltaf_ij:")
    for i in range(nstates):
        str_row = ""
        for j in range(nstates):
            str_row += "%8.3f" % dDeltaf_ij[i, j]
        logger.info(str_row)

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 21
0
def compute_hydration_energy(entry, parameters, hydration_factory_parameters, platform_name="Reference"):
    """
    Compute hydration energy of a single molecule given a GBSA parameter set.

    ARGUMENTS

    molecule (OEMol) - molecule with GBSA atom types
    parameters (dict) - parameters for GBSA atom types

    RETURNS

    energy (float) - hydration energy in kcal/mol

    """

    platform = openmm.Platform.getPlatformByName(platform_name)

    from pymbar import MBAR

    gbmodel = hydration_factory_parameters['gbmodel'].value

    molecule = entry['molecule']
    iupac_name = entry['iupac']
    cid = molecule.GetData('cid')

    # Retrieve OpenMM System.
    vacuum_system = entry['system']
    solvent_system = copy.deepcopy(entry['system'])

    # Get nonbonded force.
    forces = { solvent_system.getForce(index).__class__.__name__ : solvent_system.getForce(index) for index in range(solvent_system.getNumForces()) }
    nonbonded_force = forces['NonbondedForce']

    # Add GBSA force.
    from simtk.openmm.app.internal import customgbforces
    if gbmodel is None:
        gbsa_force = openmm.GBSAOBCForce()
        gbsa_force.setNonbondedMethod(openmm.GBSAOBCForce.NoCutoff) # set no cutoff
        gbsa_force.setSoluteDielectric(1)
        gbsa_force.setSolventDielectric(78)
    elif gbmodel == 0:
        gbsa_force = customgbforces.GBSAHCTForce(SA='ACE')
    elif gbmodel == 1:
        gbsa_force = customgbforces.GBSAOBC1Force(SA='ACE')
    elif gbmodel == 2:
        gbsa_force = customgbforces.GBSAOBC2Force(SA='ACE')
    elif gbmodel == 3:
        gbsa_force = customgbforces.GBSAGBnForce(SA='ACE')
    elif gbmodel == 4:
        gbsa_force = customgbforces.GBSAGBn2Force(SA='ACE')
    else:
        print("GBmodel %i out of range" % gbmodel)
    # Build indexable list of atoms.
    atoms = [atom for atom in molecule.GetAtoms()]
    natoms = len(atoms)

    # Assign GBSA parameters.
    for (atom_index, atom) in enumerate(atoms):
        [charge, sigma, epsilon] = nonbonded_force.getParticleParameters(atom_index)
        atomtype = atom.GetStringData("gbsa_type") # GBSA atomtype
        radius = parameters['%s_%s' % (atomtype, 'radius')] * units.angstroms
        scalingFactor = parameters['%s_%s' % (atomtype, 'scalingFactor')]
        if gbmodel is None:
            gbsa_force.addParticle(charge, radius, scalingFactor)
        else:
            gbsa_force.addParticle([charge, radius, scalingFactor])

    # Add the force to the system.
    solvent_system.addForce(gbsa_force)

    # Create context for solvent system.
    timestep = 2.0 * units.femtosecond
    solvent_integrator = openmm.VerletIntegrator(timestep)
    solvent_context = openmm.Context(solvent_system, solvent_integrator, platform)

    # Create context for vacuum system.
    vacuum_integrator = openmm.VerletIntegrator(timestep)
    vacuum_context = openmm.Context(vacuum_system, vacuum_integrator, platform)

    # Compute energy differences.
    temperature = entry['temperature']
    kT = kB * temperature
    beta = 1.0 / kT

    initial_time = time.time()
    x_n = entry['x_n']
    u_n = entry['u_n']
    nsamples = len(u_n)
    nstates = 3 # number of thermodynamic states
    u_kln = np.zeros([3,3,nsamples], np.float64)
    for sample in range(nsamples):
        positions = units.Quantity(x_n[sample,:,:], units.nanometers)

        u_kln[0,0,sample] = u_n[sample]

        vacuum_context.setPositions(positions)
        vacuum_state = vacuum_context.getState(getEnergy=True)
        u_kln[0,1,sample] = beta * vacuum_state.getPotentialEnergy()

        solvent_context.setPositions(positions)
        solvent_state = solvent_context.getState(getEnergy=True)
        u_kln[0,2,sample] = beta * solvent_state.getPotentialEnergy()

    N_k = np.zeros([nstates], np.int32)
    N_k[0] = nsamples


    mbar = MBAR(u_kln, N_k)
    try:
        df_ij, ddf_ij, _ = mbar.getFreeEnergyDifferences()
    except linalg.LinAlgError:
        return np.inf

    DeltaG_in_kT = df_ij[1,2]
    dDeltaG_in_kT = ddf_ij[1,2]

    final_time = time.time()
    elapsed_time = final_time - initial_time
    #print "%48s | %48s | reweighting took %.3f s" % (cid, iupac_name, elapsed_time)

    # Clean up.
    del solvent_context, solvent_integrator
    del vacuum_context, vacuum_integrator

    energy = kT * DeltaG_in_kT

    print "%48s | %48s | DeltaG = %.3f +- %.3f kT | gbmodel = %d" % (cid, iupac_name, DeltaG_in_kT, dDeltaG_in_kT, gbmodel)
    #print ""

    return energy / units.kilocalories_per_mole
Ejemplo n.º 22
0
  # Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0.
  #=============================================================================================
  
  randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta)
  [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln')

  # get the unreduced energies
  U_kln = u_kln/beta

  #=============================================================================================
  # Estimate free energies and expectations.
  #=============================================================================================

  # Initialize the MBAR class, determining the free energies.
  mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver
  results = mbar.getFreeEnergyDifferences()
  Deltaf_ij_estimated = results['Delta_f']
  dDeltaf_ij_estimated = results['dDelta_f']

  # Compute error from analytical free energy differences.
  Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical

  # Estimate the expectation of the mean-squared displacement at each condition.
  if observe == 'RMS displacement':
    A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64);
    for k in range(0,K):
      for l in range(0,K):
        A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement

  # observable is the potential energy, a 3D array since the potential energy is a function of 
  # thermodynamic state
Ejemplo n.º 23
0
    def MBAR_analysis(self, debug = False):
	"""MBAR analysis for populations and BICePs score"""
	# load necessary data first
	self.load_data()

	# Suppose the energies sampled from each simulation are u_kln, where u_kln[k,l,n] is the reduced potential energy
	#   of snapshot n \in 1,...,N_k of simulation k \in 1,...,K evaluated at reduced potential for state l.
	self.K = self.nlambda   # number of thermodynamic ensembles
	# N_k[k] will denote the number of correlated snapshots from state k
	N_k = np.array( [len(self.traj[i]['trajectory']) for i in range(self.nlambda)] )
	nsnaps = N_k.max()
	u_kln = np.zeros( (self.K, self.K, nsnaps) )
	nstates = int(self.states)
	print 'nstates', nstates
	states_kn = np.zeros( (self.K, nsnaps) )

	# Get snapshot energies rescored in the different ensembles
	"""['step', 'E', 'accept', 'state', 'sigma_noe', 'sigma_J', 'sigma_cs', 'sigma_pf''gamma']
	[int(step), float(self.E), int(accept), int(self.state), int(self.sigma_noe_index), int(self.sigma_J_index), int(self.sigma_cs_H_index), int(self.sigma_cs_Ha_index), int(self.sigma_cs_N_index), int(self.sigma_cs_Ca_index), int(self.sigma_pf_index), int(self.gamma_index)]               	"""

	for n in range(nsnaps):

  		for k in range(self.K):
    			for l in range(self.K):
				if debug:
      					print 'step', self.traj[k]['trajectory'][n][0],
      				if k==l:
          				print 'E%d evaluated in model %d'%(k,k), self.traj[k]['trajectory'][n][1],
          				u_kln[k,k,n] = self.traj[k]['trajectory'][n][1]
          			state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index = self.traj[k]['trajectory'][n][3:] 	# IMPORTANT: make sure the order of these parameters is the same as the way they are saved in PosteriorSampler
          			print 'state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index', state, sigma_noe_index, sigma_J_index, sigma_cs_H_index, sigma_cs_Ha_index, sigma_cs_N_index, sigma_cs_Ca_index, sigma_pf_index, gamma_index 
          			states_kn[k,n] = state
          			sigma_noe = self.traj[k]['allowed_sigma_noe'][sigma_noe_index]
          			sigma_J = self.traj[k]['allowed_sigma_J'][sigma_J_index]
          			sigma_cs_H = self.traj[k]['allowed_sigma_cs_H'][sigma_cs_H_index]
          			sigma_cs_Ha = self.traj[k]['allowed_sigma_cs_Ha'][sigma_cs_Ha_index]
          			sigma_cs_N = self.traj[k]['allowed_sigma_cs_N'][sigma_cs_N_index]
          			sigma_cs_Ca = self.traj[k]['allowed_sigma_cs_Ca'][sigma_cs_Ca_index]
          			sigma_pf = self.traj[k]['allowed_sigma_pf'][sigma_pf_index]
          			u_kln[k,l,n] = self.sampler[l].neglogP(0, state, sigma_noe, sigma_J, sigma_cs_H, sigma_cs_Ha, sigma_cs_N, sigma_cs_Ca, sigma_pf, gamma_index)
				if debug:
					print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n]


	# Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k.
 	# u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l,
	# beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k.
	# N_k[k] is the number of configurations from state k stored in u_knm
	# Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point.
	mbar = MBAR(u_kln, N_k)

	# Extract dimensionless free energy differences and their statistical uncertainties.
#	(Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences()
	#(Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
	(Deltaf_ij, dDeltaf_ij, Theta_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='approximate')
	#print 'Deltaf_ij', Deltaf_ij
	#print 'dDeltaf_ij', dDeltaf_ij
	beta = 1.0 # keep in units kT
	#print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1])
	self.f_df = np.zeros( (self.nlambda, 2) )  # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:]
	self.f_df[:,0] = Deltaf_ij[0,:]
	self.f_df[:,1] = dDeltaf_ij[0,:]

	# Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix.
        # Here, A_kn[k,n] = A(x_{kn})
        #(A_k, dA_k) = mbar.computeExpectations(A_kn)
        self.P_dP = np.zeros( (nstates, 2*self.K) )  # left columns are P, right columns are dP
	if debug:
        	print 'state\tP\tdP'
    	for i in range(nstates):
        	A_kn = np.where(states_kn==i,1,0)
        	(p_i, dp_i) = mbar.computeExpectations(A_kn, uncertainty_method='approximate')
        	self.P_dP[i,0:self.K] = p_i
        	self.P_dP[i,self.K:2*self.K] = dp_i
		print i
        	for p in p_i: print p,
        	for dp in dp_i: print dp,
		print 
	pops, dpops = self.P_dP[:,0:self.K], self.P_dP[:,self.K:2*self.K]

	# save results
	self.save_MBAR()
Ejemplo n.º 24
0
def SimulateAlchemy(path, niter, nsteps_per_iter, nlambda):
    """Calculates the binding free energy of a ligand names 'UNL' using alchemy.
    One step corresponds to two femtoseconds.
    """
    prmtop = app.AmberPrmtopFile(f'{path}/com.prmtop')
    inpcrd = app.AmberInpcrdFile(f'{path}/com.inpcrd')
    system = prmtop.createSystem(implicitSolvent=app.GBn2,
                                 nonbondedMethod=app.CutoffNonPeriodic,
                                 nonbondedCutoff=1.0 * unit.nanometers,
                                 constraints=app.HBonds,
                                 rigidWater=True,
                                 ewaldErrorTolerance=0.0005)

    # Detect ligand indices
    ligand_ind = []
    for atm in prmtop.topology.atoms():
        # OpenEye make the ligand name 'UNL'
        if atm.residue.name == 'UNL':
            ligand_ind.append(atm.index)
    ligand_ind = set(ligand_ind)
    AddAlchemyForces(system, ligand_ind)

    integrator = mm.LangevinIntegrator(300 * unit.kelvin,
                                       1.0 / unit.picoseconds,
                                       2.0 * unit.femtoseconds)
    integrator.setConstraintTolerance(0.00001)
    # TODO: The issues here are the same as the mmgbsa.py script
    # TODO: This should just recognize whatever the computer is capable of, not force CUDA.
    # TODO: I am not sure if mixed precision is necessary. Just need to be consistent
    platform = mm.Platform.getPlatformByName('CUDA')
    properties = {'CudaPrecision': 'mixed'}
    simulation = app.Simulation(prmtop.topology, system, integrator, platform)
    simulation.context.setPositions(inpcrd.positions)
    simulation.minimizeEnergy()

    ### Now simulate system
    import numpy as np
    from pymbar import MBAR, timeseries
    lambdas = np.linspace(1.0, 0.0, nlambda)
    # Save the potential energies for MBAR
    u_kln = np.zeros([nlambda, nlambda, niter])
    kT = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB * integrator.getTemperature(
    )
    # TODO: This runs in series. Someone comfortable with MPI should help parallelize this.
    for k in range(nlambda):
        for i in range(niter):
            print('state %5d iteration %5d / %5d' % (k, i, niter))
            simulation.context.setParameter('lambda', lambdas[k])
            integrator.step(nsteps_per_iter)
            for l in range(nlambda):
                simulation.context.setParameter('lambda', lambdas[l])
                u_kln[k, l, i] = simulation.context.getState(
                    getEnergy=True).getPotentialEnergy() / kT

    # Subsample to reduce variation
    N_k = np.zeros([nlambda], np.int32)  # number of uncorrelated samples
    for k in range(nlambda):
        [t0, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
        # TODO: maybe should use 't0:' instead of ':' in third index
        indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
        N_k[k] = len(indices)
        u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
    # Calculate the energy difference
    # TODO: I've never worked with pymbar beyond the timeseries function. I'm not sure how the error in DeltaF is calculated, and I don't know what Theta is right now.
    mbar = MBAR(u_kln, N_k)
    [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()
    return DeltaF_ij[0][-1], dDeltaF_ij[0][-1]
    weights = mbar.getWeights()
    # Store the weights for plot comparisons later on
    for sample in range(0, len(weights)):
        for state in range(0, num_states):
            #   print(weights_for_each_num_states[num_states_index][state][sample])
            weights_for_each_num_states[num_states_index][state][
                sample] = weights[sample][state]

#############
#
# 5) Calculate dimensionless free energies with MBAR
#
#############

# Get the dimensionless free energy differences
    free_energies, uncertainty_free_energies = mbar.getFreeEnergyDifferences(
    )[0], mbar.getFreeEnergyDifferences()[1]
    # print("With "+str(num_states)+" states the free energies are:")
    # Save the free energies for this number of states for comparison plots later on
    for sample in range(0, len(free_energies)):
        for state in range(0, num_states):
            free_energies_for_each_num_states[num_states_index][sample][
                state] = free_energies[sample][state]
            state_temps_for_each_num_states[num_states_index][
                state] = T_state_center[state]
            state_energies[
                state] = state_energies[state] + free_energies[sample][state]
# Calculate the averate total energy for the samples within each state (temperature window)
    for state in range(0, num_states):
        state_energies_for_each_num_states[num_states_index][
            state] = state_energies[state] / len(free_energies)
Ejemplo n.º 26
0
def estimate_free_energies(ncfile, ndiscard = 0, nuse = None):
    """Estimate free energies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - maximum number of iterations to use (after discarding)

    TODO: Automatically determine 'ndiscard'.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:,:,n] = energies[n,:,:]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration,:]
        u_kln[state_indices,:,iteration] = energies[iteration,:,:]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:,:,iteration]))
    #print u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:,:,ndiscard:]
    u_kln = u_kln[:,:,ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:,:,0:nuse]
        u_kln = u_kln[:,:,0:nuse]
        u_n = u_n[0:nuse]
    
    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices) # number of uncorrelated samples
    N_k[:] = N      
    u_kln[:,:,0:N] = u_kln[:,:,indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================   
   
    # Initialize MBAR (computing free energy estimates, which may take a while)
    print "Computing free energy differences..."
    mbar = MBAR(u_kln, N_k, verbose = False, method = 'adaptive', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    print "Computing covariance matrix..."
    (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
   
#    # Matrix of free energy differences
    print "Deltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % Deltaf_ij[i,j],
        print ""        
    
#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    print "dDeltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % dDeltaf_ij[i,j],
        print ""        

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 27
0
# get uncorrelated samples
print("=== Getting uncorrelated samples===")
N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
for k in range(nstates):
    [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
    indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
    N_k[k] = len(indices)
    u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
print("...found {} uncorrelated samples...".format(N_k))

np.save('{}_ukln'.format(args.outprefix), u_kln)

# Compute free energy differences and statistical uncertainties
print("=== Computing free energy differences ===")
mbar = MBAR(u_kln, N_k)
[DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

np.savetxt('{}_DeltaF.dat'.format(args.outprefix), DeltaF_ij)
np.savetxt('{}_dDeltaF.dat'.format(args.outprefix), dDeltaF_ij)

# Print out one line summary
#tension = DeltaF_ij[0,1]/2/da * 1e18
#tensionError = dDeltaF_ij[0,1]/2/da * 1e18
tension = DeltaF_ij[
    0,
    1] / da * 1e18 * kT  #(in J/m^2). note da already has a factor of two for the two areas!
tensionError = dDeltaF_ij[0, 1] / da * 1e18 * kT
print('tension (pymbar): {} +/- {}N/m'.format(tension, tensionError))

with open('{}_results.txt'.format(args.outprefix), "a") as f:
    f.write('\nUsing pymbar:\n')
   mbar_same_total_samples = MBAR(u_kn_same_total_samples, state_counts_same_total_samples, verbose=False, relative_tolerance=1e-12)
#  Get the 'weights', or reweighted mixture distribution
   weights = mbar.getWeights()
   weights_same_total_samples = mbar_same_total_samples.getWeights()
# Store the weights for later analysis 
   weights_for_each_num_states.extend([weights])
   weights_for_each_num_states_same_total_samples.extend([weights_same_total_samples])

#############
#
# 6) Calculate dimensionless free energies with MBAR
#
#############

# Get the dimensionless free energy differences, and uncertainties in their values
   free_energies,uncertainty_free_energies = mbar.getFreeEnergyDifferences()[0],mbar.getFreeEnergyDifferences()[1]
# Save the free energies
   free_energies_for_each_num_states.extend([free_energies])
# Save the uncertainty in the free energy
   uncertainties_for_each_num_states.extend([uncertainty_free_energies])
# Get the dimensionless free energy differences and uncertainties for the uniform sampling approach
   free_energies,uncertainty_free_energies = mbar_same_total_samples.getFreeEnergyDifferences()[0],mbar_same_total_samples.getFreeEnergyDifferences()[1]
# Save the data
   free_energies_for_each_num_states_same_total_samples.extend([free_energies])
   uncertainties_for_each_num_states_same_total_samples.extend([uncertainty_free_energies])

#############
#
# 7) Calculate < R_Na-Cl > with MBAR
#
#############
Ejemplo n.º 29
0
nstates, m, k = np.shape(u_kln)
l = np.linspace(0, 1, nstates)

# Subsample data to extract uncorrelated equilibrium timeseries
N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples

for k in range(nstates):
    [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
    indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
    N_k[k] = len(indices)
    u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T

# Compute free energy differences and statistical uncertainties
mbar = MBAR(u_kln, N_k)
[DeltaF_ij, dDeltaF_ij,
 Theta_ij] = mbar.getFreeEnergyDifferences(return_theta=True)
#results = mbar.getFreeEnergyDifferences(return_dict=True,return_theta=True)
#DeltaF_ij = results['Delta_f']
#dDeltaF_ij = results['dDelta_f']
#Theta_ij = results['Theta']
ODeltaF_ij = mbar.computeOverlap()['matrix']

# Print results
f = open(Savename, 'w')
for i in range(nstates):
    f.writelines("%.2f: %9.4f +- %.4f\n" %
                 (l[i], DeltaF_ij[i, 0] * kT, dDeltaF_ij[i, 0] * kT))
f.close()

# Plot Overlap
fig1, ax1 = plt.subplots()
Ejemplo n.º 30
0
      print 'E_%d evaluated in model_%d'%(k,l), u_kln[k,l,n]


# Initialize MBAR with reduced energies u_kln and number of uncorrelated configurations from each state N_k.
# 
# u_kln[k,l,n] is the reduced potential energy beta*U_l(x_kn), where U_l(x) is the potential energy function for state l,
# beta is the inverse temperature, and and x_kn denotes uncorrelated configuration n from state k.
#
# N_k[k] is the number of configurations from state k stored in u_knm
# 
# Note that this step may take some time, as the relative dimensionless free energies f_k are determined at this point.
mbar = MBAR(u_kln, N_k)

# Extract dimensionless free energy differences and their statistical uncertainties.
(Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences()
print 'Deltaf_ij', Deltaf_ij
print 'dDeltaf_ij', dDeltaf_ij
beta = 1.0 # keep in units kT
print 'Unit-bearing (units kT) free energy difference f_1K = f_K - f_1: %f +- %f' % ( (1./beta) * Deltaf_ij[0,K-1], (1./beta) * dDeltaf_ij[0,K-1])
f_df = np.zeros( (nlambda, 2) )  # first column is Deltaf_ij[0,:], second column is dDeltaf_ij[0,:]
f_df[:,0] = Deltaf_ij[0,:]
f_df[:,1] = dDeltaf_ij[0,:]
print 'Writing %s...'%args.bayesfactorfile
savetxt(args.bayesfactorfile, f_df)
print '...Done.'


# Compute the expectation of some observable A(x) at each state i, and associated uncertainty matrix.
# Here, A_kn[k,n] = A(x_{kn})
#(A_k, dA_k) = mbar.computeExpectations(A_kn)
Ejemplo n.º 31
0
kT = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB * integrator.getTemperature()
for k in range(nstates):
    for iteration in range(niterations):
        print('state %5d iteration %5d / %5d' % (k, iteration, niterations))
        # Set alchemical state
        context.setParameter('lambda', lambdas[k])
        # Run some dynamics
        integrator.step(nsteps)
        # Compute energies at all alchemical states
        for l in range(nstates):
            context.setParameter('lambda', lambdas[l])
            u_kln[k,l,iteration] = context.getState(getEnergy=True).getPotentialEnergy() / kT

# Estimate free energy of Lennard-Jones particle insertion
from pymbar import MBAR, timeseries
# Subsample data to extract uncorrelated equilibrium timeseries
N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples
for k in range(nstates):
    [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k,k,:])
    indices = timeseries.subsampleCorrelatedData(u_kln[k,k,:], g=g)
    N_k[k] = len(indices)
    u_kln[k,:,0:N_k[k]] = u_kln[k,:,indices].T
# Compute free energy differences and statistical uncertainties
mbar = MBAR(u_kln, N_k)
[DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

print('DeltaF_ij (kT):')
print(DeltaF_ij)
print('dDeltaF_ij (kT):')
print(dDeltaF_ij)
Ejemplo n.º 32
0
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=1.0, replicas=None):
    """Estimate free energies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration (default: 0)
       nuse (int) - maximum number of iterations to use (after discarding) (default: None)
       g (float) - statistical inefficiency to use for subsampleing (default: 1.0)
       replicas (list of int) - if specified, only use these replicas for estimating the free energies (default: None)

    TODO: Automatically determine 'ndiscard'.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    energies = ncfile.variables['energies']
    u_kln_replica = numpy.zeros([nstates, nstates, niterations], numpy.float64)
    for n in range(niterations):
        u_kln_replica[:,:,n] = energies[n,:,:]

    # Extract states.
    states_kn_replica = numpy.zeros([nstates, niterations], numpy.int32)
    for n in range(niterations):
        states_kn_replica[:,n] = ncfile.variables['states'][n,:]

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:,:,ndiscard:]
    states_kn_replica = states_kn_replica[:,ndiscard:]


    # If specified, truncate to number of specified conformations to use.
    if (nuse):
        u_kln_replica = u_kln_replica[:,:,0:nuse]
        states_kn_replica = states_kn_replica[:,0:nuse]

    # Subsample data to obtain uncorrelated samples
    A_n = u_kln_replica[0,0,:]
    indices = timeseries.subsampleCorrelatedData(A_n, g=g) # indices of uncorrelated samples
    N = len(indices) # number of uncorrelated samples
    u_kln_replica[:,:,0:N] = u_kln_replica[:,:,indices]
    states_kn_replica[:,0:N] = states_kn_replica[:,indices]

    # Deconvolute replicas to obtain energies by state.
    u_kln = numpy.zeros([nstates, nstates, N], numpy.float64)
    if replicas is None:
        # Use all replicas.
        N_k = N * numpy.ones(nstates, numpy.int32)
        for n in range(N):
            state_indices = states_kn_replica[:,n]
            u_kln[state_indices,:,n] = u_kln_replica[:,:,n]
    else:
        # Use only specified replicas.
        N_k = numpy.zeros(nstates, numpy.int32)
        for n in range(N):
            state_indices = ncfile.variables['states'][n,:]
            for replica in replicas:
                state_index = states_kn_replica[replica,n]
                u_kln[state_index,:,N_k[state_index]] = u_kln_replica[replica,:,n]
                N_k[state_index] += 1

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================

    # Initialize MBAR (computing free energy estimates, which may take a while)
    mbar = MBAR(u_kln, N_k, verbose = False, maximum_iterations = 50000) # use slow self-consistent-iteration (the default)

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 33
0
    def free_energy_trace(self, discard_from_start=1, n_trace=10):
        """
        Trace the free energy by keeping fewer and fewer samples in both forward and reverse direction

        Returns
        -------
        free_energy_trace_figure : matplotlib.figure
            Figure showing the equilibration between both phases

        """

        trace_spacing = 1.0/n_trace

        def format_trace_plot(plot: plt.Axes, trace_forward: np.ndarray, trace_reverse: np.ndarray):
            x = np.arange(n_trace + 1)[1:] * trace_spacing * 100
            plot.errorbar(x, trace_forward[:, 0], yerr=2 * trace_forward[:, 1], ecolor='b',
                          elinewidth=0, mec='none', mew=0, linestyle='None',
                          zorder=10)
            plot.plot(x, trace_forward[:, 0], 'b-', marker='o', mec='b', mfc='w', label='Forward', zorder=20,)
            plot.errorbar(x, trace_reverse[:, 0], yerr=2 * trace_reverse[:, 1], ecolor='r',
                          elinewidth=0, mec='none', mew=0, linestyle='None',
                          zorder=10)
            plot.plot(x, trace_reverse[:, 0], 'r-', marker='o', mec='r', mfc='w', label='Reverse', zorder=20)
            y_fill_upper = [trace_forward[-1, 0] + 2 * trace_forward[-1, 1]] * 2
            y_fill_lower = [trace_forward[-1, 0] - 2 * trace_forward[-1, 1]] * 2
            xlim = [0, 100]
            plot.fill_between(xlim, y_fill_lower, y_fill_upper, color='orchid', zorder=5)
            plot.set_xlim(xlim)
            plot.legend()
            plot.set_xlabel("% Samples Analyzed", fontsize=20)
            plot.set_ylabel(r"$\Delta G$ in kcal/mol", fontsize=20)
        # Adjust figure size
        plt.rcParams['figure.figsize'] = 15, 6 * (self.nphases + 1) * 2
        plot_grid = gridspec.GridSpec(self.nphases + 1, 1)  # Vertical distribution
        free_energy_trace_figure = plt.figure()
        # Add some space between the figures
        free_energy_trace_figure.subplots_adjust(hspace=0.4)
        traces = {}
        for i, phase_name in enumerate(self.phase_names):
            traces[phase_name] = {}
            if phase_name not in self._serialized_data:
                self._serialized_data[phase_name] = {}
            serial = self._serialized_data[phase_name]
            if "free_energy" not in serial:
                serial["free_energy"] = {}
            serial = serial["free_energy"]
            free_energy_trace_f = np.zeros([n_trace, 2], dtype=float)
            free_energy_trace_r = np.zeros([n_trace, 2], dtype=float)
            p = free_energy_trace_figure.add_subplot(plot_grid[i])
            analyzer = self.analyzers[phase_name]
            kcal = analyzer.kT / units.kilocalorie_per_mole
            # Data crunching to get timeseries
            sampled_energies, _, _, states = analyzer.read_energies()
            n_replica, n_states, _ = sampled_energies.shape
            # Sample at index 0 is actually the minimized structure and NOT from the equilibrium distribution
            # This throws off all of the equilibrium data
            sampled_energies = sampled_energies[:, :, discard_from_start:]
            states = states[:, discard_from_start:]
            total_iterations = sampled_energies.shape[-1]
            for trace_factor in range(n_trace, 0, -1):  # Reverse order tracing
                trace_percent = trace_spacing*trace_factor
                j = trace_factor - 1  # Indexing
                kept_iterations = int(np.ceil(trace_percent*total_iterations))
                u_forward = sampled_energies[:, :, :kept_iterations]
                s_forward = states[:, :kept_iterations]
                u_reverse = sampled_energies[:, :, -1:-kept_iterations-1:-1]
                s_reverse = states[:, -1:-kept_iterations - 1:-1]
                for energy_sub, state_sub, storage in [
                        (u_forward, s_forward, free_energy_trace_f), (u_reverse, s_reverse, free_energy_trace_r)]:
                    u_n = analyzer.get_effective_energy_timeseries(energies=energy_sub,
                                                                   replica_state_indices=state_sub)

                    i_t, g_i, n_effective_i = analyze.multistate.get_equilibration_data_per_sample(u_n)
                    i_max = n_effective_i.argmax()
                    number_equilibrated = i_t[i_max]
                    g_t = g_i[i_max]
                    if not self.use_full_trajectory:
                        energy_sub = analyze.multistate.utils.remove_unequilibrated_data(energy_sub,
                                                                                         number_equilibrated,
                                                                                         -1)
                        state_sub = analyze.multistate.utils.remove_unequilibrated_data(state_sub,
                                                                                        number_equilibrated, -1)
                        energy_sub = analyze.multistate.utils.subsample_data_along_axis(energy_sub, g_t, -1)
                        state_sub = analyze.multistate.utils.subsample_data_along_axis(state_sub, g_t, -1)
                    samples_per_state = np.zeros([n_states], dtype=int)
                    unique_sampled_states, counts = np.unique(state_sub, return_counts=True)
                    # Assign those counts to the correct range of states
                    samples_per_state[unique_sampled_states] = counts
                    mbar = MBAR(energy_sub, samples_per_state)
                    fe_data = mbar.getFreeEnergyDifferences(compute_uncertainty=True)
                    # Trap theta_ij output
                    try:
                        fe, dfe, _ = fe_data
                    except ValueError:
                        fe, dfe = fe_data
                    ref_i, ref_j = analyzer.reference_states
                    storage[j, :] = fe[ref_i, ref_j] * kcal, dfe[ref_i, ref_j] * kcal
            format_trace_plot(p, free_energy_trace_f, free_energy_trace_r)
            p.set_title("{} Phase".format(phase_name.title()), fontsize=20)
            traces[phase_name]['f'] = free_energy_trace_f
            traces[phase_name]['r'] = free_energy_trace_r
            serial['forward'] = free_energy_trace_f.tolist()
            serial['reverse'] = free_energy_trace_r.tolist()
        # Finally handle last combined plot
        combined_trace_f = np.zeros([n_trace, 2], dtype=float)
        combined_trace_r = np.zeros([n_trace, 2], dtype=float)
        for phase_name in self.phase_names:
            phase_f = traces[phase_name]['f']
            phase_r = traces[phase_name]['r']
            combined_trace_f[:, 0] += phase_f[:, 0]
            combined_trace_f[:, 1] = np.sqrt(combined_trace_f[:, 1]**2 + phase_f[:, 1]**2)
            combined_trace_r[:, 0] += phase_r[:, 0]
            combined_trace_r[:, 1] = np.sqrt(combined_trace_r[:, 1] ** 2 + phase_r[:, 1] ** 2)
        p = free_energy_trace_figure.add_subplot(plot_grid[-1])
        format_trace_plot(p, combined_trace_f, combined_trace_r)
        p.set_title("Combined Phases", fontsize=20)

        return free_energy_trace_figure
Ejemplo n.º 34
0
def compute_hydration_energy(entry, parameters, platform_name="CPU"):
    """
    Compute hydration energy of a single molecule given a GBSA parameter set.

    ARGUMENTS

    molecule (OEMol) - molecule with GBSA atom types
    parameters (dict) - parameters for GBSA atom types

    RETURNS

    energy (float) - hydration energy in kcal/mol

    """


    platform = openmm.Platform.getPlatformByName('CPU')

    from pymbar import MBAR

    timestep = 2 * units.femtoseconds

    molecule = entry['molecule']
    iupac_name = entry['iupac']
    cid = molecule.GetData('cid')

    # Retrieve OpenMM System.
    vacuum_system = entry['system']
    solvent_system = copy.deepcopy(entry['solvated_system'])

    # Get nonbonded force.
    forces = { solvent_system.getForce(index).__class__.__name__ : solvent_system.getForce(index) for index in range(solvent_system.getNumForces()) }
    nonbonded_force = forces['NonbondedForce']
    gbsa_force = forces['CustomGBForce']

    # Build indexable list of atoms.
    atoms = [atom for atom in molecule.GetAtoms()]
    natoms = len(atoms)


    # Create context for solvent system.
    timestep = 2.0 * units.femtosecond
    solvent_integrator = openmm.VerletIntegrator(timestep)


    # Create context for vacuum system.
    vacuum_integrator = openmm.VerletIntegrator(timestep)

    # Assign GBSA parameters.
    for (atom_index, atom) in enumerate(atoms):
        [charge, sigma, epsilon] = nonbonded_force.getParticleParameters(atom_index)
        atomtype = atom.GetStringData("gbsa_type") # GBSA atomtype
        radius = parameters['%s_%s' % (atomtype, 'radius')] * units.angstroms
        scalingFactor = parameters['%s_%s' % (atomtype, 'scalingFactor')]
        gbsa_force.setParticleParameters(atom_index, [charge, radius, scalingFactor])

    solvent_context = openmm.Context(solvent_system, solvent_integrator,platform)
    vacuum_context = openmm.Context(vacuum_system, vacuum_integrator, platform)

    # Compute energy differences.
    temperature = entry['temperature']
    kT = kB * temperature
    beta = 1.0 / kT

    initial_time = time.time()
    x_n = entry['x_n']
    u_n = entry['u_n']
    nsamples = len(u_n)
    nstates = 3 # number of thermodynamic states
    u_kln = np.zeros([3,3,nsamples], np.float64)
    for sample in range(nsamples):
        positions = units.Quantity(x_n[sample,:,:], units.nanometers)

        u_kln[0,0,sample] = u_n[sample]

        vacuum_context.setPositions(positions)
        vacuum_state = vacuum_context.getState(getEnergy=True)
        u_kln[0,1,sample] = beta * vacuum_state.getPotentialEnergy()

        solvent_context.setPositions(positions)
        solvent_state = solvent_context.getState(getEnergy=True)
        u_kln[0,2,sample] = beta * solvent_state.getPotentialEnergy()

    N_k = np.zeros([nstates], np.int32)
    N_k[0] = nsamples


    mbar = MBAR(u_kln, N_k)
    try:
        df_ij, ddf_ij, _ = mbar.getFreeEnergyDifferences()
    except linalg.LinAlgError:
        return np.inf

    DeltaG_in_kT = df_ij[1,2]
    dDeltaG_in_kT = ddf_ij[1,2]

    final_time = time.time()
    elapsed_time = final_time - initial_time
    #print "%48s | %48s | reweighting took %.3f s" % (cid, iupac_name, elapsed_time)

    # Clean up.
    del solvent_context, solvent_integrator
    del vacuum_context, vacuum_integrator

    energy = kT * DeltaG_in_kT

    #print "%48s | %48s | DeltaG = %.3f +- %.3f kT " % (cid, iupac_name, energy, dDeltaG_in_kT)
    print(DeltaG_in_kT)
    print(type(DeltaG_in_kT))

    return DeltaG_in_kT
Ejemplo n.º 35
0
#try an on the fly mbar estimation
# Subsample data to extract uncorrelated equilibrium timeseries
N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
for k in range(nstates):
    [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
    indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
    N_k[k] = len(indices)
    u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
# Compute free energy differences and statistical uncertainties
mbar = MBAR(u_kln,
            N_k,
            verbose=True,
            method="adaptive",
            relative_tolerance=1e-10)  #, initialize="BAR")
[DeltaF_ij, dDeltaF_ij,
 Theta_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')

print('DeltaF_ij (kcal/mol):')
print(DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204)

mbar_fe = DeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204
dmbar_fe = dDeltaF_ij[0, nstates - 1] * 298.0 * 0.001987204
#write the free energy
mbar_file = open("freenrg-MBAR.dat", "w")
mbar_file.write("\n")
mbar_file.write("Free energy differences matrix from MBAR in reduced units:")
mbar_file.write(DeltaF_ij)
mbar_file.write("\n")
mbar_file.write("Free energy MBAR:  %.4f +/- %.4f\n" (mbar_fe, dmbar_fe))
mbar_file.close()
Ejemplo n.º 36
0
print("======================================")
print("      Initializing MBAR               ")
print("======================================")

# Estimate free energies from simulation using MBAR.
print("Estimating relative free energies from simulation (this may take a while)...")

# Initialize the MBAR class, determining the free energies.
mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10, verbose=True)
# Get matrix of dimensionless free energy differences and uncertainty estimate.

print("=============================================")
print("      Testing getFreeEnergyDifferences       ")
print("=============================================")

results = mbar.getFreeEnergyDifferences()
Delta_f_ij_estimated = results['Delta_f']
dDelta_f_ij_estimated = results['dDelta_f']

# Compute error from analytical free energy differences.
Delta_f_ij_error = Delta_f_ij_estimated - Delta_f_ij_analytical

print("Error in free energies is:")
print(Delta_f_ij_error)
print("Uncertainty in free energies is:")
print(dDelta_f_ij_estimated)

print("Standard deviations away is:")
# mathematical manipulation to avoid dividing by zero errors; we don't care
# about the diagnonals, since they are identically zero.
df_ij_mod = dDelta_f_ij_estimated + numpy.identity(K)
Ejemplo n.º 37
0
def estimate_free_energies(ncfile, ndiscard=0, nuse=None):
    """Estimate free energies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - maximum number of iterations to use (after discarding)

    TODO: Automatically determine 'ndiscard'.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:, :, n] = energies[n, :, :]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration, :]
        u_kln[state_indices, :, iteration] = energies[iteration, :, :]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:, :, iteration]))
    #print u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:, :, ndiscard:]
    u_kln = u_kln[:, :, ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:, :, 0:nuse]
        u_kln = u_kln[:, :, 0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(
        u_n)  # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices)  # number of uncorrelated samples
    N_k[:] = N
    u_kln[:, :, 0:N] = u_kln[:, :, indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================

    # Initialize MBAR (computing free energy estimates, which may take a while)
    print "Computing free energy differences..."
    mbar = MBAR(u_kln,
                N_k,
                verbose=False,
                method='adaptive',
                maximum_iterations=50000
                )  # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    print "Computing covariance matrix..."
    (Deltaf_ij,
     dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')

    #    # Matrix of free energy differences
    print "Deltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % Deltaf_ij[i, j],
        print ""


#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    print "dDeltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % dDeltaf_ij[i, j],
        print ""

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 38
0
u_kln_subsampled = numpy.zeros([K, K, nprod_iterations],
                               numpy.float64)  # subsampled data
for k in range(K):
    # Get indices of uncorrelated samples.
    indices = subsampleCorrelatedData(u_kln[k, k, :])
    # Store only uncorrelated data.
    N_k[k] = len(indices)
    for l in range(K):
        u_kln_subsampled[k, l, 0:len(indices)] = u_kln[k, l, indices]
print "Number of uncorrelated samples per state:"
print N_k

# =============================================================================
# Analyze with MBAR to compute free energy differences and statistical errors.
# =============================================================================

print "Analyzing with MBAR..."
mbar = MBAR(u_kln_subsampled, N_k)
[Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences()
print "Free energy differences (in kT)"
print Deltaf_ij
print "Statistical errors (in kT)"
print dDeltaf_ij

# =============================================================================
# Report result.
# =============================================================================

print "Free energy of inserting argon particle: %.3f +- %.3f kT" % (
    Deltaf_ij[0, K - 1], dDeltaf_ij[0, K - 1])
Ejemplo n.º 39
0
    def free_energy_trace(self, discard_from_start=1, n_trace=10):
        """
        Trace the free energy by keeping fewer and fewer samples in both forward and reverse direction

        Returns
        -------
        free_energy_trace_figure : matplotlib.figure
            Figure showing the equilibration between both phases

        """

        trace_spacing = 1.0/n_trace

        def format_trace_plot(plot: plt.Axes, trace_forward: np.ndarray, trace_reverse: np.ndarray):
            x = np.arange(n_trace + 1)[1:] * trace_spacing * 100
            plot.errorbar(x, trace_forward[:, 0], yerr=2 * trace_forward[:, 1], ecolor='b',
                          elinewidth=0, mec='none', mew=0, linestyle='None',
                          zorder=10)
            plot.plot(x, trace_forward[:, 0], 'b-', marker='o', mec='b', mfc='w', label='Forward', zorder=20,)
            plot.errorbar(x, trace_reverse[:, 0], yerr=2 * trace_reverse[:, 1], ecolor='r',
                          elinewidth=0, mec='none', mew=0, linestyle='None',
                          zorder=10)
            plot.plot(x, trace_reverse[:, 0], 'r-', marker='o', mec='r', mfc='w', label='Reverse', zorder=20)
            y_fill_upper = [trace_forward[-1, 0] + 2 * trace_forward[-1, 1]] * 2
            y_fill_lower = [trace_forward[-1, 0] - 2 * trace_forward[-1, 1]] * 2
            xlim = [0, 100]
            plot.fill_between(xlim, y_fill_lower, y_fill_upper, color='orchid', zorder=5)
            plot.set_xlim(xlim)
            plot.legend()
            plot.set_xlabel("% Samples Analyzed", fontsize=20)
            plot.set_ylabel(r"$\Delta G$ in kcal/mol", fontsize=20)
        # Adjust figure size
        plt.rcParams['figure.figsize'] = 15, 6 * (self.nphases + 1) * 2
        plot_grid = gridspec.GridSpec(self.nphases + 1, 1)  # Vertical distribution
        free_energy_trace_figure = plt.figure()
        # Add some space between the figures
        free_energy_trace_figure.subplots_adjust(hspace=0.4)
        traces = {}
        for i, phase_name in enumerate(self.phase_names):
            traces[phase_name] = {}
            if phase_name not in self._serialized_data:
                self._serialized_data[phase_name] = {}
            serial = self._serialized_data[phase_name]
            if "free_energy" not in serial:
                serial["free_energy"] = {}
            serial = serial["free_energy"]
            free_energy_trace_f = np.zeros([n_trace, 2], dtype=float)
            free_energy_trace_r = np.zeros([n_trace, 2], dtype=float)
            p = free_energy_trace_figure.add_subplot(plot_grid[i])
            analyzer = self.analyzers[phase_name]
            kcal = analyzer.kT / units.kilocalorie_per_mole
            # Data crunching to get timeseries
            sampled_energies, _, _, states = analyzer.read_energies()
            n_replica, n_states, _ = sampled_energies.shape
            # Sample at index 0 is actually the minimized structure and NOT from the equilibrium distribution
            # This throws off all of the equilibrium data
            sampled_energies = sampled_energies[:, :, discard_from_start:]
            states = states[:, discard_from_start:]
            total_iterations = sampled_energies.shape[-1]
            for trace_factor in range(n_trace, 0, -1):  # Reverse order tracing
                trace_percent = trace_spacing*trace_factor
                j = trace_factor - 1  # Indexing
                kept_iterations = int(np.ceil(trace_percent*total_iterations))
                u_forward = sampled_energies[:, :, :kept_iterations]
                s_forward = states[:, :kept_iterations]
                u_reverse = sampled_energies[:, :, -1:-kept_iterations-1:-1]
                s_reverse = states[:, -1:-kept_iterations - 1:-1]
                for energy_sub, state_sub, storage in [
                        (u_forward, s_forward, free_energy_trace_f), (u_reverse, s_reverse, free_energy_trace_r)]:
                    u_n = analyzer.get_effective_energy_timeseries(energies=energy_sub,
                                                                   replica_state_indices=state_sub)

                    i_t, g_i, n_effective_i = analyze.multistate.get_equilibration_data_per_sample(u_n)
                    i_max = n_effective_i.argmax()
                    number_equilibrated = i_t[i_max]
                    g_t = g_i[i_max]
                    if not self.use_full_trajectory:
                        energy_sub = analyze.multistate.utils.remove_unequilibrated_data(energy_sub,
                                                                                         number_equilibrated,
                                                                                         -1)
                        state_sub = analyze.multistate.utils.remove_unequilibrated_data(state_sub,
                                                                                        number_equilibrated, -1)
                        energy_sub = analyze.multistate.utils.subsample_data_along_axis(energy_sub, g_t, -1)
                        state_sub = analyze.multistate.utils.subsample_data_along_axis(state_sub, g_t, -1)
                    samples_per_state = np.zeros([n_states], dtype=int)
                    unique_sampled_states, counts = np.unique(state_sub, return_counts=True)
                    # Assign those counts to the correct range of states
                    samples_per_state[unique_sampled_states] = counts
                    mbar = MBAR(energy_sub, samples_per_state)
                    fe_data = mbar.getFreeEnergyDifferences(compute_uncertainty=True)
                    # Trap theta_ij output
                    try:
                        fe, dfe, _ = fe_data
                    except ValueError:
                        fe, dfe = fe_data
                    ref_i, ref_j = analyzer.reference_states
                    storage[j, :] = fe[ref_i, ref_j] * kcal, dfe[ref_i, ref_j] * kcal
            format_trace_plot(p, free_energy_trace_f, free_energy_trace_r)
            p.set_title("{} Phase".format(phase_name.title()), fontsize=20)
            traces[phase_name]['f'] = free_energy_trace_f
            traces[phase_name]['r'] = free_energy_trace_r
            serial['forward'] = free_energy_trace_f.tolist()
            serial['reverse'] = free_energy_trace_r.tolist()
        # Finally handle last combined plot
        combined_trace_f = np.zeros([n_trace, 2], dtype=float)
        combined_trace_r = np.zeros([n_trace, 2], dtype=float)
        for phase_name in self.phase_names:
            phase_f = traces[phase_name]['f']
            phase_r = traces[phase_name]['r']
            combined_trace_f[:, 0] += phase_f[:, 0]
            combined_trace_f[:, 1] = np.sqrt(combined_trace_f[:, 1]**2 + phase_f[:, 1]**2)
            combined_trace_r[:, 0] += phase_r[:, 0]
            combined_trace_r[:, 1] = np.sqrt(combined_trace_r[:, 1] ** 2 + phase_r[:, 1] ** 2)
        p = free_energy_trace_figure.add_subplot(plot_grid[-1])
        format_trace_plot(p, combined_trace_f, combined_trace_r)
        p.set_title("Combined Phases", fontsize=20)

        return free_energy_trace_figure
Ejemplo n.º 40
0
  # Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0.
  #=============================================================================================
  
  randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta)
  [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln')

  # get the unreduced energies
  U_kln = u_kln/beta

  #=============================================================================================
  # Estimate free energies and expectations.
  #=============================================================================================

  # Initialize the MBAR class, determining the free energies.
  mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver
  results = mbar.getFreeEnergyDifferences(return_dict=True)
  Deltaf_ij_estimated = results['Delta_f']
  dDeltaf_ij_estimated = results['dDelta_f']

  # Compute error from analytical free energy differences.
  Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical

  # Estimate the expectation of the mean-squared displacement at each condition.
  if observe == 'RMS displacement':
    A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64);
    for k in range(0,K):
      for l in range(0,K):
        A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement

  # observable is the potential energy, a 3D array since the potential energy is a function of 
  # thermodynamic state
Ejemplo n.º 41
0
            xbar1_bs = zeros((bootstrap_trials, xbar1.size))
            for trial in xrange(bootstrap_trials):
                msmle.resample()
                msmle.solve_uwham(f1)
                f1_bs[trial] = msmle.f
                f1_bs[trial] -= msmle.f[0]
                xbar1_bs[trial] = msmle.compute_expectations(test.x_jn, False)[0]
            ferr1_bs = f1_bs.std(axis=0)[1:]
            varxbar1_bs = xbar1_bs.var(axis=0)
            msmle.revert_sample()
        f1 = f1[1:]

        if do_pymbar:
            try:
                mbar = MBAR(test.data, test.data_size)
                f2, ferr2, t = mbar.getFreeEnergyDifferences()
                f2 = f2[0][1:]
                ferr2 = ferr2[0][1:]
                xbar2, varxbar2 = mbar.computeExpectations(test.x_jn)
                skipmbar = False
            except:
                print('MBAR choked!')
                skipmbar = True
                pass
        else:
            skipmbar = True

        def print_float_array(msg, arr):
            print('%-16s '%msg + ' '.join(('% 6.4f'%x for x in arr)))

        print('samples:', test.data_size)
print "======================================"
print "      Initializing MBAR               "
print "======================================"

# Estimate free energies from simulation using MBAR.
print "Estimating relative free energies from simulation (this may take a while)..."

# Initialize the MBAR class, determining the free energies.
mbar = MBAR(u_kln, N_k, relative_tolerance=1.0e-10, verbose=True)
# Get matrix of dimensionless free energy differences and uncertainty estimate.

print "============================================="
print "      Testing getFreeEnergyDifferences       "
print "============================================="

(Delta_f_ij_estimated, dDelta_f_ij_estimated, _Theta_ij) = mbar.getFreeEnergyDifferences()

# Compute error from analytical free energy differences.
Delta_f_ij_error = Delta_f_ij_estimated - Delta_f_ij_analytical

print "Error in free energies is:"
print Delta_f_ij_error
print "Uncertainty in free energies is:"
print dDelta_f_ij_estimated

print "Standard deviations away is:"
# mathematical manipulation to avoid dividing by zero errors; we don't care
# about the diagnonals, since they are identically zero.
df_ij_mod = dDelta_f_ij_estimated + numpy.identity(K)
stdevs = numpy.abs(Delta_f_ij_error/df_ij_mod)
for k in range(K):
Ejemplo n.º 43
0
class MBAR(BaseEstimator):
    """Multi-state Bennett acceptance ratio (MBAR).

    Parameters
    ----------

    maximum_iterations : int, optional
        Set to limit the maximum number of iterations performed.

    relative_tolerance : float, optional
        Set to determine the relative tolerance convergence criteria.

    initial_f_k : np.ndarray, float, shape=(K), optional
        Set to the initial dimensionless free energies to use as a 
        guess (default None, which sets all f_k = 0).

    method : str, optional, default="hybr"
        The optimization routine to use.  This can be any of the methods
        available via scipy.optimize.minimize() or scipy.optimize.root().

    verbose : bool, optional
        Set to True if verbose debug output is desired.

    Attributes
    ----------

    delta_f_ : DataFrame
        The estimated dimensionless free energy difference between each state.

    d_delta_f_ : DataFrame
        The estimated statistical uncertainty (one standard deviation) in
        dimensionless free energy differences.

    theta_ : DataFrame
        The theta matrix.

    states_ : list
        Lambda states for which free energy differences were obtained.

    """
    def __init__(self,
                 maximum_iterations=10000,
                 relative_tolerance=1.0e-7,
                 initial_f_k=None,
                 method='hybr',
                 verbose=False):

        self.maximum_iterations = maximum_iterations
        self.relative_tolerance = relative_tolerance
        self.initial_f_k = initial_f_k
        self.method = [dict(method=method)]
        self.verbose = verbose

        # handle for pymbar.MBAR object
        self._mbar = None

    def fit(self, u_nk):
        """
        Compute overlap matrix of reduced potentials using multi-state
        Bennett acceptance ratio.

        Parameters
        ----------
        u_nk : DataFrame 
            u_nk[n,k] is the reduced potential energy of uncorrelated
            configuration n evaluated at state k.

        """
        # sort by state so that rows from same state are in contiguous blocks
        u_nk = u_nk.sort_index(level=u_nk.index.names[1:])

        groups = u_nk.groupby(level=u_nk.index.names[1:])
        N_k = [(len(groups.get_group(i)) if i in groups.groups else 0)
               for i in u_nk.columns]

        self._mbar = MBAR_(u_nk.T,
                           N_k,
                           maximum_iterations=self.maximum_iterations,
                           relative_tolerance=self.relative_tolerance,
                           initial_f_k=self.initial_f_k,
                           solver_protocol=self.method,
                           verbose=self.verbose)

        self.states_ = u_nk.columns.values.tolist()

        # set attributes
        out = self._mbar.getFreeEnergyDifferences(return_theta=True)
        attrs = [
            pd.DataFrame(i, columns=self.states_, index=self.states_)
            for i in out
        ]

        (self.delta_f_, self.d_delta_f_, self.theta_) = attrs

        return self

    def predict(self, u_ln):
        pass

    @property
    def overlap_matrix(self):
        r"""MBAR overlap matrix.
        
        The estimated state overlap matrix :math:`O_{ij}` is an estimate of the probability 
        of observing a sample from state :math:`i` in state :math:`j`.
        
        The :attr:`overlap_matrix` is computed on-the-fly. Assign it to a variable if
        you plan to re-use it.
        
        See Also
        ---------
        pymbar.mbar.MBAR.computeOverlap
        """
        return self._mbar.computeOverlap()['matrix']
Ejemplo n.º 44
0
sumN_k = nSnapshots*len(Temp_sim)
Nmol_kn = N_all.reshape([N_all.size])
u_kn_sim = np.zeros([len(Temp_sim),nSnapshots*len(Temp_sim)])

for iT, (Temp, mu) in enumerate(zip(Temp_sim, mu_sim)):
    
    for jT in range(len(Temp_sim)):
        
        jstart = nSnapshots*jT
        jend = jstart+nSnapshots
        
        u_kn_sim[iT,jstart:jend] = U_to_u(U_all[jT],Temp,mu,N_all[jT])
        
mbar = MBAR(u_kn_sim,N_k)

Deltaf_ij = mbar.getFreeEnergyDifferences(return_theta=False)[0]
#print "effective sample numbers"
#print (mbar.computeEffectiveSampleNumber())
#print('\nWhich is approximately '+str(mbar.computeEffectiveSampleNumber()/sumN_k*100.)+'%')

f_k_sim = Deltaf_ij[0,:]

#mbar2 = MBAR(u_kn_sim,N_k,initial_f_k=f_k_sim)
#
#Deltaf_ij2 = mbar2.getFreeEnergyDifferences(return_theta=False)[0]
#print "effective sample numbers"
#print (mbar2.computeEffectiveSampleNumber())
#print('\nWhich is approximately '+str(mbar2.computeEffectiveSampleNumber()/sumN_k*100.)+'%')

#Nmolk, dNmolk = mbar.computeExpectations(Nmol_kn) # Average number of molecules
#Nmolk_alt = np.zeros(len(N_k))
  # Generate independent data samples from K one-dimensional harmonic oscillators centered at q = 0.
  #=============================================================================================
  
  randomsample = testsystems.harmonic_oscillators.HarmonicOscillatorsTestCase(O_k=O_k, K_k=K_k, beta=beta)
  [x_kn,u_kln,N_k] = randomsample.sample(N_k,mode='u_kln')

  # get the unreduced energies
  U_kln = u_kln/beta

  #=============================================================================================
  # Estimate free energies and expectations.
  #=============================================================================================

  # Initialize the MBAR class, determining the free energies.
  mbar = MBAR(u_kln, N_k, method = 'adaptive',relative_tolerance=1.0e-10,verbose=False) # use fast Newton-Raphson solver
  (Deltaf_ij_estimated, dDeltaf_ij_estimated) = mbar.getFreeEnergyDifferences()
  
  # Compute error from analytical free energy differences.
  Deltaf_ij_error = Deltaf_ij_estimated - Deltaf_ij_analytical

  # Estimate the expectation of the mean-squared displacement at each condition.
  if observe == 'RMS displacement':
    A_kn = numpy.zeros([K,K,N_max], dtype = numpy.float64);
    for k in range(0,K):
      for l in range(0,K):
        A_kn[k,l,0:N_k[k]] = (x_kn[k,0:N_k[k]] - O_k[l])**2 # observable is the squared displacement

  # observable is the potential energy, a 3D array since the potential energy is a function of 
  # thermodynamic state
  elif observe == 'potential energy':
    A_kn = U_kln
Ejemplo n.º 46
0
def estimate_free_energies(ncfile, ndiscard=0, nuse=None, g=None):
    """
    Estimate free energies of all alchemical states.

    Parameters
    ----------
    ncfile : NetCDF
       Input YANK netcdf file
    ndiscard : int, optional, default=0
       Number of iterations to discard to equilibration
    nuse : int, optional, default=None
       Maximum number of iterations to use (after discarding)
    g : int, optional, default=None
       Statistical inefficiency to use if desired; if None, will be computed.

    TODO
    ----
    * Automatically determine 'ndiscard'.

    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    logger.info("Reading energies...")
    energies = ncfile.variables['energies']
    u_kln_replica = np.zeros([nstates, nstates, niterations], np.float64)
    for n in range(niterations):
        u_kln_replica[:,:,n] = energies[n,:,:]
    logger.info("Done.")

    # Deconvolute replicas
    logger.info("Deconvoluting replicas...")
    u_kln = np.zeros([nstates, nstates, niterations], np.float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration,:]
        u_kln[state_indices,:,iteration] = energies[iteration,:,:]
    logger.info("Done.")

    # Compute total negative log probability over all iterations.
    u_n = np.zeros([niterations], np.float64)
    for iteration in range(niterations):
        u_n[iteration] = np.sum(np.diagonal(u_kln[:,:,iteration]))
    #logger.info(u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:,:,ndiscard:]
    u_kln = u_kln[:,:,ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:,:,0:nuse]
        u_kln = u_kln[:,:,0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = np.zeros(nstates, np.int32)
    indices = timeseries.subsampleCorrelatedData(u_n, g=g) # indices of uncorrelated samples
    #print u_n # DEBUG
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices) # number of uncorrelated samples
    N_k[:] = N
    u_kln[:,:,0:N] = u_kln[:,:,indices]
    logger.info("number of uncorrelated samples:")
    logger.info(N_k)
    logger.info("")

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================

    # Initialize MBAR (computing free energy estimates, which may take a while)
    logger.info("Computing free energy differences...")
    mbar = MBAR(u_kln, N_k)

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    logger.info("Computing covariance matrix...")

    try:
        # pymbar 2
        (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences()
    except ValueError:
        # pymbar 3
        (Deltaf_ij, dDeltaf_ij, theta_ij) = mbar.getFreeEnergyDifferences()

#    # Matrix of free energy differences
    logger.info("Deltaf_ij:")
    for i in range(nstates):
        str_row = ""
        for j in range(nstates):
            str_row += "%8.3f" % Deltaf_ij[i, j]
        logger.info(str_row)

#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    logger.info("dDeltaf_ij:")
    for i in range(nstates):
        str_row = ""
        for j in range(nstates):
            str_row += "%8.3f" % dDeltaf_ij[i, j]
        logger.info(str_row)

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)