Ejemplo n.º 1
0
def subsample(N_k, U_kn, V_kn, N_kn, g, type):

    K = len(N_k)
    N_k_sampled = numpy.zeros(K)
    tempspace = numpy.zeros(numpy.max(N_k))
    for k in range(K):
        if (type != 'volume') and (type != 'number'):
            indices = timeseries.subsampleCorrelatedData(
                U_kn[k, 0:N_k[k]], g[k])
            tempspace = U_kn[k, indices].copy()
            N_k_sampled[k] = numpy.size(indices)
            U_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        if (type in requireV):
            indices = timeseries.subsampleCorrelatedData(
                V_kn[k, 0:N_k[k]], g[k])
            tempspace = V_kn[k, indices].copy()
            N_k_sampled[k] = numpy.size(indices)
            V_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        if (type in requireN):
            indices = timeseries.subsampleCorrelatedData(
                N_kn[k, 0:N_k[k]], g[k])
            tempspace = N_kn[k, indices].copy()
            N_k_sampled[k] = numpy.size(indices)
            N_kn[k, 0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        print "data has been subsampled using the statistical inefficiencies"
        g[k] = 1.0
        N_k[k] = N_k_sampled[k]
Ejemplo n.º 2
0
def subsample(Q_n,localQ):
    print 'Subsampling the data'
    g = timeseries.statisticalInefficiency(Q_n)
    indices = numpy.array(timeseries.subsampleCorrelatedData(Q_n,g))
    print '%i uncorrelated samples found of %i original samples' %(len(indices),len(Q_n))
    localQ = localQ[:,indices]
    return localQ
Ejemplo n.º 3
0
def getNkandUkln(do_dhdl=False):
   """Identifies uncorrelated samples and updates the arrays of the reduced potential energy and dhdlt retaining data entries of these samples only.
      Assumes that 'dhdlt' and 'u_klt' are in memory, as well as proper values for 'sta' and 'fin', i.e. the starting and
      final snapshot positions to be read, both are arrays of dimension K."""
   u_kln = numpy.zeros([K,K,max(fin-sta)], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m
   N_k = numpy.zeros(K, int) # N_k[k] is the number of uncorrelated samples from state k
   g = numpy.zeros(K,float) # autocorrelation times for the data
   if do_dhdl:
      dhdl = numpy.zeros([K,n_components,max(fin-sta)], float) #dhdl is value for dhdl for each component in the file at each time.
      print "\n\nNumber of correlated and uncorrelated samples:\n\n%6s %12s %12s %12s\n" % ('State', 'N', 'N_k', 'N/N_k') 
   for k in range(K):
      # Sum up over the energy components; notice, that only the relevant data is being used in the third dimension.
      dhdl_sum = numpy.sum(dhdlt[k,:,sta[k]:fin[k]], axis=0)
      # Determine indices of uncorrelated samples from potential autocorrelation analysis at state k
      # (alternatively, could use the energy differences -- here, we will use total dhdl).
      g[k] = timeseries.statisticalInefficiency(dhdl_sum)
      indices = numpy.array(timeseries.subsampleCorrelatedData(dhdl_sum, g=g[k])) # indices of uncorrelated samples
      N = len(indices) # number of uncorrelated samples
      # Handle case where we end up with too few.
      if N < 50:
         if do_dhdl:
            print "WARNING: Only %s uncorrelated samples found at lambda number %s; proceeding with analysis using correlated samples..." % (N, k)
         indices = numpy.arange(len(dhdl_sum))
         N = len(indices)
      N_k[k] = N # Store the number of uncorrelated samples from state k.
      for l in range(K):
         u_kln[k,l,0:N] = u_klt[k,l,indices]
      if do_dhdl:
         print "%6s %12s %12s %12.2f" % (k, fin[k], N_k[k], g[k])
         for n in range(n_components): 
            dhdl[k,n,0:N] = dhdlt[k,n,indices]
   if do_dhdl:
      return (dhdl, N_k, u_kln)
   return (N_k, u_kln)
Ejemplo n.º 4
0
def subsample_series(series, g_t=None, return_g_t=False):
    if g_t is None:
        g_t = timeseries.statisticalInefficiency(series)
    state_indices = timeseries.subsampleCorrelatedData(series, g = g_t, conservative=True)
    N_k = len(state_indices)
    transfer_series = series[state_indices]
    if return_g_t:
        return state_indices, transfer_series, g_t
    else:
        return state_indices, transfer_series
Ejemplo n.º 5
0
def subsample(N_k,U_kn,V_kn,N_kn,g,type):

    K = len(N_k)
    N_k_sampled = numpy.zeros(K, dtype=numpy.int)
    tempspace = numpy.zeros(numpy.max(N_k))
    for k in range(K):
        if (type != 'volume') and (type != 'number'): 
            indices = timeseries.subsampleCorrelatedData(U_kn[k,0:N_k[k]],g[k])
            tempspace = U_kn[k,indices].copy()
            N_k_sampled[k] = numpy.size(indices)
            U_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        if (type in requireV):
            indices = timeseries.subsampleCorrelatedData(V_kn[k,0:N_k[k]],g[k])
            tempspace = V_kn[k,indices].copy()
            N_k_sampled[k] = numpy.size(indices) 
            V_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        if (type in requireN):
            indices = timeseries.subsampleCorrelatedData(N_kn[k,0:N_k[k]],g[k])
            tempspace = N_kn[k,indices].copy()
            N_k_sampled[k] = numpy.size(indices) 
            N_kn[k,0:N_k_sampled[k]] = tempspace[0:N_k_sampled[k]]
        print "data has been subsampled using the statistical inefficiencies"
        g[k] = 1.0
        N_k[k] = N_k_sampled[k]
Ejemplo n.º 6
0
def subsample(U_kn,Q_kn,K,N_max):
    assume_uncorrelated = False
    if assume_uncorrelated:
        print 'Assuming data is uncorrelated'
        N_k = numpy.zeros(K, numpy.int32)
        N_k[:] = N_max
    else:	
        print 'Subsampling the data...'
        N_k = numpy.zeros(K,numpy.int32)
        g = numpy.zeros(K,numpy.float64)
        for k in range(K):  # subsample the energies
            g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
            indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
            N_k[k] = len(indices) # number of uncorrelated samplesadsf
            U_kn[k,0:N_k[k]] = U_kn[k,indices]
            Q_kn[k,0:N_k[k]] = Q_kn[k,indices]
    return U_kn, Q_kn, N_k
Ejemplo n.º 7
0
def subsample(U_kn,Q_kn,K,N_max):
    assume_uncorrelated = False
    if assume_uncorrelated:
        print 'Assuming data is uncorrelated'
        N_k = numpy.zeros(K, numpy.int32)
        N_k[:] = N_max
    else:	
        print 'Subsampling the data...'
        N_k = numpy.zeros(K,numpy.int32)
        g = numpy.zeros(K,numpy.float64)
        for k in range(K):  # subsample the energies
            g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
            indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
            N_k[k] = len(indices) # number of uncorrelated samplesadsf
            U_kn[k,0:N_k[k]] = U_kn[k,indices]
            Q_kn[k,0:N_k[k]] = Q_kn[k,indices]
    return U_kn, Q_kn, N_k
Ejemplo n.º 8
0
def subsample1D(pos_kn,N_k,ineff):
    ''' Modifies pos_xkn,pos_ykn,N_k inplace
    
    '''
    
    logger.info("Subsampling using given ICATS")
    K = pos_kn.shape[0]
    for i in range(K):
        indices = timeseries.subsampleCorrelatedData(pos_kn[i,0:N_k[i]], g = ineff[i])
        newN = len(indices)
        pos_kn[i,0:newN] = pos_kn[i,indices]
        logger.debug("Original %s New %s",N_k[i],newN)
        N_k[i] = newN
        if newN < 10:
            logger.warn("Very few independant samples %s",newN)
 
    logger.info("Subsampled using given ICATS")
    return pos_kn,N_k
Ejemplo n.º 9
0
def subsample(observ,maxIneff):
    ''' subsample according to largest inefff
    
    Parameters
    -------------
        observ: list of arrays
        ineff: array with ineff for each column of observ
        
    Return
    -----------
        newObserv: list of arrays subsampled according to ineff
    '''
    logger.info("Subsampling using given ICATS")
    newObserv = [] 
    for i,sim in enumerate(observ):
        
        indices = timeseries.subsampleCorrelatedData(sim[:,0], g = maxIneff[i])
        newsim = sim[indices,...]
        newObserv.append(newsim)
    logger.debug("Original %s \nNew %s",[i.shape[0] for i in observ],[i.shape[0]  for i in newObserv])
    logger.info("Subsampled using given ICATS")
    return newObserv
Ejemplo n.º 10
0
   #     infile.close()
        # Parse data.
   #     n = 0
   #     for line in lines:
   #         if line[0] != '#' and line[0] != '@':
   #             tokens = line.split()            
   #             u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
   #             n += 1

    # Compute correlation times for potential energy and val
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of val
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,:])
    else:
        #g_k[k] = timeseries.statisticalInefficiency(val_kn[k,:], val_kn[k,:])
        #print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(val_kn[k,0:N_k[k]], fast=True, verbose=True)
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    val_kn[k,0:N_k[k]] = val_kn[k,indices]
   # print val_kn[k,0:N_k[k]]

# Set zero of u_kn -- this is arbitrary.
u_kn -= u_kn.min()

val_min = numpy.min([numpy.min(val_kn[k,0:N_k[k]]) for k in range(K)])
val_max = numpy.max([numpy.max(val_kn[k,0:N_k[k]]) for k in range(K)])
Ejemplo n.º 11
0
        infile.close()
        # Parse data.
        n = 0
        for line in lines:
            if line[0] != '#' and line[0] != '@':
                tokens = line.split()            
                u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
                n += 1

    # Compute correlation times for potential energy and chi
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,:])
    else:
        g_k[k] = timeseries.statisticalInefficiency(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)),numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)))
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)))
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    chi_kn[k,0:N_k[k]] = chi_kn[k,indices]

# Set zero of u_kn -- this is arbitrary.
u_kn -= u_kn.min()

# Construct torsion bins
print "Binning data..."
delta = (chi_max - chi_min) / float(nbins)
Ejemplo n.º 12
0
    raise "pymbar [https://simtk.org/home/pymbar] must be installed to complete analysis of free energies."

# =============================================================================
# Subsample correlated samples to generate uncorrelated subsample.
# =============================================================================

print "Subsampling data to remove correlation..."
K = nlambda  # number of states
N_k = nprod_iterations * numpy.ones(
    [K],
    numpy.int32)  # N_k[k] is the number of uncorrelated samples at state k
u_kln_subsampled = numpy.zeros([K, K, nprod_iterations],
                               numpy.float64)  # subsampled data
for k in range(K):
    # Get indices of uncorrelated samples.
    indices = subsampleCorrelatedData(u_kln[k, k, :])
    # Store only uncorrelated data.
    N_k[k] = len(indices)
    for l in range(K):
        u_kln_subsampled[k, l, 0:len(indices)] = u_kln[k, l, indices]
print "Number of uncorrelated samples per state:"
print N_k

# =============================================================================
# Analyze with MBAR to compute free energy differences and statistical errors.
# =============================================================================

print "Analyzing with MBAR..."
mbar = MBAR(u_kln_subsampled, N_k)
[Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences()
print "Free energy differences (in kT)"
Ejemplo n.º 13
0
    if len(fep_columns) > 0:
        for i in range(len(fep_columns)):
            reduced_fep_data.append(numpy.zeros([K, N_samples], numpy.float64))
    for k in range(K):
        # Extract timeseries.
        A_t = biasing_variable_kt[0][k, :]
        # Compute statistical inefficiency.
        try:
            g = timeseries.statisticalInefficiency(A_t)
        except Exception as e:
            print str(e)
            print A_t

        # Subsample data.
        if subsample_trajectories:
            indices = timeseries.subsampleCorrelatedData(A_t, g=g)
        else:
            indices = timeseries.subsampleCorrelatedData(A_t, g=1)
        N = len(indices)  # number of uncorrelated samples
        print "k = %5d : g = %.1f, N = %d" % (k, g, N)
        for i in range(nbiases):
            biasing_variable_kn[i][k, 0:N] = biasing_variable_kt[i][k, indices]
        for i in range(nperturbations + 1):
            U_kn[i][k, 0:N] = U_kt[i][k, indices]
        if not cluster_binning:
            pmf_variable_kn_1[k, 0:N] = pmf_variable_kt_1[k, indices]
            if ndim == 2:
                pmf_variable_kn_2[k, 0:N] = pmf_variable_kt_2[k, indices]
        if cluster_binning:
            cluster_bin_kn[k, 0:N] = cluster_bin_kt[k, indices]
        if len(expectation_columns) > 0:
Ejemplo n.º 14
0
    def _subsample_kln(self, u_kln):
        #Try to load in the data
        if self.save_equil_data:  #Check if we want to save/load equilibration data
            try:
                equil_data = numpy.load(
                    os.path.join(
                        self.source_directory, self.save_prefix + self.phase +
                        '_equil_data_%s.npz' % self.subsample_method))
                if self.nequil is None:
                    self.nequil = equil_data['nequil']
                elif type(self.nequil
                          ) is int and self.subsample_method == 'per-state':
                    print "WARRNING: Per-state subsampling requested with only single value for equilibration..."
                    try:
                        self.nequil = equil_data['nequil']
                        print "Loading equilibration from file with %i states read" % self.nstates
                    except:
                        print "Assuming equal equilibration per state of %i" % self.nequil
                        self.nequil = numpy.array([self.nequil] * self.nstates)
                self.g_t = equil_data['g_t']
                Neff_max = equil_data['Neff_max']
                #Do equilibration if we have not already
                if self.subsample_method == 'per-state' and (
                        len(self.g_t) < self.nstates
                        or len(self.nequil) < self.nstates):
                    equil_loaded = False
                    raise IndexError
                else:
                    equil_loaded = True
            except:
                if self.subsample_method == 'per-state':
                    self.nequil = numpy.zeros([self.nstates],
                                              dtype=numpy.int32)
                    self.g_t = numpy.zeros([self.nstates])
                    Neff_max = numpy.zeros([self.nstates])
                    for k in xrange(self.nstates):
                        if self.verbose:
                            print "Computing timeseries for state %i/%i" % (
                                k, self.nstates - 1)
                        self.nequil[k] = 0
                        self.g_t[k] = timeseries.statisticalInefficiency(
                            u_kln[k, k, :])
                        Neff_max[k] = (u_kln[k, k, :].size + 1) / self.g_t[k]
                        #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                else:
                    if self.nequil is None:
                        [self.nequil, self.g_t,
                         Neff_max] = self._detect_equilibration(self.u_n)
                    else:
                        [self.nequil_timeseries, self.g_t,
                         Neff_max] = self._detect_equilibration(self.u_n)
                equil_loaded = False
            if not equil_loaded:
                numpy.savez(os.path.join(
                    self.source_directory, self.save_prefix + self.phase +
                    '_equil_data_%s.npz' % self.subsample_method),
                            nequil=self.nequil,
                            g_t=self.g_t,
                            Neff_max=Neff_max)
        elif self.nequil is None:
            if self.subsample_method == 'per-state':
                self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                self.g_t = numpy.zeros([self.nstates])
                Neff_max = numpy.zeros([self.nstates])
                for k in xrange(self.nstates):
                    [self.nequil[k], self.g_t[k],
                     Neff_max[k]] = self._detect_equilibration(u_kln[k, k, :])
                    if self.verbose:
                        print "State %i equilibrated with %i samples" % (
                            k, int(Neff_max[k]))
            else:
                [self.nequil, self.g_t,
                 Neff_max] = self._detect_equilibration(self.u_n)

        if self.verbose: print[self.nequil, Neff_max]
        # 1) Discard equilibration data
        # 2) Subsample data to obtain uncorrelated samples
        self.N_k = numpy.zeros(self.nstates, numpy.int32)
        if self.subsample_method == 'per-state':
            # Discard samples
            nsamples_equil = self.niterations - self.nequil
            self.u_kln = numpy.zeros(
                [self.nstates, self.nstates,
                 nsamples_equil.max()])
            for k in xrange(self.nstates):
                self.u_kln[k, :, :nsamples_equil[k]] = u_kln[k, :,
                                                             self.nequil[k]:]
            #Subsample
            transfer_retained_indices = numpy.zeros(
                [self.nstates, nsamples_equil.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                state_indices = timeseries.subsampleCorrelatedData(
                    self.u_kln[k, k, :], g=self.g_t[k])
                self.N_k[k] = len(state_indices)
                transfer_retained_indices[k, :self.N_k[k]] = state_indices
            transfer_kln = numpy.zeros(
                [self.nstates, self.nstates,
                 self.N_k.max()])
            self.retained_indices = numpy.zeros(
                [self.nstates, self.N_k.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                self.retained_indices[
                    k, :self.N_k[k]] = transfer_retained_indices[
                        k, :self.N_k[k]]  #Memory reduction
                transfer_kln[k, :, :self.N_k[k]] = self.u_kln[
                    k, :, self.retained_indices[k, :self.N_k[
                        k]]].T  #Have to transpose since indexing in this way causes issues

            #Cut down on memory, once function is done, transfer_kln should be released
            self.u_kln = transfer_kln
            self.retained_iters = self.N_k
        else:
            #Discard Samples
            self.u_kln = u_kln[:, :, self.nequil:]
            self.u_n = self.u_n[self.nequil:]
            #Subsamples
            indices = timeseries.subsampleCorrelatedData(
                self.u_n, g=self.g_t)  # indices of uncorrelated samples
            self.u_kln = self.u_kln[:, :, indices]
            self.N_k[:] = len(indices)
            self.retained_indices = indices
            self.retained_iters = len(indices)
        return
Ejemplo n.º 15
0
   from timeseries import subsampleCorrelatedData
   from pymbar import MBAR
except:
   raise "pymbar [https://simtk.org/home/pymbar] must be installed to complete analysis of free energies."
   
# =============================================================================
# Subsample correlated samples to generate uncorrelated subsample.
# =============================================================================

print "Subsampling data to remove correlation..."
K = nlambda # number of states
N_k = nprod_iterations*numpy.ones([K], numpy.int32) # N_k[k] is the number of uncorrelated samples at state k
u_kln_subsampled = numpy.zeros([K,K,nprod_iterations], numpy.float64) # subsampled data
for k in range(K):
   # Get indices of uncorrelated samples.
   indices = subsampleCorrelatedData(u_kln[k,k,:])
   # Store only uncorrelated data.
   N_k[k] = len(indices)
   for l in range(K):
      u_kln_subsampled[k,l,0:len(indices)] = u_kln[k,l,indices]
print "Number of uncorrelated samples per state:"
print N_k

# =============================================================================
# Analyze with MBAR to compute free energy differences and statistical errors.
# =============================================================================

print "Analyzing with MBAR..."
mbar = MBAR(u_kln_subsampled, N_k)
[Deltaf_ij, dDeltaf_ij] = mbar.getFreeEnergyDifferences()
print "Free energy differences (in kT)"
g_k = zeros([K], float64)
for k in range(K):
    # Compute statistical inefficiency for extension timeseries
    g = timeseries.statisticalInefficiency(x_kt[k,0:T_k[k]], x_kt[k,0:T_k[k]])
    # store statistical inefficiency
    g_k[k] = g
    print "timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k])
    N_max = max(N_max, ceil(T_k[k] / g) + 1)

# Subsample trajectory position data.
x_kn = zeros([K, N_max], float64)
bin_kn = zeros([K, N_max], int32)
N_k = zeros([K], int32)
for k in range(K):
    # Compute correlation times for potential energy and chi timeseries.
    indices = timeseries.subsampleCorrelatedData(x_kt[k,0:T_k[k]])
    # Store subsampled positions.
    N_k[k] = len(indices)
    x_kn[k,0:N_k[k]] = x_kt[k,indices]
    bin_kn[k,0:N_k[k]] = bin_kt[k,indices]

# Set arbitrary zeros for external biasing potential.
x0_k = zeros([K], float64) # x position corresponding to zero of potential
for k in range(K):
    x0_k[k] = x_kn[k,0:N_k[k]].mean()
print "x0_k = "
print x0_k

# Compute bias energies in units of kT.
u_kln = zeros([K,K,N_max], float64) # u_kln[k,l,n] is the reduced (dimensionless) relative potential energy of snapshot n from umbrella simulation k evaluated at umbrella l
for k in range(K):
Ejemplo n.º 17
0
def estimate_enthalpies(ncfile, ndiscard=0, nuse=None):
    """Estimate enthalpies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - number of iterations to use (after discarding) 

    TODO: Automatically determine 'ndiscard'.
    TODO: Combine some functions with estimate_free_energies.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:, :, n] = energies[n, :, :]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration, :]
        u_kln[state_indices, :, iteration] = energies[iteration, :, :]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:, :, iteration]))
    #print u_n

    # DEBUG


#    outfile = open('u_n.out', 'w')
#    for iteration in range(niterations):
#        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
#    outfile.close()

# Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:, :, ndiscard:]
    u_kln = u_kln[:, :, ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conformations to use
    if (nuse):
        u_kln_replica = u_kln_replica[:, :, 0:nuse]
        u_kln = u_kln[:, :, 0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(
        u_n)  # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices)  # number of uncorrelated samples
    N_k[:] = N
    u_kln[:, :, 0:N] = u_kln[:, :, indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    # Compute average enthalpies.
    H_k = zeros([nstates], float64)  # H_i[i] is estimated enthalpy of state i
    dH_k = zeros([nstates], float64)
    for k in range(nstates):
        H_k[k] = u_kln[k, k, :].mean()
        dH_k[k] = u_kln[k, k, :].std() / sqrt(N)

    return (H_k, dH_k)
   # Estimate the statistical inefficiency of the simulation by analyzing the timeseries of interest.
   # We use the max of cos and sin of the phi and psi timeseries because they are periodic angles.
   # The 
   print "Computing statistical inefficiencies..."
   g_cosphi = timeseries.statisticalInefficiencyMultiple(numpy.cos(phi_kt_replica * numpy.pi / 180.0))
   print "g_cos(phi) = %.1f" % g_cosphi
   g_sinphi = timeseries.statisticalInefficiencyMultiple(numpy.sin(phi_kt_replica * numpy.pi / 180.0))
   print "g_sin(phi) = %.1f" % g_sinphi   
   g_cospsi = timeseries.statisticalInefficiencyMultiple(numpy.cos(psi_kt_replica * numpy.pi / 180.0))
   print "g_cos(psi) = %.1f" % g_cospsi
   g_sinpsi = timeseries.statisticalInefficiencyMultiple(numpy.sin(psi_kt_replica * numpy.pi / 180.0))
   print "g_sin(psi) = %.1f" % g_sinpsi
   # Subsample data with maximum of all correlation times.
   print "Subsampling data..."
   g = numpy.max(numpy.array([g_cosphi, g_sinphi, g_cospsi, g_sinpsi]))
   indices = timeseries.subsampleCorrelatedData(U_kt[k,:], g = g)   
   print "Using g = %.1f to obtain %d uncorrelated samples per temperature" % (g, len(indices))
   N_max = int(numpy.ceil(T / g)) # max number of samples per temperature   
   U_kn = numpy.zeros([K, N_max], numpy.float64)
   phi_kn = numpy.zeros([K, N_max], numpy.float64)
   psi_kn = numpy.zeros([K, N_max], numpy.float64)
   N_k = N_max * numpy.ones([K], numpy.int32)
   for k in range(K):
      U_kn[k,:] = U_kt[k,indices]
      phi_kn[k,:] = phi_kt[k,indices]
      psi_kn[k,:] = psi_kt[k,indices]
   print "%d uncorrelated samples per temperature" % N_max
         
#===================================================================================================
# Generate a list of indices of all configurations in kn-indexing
#===================================================================================================
Ejemplo n.º 19
0
def estimate_enthalpies(ncfile, ndiscard = 0, nuse = None):
    """Estimate enthalpies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - number of iterations to use (after discarding) 

    TODO: Automatically determine 'ndiscard'.
    TODO: Combine some functions with estimate_free_energies.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:,:,n] = energies[n,:,:]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration,:]
        u_kln[state_indices,:,iteration] = energies[iteration,:,:]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:,:,iteration]))
    #print u_n

    # DEBUG
#    outfile = open('u_n.out', 'w')
#    for iteration in range(niterations):
#        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
#    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:,:,ndiscard:]
    u_kln = u_kln[:,:,ndiscard:]
    u_n = u_n[ndiscard:]
    
    # Truncate to number of specified conformations to use
    if (nuse):
        u_kln_replica = u_kln_replica[:,:,0:nuse]
        u_kln = u_kln[:,:,0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices) # number of uncorrelated samples
    N_k[:] = N      
    u_kln[:,:,0:N] = u_kln[:,:,indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    # Compute average enthalpies.
    H_k = zeros([nstates], float64) # H_i[i] is estimated enthalpy of state i
    dH_k = zeros([nstates], float64)
    for k in range(nstates):
        H_k[k] = u_kln[k,k,:].mean()
        dH_k[k] = u_kln[k,k,:].std() / sqrt(N)

    return (H_k, dH_k)
Ejemplo n.º 20
0
def estimate_free_energies(ncfile, ndiscard = 0, nuse = None):
    """Estimate free energies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - maximum number of iterations to use (after discarding)

    TODO: Automatically determine 'ndiscard'.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:,:,n] = energies[n,:,:]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration,:]
        u_kln[state_indices,:,iteration] = energies[iteration,:,:]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:,:,iteration]))
    #print u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:,:,ndiscard:]
    u_kln = u_kln[:,:,ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:,:,0:nuse]
        u_kln = u_kln[:,:,0:nuse]
        u_n = u_n[0:nuse]
    
    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(u_n) # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices) # number of uncorrelated samples
    N_k[:] = N      
    u_kln[:,:,0:N] = u_kln[:,:,indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================   
   
    # Initialize MBAR (computing free energy estimates, which may take a while)
    print "Computing free energy differences..."
    mbar = MBAR(u_kln, N_k, verbose = False, method = 'adaptive', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    print "Computing covariance matrix..."
    (Deltaf_ij, dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
   
#    # Matrix of free energy differences
    print "Deltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % Deltaf_ij[i,j],
        print ""        
    
#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    print "dDeltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % dDeltaf_ij[i,j],
        print ""        

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 21
0
#------------------------------------------------------------------------
# Read Data From File
#------------------------------------------------------------------------

print("")
print("Preparing data:")
T_from_file = read_simulation_temps(simulation,NumTemps)
E_from_file = read_total_energies(simulation,TE_COL_NUM)
K = len(T_from_file)
N_k = numpy.zeros(K,numpy.int32)
g = numpy.zeros(K,numpy.float64)

for k in range(K):  # subsample the energies
   g[k] = timeseries.statisticalInefficiency(E_from_file[k])
   indices = numpy.array(timeseries.subsampleCorrelatedData(E_from_file[k],g=g[k])) # indices of uncorrelated samples
   N_k[k] = len(indices) # number of uncorrelated samples
   E_from_file[k,0:N_k[k]] = E_from_file[k,indices]

#------------------------------------------------------------------------
# Insert Intermediate T's and corresponding blank U's and E's
#------------------------------------------------------------------------
Temp_k = T_from_file
minT = T_from_file[0]
maxT = T_from_file[len(T_from_file) - 1]
#beta = 1/(k*BT)
#T = 1/(kB*beta)
if dtype == 'temperature':
	minv = minT
	maxv = maxT
elif dtype == 'beta':   # actually going in the opposite direction as beta for logistical reasons
    if len(fep_columns) > 0:
        for i in range(len(fep_columns)):
            reduced_fep_data.append(numpy.zeros([K,N_samples], numpy.float64))
    for k in range(K):
        # Extract timeseries.
        A_t = biasing_variable_kt[0][k,:]
        # Compute statistical inefficiency.
        try:
            g = timeseries.statisticalInefficiency(A_t)
        except Exception as e:
            print str(e)
            print A_t

        # Subsample data.
        if subsample_trajectories:
            indices = timeseries.subsampleCorrelatedData(A_t, g=g)
        else:
            indices = timeseries.subsampleCorrelatedData(A_t, g=1)
        N = len(indices) # number of uncorrelated samples
        print "k = %5d : g = %.1f, N = %d" % (k, g, N)
        for i in range(nbiases):
            biasing_variable_kn[i][k,0:N] = biasing_variable_kt[i][k,indices]
        for i in range(nperturbations+1):
            U_kn[i][k,0:N] = U_kt[i][k,indices]
        if not cluster_binning:
            pmf_variable_kn_1[k,0:N] = pmf_variable_kt_1[k,indices]
            if ndim == 2:
                pmf_variable_kn_2[k,0:N] = pmf_variable_kt_2[k,indices]
        if cluster_binning:
            cluster_bin_kn[k,0:N] = cluster_bin_kt[k,indices]
        if len(expectation_columns) > 0:
Ejemplo n.º 23
0
    for state in range(K):
        # construct timeseries
        Nstate = 0
        for t in range(T):
            if state_t[t] == state:
                #u_t_singlestate[Nstate] = u_tk[t,state]
                u_t_singlestate[Nstate] = u_t[t]
                Nstate += 1

        if Nstate > 0:
            g_state = timeseries.statisticalInefficiency(
                u_t_singlestate[0:Nstate], u_t_singlestate[0:Nstate])
            print "state %5d : g = %16.8f, N = %6d" % (state, g_state, Nstate)

    # Analyze timeseries to determine effectively uncorrelated snapshots.
    indices = timeseries.subsampleCorrelatedData(
        u_t)  # indices of uncorrelated samples
    N = len(indices)  # number of uncorrelated samples
    print "%d uncorrelated samples of %d snapshots." % (N, T)

    # DEBUG: assume all samples are uncorrelated
    #   indices = range(0,T,20)
    #   for t in range(T):
    #      print "%8d %16.8f" % (t, u_t[t])
    #   N = len(indices)

    # Count number of uncorrelated samples in each state.
    N_k = zeros(K, int32)
    for n in range(N):
        t = indices[n]
        state = state_t[t]
        N_k[state] += 1
Ejemplo n.º 24
0
        infile.close()
        # Parse data.
        n = 0
        for line in lines:
            if line[0] != '#' and line[0] != '@':
                tokens = line.split()            
                u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
                n += 1

    # Compute correlation times for potential energy and chi
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,0:N_k[k]])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,0:N_k[k]])
    else:
        chi_radians = chi_kn[k,0:N_k[k]]/(180.0/numpy.pi)
        g_cos = timeseries.statisticalInefficiency(numpy.cos(chi_radians))
        g_sin = timeseries.statisticalInefficiency(numpy.sin(chi_radians))
        print "g_cos = %.1f | g_sin = %.1f" % (g_cos, g_sin)
        g_k[k] = max(g_cos, g_sin)
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(chi_radians, g=g_k[k]) 
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    chi_kn[k,0:N_k[k]] = chi_kn[k,indices]

N_max = numpy.max(N_k) # shorten the array size
u_kln = numpy.zeros([K,K,N_max], numpy.float64) # u_kln[k,l,n] is the reduced potential energy of snapshot n from umbrella simulation k evaluated at umbrella l
   u_t_singlestate = zeros([T], float64)
   for state in range(K):
      # construct timeseries
      Nstate = 0
      for t in range(T):
         if state_t[t] == state:
            #u_t_singlestate[Nstate] = u_tk[t,state]
            u_t_singlestate[Nstate] = u_t[t]
            Nstate += 1

      if Nstate > 0:
         g_state = timeseries.statisticalInefficiency(u_t_singlestate[0:Nstate], u_t_singlestate[0:Nstate])      
         print "state %5d : g = %16.8f, N = %6d" % (state, g_state, Nstate)

   # Analyze timeseries to determine effectively uncorrelated snapshots.
   indices = timeseries.subsampleCorrelatedData(u_t) # indices of uncorrelated samples
   N = len(indices) # number of uncorrelated samples
   print "%d uncorrelated samples of %d snapshots." % (N, T)

   # DEBUG: assume all samples are uncorrelated
#   indices = range(0,T,20)
#   for t in range(T):
#      print "%8d %16.8f" % (t, u_t[t])
#   N = len(indices)
         
   # Count number of uncorrelated samples in each state.
   N_k = zeros(K, int32)
   for n in range(N):
      t = indices[n]
      state = state_t[t]
      N_k[state] += 1
Ejemplo n.º 26
0
def main():
    options = parse_args()
    kB = 0.00831447/4.184  #Boltzmann constant (Gas constant) in kJ/(mol*K)
    dT = 2.5              # Temperature increment for calculating Cv(T)
    
    T = numpy.loadtxt(options.tfile)
    print 'Initial temperature states are', T
    K = len(T)
  
    U_kn, Q_kn, N_max = read_data(options,T,K)

    print 'Subsampling Q...' 
    N_k = numpy.zeros(K,numpy.int32)
    g = numpy.zeros(K,numpy.float64)
    for k in range(K):  # subsample the energies
       g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
       indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
       N_k[k] = len(indices) # number of uncorrelated samplesadsf
       print '%i uncorrelated samples out of %i total samples' %(len(indices),options.N_max/options.skip)
       U_kn[k,0:N_k[k]] = U_kn[k,indices]
       Q_kn[k,0:N_k[k]] = Q_kn[k,indices]

    insert = True
    if insert: 
        #------------------------------------------------------------------------
        # Insert Intermediate T's and corresponding blank U's and E's
        #------------------------------------------------------------------------
        # Set up variables
        Temp_k = T
        currentT = T[0] + dT
        maxT = T[-1]
        i = 1
        
        print("--Inserting intermediate temperatures...")
        
        # Loop, inserting T's at which we are interested in the properties
        while (currentT < maxT) :
               if (currentT < Temp_k[i]):
                     Temp_k = numpy.insert(Temp_k, i, currentT)
                     currentT = currentT + dT
               else:
                     currentT = Temp_k[i] + dT
                     i = i + 1
                     
        # Update number of states
        K = len(Temp_k)
        
        print("--Inserting blank energies to match up with inserted temperatures...")
        
        # Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted)
        Q_fromfile = Q_kn
        Nall_k = numpy.zeros([K], numpy.int32) # Number of samples (n) for each state (k) = number of iterations/energies
        E_kn = numpy.zeros([K, N_max], numpy.float64)
        Q_kn = numpy.zeros([K, N_max], numpy.float64)
        i = 0
        
        for k in range(K):
            if (Temp_k[k] == T[i]):
                E_kn[k,0:N_k[i]] = U_kn[i,0:N_k[i]]
                Q_kn[k,0:N_k[i]] = Q_fromfile[i,0:N_k[i]]
                Nall_k[k] = N_k[i]
                i = i + 1
    else:
        print 'Not inserting intermediate temperatures'
        Temp_k = T
        E_kn = U_kn
        Nall_k = N_k

    #------------------------------------------------------------------------
    # Compute inverse temperatures
    #------------------------------------------------------------------------
    beta_k = 1 / (kB * Temp_k)
    
    #------------------------------------------------------------------------
    # Compute reduced potential energies
    #------------------------------------------------------------------------
    
    print "--Computing reduced energies..."
    
    u_kln = numpy.zeros([K,K,N_max], numpy.float64) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l
    
    for k in range(K):
           for l in range(K):
                 u_kln[k,l,0:Nall_k[k]] = beta_k[l] * E_kn[k,0:Nall_k[k]]
    
    #------------------------------------------------------------------------
    # Initialize MBAR
    #------------------------------------------------------------------------
    
    # Initialize MBAR with Newton-Raphson
    print ""
    print "Initializing MBAR:"
    print "--K = number of Temperatures"
    print "--L = number of Temperatures"
    print "--N = number of Energies per Temperature"
    
    # Use Adaptive Method (Both Newton-Raphson and Self-Consistent, testing which is better)
    if insert:
        mbar = pymbar.MBAR(u_kln, Nall_k, method = 'adaptive', verbose=True, relative_tolerance=1e-12)
    else:
        f_k = wham.histogram_wham(beta_k, U_kn, Nall_k, relative_tolerance = 1.0e-4)
        mbar = pymbar.MBAR(u_kln, Nall_k, initial_f_k = f_k, verbose=True)
    #------------------------------------------------------------------------
    # Compute Expectations for E_kt and E2_kt as E_expect and E2_expect
    #------------------------------------------------------------------------
    print ""
    print "Computing Expectations for E..."
    (E_expect, dE_expect) = mbar.computeExpectations(u_kln)*(beta_k)**(-1)
    print "Computing Expectations for E^2..."
    (E2_expect,dE2_expect) = mbar.computeExpectations(u_kln*u_kln)*(beta_k)**(-2)
    
    print "Computing Expectations for Q..."
    (Q,dQ) = mbar.computeExpectations(Q_kn)
    
    #------------------------------------------------------------------------
    # Compute Cv for NVT simulations as <E^2> - <E>^2 / (RT^2)
    #------------------------------------------------------------------------
    #print ""
    #print "Computing Heat Capacity as ( <E^2> - <E>^2 ) / ( R*T^2 )..."
    
    Cv_expect = numpy.zeros([K], numpy.float64)
    dCv_expect = numpy.zeros([K], numpy.float64)
    
    for i in range(K):
           Cv_expect[i] = (E2_expect[i] - (E_expect[i]*E_expect[i])) / ( kB * Temp_k[i] * Temp_k[i])
           dCv_expect[i] = 2*dE_expect[i]**2 / (kB *Temp_k[i]*Temp_k[i])   # from propagation of error
    
    #print "Temperature  dA         <E> +/- d<E>       <E^2> +/- d<E^2>       Cv +/- dCv"     
    #print "-------------------------------------------------------------------------------"
    #for k in range(K):
    #       print "%8.3f %8.3f %9.3f +/- %5.3f  %9.1f +/- %5.1f   %7.4f +/- %6.4f" % (Temp_k[k],mbar.f_k[k],E_expect[k],dE_expect[k],E2_expect[k],dE2_expect[k],Cv_expect[k], dCv_expect[k])
    #numpy.savetxt('/home/edz3fz/Qsurf_int.txt',Q)
    #numpy.savetxt('/home/edz3fz/dQsurf_int.txt',dQ)
    #numpy.savetxt('/home/edz3fz/dQsol.txt',dQ)
    
    #numpy.savetxt('/home/edz3fz/Qtemp.tt',Temp_k)
    import matplotlib.pyplot as plt
    #ncavg = numpy.average(Q_fromfile, axis=1)
    
    plt.figure(1)
    #plt.plot(T, ncavg, 'ko')
    plt.plot(Temp_k,Q,'k')
    plt.errorbar(Temp_k, Q, yerr=dQ)
    plt.xlabel('Temperature (K)')
    plt.ylabel('Q fraction native contacts')
    #plt.title('Heat Capacity from Go like model MC simulation of 1BSQ')
    plt.savefig(options.direc+'/foldingcurve.png')
    numpy.save(options.direc+'/foldingcurve',numpy.array([Temp_k, Q, dQ]))
    numpy.save(options.direc+'/heatcap',numpy.array([Temp_k, Cv_expect, dCv_expect]))
    if options.show:
        plt.show()
Ejemplo n.º 27
0
]
#file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt']
#file=[direc+'/energy426.txt',direc+'/energy442.txt']
#file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt']
down = load(files[0])
up = load(files[1])
length = len(down)
down = down[length / 2::]
up = up[length / 2::]
#up=up[-50000::]
#down=down[-50000::]
#up=up[::100]
#down=down[::100]

g_up = timeseries.statisticalInefficiency(up)
indices_up = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_up))
print len(indices_up), 'samples'

g_down = timeseries.statisticalInefficiency(down)
indices_down = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_down))
print len(indices_down), 'samples'

type = 'total'
U_kn = zeros([2, len(up)])
U_kn[0, 0:len(indices_down)] = down[indices_down]
U_kn[1, 0:len(indices_up)] = up[indices_up]
#T_k=array([300.,336.8472786])
#T_k=array([426.81933819,442.13650313])
#T_k=array([424.67492585,450])
#T_k=array([437.99897735,450])
N_k = [len(indices_up), len(indices_down)]
Ejemplo n.º 28
0
files = ['%s/energy%i.npy' % (direc, T[-2]), '%s/energy%i.npy' % (direc, T[-1])]
#file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt']
#file=[direc+'/energy426.txt',direc+'/energy442.txt']
#file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt']
down=load(files[0])
up=load(files[1])
length = len(down)
down = down[length/2::]
up = up[length/2::]
#up=up[-50000::]
#down=down[-50000::]
#up=up[::100]
#down=down[::100]

g_up = timeseries.statisticalInefficiency(up)
indices_up = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_up))
print len(indices_up), 'samples'

g_down = timeseries.statisticalInefficiency(down)
indices_down = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_down))
print len(indices_down), 'samples'



type='total'
U_kn=zeros([2,len(up)])
U_kn[0,0:len(indices_down)] = down[indices_down]
U_kn[1,0:len(indices_up)] = up[indices_up]
#T_k=array([300.,336.8472786])
#T_k=array([426.81933819,442.13650313])
#T_k=array([424.67492585,450])
Ejemplo n.º 29
0
  # Calculate Reduced Potential 
  if aur == 'o':
    if rfc[k,0] == 0:
      tmp=np.ones([R],np.float64)*0.001
      u[0:N[k]] = np.sum(beta*tmp[0:R]*((val[0:N[k],k,0:R])**2), axis=1)
    else:
      u[0:N[k]] = np.sum(beta*rfc[k,0:R]*((val[0:N[k],k,0:R])**2), axis=1)
  else:
    if rfc[k,0] == 0:
      tmp=np.ones([R],np.float64)*0.001
      u[0:N[k]] = np.sum(beta*tmp[0:R]*((val[0:N[k],k,0:R]-req[k,0:R])**2), axis=1)
    else:
      u[0:N[k]] = np.sum(beta*rfc[k,0:R]*((val[0:N[k],k,0:R]-req[k,0:R])**2), axis=1)

  g[k] = calcg(u[0:N[k]])
  subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k])
  Nind[k] = len(subs)
  if Nind[k] > 100000:
    Neff[k] = 100000
  else:
    Neff[k] = Nind[k]


  print  "Processed Window %5.0f.  N= %12.0f.  g= %10.3f   Nind= %12.0f   Neff= %12.0f" % ( k, N[k], g[k], Nind[k], Neff[k] )

print  "Max Neff= %.0f" % ( np.max(Neff) )
Upot = np.zeros([K,K,np.max(Neff)], np.float64)

# Calculate Restraint Energy
for k in range(K):
  subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k])
Ejemplo n.º 30
0
    def _subsample_kln(self, u_kln):
        #Try to load in the data
        if self.save_equil_data: #Check if we want to save/load equilibration data
            try:
                equil_data = numpy.load(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method))
                if self.nequil is None:
                    self.nequil = equil_data['nequil']
                elif type(self.nequil) is int and self.subsample_method == 'per-state':
                    print "WARRNING: Per-state subsampling requested with only single value for equilibration..."
                    try:
                        self.nequil = equil_data['nequil']
                        print "Loading equilibration from file with %i states read" % self.nstates
                    except:
                        print "Assuming equal equilibration per state of %i" % self.nequil
                        self.nequil = numpy.array([self.nequil] * self.nstates)
                self.g_t = equil_data['g_t']
                Neff_max = equil_data['Neff_max']
                #Do equilibration if we have not already
                if self.subsample_method == 'per-state' and (len(self.g_t) < self.nstates or len(self.nequil) < self.nstates):
                    equil_loaded = False
                    raise IndexError
                else:
                    equil_loaded = True
            except:
                if self.subsample_method == 'per-state':
                    self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                    self.g_t = numpy.zeros([self.nstates])
                    Neff_max = numpy.zeros([self.nstates])
                    for k in xrange(self.nstates):
                        if self.verbose: print "Computing timeseries for state %i/%i" % (k,self.nstates-1)
                        self.nequil[k] = 0
                        self.g_t[k] = timeseries.statisticalInefficiency(u_kln[k,k,:])
                        Neff_max[k] = (u_kln[k,k,:].size + 1 ) / self.g_t[k]
                        #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                else:
                    if self.nequil is None:
                        [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)
                    else:
                        [self.nequil_timeseries, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)
                equil_loaded = False
            if not equil_loaded:
                numpy.savez(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method), nequil=self.nequil, g_t=self.g_t, Neff_max=Neff_max)
        elif self.nequil is None:
            if self.subsample_method == 'per-state':
                self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                self.g_t = numpy.zeros([self.nstates])
                Neff_max = numpy.zeros([self.nstates])
                for k in xrange(self.nstates):
                    [self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                    if self.verbose: print "State %i equilibrated with %i samples" % (k, int(Neff_max[k]))
            else:
                [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)

        if self.verbose: print [self.nequil, Neff_max]
        # 1) Discard equilibration data
        # 2) Subsample data to obtain uncorrelated samples
        self.N_k = numpy.zeros(self.nstates, numpy.int32)
        if self.subsample_method == 'per-state':
            # Discard samples
            nsamples_equil = self.niterations - self.nequil
            self.u_kln = numpy.zeros([self.nstates,self.nstates,nsamples_equil.max()])
            for k in xrange(self.nstates):
                self.u_kln[k,:,:nsamples_equil[k]] = u_kln[k,:,self.nequil[k]:]
            #Subsample
            transfer_retained_indices = numpy.zeros([self.nstates,nsamples_equil.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                state_indices = timeseries.subsampleCorrelatedData(self.u_kln[k,k,:], g = self.g_t[k])
                self.N_k[k] = len(state_indices)
                transfer_retained_indices[k,:self.N_k[k]] = state_indices
            transfer_kln = numpy.zeros([self.nstates, self.nstates, self.N_k.max()])
            self.retained_indices = numpy.zeros([self.nstates,self.N_k.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                self.retained_indices[k,:self.N_k[k]] = transfer_retained_indices[k,:self.N_k[k]] #Memory reduction
                transfer_kln[k,:,:self.N_k[k]] = self.u_kln[k,:,self.retained_indices[k,:self.N_k[k]]].T #Have to transpose since indexing in this way causes issues

            #Cut down on memory, once function is done, transfer_kln should be released
            self.u_kln = transfer_kln
            self.retained_iters = self.N_k
        else:
            #Discard Samples
            self.u_kln = u_kln[:,:,self.nequil:]
            self.u_n = self.u_n[self.nequil:]
            #Subsamples
            indices = timeseries.subsampleCorrelatedData(self.u_n, g=self.g_t) # indices of uncorrelated samples
            self.u_kln = self.u_kln[:,:,indices]
            self.N_k[:] = len(indices)
            self.retained_indices = indices
            self.retained_iters = len(indices)
        return
Ejemplo n.º 31
0
def estimate_free_energies(ncfile, ndiscard=0, nuse=None):
    """Estimate free energies of all alchemical states.

    ARGUMENTS
       ncfile (NetCDF) - input YANK netcdf file

    OPTIONAL ARGUMENTS
       ndiscard (int) - number of iterations to discard to equilibration
       nuse (int) - maximum number of iterations to use (after discarding)

    TODO: Automatically determine 'ndiscard'.
    """

    # Get current dimensions.
    niterations = ncfile.variables['energies'].shape[0]
    nstates = ncfile.variables['energies'].shape[1]
    natoms = ncfile.variables['energies'].shape[2]

    # Extract energies.
    print "Reading energies..."
    energies = ncfile.variables['energies']
    u_kln_replica = zeros([nstates, nstates, niterations], float64)
    for n in range(niterations):
        u_kln_replica[:, :, n] = energies[n, :, :]
    print "Done."

    # Deconvolute replicas
    print "Deconvoluting replicas..."
    u_kln = zeros([nstates, nstates, niterations], float64)
    for iteration in range(niterations):
        state_indices = ncfile.variables['states'][iteration, :]
        u_kln[state_indices, :, iteration] = energies[iteration, :, :]
    print "Done."

    # Compute total negative log probability over all iterations.
    u_n = zeros([niterations], float64)
    for iteration in range(niterations):
        u_n[iteration] = sum(diagonal(u_kln[:, :, iteration]))
    #print u_n

    # DEBUG
    outfile = open('u_n.out', 'w')
    for iteration in range(niterations):
        outfile.write("%8d %24.3f\n" % (iteration, u_n[iteration]))
    outfile.close()

    # Discard initial data to equilibration.
    u_kln_replica = u_kln_replica[:, :, ndiscard:]
    u_kln = u_kln[:, :, ndiscard:]
    u_n = u_n[ndiscard:]

    # Truncate to number of specified conforamtions to use
    if (nuse):
        u_kln_replica = u_kln_replica[:, :, 0:nuse]
        u_kln = u_kln[:, :, 0:nuse]
        u_n = u_n[0:nuse]

    # Subsample data to obtain uncorrelated samples
    N_k = zeros(nstates, int32)
    indices = timeseries.subsampleCorrelatedData(
        u_n)  # indices of uncorrelated samples
    #indices = range(0,u_n.size) # DEBUG - assume samples are uncorrelated
    N = len(indices)  # number of uncorrelated samples
    N_k[:] = N
    u_kln[:, :, 0:N] = u_kln[:, :, indices]
    print "number of uncorrelated samples:"
    print N_k
    print ""

    #===================================================================================================
    # Estimate free energy difference with MBAR.
    #===================================================================================================

    # Initialize MBAR (computing free energy estimates, which may take a while)
    print "Computing free energy differences..."
    mbar = MBAR(u_kln,
                N_k,
                verbose=False,
                method='adaptive',
                maximum_iterations=50000
                )  # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = False, method = 'self-consistent-iteration', maximum_iterations = 50000) # use slow self-consistent-iteration (the default)
    #mbar = MBAR(u_kln, N_k, verbose = True, method = 'Newton-Raphson') # use faster Newton-Raphson solver

    # Get matrix of dimensionless free energy differences and uncertainty estimate.
    print "Computing covariance matrix..."
    (Deltaf_ij,
     dDeltaf_ij) = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')

    #    # Matrix of free energy differences
    print "Deltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % Deltaf_ij[i, j],
        print ""


#    print Deltaf_ij
#    # Matrix of uncertainties in free energy difference (expectations standard deviations of the estimator about the true free energy)
    print "dDeltaf_ij:"
    for i in range(nstates):
        for j in range(nstates):
            print "%8.3f" % dDeltaf_ij[i, j],
        print ""

    # Return free energy differences and an estimate of the covariance.
    return (Deltaf_ij, dDeltaf_ij)
Ejemplo n.º 32
0
def call_mbar(aur, temp, phase):
    ### Arguments
    aur = aur  # t or u or r or d
    temp = float(temp)  # temp
    kB = 1.381e-23 * 6.022e23 / (4.184 * 1000.0
                                 )  # Boltzmann constant in kJ/mol/K
    beta = 1 / (kB * temp)  # beta
    N_max = 2000000  # Max frames for any simulation window, you should check this if you did some long runs

    sys.stdout = open('subs-' + aur + '.log', 'w')
    ### Determine Number of umbrellas
    K = 0
    filename = './' + aur + '%02.0f/restraints.dat' % K
    while os.path.isfile(filename):
        K = K + 1
        filename = './' + aur + '%02.0f/restraints.dat' % K

    R = 1

    print "K= %5.0f  R= %5.0f" % (K, R)

    ### Allocate storage for simulation data
    N = np.zeros(
        [K], np.int32
    )  # N_k[k] is the number of snapshots to be used from umbrella simulation k
    Neff = np.zeros([K], np.int32)
    Nind = np.zeros([K], np.int32)
    Nprg = np.zeros([K], np.int32)
    rty = ['d'] * R  # restraint type (distance or angle)
    rfc = np.zeros([K, R], np.float64)  # restraint force constant
    rfc2 = np.zeros([K, R], np.float64)  # restraint force constant
    fcmax = np.zeros(
        [R], np.float64
    )  # full force constant value used during umbrella portion of work
    req = np.zeros([K, R], np.float64)  # restraint target value
    req2 = np.zeros([K, R], np.float64)  # restraint target value
    val = np.zeros(
        [N_max, K, R],
        np.float64)  # value of the restrained variable at each frame n
    val2 = np.zeros(
        [N_max, K, R],
        np.float64)  # value of the restrained variable at each frame n
    g = np.zeros([K], np.float64)

    ### Tmp type arrays for energy and spline fitting/integration
    u = np.zeros([N_max], np.float64)
    x = np.zeros([K], np.float64)
    y = np.zeros([K], np.float64)
    m = np.zeros([K], np.float64)
    s = np.zeros([K], np.float64)

    print "Done with array setup\n"

    ### Read the simulation data
    r = 0
    for k in range(K):
        # Read Equilibrium Value and Force Constant
        if aur == 't':
            with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f:
                for line in f:
                    if 'posit2' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                req[k, r] = float(cols[1])
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc[k, r] = float(cols[1]) / 2
                                break
                    if 'posit3' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                req2[k, r] = float(cols[1])
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc2[k, r] = float(cols[1]) / 2
                                break
        elif aur == 'o':
            with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f:
                for line in f:
                    if 'orient2' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                str = cols[1][1:-1]
                                req[k, r] = float(str)
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc[k, r] = float(cols[1]) / 2
                                break
        elif aur == 'r' or aur == 'l':
            with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f:
                for line in f:
                    if 'rmsd2' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                req[k, r] = float(cols[1])
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc[k, r] = float(cols[1]) / 2
                                break
        elif aur == 'p' or aur == 'b':
            with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f:
                for line in f:
                    if 'rmsd1' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                req[k, r] = float(cols[1])
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc[k, r] = float(cols[1]) / 2
                                break
        elif aur == 'u':
            with open('./' + aur + '%02.0f/colvar.in' % k, 'r') as f:
                for line in f:
                    if 'posit3' in line:
                        for line in f:
                            cols = line.split()
                            if len(cols) != 0 and (cols[0] == "centers"):
                                req[k, r] = float(cols[1])
                            if len(cols) != 0 and (cols[0] == "forceConstant"):
                                rfc[k, r] = float(cols[1]) / 2
                                break
        else:
            sys.exit("not sure about restraint type!")

        # Read in Values for restrained variables for each simulation
        filename = './' + aur + '%02.0f/restraints.dat' % k
        infile = open(filename, 'r')
        restdat = infile.readlines(
        )  # slice off first 20 lines  readlines()[20:]
        infile.close()
        # Parse Data
        n = 0
        s = 0
        from_line = 0
        if int(phase) == 0:
            from_line = 500
        for line in restdat:
            s += 1  #so ira analizar o arquivo a partir da linha 500!
            if line[0] != '#' and line[0] != '@' and s > from_line:
                cols = line.split()
                if aur == 'o':
                    val[n, k, r] = math.acos(float(cols[2]))
                elif aur == 'u' or aur == 'l':
                    val[n, k, r] = float(cols[2])
                elif aur == 't':
                    val[n, k, r] = float(cols[1])
                    val2[n, k, r] = float(cols[2])
                else:
                    val[n, k, r] = float(cols[1])
                n += 1
        N[k] = n

        # Calculate Reduced Potential
        if aur == 'o':
            if rfc[k, 0] == 0:
                tmp = np.ones([R], np.float64) * 0.001
                u[0:N[k]] = np.sum(beta * tmp[0:R] *
                                   ((val[0:N[k], k, 0:R])**2),
                                   axis=1)  #->slicing syntax [0:N[k]]
            else:
                u[0:N[k]] = np.sum(beta * rfc[k, 0:R] *
                                   ((val[0:N[k], k, 0:R])**2),
                                   axis=1)
        elif aur == 't':
            if rfc[k, 0] == 0 and rfc2[k, 0] != 0:
                tmp = np.ones([R], np.float64) * 0.001
                u[0:N[k]] = np.sum(beta * (tmp[0:R] * ((
                    (val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + rfc2[k, 0:R] * (
                        (val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)),
                                   axis=1)  #-> (1/k_bT)*Kx**2
            elif rfc[k, 0] != 0 and rfc2[k, 0] == 0:
                tmp = np.ones([R], np.float64) * 0.001
                u[0:N[k]] = np.sum(
                    beta *
                    (rfc[k, 0:R] *
                     (((val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + tmp[0:R] *
                     ((val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)),
                    axis=1)
            elif rfc[k, 0] == 0 and rfc2[k, 0] == 0:
                tmp = np.ones([R], np.float64) * 0.001
                u[0:N[k]] = np.sum(
                    beta *
                    (tmp[0:R] *
                     (((val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + tmp[0:R] *
                     ((val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)),
                    axis=1)
            else:
                u[0:N[k]] = np.sum(beta * (rfc[k, 0:R] * ((
                    (val[0:N[k], k, 0:R] - req[k, 0:R])**2)) + rfc2[k, 0:R] * (
                        (val2[0:N[k], k, 0:R] - req2[k, 0:R])**2)),
                                   axis=1)
        else:
            if rfc[k, 0] == 0:
                tmp = np.ones([R], np.float64) * 0.001
                u[0:N[k]] = np.sum(beta * tmp[0:R] *
                                   ((val[0:N[k], k, 0:R] - req[k, 0:R])**2),
                                   axis=1)  #-> (1/k_bT)*Kx**2
            else:
                u[0:N[k]] = np.sum(beta * rfc[k, 0:R] *
                                   ((val[0:N[k], k, 0:R] - req[k, 0:R])**2),
                                   axis=1)

        g[k] = calcg(u[0:N[k]])
        subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]), g=g[k])
        Nind[k] = len(subs)
        if Nind[k] > 100000:
            Neff[k] = 100000
        else:
            Neff[k] = Nind[k]

        print "Processed Window %5.0f.  N= %12.0f.  g= %10.3f   Nind= %12.0f   Neff= %12.0f" % (
            k, N[k], g[k], Nind[k], Neff[k])

    print "Max Neff= %.0f" % (np.max(Neff))
    Upot = np.zeros([K, K, np.max(Neff)], np.float64)

    # Calculate Restraint Energy
    for k in range(K):
        #  subs = timeseries.subsampleCorrelatedData(np.zeros([N[k]]),g=g[k])
        for l in range(K):
            if aur == 'o':
                Upot[k, l, 0:Neff[k]] = np.sum(beta * rfc[l, 0:R] *
                                               ((val[0:Neff[k], k, 0:R])**2),
                                               axis=1)
            elif aur == 't':
                Upot[k, l, 0:Neff[k]] = np.sum(
                    beta * (rfc[l, 0:R] *
                            ((val[0:Neff[k], k, 0:R] - req[l, 0:R])**2) +
                            rfc2[l, 0:R] *
                            ((val2[0:Neff[k], k, 0:R] - req2[l, 0:R])**2)),
                    axis=1)
            else:
                Upot[k, l, 0:Neff[k]] = np.sum(
                    beta * rfc[l, 0:R] *
                    ((val[0:Neff[k], k, 0:R] - req[l, 0:R])**2),
                    axis=1)

    val = []

    prg = [100]
    for p in range(len(prg)):

        Nprg = Neff * prg[p] / 100  ## Test integers out only
        print "Running MBAR on %.0f percent of the data ... " % (prg[p])
        mbar = pymbar.MBAR(Upot,
                           Nprg,
                           verbose=True,
                           method='adaptive',
                           initialize='BAR')

        print "Calculate Free Energy Differences Between States"
        [Deltaf, dDeltaf] = mbar.getFreeEnergyDifferences()

        min = np.argmin(Deltaf[0])

        # Write to file
        print "Free Energy Differences (in units of kcal/mol)"
        print "%9s %8s %8s %12s %12s" % ('bin', 'f', 'df', 'deq', 'dfc')
        datfile = open('subs-' + aur + '.%03.0f.dat' % prg[p], 'w')
        for k in range(K):
            if aur == 'r' or aur == 'o' or aur == 'p' or aur == 'b' or aur == 'l':
                print "%10.5f %10.5f %10.5f %12.7f %12.7f" % (
                    rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta,
                    dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0])
                datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" %
                              (rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta,
                               dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0]))
            elif aur == 't':
                print "%10.5f %10.5f %10.5f %12.7f %12.7f %12.7f %12.7f" % (
                    rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta, dDeltaf[0, k]
                    / beta, req[k, 0], req2[k, 0], rfc[k, 0], rfc2[k, 0])
                datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" %
                              (rfc[k, 0] / rfc[-1, 0], Deltaf[0, k] / beta,
                               dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0]))
            elif aur == 'd':
                print "%9.0f %10.5f %10.5f %12.7f %12.7f" % (
                    k, Deltaf[0, k] / beta, dDeltaf[0, k] / beta, req[k, 0],
                    rfc[k, 0] / rfc[-1, 0])
                datfile.write("%9.0f %10.5f %10.5f %12.7f %12.7f\n" %
                              (k, Deltaf[0, k] / beta, dDeltaf[0, k] / beta,
                               req[k, 0], rfc[k, 0] / rfc[-1, 0]))
            else:  # 'u'
                print "%10.5f %10.5f %10.5f %12.7f %12.7f" % (
                    req[k, 0], Deltaf[0, k] / beta, dDeltaf[0, k] / beta,
                    req[k, 0], rfc[k, 0])
                datfile.write("%10.5f %10.5f %10.5f %12.7f %12.7f\n" %
                              (req[k, 0], Deltaf[0, k] / beta,
                               dDeltaf[0, k] / beta, req[k, 0], rfc[k, 0]))
        datfile.close()
        print "\n\n"
Ejemplo n.º 33
0
 for iteration in range(niterations):
     state_indices = ncfile.variables['states'][iteration,:]
     u_kln[state_indices,:,iteration] = ncfile.variables['energies'][iteration,:,:]
 ncfile.close()
 # Extract log probability history.
 u_n = numpy.zeros([niterations], numpy.float64)
 for iteration in range(niterations):
     u_n[iteration] = 0.0        
     for state in range(nstates):
         u_n[iteration] += u_kln[state,state,iteration]
 # Detect equilibration.
 [nequil, g, Neff] = detect_equilibration(u_n)
 u_n = u_n[nequil:]
 u_kln = u_kln[:,:,nequil:]
 # Subsample data.
 indices = timeseries.subsampleCorrelatedData(u_n, g=g)
 u_n = u_n[indices]
 u_kln = u_kln[:,:,indices]
 N_k = len(indices) * numpy.ones([nstates], numpy.int32)
 # Analyze with MBAR.
 mbar = pymbar.MBAR(u_kln, N_k)
 [Delta_f_ij, dDelta_f_ij] = mbar.getFreeEnergyDifferences()
 # Compare with analytical.
 f_i_analytical = numpy.zeros([nstates], numpy.float64)    
 for (state_index, state) in enumerate(simulation.states):
     values = computeHarmonicOscillatorExpectations(K, mass, state.temperature)
     f_i_analytical[state_index] = values['free energies']['potential'] 
 Delta_f_ij_analytical = numpy.zeros([nstates, nstates], numpy.float64)
 for i in range(nstates):
     for j in range(nstates):
         Delta_f_ij_analytical[i,j] = f_i_analytical[j] - f_i_analytical[i]        
def analyze_data(store_filename, phipsi_outfile=None):
    """
    Analyze output from parallel tempering simulations.
    
    """

    temperature = 300.0 * units.kelvin # temperature
    ndiscard = 100 # number of samples to discard to equilibration

    # Allocate storage for results.
    results = dict()

    # Compute kappa
    nbins = 10
    kB = units.BOLTZMANN_CONSTANT_kB * units.AVOGADRO_CONSTANT_NA # Boltzmann constant        
    kT = (kB * temperature) # thermal energy
    beta = 1.0 / kT # inverse temperature
    delta = 360.0 / float(nbins) * units.degrees # bin spacing
    sigma = delta/3.0 # standard deviation 
    kappa = (sigma / units.radians)**(-2) # kappa parameter (unitless)

    # Open NetCDF file.
    ncfile = netcdf.Dataset(store_filename, 'r', version=2)

    # Get dimensions.
    [niterations, nstates, natoms, ndim] = ncfile.variables['positions'][:,:,:,:].shape    
    print "%d iterations, %d states, %d atoms" % (niterations, nstates, natoms)

    # Discard initial configurations to equilibration.
    print "First %d iterations will be discarded to equilibration." % ndiscard
    niterations -= ndiscard
    
    # Print summary statistics about mixing in state space.
    [tau2, dtau2] = show_mixing_statistics_with_error(ncfile)
                
    # Compute correlation time of state index.
    states = ncfile.variables['states'][:,:].copy()
    A_kn = [ states[:,k].copy() for k in range(nstates) ]
    g_states = timeseries.statisticalInefficiencyMultiple(A_kn)
    tau_states = (g_states-1.0)/2.0
    # Compute statistical error.
    nblocks = 10
    blocksize = int(niterations) / int(nblocks)
    g_states_i = numpy.zeros([nblocks], numpy.float64)
    tau_states_i = numpy.zeros([nblocks], numpy.float64)        
    for block_index in range(nblocks):
        # Extract block
        states = ncfile.variables['states'][(blocksize*block_index):(blocksize*(block_index+1)),:].copy()
        A_kn = [ states[:,k].copy() for k in range(nstates) ]
        g_states_i[block_index] = timeseries.statisticalInefficiencyMultiple(A_kn)
        tau_states_i[block_index] = (g_states_i[block_index]-1.0)/2.0            
    dg_states = g_states_i.std() / numpy.sqrt(float(nblocks))
    dtau_states = tau_states_i.std() / numpy.sqrt(float(nblocks))
    # Print.
    print "g_states = %.3f+-%.3f iterations" % (g_states, dg_states)
    print "tau_states = %.3f+-%.3f iterations" % (tau_states, dtau_states)
    del states, A_kn

    # Compute end-to-end time.
    states = ncfile.variables['states'][:,:].copy()
    [tau_end, dtau_end] = average_end_to_end_time(states)

    # Compute statistical inefficiency for reduced potential
    energies = ncfile.variables['energies'][ndiscard:,:,:].copy()
    states = ncfile.variables['states'][ndiscard:,:].copy()    
    u_n = numpy.zeros([niterations], numpy.float64)
    for iteration in range(niterations):
        u_n[iteration] = 0.0
        for replica in range(nstates):
            state = states[iteration,replica]
            u_n[iteration] += energies[iteration,replica,state]
    del energies, states
    g_u = timeseries.statisticalInefficiency(u_n)
    print "g_u = %8.1f iterations" % g_u
        
    # Compute x and y umbrellas.    
    print "Computing torsions..."
    positions = ncfile.variables['positions'][ndiscard:,:,:,:]
    coordinates = units.Quantity(numpy.zeros([natoms,ndim], numpy.float32), units.angstroms)
    phi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
    psi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
    for iteration in range(niterations):
        for replica in range(nstates):
            coordinates[:,:] = units.Quantity(positions[iteration,replica,:,:].copy(), units.angstroms)
            phi_it[replica,iteration] = compute_torsion(coordinates, 4, 6, 8, 14) 
            psi_it[replica,iteration] = compute_torsion(coordinates, 6, 8, 14, 16)

    # Run MBAR.
    print "Grouping torsions by state..."
    phi_state_it = numpy.zeros([nstates,niterations], numpy.float32)
    psi_state_it = numpy.zeros([nstates,niterations], numpy.float32)
    states = ncfile.variables['states'][ndiscard:,:].copy()                
    for iteration in range(niterations):
        replicas = numpy.argsort(states[iteration,:])            
        for state in range(1,nstates):
            replica = replicas[state]
            phi_state_it[state,iteration] = phi_it[replica,iteration] / units.radians
            psi_state_it[state,iteration] = psi_it[replica,iteration] / units.radians
            
    print "Evaluating reduced potential energies..."
    N_k = numpy.ones([nstates], numpy.int32) * niterations
    u_kln = numpy.zeros([nstates, nstates, niterations], numpy.float32)
    for l in range(1,nstates):
        phi0 = ((numpy.floor((l-1)/nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians
        psi0 = ((numpy.remainder((l-1), nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians
        u_kln[:,l,:] = - kappa * numpy.cos(phi_state_it[:,:] - phi0) - kappa * numpy.cos(psi_state_it[:,:] - psi0)

#    print "Running MBAR..."
#    #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='self-consistent-iteration')
#    mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='adaptive', relative_tolerance=1.0e-2) # only use biased samples
#    f_k = mbar.f_k
#    mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='Newton-Raphson', initial_f_k=f_k) # only use biased samples
#    #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='Newton-Raphson', initialize='BAR')
#    print "Getting free energy differences..."
#    [df_ij, ddf_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
#    print df_ij
#    print ddf_ij

#    print "ln(Z_ij / Z_55):"
#    reference_bin = 4*nbins+4
#    for psi_index in range(nbins):
#        print "   [,%2d]" % (psi_index+1),
#    print ""
#    for phi_index in range(nbins):
#        print "[%2d,]" % (phi_index+1),
#        for psi_index in range(nbins):
#            print "%8.3f" % (-df_ij[reference_bin, phi_index*nbins+psi_index]),
#        print ""
#    print ""

#    print "dln(Z_ij / Z_55):"
#    reference_bin = 4*nbins+4
#    for psi_index in range(nbins):
#        print "   [,%2d]" % (psi_index+1),
#    print ""
#    for phi_index in range(nbins):
#        print "[%2d,]" % (phi_index+1),
#        for psi_index in range(nbins):
#            print "%8.3f" % (ddf_ij[reference_bin, phi_index*nbins+psi_index]),
#        print ""
#    print ""
    
    # Compute statistical inefficiencies of various functions of the timeseries data.
    print "Computing statistical infficiencies of cos(phi), sin(phi), cos(psi), sin(psi)..."
    cosphi_kn = [ numpy.cos(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    sinphi_kn = [ numpy.sin(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    cospsi_kn = [ numpy.cos(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    sinpsi_kn = [ numpy.sin(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    g_cosphi = timeseries.statisticalInefficiencyMultiple(cosphi_kn)
    g_sinphi = timeseries.statisticalInefficiencyMultiple(sinphi_kn)
    g_cospsi = timeseries.statisticalInefficiencyMultiple(cospsi_kn)
    g_sinpsi = timeseries.statisticalInefficiencyMultiple(sinpsi_kn)
    tau_cosphi = (g_cosphi-1.0)/2.0
    tau_sinphi = (g_sinphi-1.0)/2.0
    tau_cospsi = (g_cospsi-1.0)/2.0
    tau_sinpsi = (g_sinpsi-1.0)/2.0        

    # Compute relaxation times in each torsion.
    print "Relaxation times for transitions among phi or psi bins alone:"
    phibin_it = ((phi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16)
    tau_phi = compute_relaxation_time(phibin_it, nbins)
    psibin_it = ((psi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16)
    tau_psi = compute_relaxation_time(psibin_it, nbins)
    print "tau_phi = %8.1f iteration" % tau_phi
    print "tau_psi = %8.1f iteration" % tau_psi

    # Compute statistical error.
    nblocks = 10
    blocksize = int(niterations) / int(nblocks)
    g_cosphi_i = numpy.zeros([nblocks], numpy.float64)
    g_sinphi_i = numpy.zeros([nblocks], numpy.float64)
    g_cospsi_i = numpy.zeros([nblocks], numpy.float64)
    g_sinpsi_i = numpy.zeros([nblocks], numpy.float64)        
    tau_cosphi_i = numpy.zeros([nblocks], numpy.float64)
    tau_sinphi_i = numpy.zeros([nblocks], numpy.float64)
    tau_cospsi_i = numpy.zeros([nblocks], numpy.float64)
    tau_sinpsi_i = numpy.zeros([nblocks], numpy.float64)                
    for block_index in range(nblocks):
        # Extract block  
        slice_indices = range(blocksize*block_index,blocksize*(block_index+1))
        cosphi_kn = [ numpy.cos(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        sinphi_kn = [ numpy.sin(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        cospsi_kn = [ numpy.cos(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        sinpsi_kn = [ numpy.sin(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        g_cosphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cosphi_kn)
        g_sinphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinphi_kn)
        g_cospsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cospsi_kn)
        g_sinpsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinpsi_kn)
        tau_cosphi_i[block_index] = (g_cosphi_i[block_index]-1.0)/2.0
        tau_sinphi_i[block_index] = (g_sinphi_i[block_index]-1.0)/2.0
        tau_cospsi_i[block_index] = (g_cospsi_i[block_index]-1.0)/2.0
        tau_sinpsi_i[block_index] = (g_sinpsi_i[block_index]-1.0)/2.0

    dtau_cosphi = tau_cosphi_i.std() / numpy.sqrt(float(nblocks))
    dtau_sinphi = tau_sinphi_i.std() / numpy.sqrt(float(nblocks))
    dtau_cospsi = tau_cospsi_i.std() / numpy.sqrt(float(nblocks))
    dtau_sinpsi = tau_sinpsi_i.std() / numpy.sqrt(float(nblocks))        

    del cosphi_kn, sinphi_kn, cospsi_kn, sinpsi_kn

    print "Integrated autocorrelation times"
    print "tau_cosphi = %8.1f+-%.1f iterations" % (tau_cosphi, dtau_cosphi)
    print "tau_sinphi = %8.1f+-%.1f iterations" % (tau_sinphi, dtau_sinphi)
    print "tau_cospsi = %8.1f+-%.1f iterations" % (tau_cospsi, dtau_cospsi)
    print "tau_sinpsi = %8.1f+-%.1f iterations" % (tau_sinpsi, dtau_sinpsi)

    # Print LaTeX line.
    print ""
    print "%(store_filename)s & %(tau2).2f $\pm$ %(dtau2).2f & %(tau_states).2f $\pm$ %(dtau_states).2f & %(tau_end).2f $\pm$ %(dtau_end).2f & %(tau_cosphi).2f $\pm$ %(dtau_cosphi).2f & %(tau_sinphi).2f $\pm$ %(dtau_sinphi).2f & %(tau_cospsi).2f $\pm$ %(dtau_cospsi).2f & %(tau_sinpsi).2f $\pm$ %(dtau_sinpsi).2f \\\\" % vars()
    print ""        

    if phipsi_outfile is not None:        
        # Write uncorrelated (phi,psi) data
        outfile = open(phipsi_outfile, 'w')
        outfile.write('# alanine dipeptide 2d umbrella sampling data\n')        
        # Write umbrella restraints
        nbins = 10 # number of bins per torsion
        outfile.write('# %d x %d grid of restraints\n' % (nbins, nbins))
        outfile.write('# Each state was sampled from p_i(x) = Z_i^{-1} q(x) q_i(x) where q_i(x) = exp[kappa*cos(phi(x)-phi_i) + kappa*cos(psi(x)-psi_i)]\n')
        outfile.write('# phi(x) and psi(x) are periodic torsion angles on domain [-180, +180) degrees.\n')
        outfile.write('# kappa = %f\n' % kappa)
        outfile.write('# phi_i = [-180 + (floor(i / nbins) + 0.5) * delta] degrees\n')
        outfile.write('# psi_i = [-180 + (     (i % nbins) + 0.5) * delta] degrees\n')
        outfile.write('# where i = 0...%d, nbins = %d, and delta = %f degrees\n' % (nbins*nbins-1, nbins, delta / units.degrees))
        outfile.write('# Data has been subsampled to generate approximately uncorrelated samples.\n')        
        outfile.write('#\n')
        # write data header
        outfile.write('# ')
        for replica in range(nstates):
            outfile.write('state  %06d  ' % replica)
        outfile.write('\n')
        # write data        
        indices = timeseries.subsampleCorrelatedData(u_n, g=g_u) # indices of uncorrelated iterations
        states = ncfile.variables['states'][ndiscard:,:].copy()            
        for iteration in indices:
            outfile.write('  ')
            replicas = numpy.argsort(states[iteration,:])            
            for state in range(1,nstates):
                replica = replicas[state]
                outfile.write('%+6.1f %+6.1f  ' % (phi_it[replica,iteration] / units.degrees, psi_it[replica,iteration] / units.degrees))
            outfile.write('\n')
        outfile.close()

    return results