Esempio n. 1
0
 def _detect_equilibration(self, A_t):
     """
     Automatically detect equilibrated region.
 
     ARGUMENTS
 
     A_t (numpy.array) - timeseries
 
     RETURNS
 
     t (int) - start of equilibrated data
     g (float) - statistical inefficiency of equilibrated data
     Neff_max (float) - number of uncorrelated samples   
     
     """
     T = A_t.size
 
     # Special case if timeseries is constant.
     if A_t.std() == 0.0:
         return (0, 1, T)
     
     g_t = numpy.ones([T-1], numpy.float32)
     Neff_t = numpy.ones([T-1], numpy.float32)
     print T
     for t in range(T-1):
         print t
         g_t[t] = timeseries.statisticalInefficiency(A_t[t:T])
         Neff_t[t] = (T-t+1) / g_t[t]
     
     Neff_max = Neff_t.max()
     t = Neff_t.argmax()
     g = g_t[t]
     
     return (t, g, Neff_max)
Esempio n. 2
0
def detect_equilibration(A_t):
    """
    Automatically detect equilibrated region.

    ARGUMENTS

    A_t (numpy.array) - timeseries

    RETURNS

    t (int) - start of equilibrated data
    g (float) - statistical inefficiency of equilibrated data
    Neff_max (float) - number of uncorrelated samples   
    
    """
    T = A_t.size

    # Special case if timeseries is constant.
    if A_t.std() == 0.0:
        return (0, 1, T)

    g_t = numpy.ones([T - 1], numpy.float32)
    Neff_t = numpy.ones([T - 1], numpy.float32)
    for t in range(T - 1):
        g_t[t] = timeseries.statisticalInefficiency(A_t[t:T])
        Neff_t[t] = (T - t + 1) / g_t[t]

    Neff_max = Neff_t.max()
    t = Neff_t.argmax()
    g = g_t[t]

    return (t, g, Neff_max)
Esempio n. 3
0
def subsample(Q_n,localQ):
    print 'Subsampling the data'
    g = timeseries.statisticalInefficiency(Q_n)
    indices = numpy.array(timeseries.subsampleCorrelatedData(Q_n,g))
    print '%i uncorrelated samples found of %i original samples' %(len(indices),len(Q_n))
    localQ = localQ[:,indices]
    return localQ
Esempio n. 4
0
def EXPgauss(w_F, compute_uncertainty=True, is_timeseries=False):
  """
  Estimate free energy difference using gaussian approximation to one-sided (unidirectional) exponential averaging.

  ARGUMENTS
    w_F (numpy array) - w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.

  OPTIONAL ARGUMENTS
    compute_uncertainty (boolean) - if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries (boolean) - if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
                              Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

  RETURNS
    DeltaF (float) - DeltaF is the free energy difference between the two states.
    dDeltaF (float) - dDeltaF is the uncertainty, and is only returned if compute_uncertainty is set to True

  NOTE

    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

  EXAMPLES

  Compute the free energy difference given a sample of forward work values.

  >>> import testsystems
  >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
  >>> [DeltaF, dDeltaF] = EXPgauss(w_F)
  >>> print 'Forward Gaussian approximated free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Forward Gaussian approximated free energy difference is 1.049 +- 0.089 kT
  >>> [DeltaF, dDeltaF] = EXPgauss(w_R)
  >>> print 'Reverse Gaussian approximated free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Reverse Gaussian approximated free energy difference is -1.073 +- 0.080 kT
  
  """

  # Get number of work measurements.
  T = float(np.size(w_F)) # number of work measurements
  
  var = np.var(w_F)
  # Estimate free energy difference by Gaussian approximation, dG = <U> - 0.5*var(U)
  DeltaF = np.average(w_F) - 0.5*var

  if compute_uncertainty:  
    # Compute effective number of uncorrelated samples.
    g = 1.0 # statistical inefficiency
    T_eff = T
    if is_timeseries:
      # Estimate statistical inefficiency of x timeseries.
      import timeseries
      g = timeseries.statisticalInefficiency(w_F, w_F)

      T_eff = T/g
    # Estimate standard error of E[x].
    dx2 = var/ T_eff + 0.5*var*var/(T_eff - 1)
    dDeltaF = np.sqrt(dx2)

    # Return estimate of free energy difference and uncertainty.
    return (DeltaF, dDeltaF)
  else:
    return DeltaF
Esempio n. 5
0
def EXPgauss(w_F, compute_uncertainty=True, is_timeseries=False):
    """
  Estimate free energy difference using gaussian approximation to one-sided (unidirectional) exponential averaging.

  ARGUMENTS
    w_F (numpy array) - w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.

  OPTIONAL ARGUMENTS
    compute_uncertainty (boolean) - if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries (boolean) - if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
                              Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

  RETURNS
    DeltaF (float) - DeltaF is the free energy difference between the two states.
    dDeltaF (float) - dDeltaF is the uncertainty, and is only returned if compute_uncertainty is set to True

  NOTE

    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

  EXAMPLES

  Compute the free energy difference given a sample of forward work values.

  >>> import testsystems
  >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
  >>> [DeltaF, dDeltaF] = EXPgauss(w_F)
  >>> print 'Forward Gaussian approximated free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Forward Gaussian approximated free energy difference is 1.049 +- 0.089 kT
  >>> [DeltaF, dDeltaF] = EXPgauss(w_R)
  >>> print 'Reverse Gaussian approximated free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Reverse Gaussian approximated free energy difference is -1.073 +- 0.080 kT
  
  """

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    var = np.var(w_F)
    # Estimate free energy difference by Gaussian approximation, dG = <U> - 0.5*var(U)
    DeltaF = np.average(w_F) - 0.5 * var

    if compute_uncertainty:
        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        T_eff = T
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(w_F, w_F)

            T_eff = T / g
        # Estimate standard error of E[x].
        dx2 = var / T_eff + 0.5 * var * var / (T_eff - 1)
        dDeltaF = np.sqrt(dx2)

        # Return estimate of free energy difference and uncertainty.
        return (DeltaF, dDeltaF)
    else:
        return DeltaF
def getNkandUkln(do_dhdl=False):
   """Identifies uncorrelated samples and updates the arrays of the reduced potential energy and dhdlt retaining data entries of these samples only.
      Assumes that 'dhdlt' and 'u_klt' are in memory, as well as proper values for 'sta' and 'fin', i.e. the starting and
      final snapshot positions to be read, both are arrays of dimension K."""
   u_kln = numpy.zeros([K,K,max(fin-sta)], numpy.float64) # u_kln[k,m,n] is the reduced potential energy of uncorrelated sample index n from state k evaluated at state m
   N_k = numpy.zeros(K, int) # N_k[k] is the number of uncorrelated samples from state k
   g = numpy.zeros(K,float) # autocorrelation times for the data
   if do_dhdl:
      dhdl = numpy.zeros([K,n_components,max(fin-sta)], float) #dhdl is value for dhdl for each component in the file at each time.
      print "\n\nNumber of correlated and uncorrelated samples:\n\n%6s %12s %12s %12s\n" % ('State', 'N', 'N_k', 'N/N_k') 
   for k in range(K):
      # Sum up over the energy components; notice, that only the relevant data is being used in the third dimension.
      dhdl_sum = numpy.sum(dhdlt[k,:,sta[k]:fin[k]], axis=0)
      # Determine indices of uncorrelated samples from potential autocorrelation analysis at state k
      # (alternatively, could use the energy differences -- here, we will use total dhdl).
      g[k] = timeseries.statisticalInefficiency(dhdl_sum)
      indices = numpy.array(timeseries.subsampleCorrelatedData(dhdl_sum, g=g[k])) # indices of uncorrelated samples
      N = len(indices) # number of uncorrelated samples
      # Handle case where we end up with too few.
      if N < 50:
         if do_dhdl:
            print "WARNING: Only %s uncorrelated samples found at lambda number %s; proceeding with analysis using correlated samples..." % (N, k)
         indices = numpy.arange(len(dhdl_sum))
         N = len(indices)
      N_k[k] = N # Store the number of uncorrelated samples from state k.
      for l in range(K):
         u_kln[k,l,0:N] = u_klt[k,l,indices]
      if do_dhdl:
         print "%6s %12s %12s %12.2f" % (k, fin[k], N_k[k], g[k])
         for n in range(n_components): 
            dhdl[k,n,0:N] = dhdlt[k,n,indices]
   if do_dhdl:
      return (dhdl, N_k, u_kln)
   return (N_k, u_kln)
Esempio n. 7
0
def compute_stat_inefficiency2D(pos_xkn,pos_ykn,N_k):
    ''' computes iacts
    '''
    logger.info("computing IACTS")
    K = pos_xkn.shape[0]
    ineff = np.zeros(K,dtype=np.float)
    ineffx = np.zeros(K,dtype=np.float)
    ineffy = np.zeros(K,dtype=np.float)
    for i in range(K):
        ineffx[i] = timeseries.statisticalInefficiency( pos_xkn[i,0:N_k[i]] ) 
        ineffy[i] = timeseries.statisticalInefficiency( pos_ykn[i,0:N_k[i]] )
        
        if ineffx[i] > ineffy[i]:
            ineff[i] = ineffx[i]
        else:
            ineff[i] = ineffy[i]
        #logger.debug("IACT X and Y %s %s %s",iactx,iacty,i )
    logger.info("IACTS computed")
    return ineff,ineffx,ineffy
Esempio n. 8
0
def subsample_series(series, g_t=None, return_g_t=False):
    if g_t is None:
        g_t = timeseries.statisticalInefficiency(series)
    state_indices = timeseries.subsampleCorrelatedData(series, g = g_t, conservative=True)
    N_k = len(state_indices)
    transfer_series = series[state_indices]
    if return_g_t:
        return state_indices, transfer_series, g_t
    else:
        return state_indices, transfer_series
Esempio n. 9
0
def getefficiency(N_k, U_kn, V_kn, N_kn, type):

    K = len(N_k)
    g = numpy.ones(K)
    ge = numpy.ones(K)
    gv = numpy.ones(K)
    gn = numpy.ones(K)

    if (type != 'volume') and (type != 'number'):
        for k in range(K):
            ge[k] = timeseries.statisticalInefficiency(U_kn[k, 0:N_k[k]],
                                                       fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (ge[k])
        print "] as the statistical inefficiencies of the energy"
    if type in requireV:
        for k in range(K):
            gv[k] = timeseries.statisticalInefficiency(V_kn[k, 0:N_k[k]],
                                                       fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (gv[k])
        print "] as the statistical inefficiencies of the volume"
    if type in requireN:
        for k in range(K):
            gn[k] = timeseries.statisticalInefficiency(N_kn[k, 0:N_k[k]],
                                                       fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (gn[k])
        print "] as the statistical inefficiencies of the particle number"

    for k in range(K):
        g[k] = numpy.max([ge[k], gv[k], gn[k]])
    print "Using ["
    for k in range(K):
        print " %.3f " % (g[k])
    print "] as the statistical inefficiencies"
    return g
Esempio n. 10
0
def compute_stat_inefficiency1D(pos_kn,N_k):
    ''' computes iacts
    
    '''
    
    logger.info("computing IACTS")
    K = pos_kn.shape[0]
    ineff = np.zeros(K,dtype=np.float)
    for i in range(K):
        ineff[i] = timeseries.statisticalInefficiency( pos_kn[i,0:N_k[i]] ) 
        logger.debug("%d %f\n",i,ineff[i])
    logger.info("IACTS computed")
    return ineff
Esempio n. 11
0
def getefficiency(N_k,U_kn,V_kn,N_kn,type):

    K = len(N_k)
    g = numpy.ones(K)
    ge = numpy.ones(K);
    gv = numpy.ones(K);
    gn = numpy.ones(K);

    if (type != 'volume') and (type != 'number'):
        for k in range(K):
            ge[k] = timeseries.statisticalInefficiency(U_kn[k,0:N_k[k]],fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (ge[k])
        print "] as the statistical inefficiencies of the energy"
    if type in requireV:
        for k in range(K):
            gv[k] = timeseries.statisticalInefficiency(V_kn[k,0:N_k[k]],fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (gv[k])
        print "] as the statistical inefficiencies of the volume"
    if type in requireN:
        for k in range(K):
            gn[k] = timeseries.statisticalInefficiency(N_kn[k,0:N_k[k]],fast=False)
        print "Calculating ["
        for k in range(K):
            print " %.3f " % (gn[k])
        print "] as the statistical inefficiencies of the particle number"

    for k in range(K):
        g[k] = numpy.max([ge[k],gv[k],gn[k]])
    print "Using ["
    for k in range(K):
        print " %.3f " % (g[k])
    print "] as the statistical inefficiencies"
    return g
Esempio n. 12
0
def compute_stat_inefficiency(observ):
    ''' computes iacts
    '''
    logger.info("computing IACTS")
    ineff = []

    for sim in observ:
        simIneff = []
        for j in range(sim.shape[1]):
            simIneff.append( timeseries.statisticalInefficiency( sim[:,j] ) )
        
        ineff.append(simIneff)
    
    ineff = np.array(ineff,dtype=np.float)
    
    logger.info("IACTS computed")
    return ineff
Esempio n. 13
0
def subsample(U_kn,Q_kn,K,N_max):
    assume_uncorrelated = False
    if assume_uncorrelated:
        print 'Assuming data is uncorrelated'
        N_k = numpy.zeros(K, numpy.int32)
        N_k[:] = N_max
    else:	
        print 'Subsampling the data...'
        N_k = numpy.zeros(K,numpy.int32)
        g = numpy.zeros(K,numpy.float64)
        for k in range(K):  # subsample the energies
            g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
            indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
            N_k[k] = len(indices) # number of uncorrelated samplesadsf
            U_kn[k,0:N_k[k]] = U_kn[k,indices]
            Q_kn[k,0:N_k[k]] = Q_kn[k,indices]
    return U_kn, Q_kn, N_k
Esempio n. 14
0
def subsample(U_kn,Q_kn,K,N_max):
    assume_uncorrelated = False
    if assume_uncorrelated:
        print 'Assuming data is uncorrelated'
        N_k = numpy.zeros(K, numpy.int32)
        N_k[:] = N_max
    else:	
        print 'Subsampling the data...'
        N_k = numpy.zeros(K,numpy.int32)
        g = numpy.zeros(K,numpy.float64)
        for k in range(K):  # subsample the energies
            g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
            indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
            N_k[k] = len(indices) # number of uncorrelated samplesadsf
            U_kn[k,0:N_k[k]] = U_kn[k,indices]
            Q_kn[k,0:N_k[k]] = Q_kn[k,indices]
    return U_kn, Q_kn, N_k
Esempio n. 15
0
def find_g_t_states(u_kln, states=None, nequil=None):
    #Subsample multiple states, this assumes you want to subsample independent of what was fed in
    if states is None:
        states = numpy.array(range(nstates))
    num_sample = len(states)
    if nequil is None:
        gen_nequil = True
        nequil = numpy.zeroes(num_sample, dtype=numpy.int32)
    else:
        if len(nequil) != num_sample:
            print "nequil length needs to be the same as length as states!"
            raise
        else:
            gen_nequl = False
    g_t = numpy.zeros([num_sample])
    Neff_max = numpy.zeros([num_sample])
    for state in states:
        g_t[state] = timeseries.statisticalInefficiency(u_kln[k,k,nequil[state]:])
        Neff_max[k] = (u_kln[k,k,:].size + 1) / g_t[state]
    return g_t, Neff_max
Esempio n. 16
0
def EXP(w_F, compute_uncertainty=True, is_timeseries=False):
    """Estimate free energy difference using one-sided (unidirectional) exponential averaging (EXP).

    Parameters
    ----------
    w_F : np.ndarray, float
        w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.
    compute_uncertainty : bool, optional, default=True
        if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries : bool, default=False
        if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
        Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

    Returns
    -------
    DeltaF : float
        DeltaF is the free energy difference between the two states.
    dDeltaF : float
        dDeltaF is the uncertainty, and is only returned if compute_uncertainty is set to True

    Notes
    -----
    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

    Examples
    --------

    Compute the free energy difference given a sample of forward work values.

    >>> from pymbar import testsystems
    >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
    >>> [DeltaF, dDeltaF] = EXP(w_F)
    >>> print('Forward free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF))
    Forward free energy difference is 1.088 +- 0.076 kT
    >>> [DeltaF, dDeltaF] = EXP(w_R)
    >>> print('Reverse free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF))
    Reverse free energy difference is -1.073 +- 0.082 kT

    """

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    # Estimate free energy difference by exponential averaging using DeltaF = - log < exp(-w_F) >
    DeltaF = - (logsumexp(- w_F) - np.log(T))

    if compute_uncertainty:
        # Compute x_i = np.exp(-w_F_i - max_arg)
        max_arg = np.max(-w_F)  # maximum argument
        x = np.exp(-w_F - max_arg)

        # Compute E[x] = <x> and dx
        Ex = x.mean()

        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(x, x)

        # Estimate standard error of E[x].
        dx = np.std(x) / np.sqrt(T / g)

        # dDeltaF = <x>^-1 dx
        dDeltaF = (dx / Ex)

        # Return estimate of free energy difference and uncertainty.
        return (DeltaF, dDeltaF)
    else:
        return DeltaF
Esempio n. 17
0
T_k = T[-2::]
files = ['%s/energy%i.npy' % (direc, T[-2]), '%s/energy%i.npy' % (direc, T[-1])]
#file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt']
#file=[direc+'/energy426.txt',direc+'/energy442.txt']
#file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt']
down=load(files[0])
up=load(files[1])
length = len(down)
down = down[length/2::]
up = up[length/2::]
#up=up[-50000::]
#down=down[-50000::]
#up=up[::100]
#down=down[::100]

g_up = timeseries.statisticalInefficiency(up)
indices_up = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_up))
print len(indices_up), 'samples'

g_down = timeseries.statisticalInefficiency(down)
indices_down = numpy.array(timeseries.subsampleCorrelatedData(up,g=g_down))
print len(indices_down), 'samples'



type='total'
U_kn=zeros([2,len(up)])
U_kn[0,0:len(indices_down)] = down[indices_down]
U_kn[1,0:len(indices_up)] = up[indices_up]
#T_k=array([300.,336.8472786])
#T_k=array([426.81933819,442.13650313])
Esempio n. 18
0
        #        dtau_end = tau_end_i.std() / numpy.sqrt(float(nblocks))
        # Print.
        print "tau_end = %.3f+-%.3f iterations" % (tau_end, dtau_end)
        del states

        # Compute statistical inefficiency for reduced potential
        energies = ncfile.variables['energies'][:, :, :].copy()
        states = ncfile.variables['states'][:, :].copy()
        u_n = numpy.zeros([niterations], numpy.float64)
        for iteration in range(niterations):
            u_n[iteration] = 0.0
            for replica in range(nstates):
                state = states[iteration, replica]
                u_n[iteration] += energies[iteration, replica, state]
        del energies, states
        g_u = timeseries.statisticalInefficiency(u_n)
        tau_u = (g_u - 1.0) / 2.0
        print "g_u = %8.1f iterations" % g_u
        print "tau_u = %8.1f iterations" % tau_u

        # DEBUG for lactalbumin
        #continue

        # Compute torsions.
        print "Computing torsions..."
        positions = ncfile.variables['positions'][:, :, :, :]
        coordinates = units.Quantity(
            numpy.zeros([natoms, ndim], numpy.float32), units.angstroms)
        phi_it = units.Quantity(
            numpy.zeros([nstates, niterations], numpy.float32), units.radians)
        psi_it = units.Quantity(
Esempio n. 19
0
    )  # N_k[k] is the number of uncorrelated samples from simulation index k
    reduced_expectation_data = []
    if len(expectation_columns) > 0:
        for i in range(len(expectation_columns)):
            reduced_expectation_data.append(
                numpy.zeros([K, N_samples], numpy.float64))
    reduced_fep_data = []
    if len(fep_columns) > 0:
        for i in range(len(fep_columns)):
            reduced_fep_data.append(numpy.zeros([K, N_samples], numpy.float64))
    for k in range(K):
        # Extract timeseries.
        A_t = biasing_variable_kt[0][k, :]
        # Compute statistical inefficiency.
        try:
            g = timeseries.statisticalInefficiency(A_t)
        except Exception as e:
            print str(e)
            print A_t

        # Subsample data.
        if subsample_trajectories:
            indices = timeseries.subsampleCorrelatedData(A_t, g=g)
        else:
            indices = timeseries.subsampleCorrelatedData(A_t, g=1)
        N = len(indices)  # number of uncorrelated samples
        print "k = %5d : g = %.1f, N = %d" % (k, g, N)
        for i in range(nbiases):
            biasing_variable_kn[i][k, 0:N] = biasing_variable_kt[i][k, indices]
        for i in range(nperturbations + 1):
            U_kn[i][k, 0:N] = U_kt[i][k, indices]
Esempio n. 20
0
#!/usr/bin/python
import string
import os
import sys
import numpy as np
import timeseries

dat1 = np.loadtxt("t1.dat", delimiter=None)
dat2 = np.loadtxt("t2.dat", delimiter=None)

g1 = timeseries.statisticalInefficiency(dat1, fast=True)
g2 = timeseries.statisticalInefficiency(dat2, fast=True)

nsamp1 = dat1.size / g1
nsamp2 = dat1.size / g2

avg1 = np.mean(dat1)
avg2 = np.mean(dat2)

sig1 = np.power(np.var(dat1) / nsamp1, 0.5)
sig2 = np.power(np.var(dat2) / nsamp2, 0.5)

print avg1, 3. * sig1
print avg2, 3. * sig2
Esempio n. 21
0
    def _subsample_kln(self, u_kln):
        #Try to load in the data
        if self.save_equil_data:  #Check if we want to save/load equilibration data
            try:
                equil_data = numpy.load(
                    os.path.join(
                        self.source_directory, self.save_prefix + self.phase +
                        '_equil_data_%s.npz' % self.subsample_method))
                if self.nequil is None:
                    self.nequil = equil_data['nequil']
                elif type(self.nequil
                          ) is int and self.subsample_method == 'per-state':
                    print "WARRNING: Per-state subsampling requested with only single value for equilibration..."
                    try:
                        self.nequil = equil_data['nequil']
                        print "Loading equilibration from file with %i states read" % self.nstates
                    except:
                        print "Assuming equal equilibration per state of %i" % self.nequil
                        self.nequil = numpy.array([self.nequil] * self.nstates)
                self.g_t = equil_data['g_t']
                Neff_max = equil_data['Neff_max']
                #Do equilibration if we have not already
                if self.subsample_method == 'per-state' and (
                        len(self.g_t) < self.nstates
                        or len(self.nequil) < self.nstates):
                    equil_loaded = False
                    raise IndexError
                else:
                    equil_loaded = True
            except:
                if self.subsample_method == 'per-state':
                    self.nequil = numpy.zeros([self.nstates],
                                              dtype=numpy.int32)
                    self.g_t = numpy.zeros([self.nstates])
                    Neff_max = numpy.zeros([self.nstates])
                    for k in xrange(self.nstates):
                        if self.verbose:
                            print "Computing timeseries for state %i/%i" % (
                                k, self.nstates - 1)
                        self.nequil[k] = 0
                        self.g_t[k] = timeseries.statisticalInefficiency(
                            u_kln[k, k, :])
                        Neff_max[k] = (u_kln[k, k, :].size + 1) / self.g_t[k]
                        #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                else:
                    if self.nequil is None:
                        [self.nequil, self.g_t,
                         Neff_max] = self._detect_equilibration(self.u_n)
                    else:
                        [self.nequil_timeseries, self.g_t,
                         Neff_max] = self._detect_equilibration(self.u_n)
                equil_loaded = False
            if not equil_loaded:
                numpy.savez(os.path.join(
                    self.source_directory, self.save_prefix + self.phase +
                    '_equil_data_%s.npz' % self.subsample_method),
                            nequil=self.nequil,
                            g_t=self.g_t,
                            Neff_max=Neff_max)
        elif self.nequil is None:
            if self.subsample_method == 'per-state':
                self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                self.g_t = numpy.zeros([self.nstates])
                Neff_max = numpy.zeros([self.nstates])
                for k in xrange(self.nstates):
                    [self.nequil[k], self.g_t[k],
                     Neff_max[k]] = self._detect_equilibration(u_kln[k, k, :])
                    if self.verbose:
                        print "State %i equilibrated with %i samples" % (
                            k, int(Neff_max[k]))
            else:
                [self.nequil, self.g_t,
                 Neff_max] = self._detect_equilibration(self.u_n)

        if self.verbose: print[self.nequil, Neff_max]
        # 1) Discard equilibration data
        # 2) Subsample data to obtain uncorrelated samples
        self.N_k = numpy.zeros(self.nstates, numpy.int32)
        if self.subsample_method == 'per-state':
            # Discard samples
            nsamples_equil = self.niterations - self.nequil
            self.u_kln = numpy.zeros(
                [self.nstates, self.nstates,
                 nsamples_equil.max()])
            for k in xrange(self.nstates):
                self.u_kln[k, :, :nsamples_equil[k]] = u_kln[k, :,
                                                             self.nequil[k]:]
            #Subsample
            transfer_retained_indices = numpy.zeros(
                [self.nstates, nsamples_equil.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                state_indices = timeseries.subsampleCorrelatedData(
                    self.u_kln[k, k, :], g=self.g_t[k])
                self.N_k[k] = len(state_indices)
                transfer_retained_indices[k, :self.N_k[k]] = state_indices
            transfer_kln = numpy.zeros(
                [self.nstates, self.nstates,
                 self.N_k.max()])
            self.retained_indices = numpy.zeros(
                [self.nstates, self.N_k.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                self.retained_indices[
                    k, :self.N_k[k]] = transfer_retained_indices[
                        k, :self.N_k[k]]  #Memory reduction
                transfer_kln[k, :, :self.N_k[k]] = self.u_kln[
                    k, :, self.retained_indices[k, :self.N_k[
                        k]]].T  #Have to transpose since indexing in this way causes issues

            #Cut down on memory, once function is done, transfer_kln should be released
            self.u_kln = transfer_kln
            self.retained_iters = self.N_k
        else:
            #Discard Samples
            self.u_kln = u_kln[:, :, self.nequil:]
            self.u_n = self.u_n[self.nequil:]
            #Subsamples
            indices = timeseries.subsampleCorrelatedData(
                self.u_n, g=self.g_t)  # indices of uncorrelated samples
            self.u_kln = self.u_kln[:, :, indices]
            self.N_k[:] = len(indices)
            self.retained_indices = indices
            self.retained_iters = len(indices)
        return
    mask_kt[k,0:T_k[k]] = True
# Create a list from this mask.
all_data_indices = where(mask_kt)

# Construct equal-frequency extension bins
print "binning data..."
bin_kt = zeros([K, T_max], int32)
(bin_left_boundary_i, bin_center_i, bin_width_i, bin_assignments) = construct_nonuniform_bins(x_kt[all_data_indices], nbins)
bin_kt[all_data_indices] = bin_assignments

# Compute correlation times.
N_max = 0
g_k = zeros([K], float64)
for k in range(K):
    # Compute statistical inefficiency for extension timeseries
    g = timeseries.statisticalInefficiency(x_kt[k,0:T_k[k]], x_kt[k,0:T_k[k]])
    # store statistical inefficiency
    g_k[k] = g
    print "timeseries %d : g = %.1f, %.0f uncorrelated samples (of %d total samples)" % (k+1, g, floor(T_k[k] / g), T_k[k])
    N_max = max(N_max, ceil(T_k[k] / g) + 1)

# Subsample trajectory position data.
x_kn = zeros([K, N_max], float64)
bin_kn = zeros([K, N_max], int32)
N_k = zeros([K], int32)
for k in range(K):
    # Compute correlation times for potential energy and chi timeseries.
    indices = timeseries.subsampleCorrelatedData(x_kt[k,0:T_k[k]])
    # Store subsampled positions.
    N_k[k] = len(indices)
    x_kn[k,0:N_k[k]] = x_kt[k,indices]
Esempio n. 23
0
#!/usr/bin/env python

#Usage: calc_acf file [colnr [ndiscard] ]

import sys, timeseries
from numpy import *

if len(sys.argv) > 2:
    colnr = int(sys.argv[2])
else:
    colnr = 1

if len(sys.argv) > 3:
    ndiscard = int(sys.argv[3])
else:
    ndiscard = 0

lines = [line for line in open(sys.argv[1]).readlines() if line[0] != '#']
f = array([float(line.split()[colnr - 1]) for line in lines[ndiscard:]])
print timeseries.statisticalInefficiency(f)
Esempio n. 24
0
        infile = open(filename, 'r')
        lines = infile.readlines()
        infile.close()
        # Parse data.
        n = 0
        for line in lines:
            if line[0] != '#' and line[0] != '@':
                tokens = line.split()            
                u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
                n += 1

    # Compute correlation times for potential energy and chi
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,0:N_k[k]])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,0:N_k[k]])
    else:
        chi_radians = chi_kn[k,0:N_k[k]]/(180.0/numpy.pi)
        g_cos = timeseries.statisticalInefficiency(numpy.cos(chi_radians))
        g_sin = timeseries.statisticalInefficiency(numpy.sin(chi_radians))
        print "g_cos = %.1f | g_sin = %.1f" % (g_cos, g_sin)
        g_k[k] = max(g_cos, g_sin)
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(chi_radians, g=g_k[k]) 
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    chi_kn[k,0:N_k[k]] = chi_kn[k,indices]
Esempio n. 25
0
   #     infile = open(filename, 'r')
   #     lines = infile.readlines()
   #     infile.close()
        # Parse data.
   #     n = 0
   #     for line in lines:
   #         if line[0] != '#' and line[0] != '@':
   #             tokens = line.split()            
   #             u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
   #             n += 1

    # Compute correlation times for potential energy and val
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of val
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,:])
    else:
        #g_k[k] = timeseries.statisticalInefficiency(val_kn[k,:], val_kn[k,:])
        #print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(val_kn[k,0:N_k[k]], fast=True, verbose=True)
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    val_kn[k,0:N_k[k]] = val_kn[k,indices]
   # print val_kn[k,0:N_k[k]]

# Set zero of u_kn -- this is arbitrary.
u_kn -= u_kn.min()
        cluster_bin_kn = -1*numpy.ones([K,N_samples], numpy.int32) # cluster_bin_kn[k,n] is the cluster bin index of snapshot n of umbrella simulation k
    N_k = numpy.zeros([K], numpy.int32) # N_k[k] is the number of uncorrelated samples from simulation index k
    reduced_expectation_data = []
    if len(expectation_columns) > 0:
        for i in range(len(expectation_columns)):
            reduced_expectation_data.append(numpy.zeros([K,N_samples], numpy.float64))
    reduced_fep_data = []
    if len(fep_columns) > 0:
        for i in range(len(fep_columns)):
            reduced_fep_data.append(numpy.zeros([K,N_samples], numpy.float64))
    for k in range(K):
        # Extract timeseries.
        A_t = biasing_variable_kt[0][k,:]
        # Compute statistical inefficiency.
        try:
            g = timeseries.statisticalInefficiency(A_t)
        except Exception as e:
            print str(e)
            print A_t

        # Subsample data.
        if subsample_trajectories:
            indices = timeseries.subsampleCorrelatedData(A_t, g=g)
        else:
            indices = timeseries.subsampleCorrelatedData(A_t, g=1)
        N = len(indices) # number of uncorrelated samples
        print "k = %5d : g = %.1f, N = %d" % (k, g, N)
        for i in range(nbiases):
            biasing_variable_kn[i][k,0:N] = biasing_variable_kt[i][k,indices]
        for i in range(nperturbations+1):
            U_kn[i][k,0:N] = U_kt[i][k,indices]
Esempio n. 27
0
    '%s/energy%i.npy' % (direc, T[-1])
]
#file=['/home/edz3fz/checkensemble_high/CE_high.txt','/home/edz3fz/checkensemble_low/CE_low.txt']
#file=[direc+'/energy426.txt',direc+'/energy442.txt']
#file = ['/home/edz3fz/surface_replica_exchange/replica0/energy300.txt', '/home/edz3fz/surface_replica_exchange/replica3/energy356.txt']
down = load(files[0])
up = load(files[1])
length = len(down)
down = down[length / 2::]
up = up[length / 2::]
#up=up[-50000::]
#down=down[-50000::]
#up=up[::100]
#down=down[::100]

g_up = timeseries.statisticalInefficiency(up)
indices_up = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_up))
print len(indices_up), 'samples'

g_down = timeseries.statisticalInefficiency(down)
indices_down = numpy.array(timeseries.subsampleCorrelatedData(up, g=g_down))
print len(indices_down), 'samples'

type = 'total'
U_kn = zeros([2, len(up)])
U_kn[0, 0:len(indices_down)] = down[indices_down]
U_kn[1, 0:len(indices_up)] = up[indices_up]
#T_k=array([300.,336.8472786])
#T_k=array([426.81933819,442.13650313])
#T_k=array([424.67492585,450])
#T_k=array([437.99897735,450])
Esempio n. 28
0
    n_samples, n_bins = bp.shape
    bi = np.arange(n_bins)

    pmf = -0.6*np.log(bp)
    pmf_mean = pmf.mean(axis=0)
    pmf_mean -= np.min(pmf_mean)

    pmf_std = pmf.std(axis=0)

    # Calculate statistical inefficiency
    try:
        g = np.load(stat_ineff_file)
    except:
        g = np.zeros((n_bins,))
        for k in xrange(n_bins):
            g[k] = timeseries.statisticalInefficiency(pmf[:,k])

        np.save(stat_ineff_file, g)

    pmf_err = 1.96*g*pmf_std/np.sqrt(n_samples)

    offset = np.min(pmf_mean - pmf_err)
    pmf_mean -= offset

    fig = plt.figure(1, figsize=fsize)
    ax = fig.add_subplot(111)
    ax.plot(bi, pmf_mean, color='black', lw=2, zorder=1000)
    ax.fill_between(bi, pmf_mean + pmf_err, pmf_mean - pmf_err, alpha=.4, facecolor='gray', edgecolor='black', lw=1, zorder=900)
    ax.set_xlabel(r'$\alpha$')
    ax.set_ylabel(r'$G_{\alpha}$ (kcal/mol)')
Esempio n. 29
0
def EXP(w_F, compute_uncertainty=True, is_timeseries=False):
    """
  Estimate free energy difference using one-sided (unidirectional) exponential averaging (EXP).

  ARGUMENTS
    w_F (numpy array) - w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.

  OPTIONAL ARGUMENTS
    compute_uncertainty (boolean) - if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries (boolean) - if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
                              Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

  RETURNS
    DeltaF (float) - DeltaF is the free energy difference between the two states.
    dDeltaF (float) - dDeltaF is the uncertainty, and is only returned if compute_uncertainty is set to True

  NOTE

    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

  EXAMPLES

  Compute the free energy difference given a sample of forward work values.

  >>> import testsystems
  >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
  >>> [DeltaF, dDeltaF] = EXP(w_F)
  >>> print 'Forward free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Forward free energy difference is 1.088 +- 0.076 kT
  >>> [DeltaF, dDeltaF] = EXP(w_R)
  >>> print 'Reverse free energy difference is %.3f +- %.3f kT' % (DeltaF, dDeltaF)
  Reverse free energy difference is -1.073 +- 0.082 kT
  
  """

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    # Estimate free energy difference by exponential averaging using DeltaF = - log < exp(-w_F) >
    DeltaF = -(_logsum(-w_F) - np.log(T))

    if compute_uncertainty:
        # Compute x_i = np.exp(-w_F_i - max_arg)
        max_arg = np.max(-w_F)  # maximum argument
        x = np.exp(-w_F - max_arg)

        # Compute E[x] = <x> and dx
        Ex = x.mean()

        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(x, x)

        # Estimate standard error of E[x].
        dx = np.std(x) / np.sqrt(T / g)

        # dDeltaF = <x>^-1 dx
        dDeltaF = (dx / Ex)

        # Return estimate of free energy difference and uncertainty.
        return (DeltaF, dDeltaF)
    else:
        return DeltaF
Esempio n. 30
0
def main():
    options = parse_args()
    kB = 0.00831447/4.184  #Boltzmann constant (Gas constant) in kJ/(mol*K)
    dT = 2.5              # Temperature increment for calculating Cv(T)
    
    T = numpy.loadtxt(options.tfile)
    print 'Initial temperature states are', T
    K = len(T)
  
    U_kn, Q_kn, N_max = read_data(options,T,K)

    print 'Subsampling Q...' 
    N_k = numpy.zeros(K,numpy.int32)
    g = numpy.zeros(K,numpy.float64)
    for k in range(K):  # subsample the energies
       g[k] = timeseries.statisticalInefficiency(Q_kn[k])#,suppress_warning=True)
       indices = numpy.array(timeseries.subsampleCorrelatedData(Q_kn[k],g=g[k])) # indices of uncorrelated samples
       N_k[k] = len(indices) # number of uncorrelated samplesadsf
       print '%i uncorrelated samples out of %i total samples' %(len(indices),options.N_max/options.skip)
       U_kn[k,0:N_k[k]] = U_kn[k,indices]
       Q_kn[k,0:N_k[k]] = Q_kn[k,indices]

    insert = True
    if insert: 
        #------------------------------------------------------------------------
        # Insert Intermediate T's and corresponding blank U's and E's
        #------------------------------------------------------------------------
        # Set up variables
        Temp_k = T
        currentT = T[0] + dT
        maxT = T[-1]
        i = 1
        
        print("--Inserting intermediate temperatures...")
        
        # Loop, inserting T's at which we are interested in the properties
        while (currentT < maxT) :
               if (currentT < Temp_k[i]):
                     Temp_k = numpy.insert(Temp_k, i, currentT)
                     currentT = currentT + dT
               else:
                     currentT = Temp_k[i] + dT
                     i = i + 1
                     
        # Update number of states
        K = len(Temp_k)
        
        print("--Inserting blank energies to match up with inserted temperatures...")
        
        # Loop, inserting E's into blank matrix (leaving blanks only where new Ts are inserted)
        Q_fromfile = Q_kn
        Nall_k = numpy.zeros([K], numpy.int32) # Number of samples (n) for each state (k) = number of iterations/energies
        E_kn = numpy.zeros([K, N_max], numpy.float64)
        Q_kn = numpy.zeros([K, N_max], numpy.float64)
        i = 0
        
        for k in range(K):
            if (Temp_k[k] == T[i]):
                E_kn[k,0:N_k[i]] = U_kn[i,0:N_k[i]]
                Q_kn[k,0:N_k[i]] = Q_fromfile[i,0:N_k[i]]
                Nall_k[k] = N_k[i]
                i = i + 1
    else:
        print 'Not inserting intermediate temperatures'
        Temp_k = T
        E_kn = U_kn
        Nall_k = N_k

    #------------------------------------------------------------------------
    # Compute inverse temperatures
    #------------------------------------------------------------------------
    beta_k = 1 / (kB * Temp_k)
    
    #------------------------------------------------------------------------
    # Compute reduced potential energies
    #------------------------------------------------------------------------
    
    print "--Computing reduced energies..."
    
    u_kln = numpy.zeros([K,K,N_max], numpy.float64) # u_kln is reduced pot. ener. of segment n of temp k evaluated at temp l
    
    for k in range(K):
           for l in range(K):
                 u_kln[k,l,0:Nall_k[k]] = beta_k[l] * E_kn[k,0:Nall_k[k]]
    
    #------------------------------------------------------------------------
    # Initialize MBAR
    #------------------------------------------------------------------------
    
    # Initialize MBAR with Newton-Raphson
    print ""
    print "Initializing MBAR:"
    print "--K = number of Temperatures"
    print "--L = number of Temperatures"
    print "--N = number of Energies per Temperature"
    
    # Use Adaptive Method (Both Newton-Raphson and Self-Consistent, testing which is better)
    if insert:
        mbar = pymbar.MBAR(u_kln, Nall_k, method = 'adaptive', verbose=True, relative_tolerance=1e-12)
    else:
        f_k = wham.histogram_wham(beta_k, U_kn, Nall_k, relative_tolerance = 1.0e-4)
        mbar = pymbar.MBAR(u_kln, Nall_k, initial_f_k = f_k, verbose=True)
    #------------------------------------------------------------------------
    # Compute Expectations for E_kt and E2_kt as E_expect and E2_expect
    #------------------------------------------------------------------------
    print ""
    print "Computing Expectations for E..."
    (E_expect, dE_expect) = mbar.computeExpectations(u_kln)*(beta_k)**(-1)
    print "Computing Expectations for E^2..."
    (E2_expect,dE2_expect) = mbar.computeExpectations(u_kln*u_kln)*(beta_k)**(-2)
    
    print "Computing Expectations for Q..."
    (Q,dQ) = mbar.computeExpectations(Q_kn)
    
    #------------------------------------------------------------------------
    # Compute Cv for NVT simulations as <E^2> - <E>^2 / (RT^2)
    #------------------------------------------------------------------------
    #print ""
    #print "Computing Heat Capacity as ( <E^2> - <E>^2 ) / ( R*T^2 )..."
    
    Cv_expect = numpy.zeros([K], numpy.float64)
    dCv_expect = numpy.zeros([K], numpy.float64)
    
    for i in range(K):
           Cv_expect[i] = (E2_expect[i] - (E_expect[i]*E_expect[i])) / ( kB * Temp_k[i] * Temp_k[i])
           dCv_expect[i] = 2*dE_expect[i]**2 / (kB *Temp_k[i]*Temp_k[i])   # from propagation of error
    
    #print "Temperature  dA         <E> +/- d<E>       <E^2> +/- d<E^2>       Cv +/- dCv"     
    #print "-------------------------------------------------------------------------------"
    #for k in range(K):
    #       print "%8.3f %8.3f %9.3f +/- %5.3f  %9.1f +/- %5.1f   %7.4f +/- %6.4f" % (Temp_k[k],mbar.f_k[k],E_expect[k],dE_expect[k],E2_expect[k],dE2_expect[k],Cv_expect[k], dCv_expect[k])
    #numpy.savetxt('/home/edz3fz/Qsurf_int.txt',Q)
    #numpy.savetxt('/home/edz3fz/dQsurf_int.txt',dQ)
    #numpy.savetxt('/home/edz3fz/dQsol.txt',dQ)
    
    #numpy.savetxt('/home/edz3fz/Qtemp.tt',Temp_k)
    import matplotlib.pyplot as plt
    #ncavg = numpy.average(Q_fromfile, axis=1)
    
    plt.figure(1)
    #plt.plot(T, ncavg, 'ko')
    plt.plot(Temp_k,Q,'k')
    plt.errorbar(Temp_k, Q, yerr=dQ)
    plt.xlabel('Temperature (K)')
    plt.ylabel('Q fraction native contacts')
    #plt.title('Heat Capacity from Go like model MC simulation of 1BSQ')
    plt.savefig(options.direc+'/foldingcurve.png')
    numpy.save(options.direc+'/foldingcurve',numpy.array([Temp_k, Q, dQ]))
    numpy.save(options.direc+'/heatcap',numpy.array([Temp_k, Cv_expect, dCv_expect]))
    if options.show:
        plt.show()
Esempio n. 31
0
        # get state and reduced potentials
        line = lines[t]
        elements = line.split()
        state = int(elements[1]) - 1  # state in range(K)
        current_reduced_potential = float(elements[2])
        for k in range(K):
            u_tk[t, k] = float(elements[3 + k])
        # store
        state_t[t] = state
        u_t[t] = current_reduced_potential - g_k[state]
    print "u_t = "
    #   outfile = open('%s.out' % datafile_directory,'w')
    #   for t in range(T):
    #      outfile.write("%16d %16.8f\n" % (t, u_t[t]))
    #   outfile.close()
    g = timeseries.statisticalInefficiency(u_t, u_t)
    print "g = %16.8f" % g
    # compute correlation function
    print "Computing correlation function..."
    C_t = timeseries.normalizedFluctuationCorrelationFunction(
        u_t, u_t, int(3 * g))
    #   outfile = open('corrfun-%s.dat' % datafile_directory, 'w')
    #   for t in range(len(C_t)):
    #      outfile.write('%8d %16.8f\n' % (t, C_t[t]))
    #   outfile.close()

    # Test MRS's hypothesis.
    u_t_singlestate = zeros([T], float64)
    for state in range(K):
        # construct timeseries
        Nstate = 0
Esempio n. 32
0
#========================================================================

#------------------------------------------------------------------------
# Read Data From File
#------------------------------------------------------------------------

print("")
print("Preparing data:")
T_from_file = read_simulation_temps(simulation,NumTemps)
E_from_file = read_total_energies(simulation,TE_COL_NUM)
K = len(T_from_file)
N_k = numpy.zeros(K,numpy.int32)
g = numpy.zeros(K,numpy.float64)

for k in range(K):  # subsample the energies
   g[k] = timeseries.statisticalInefficiency(E_from_file[k])
   indices = numpy.array(timeseries.subsampleCorrelatedData(E_from_file[k],g=g[k])) # indices of uncorrelated samples
   N_k[k] = len(indices) # number of uncorrelated samples
   E_from_file[k,0:N_k[k]] = E_from_file[k,indices]

#------------------------------------------------------------------------
# Insert Intermediate T's and corresponding blank U's and E's
#------------------------------------------------------------------------
Temp_k = T_from_file
minT = T_from_file[0]
maxT = T_from_file[len(T_from_file) - 1]
#beta = 1/(k*BT)
#T = 1/(kB*beta)
if dtype == 'temperature':
	minv = minT
	maxv = maxT
      # get state and reduced potentials
      line = lines[t]
      elements = line.split()
      state = int(elements[1]) - 1 # state in range(K)
      current_reduced_potential = float(elements[2])
      for k in range(K):
         u_tk[t,k] = float(elements[3 + k])
      # store
      state_t[t] = state
      u_t[t] = current_reduced_potential - g_k[state]
   print "u_t = "
#   outfile = open('%s.out' % datafile_directory,'w')
#   for t in range(T):
#      outfile.write("%16d %16.8f\n" % (t, u_t[t]))
#   outfile.close()
   g = timeseries.statisticalInefficiency(u_t, u_t)
   print "g = %16.8f" % g
   # compute correlation function
   print "Computing correlation function..."
   C_t = timeseries.normalizedFluctuationCorrelationFunction(u_t, u_t, int(3 * g))
#   outfile = open('corrfun-%s.dat' % datafile_directory, 'w')
#   for t in range(len(C_t)):
#      outfile.write('%8d %16.8f\n' % (t, C_t[t]))
#   outfile.close()
   
   # Test MRS's hypothesis.
   u_t_singlestate = zeros([T], float64)
   for state in range(K):
      # construct timeseries
      Nstate = 0
      for t in range(T):
Esempio n. 34
0
def EXP(w_F, compute_uncertainty=True, is_timeseries=False):
    """Estimate free energy difference using one-sided (unidirectional) exponential averaging (EXP).

    Parameters
    ----------
    w_F : np.ndarray, float
        w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.
    compute_uncertainty : bool, optional, default=True
        if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries : bool, default=False
        if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
        Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

    Returns
    -------
    result_vals : dictionary
    
    Possible keys in the result_vals dictionary

    'Delta_f' : float
        Free energy difference
    'dDelta_f': float
        Estimated standard deviation of free energy difference

    Notes
    -----
    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

    Examples
    --------

    Compute the free energy difference given a sample of forward work values.

    >>> from pymbar import testsystems
    >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
    >>> results = EXP(w_F)
    >>> print('Forward free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Forward free energy difference is 1.088 +- 0.076 kT
    >>> results = EXP(w_R)
    >>> print('Reverse free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Reverse free energy difference is -1.073 +- 0.082 kT

    """

    result_vals = dict()

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    # Estimate free energy difference by exponential averaging using DeltaF = - log < exp(-w_F) >
    DeltaF = -(logsumexp(-w_F) - np.log(T))

    if compute_uncertainty:
        # Compute x_i = np.exp(-w_F_i - max_arg)
        max_arg = np.max(-w_F)  # maximum argument
        x = np.exp(-w_F - max_arg)

        # Compute E[x] = <x> and dx
        Ex = x.mean()

        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(x, x)

        # Estimate standard error of E[x].
        dx = np.std(x) / np.sqrt(T / g)

        # dDeltaF = <x>^-1 dx
        dDeltaF = (dx / Ex)

        # Return estimate of free energy difference and uncertainty.
        result_vals['Delta_f'] = DeltaF
        result_vals['dDelta_f'] = dDeltaF
    else:
        result_vals['Delta_f'] = DeltaF
    return result_vals
Esempio n. 35
0
def EXPGauss(w_F, compute_uncertainty=True, is_timeseries=False):
    """Estimate free energy difference using gaussian approximation to one-sided (unidirectional) exponential averaging.

    Parameters
    ----------
    w_F : np.ndarray, float
        w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.
    compute_uncertainty : bool, optional, default=True
        if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries : bool, default=False
        if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
        Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

    Returns
    -------
    result_vals : dictionary
    
    Possible keys in the result_vals dictionary

    'Delta_f' : float
        Free energy difference between the two states
    'dDelta_f': float
        Estimated standard deviation of free energy difference between the two states.

    Notes
    -----
    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

    Examples
    --------
    Compute the free energy difference given a sample of forward work values.

    >>> from pymbar import testsystems
    >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
    >>> results = EXPGauss(w_F)
    >>> print('Forward Gaussian approximated free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Forward Gaussian approximated free energy difference is 1.049 +- 0.089 kT
    >>> results = EXPGauss(w_R)
    >>> print('Reverse Gaussian approximated free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Reverse Gaussian approximated free energy difference is -1.073 +- 0.080 kT

    """

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    var = np.var(w_F)
    # Estimate free energy difference by Gaussian approximation, dG = <U> - 0.5*var(U)
    DeltaF = np.average(w_F) - 0.5 * var

    result_vals = dict()
    if compute_uncertainty:
        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        T_eff = T
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(w_F, w_F)

            T_eff = T / g
        # Estimate standard error of E[x].
        dx2 = var / T_eff + 0.5 * var * var / (T_eff - 1)
        dDeltaF = np.sqrt(dx2)

        # Return estimate of free energy difference and uncertainty.
        result_vals['Delta_f'] = DeltaF
        result_vals['dDelta_f'] = dDeltaF
    else:
        result_vals['Delta_f'] = DeltaF
    return result_vals
Esempio n. 36
0
        infile = open(filename, 'r')
        lines = infile.readlines()
        infile.close()
        # Parse data.
        n = 0
        for line in lines:
            if line[0] != '#' and line[0] != '@':
                tokens = line.split()            
                u_kn[k,n] = beta_k[k] * (float(tokens[2]) - float(tokens[1])) # reduced potential energy without umbrella restraint
                n += 1

    # Compute correlation times for potential energy and chi
    # timeseries.  If the temperatures differ, use energies to determine samples; otherwise, use the cosine of chi
            
    if (DifferentTemperatures):        
        g_k[k] = timeseries.statisticalInefficiency(u_kn[k,:], u_kn[k,:])
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(u_kn[k,:])
    else:
        g_k[k] = timeseries.statisticalInefficiency(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)),numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)))
        print "Correlation time for set %5d is %10.3f" % (k,g_k[k])
        indices = timeseries.subsampleCorrelatedData(numpy.cos(chi_kn[k,:]/(180.0/numpy.pi)))
    # Subsample data.
    N_k[k] = len(indices)
    u_kn[k,0:N_k[k]] = u_kn[k,indices]
    chi_kn[k,0:N_k[k]] = chi_kn[k,indices]

# Set zero of u_kn -- this is arbitrary.
u_kn -= u_kn.min()

# Construct torsion bins
def analyze_data(store_filename, phipsi_outfile=None):
    """
    Analyze output from parallel tempering simulations.
    
    """

    temperature = 300.0 * units.kelvin # temperature
    ndiscard = 100 # number of samples to discard to equilibration

    # Allocate storage for results.
    results = dict()

    # Compute kappa
    nbins = 10
    kB = units.BOLTZMANN_CONSTANT_kB * units.AVOGADRO_CONSTANT_NA # Boltzmann constant        
    kT = (kB * temperature) # thermal energy
    beta = 1.0 / kT # inverse temperature
    delta = 360.0 / float(nbins) * units.degrees # bin spacing
    sigma = delta/3.0 # standard deviation 
    kappa = (sigma / units.radians)**(-2) # kappa parameter (unitless)

    # Open NetCDF file.
    ncfile = netcdf.Dataset(store_filename, 'r', version=2)

    # Get dimensions.
    [niterations, nstates, natoms, ndim] = ncfile.variables['positions'][:,:,:,:].shape    
    print "%d iterations, %d states, %d atoms" % (niterations, nstates, natoms)

    # Discard initial configurations to equilibration.
    print "First %d iterations will be discarded to equilibration." % ndiscard
    niterations -= ndiscard
    
    # Print summary statistics about mixing in state space.
    [tau2, dtau2] = show_mixing_statistics_with_error(ncfile)
                
    # Compute correlation time of state index.
    states = ncfile.variables['states'][:,:].copy()
    A_kn = [ states[:,k].copy() for k in range(nstates) ]
    g_states = timeseries.statisticalInefficiencyMultiple(A_kn)
    tau_states = (g_states-1.0)/2.0
    # Compute statistical error.
    nblocks = 10
    blocksize = int(niterations) / int(nblocks)
    g_states_i = numpy.zeros([nblocks], numpy.float64)
    tau_states_i = numpy.zeros([nblocks], numpy.float64)        
    for block_index in range(nblocks):
        # Extract block
        states = ncfile.variables['states'][(blocksize*block_index):(blocksize*(block_index+1)),:].copy()
        A_kn = [ states[:,k].copy() for k in range(nstates) ]
        g_states_i[block_index] = timeseries.statisticalInefficiencyMultiple(A_kn)
        tau_states_i[block_index] = (g_states_i[block_index]-1.0)/2.0            
    dg_states = g_states_i.std() / numpy.sqrt(float(nblocks))
    dtau_states = tau_states_i.std() / numpy.sqrt(float(nblocks))
    # Print.
    print "g_states = %.3f+-%.3f iterations" % (g_states, dg_states)
    print "tau_states = %.3f+-%.3f iterations" % (tau_states, dtau_states)
    del states, A_kn

    # Compute end-to-end time.
    states = ncfile.variables['states'][:,:].copy()
    [tau_end, dtau_end] = average_end_to_end_time(states)

    # Compute statistical inefficiency for reduced potential
    energies = ncfile.variables['energies'][ndiscard:,:,:].copy()
    states = ncfile.variables['states'][ndiscard:,:].copy()    
    u_n = numpy.zeros([niterations], numpy.float64)
    for iteration in range(niterations):
        u_n[iteration] = 0.0
        for replica in range(nstates):
            state = states[iteration,replica]
            u_n[iteration] += energies[iteration,replica,state]
    del energies, states
    g_u = timeseries.statisticalInefficiency(u_n)
    print "g_u = %8.1f iterations" % g_u
        
    # Compute x and y umbrellas.    
    print "Computing torsions..."
    positions = ncfile.variables['positions'][ndiscard:,:,:,:]
    coordinates = units.Quantity(numpy.zeros([natoms,ndim], numpy.float32), units.angstroms)
    phi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
    psi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
    for iteration in range(niterations):
        for replica in range(nstates):
            coordinates[:,:] = units.Quantity(positions[iteration,replica,:,:].copy(), units.angstroms)
            phi_it[replica,iteration] = compute_torsion(coordinates, 4, 6, 8, 14) 
            psi_it[replica,iteration] = compute_torsion(coordinates, 6, 8, 14, 16)

    # Run MBAR.
    print "Grouping torsions by state..."
    phi_state_it = numpy.zeros([nstates,niterations], numpy.float32)
    psi_state_it = numpy.zeros([nstates,niterations], numpy.float32)
    states = ncfile.variables['states'][ndiscard:,:].copy()                
    for iteration in range(niterations):
        replicas = numpy.argsort(states[iteration,:])            
        for state in range(1,nstates):
            replica = replicas[state]
            phi_state_it[state,iteration] = phi_it[replica,iteration] / units.radians
            psi_state_it[state,iteration] = psi_it[replica,iteration] / units.radians
            
    print "Evaluating reduced potential energies..."
    N_k = numpy.ones([nstates], numpy.int32) * niterations
    u_kln = numpy.zeros([nstates, nstates, niterations], numpy.float32)
    for l in range(1,nstates):
        phi0 = ((numpy.floor((l-1)/nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians
        psi0 = ((numpy.remainder((l-1), nbins) + 0.5) * delta - 180.0 * units.degrees) / units.radians
        u_kln[:,l,:] = - kappa * numpy.cos(phi_state_it[:,:] - phi0) - kappa * numpy.cos(psi_state_it[:,:] - psi0)

#    print "Running MBAR..."
#    #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='self-consistent-iteration')
#    mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='adaptive', relative_tolerance=1.0e-2) # only use biased samples
#    f_k = mbar.f_k
#    mbar = pymbar.MBAR(u_kln[1:,1:,:], N_k[1:], verbose=True, method='Newton-Raphson', initial_f_k=f_k) # only use biased samples
#    #mbar = pymbar.MBAR(u_kln, N_k, verbose=True, method='Newton-Raphson', initialize='BAR')
#    print "Getting free energy differences..."
#    [df_ij, ddf_ij] = mbar.getFreeEnergyDifferences(uncertainty_method='svd-ew')
#    print df_ij
#    print ddf_ij

#    print "ln(Z_ij / Z_55):"
#    reference_bin = 4*nbins+4
#    for psi_index in range(nbins):
#        print "   [,%2d]" % (psi_index+1),
#    print ""
#    for phi_index in range(nbins):
#        print "[%2d,]" % (phi_index+1),
#        for psi_index in range(nbins):
#            print "%8.3f" % (-df_ij[reference_bin, phi_index*nbins+psi_index]),
#        print ""
#    print ""

#    print "dln(Z_ij / Z_55):"
#    reference_bin = 4*nbins+4
#    for psi_index in range(nbins):
#        print "   [,%2d]" % (psi_index+1),
#    print ""
#    for phi_index in range(nbins):
#        print "[%2d,]" % (phi_index+1),
#        for psi_index in range(nbins):
#            print "%8.3f" % (ddf_ij[reference_bin, phi_index*nbins+psi_index]),
#        print ""
#    print ""
    
    # Compute statistical inefficiencies of various functions of the timeseries data.
    print "Computing statistical infficiencies of cos(phi), sin(phi), cos(psi), sin(psi)..."
    cosphi_kn = [ numpy.cos(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    sinphi_kn = [ numpy.sin(phi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    cospsi_kn = [ numpy.cos(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    sinpsi_kn = [ numpy.sin(psi_it[replica,:] / units.radians).copy() for replica in range(1,nstates) ]
    g_cosphi = timeseries.statisticalInefficiencyMultiple(cosphi_kn)
    g_sinphi = timeseries.statisticalInefficiencyMultiple(sinphi_kn)
    g_cospsi = timeseries.statisticalInefficiencyMultiple(cospsi_kn)
    g_sinpsi = timeseries.statisticalInefficiencyMultiple(sinpsi_kn)
    tau_cosphi = (g_cosphi-1.0)/2.0
    tau_sinphi = (g_sinphi-1.0)/2.0
    tau_cospsi = (g_cospsi-1.0)/2.0
    tau_sinpsi = (g_sinpsi-1.0)/2.0        

    # Compute relaxation times in each torsion.
    print "Relaxation times for transitions among phi or psi bins alone:"
    phibin_it = ((phi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16)
    tau_phi = compute_relaxation_time(phibin_it, nbins)
    psibin_it = ((psi_it + 180.0 * units.degrees) / (delta + 0.1*units.degrees)).astype(numpy.int16)
    tau_psi = compute_relaxation_time(psibin_it, nbins)
    print "tau_phi = %8.1f iteration" % tau_phi
    print "tau_psi = %8.1f iteration" % tau_psi

    # Compute statistical error.
    nblocks = 10
    blocksize = int(niterations) / int(nblocks)
    g_cosphi_i = numpy.zeros([nblocks], numpy.float64)
    g_sinphi_i = numpy.zeros([nblocks], numpy.float64)
    g_cospsi_i = numpy.zeros([nblocks], numpy.float64)
    g_sinpsi_i = numpy.zeros([nblocks], numpy.float64)        
    tau_cosphi_i = numpy.zeros([nblocks], numpy.float64)
    tau_sinphi_i = numpy.zeros([nblocks], numpy.float64)
    tau_cospsi_i = numpy.zeros([nblocks], numpy.float64)
    tau_sinpsi_i = numpy.zeros([nblocks], numpy.float64)                
    for block_index in range(nblocks):
        # Extract block  
        slice_indices = range(blocksize*block_index,blocksize*(block_index+1))
        cosphi_kn = [ numpy.cos(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        sinphi_kn = [ numpy.sin(phi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        cospsi_kn = [ numpy.cos(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        sinpsi_kn = [ numpy.sin(psi_it[replica,slice_indices] / units.radians).copy() for replica in range(1,nstates) ]
        g_cosphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cosphi_kn)
        g_sinphi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinphi_kn)
        g_cospsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(cospsi_kn)
        g_sinpsi_i[block_index] = timeseries.statisticalInefficiencyMultiple(sinpsi_kn)
        tau_cosphi_i[block_index] = (g_cosphi_i[block_index]-1.0)/2.0
        tau_sinphi_i[block_index] = (g_sinphi_i[block_index]-1.0)/2.0
        tau_cospsi_i[block_index] = (g_cospsi_i[block_index]-1.0)/2.0
        tau_sinpsi_i[block_index] = (g_sinpsi_i[block_index]-1.0)/2.0

    dtau_cosphi = tau_cosphi_i.std() / numpy.sqrt(float(nblocks))
    dtau_sinphi = tau_sinphi_i.std() / numpy.sqrt(float(nblocks))
    dtau_cospsi = tau_cospsi_i.std() / numpy.sqrt(float(nblocks))
    dtau_sinpsi = tau_sinpsi_i.std() / numpy.sqrt(float(nblocks))        

    del cosphi_kn, sinphi_kn, cospsi_kn, sinpsi_kn

    print "Integrated autocorrelation times"
    print "tau_cosphi = %8.1f+-%.1f iterations" % (tau_cosphi, dtau_cosphi)
    print "tau_sinphi = %8.1f+-%.1f iterations" % (tau_sinphi, dtau_sinphi)
    print "tau_cospsi = %8.1f+-%.1f iterations" % (tau_cospsi, dtau_cospsi)
    print "tau_sinpsi = %8.1f+-%.1f iterations" % (tau_sinpsi, dtau_sinpsi)

    # Print LaTeX line.
    print ""
    print "%(store_filename)s & %(tau2).2f $\pm$ %(dtau2).2f & %(tau_states).2f $\pm$ %(dtau_states).2f & %(tau_end).2f $\pm$ %(dtau_end).2f & %(tau_cosphi).2f $\pm$ %(dtau_cosphi).2f & %(tau_sinphi).2f $\pm$ %(dtau_sinphi).2f & %(tau_cospsi).2f $\pm$ %(dtau_cospsi).2f & %(tau_sinpsi).2f $\pm$ %(dtau_sinpsi).2f \\\\" % vars()
    print ""        

    if phipsi_outfile is not None:        
        # Write uncorrelated (phi,psi) data
        outfile = open(phipsi_outfile, 'w')
        outfile.write('# alanine dipeptide 2d umbrella sampling data\n')        
        # Write umbrella restraints
        nbins = 10 # number of bins per torsion
        outfile.write('# %d x %d grid of restraints\n' % (nbins, nbins))
        outfile.write('# Each state was sampled from p_i(x) = Z_i^{-1} q(x) q_i(x) where q_i(x) = exp[kappa*cos(phi(x)-phi_i) + kappa*cos(psi(x)-psi_i)]\n')
        outfile.write('# phi(x) and psi(x) are periodic torsion angles on domain [-180, +180) degrees.\n')
        outfile.write('# kappa = %f\n' % kappa)
        outfile.write('# phi_i = [-180 + (floor(i / nbins) + 0.5) * delta] degrees\n')
        outfile.write('# psi_i = [-180 + (     (i % nbins) + 0.5) * delta] degrees\n')
        outfile.write('# where i = 0...%d, nbins = %d, and delta = %f degrees\n' % (nbins*nbins-1, nbins, delta / units.degrees))
        outfile.write('# Data has been subsampled to generate approximately uncorrelated samples.\n')        
        outfile.write('#\n')
        # write data header
        outfile.write('# ')
        for replica in range(nstates):
            outfile.write('state  %06d  ' % replica)
        outfile.write('\n')
        # write data        
        indices = timeseries.subsampleCorrelatedData(u_n, g=g_u) # indices of uncorrelated iterations
        states = ncfile.variables['states'][ndiscard:,:].copy()            
        for iteration in indices:
            outfile.write('  ')
            replicas = numpy.argsort(states[iteration,:])            
            for state in range(1,nstates):
                replica = replicas[state]
                outfile.write('%+6.1f %+6.1f  ' % (phi_it[replica,iteration] / units.degrees, psi_it[replica,iteration] / units.degrees))
            outfile.write('\n')
        outfile.close()

    return results
#        dtau_end = tau_end_i.std() / numpy.sqrt(float(nblocks))
        # Print.
        print "tau_end = %.3f+-%.3f iterations" % (tau_end, dtau_end)
        del states

        # Compute statistical inefficiency for reduced potential
        energies = ncfile.variables['energies'][:,:,:].copy()
        states = ncfile.variables['states'][:,:].copy()    
        u_n = numpy.zeros([niterations], numpy.float64)
        for iteration in range(niterations):
            u_n[iteration] = 0.0
            for replica in range(nstates):
                state = states[iteration,replica]
                u_n[iteration] += energies[iteration,replica,state]
        del energies, states
        g_u = timeseries.statisticalInefficiency(u_n)
        tau_u = (g_u-1.0)/2.0
        print "g_u = %8.1f iterations" % g_u
        print "tau_u = %8.1f iterations" % tau_u

        # DEBUG for lactalbumin
        #continue

        # Compute torsions.
        print "Computing torsions..."
        positions = ncfile.variables['positions'][:,:,:,:]
        coordinates = units.Quantity(numpy.zeros([natoms,ndim], numpy.float32), units.angstroms)
        phi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
        psi_it = units.Quantity(numpy.zeros([nstates,niterations], numpy.float32), units.radians)
        for iteration in range(niterations):
            for replica in range(nstates):
Esempio n. 39
0
    def _subsample_kln(self, u_kln):
        #Try to load in the data
        if self.save_equil_data: #Check if we want to save/load equilibration data
            try:
                equil_data = numpy.load(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method))
                if self.nequil is None:
                    self.nequil = equil_data['nequil']
                elif type(self.nequil) is int and self.subsample_method == 'per-state':
                    print "WARRNING: Per-state subsampling requested with only single value for equilibration..."
                    try:
                        self.nequil = equil_data['nequil']
                        print "Loading equilibration from file with %i states read" % self.nstates
                    except:
                        print "Assuming equal equilibration per state of %i" % self.nequil
                        self.nequil = numpy.array([self.nequil] * self.nstates)
                self.g_t = equil_data['g_t']
                Neff_max = equil_data['Neff_max']
                #Do equilibration if we have not already
                if self.subsample_method == 'per-state' and (len(self.g_t) < self.nstates or len(self.nequil) < self.nstates):
                    equil_loaded = False
                    raise IndexError
                else:
                    equil_loaded = True
            except:
                if self.subsample_method == 'per-state':
                    self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                    self.g_t = numpy.zeros([self.nstates])
                    Neff_max = numpy.zeros([self.nstates])
                    for k in xrange(self.nstates):
                        if self.verbose: print "Computing timeseries for state %i/%i" % (k,self.nstates-1)
                        self.nequil[k] = 0
                        self.g_t[k] = timeseries.statisticalInefficiency(u_kln[k,k,:])
                        Neff_max[k] = (u_kln[k,k,:].size + 1 ) / self.g_t[k]
                        #[self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                else:
                    if self.nequil is None:
                        [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)
                    else:
                        [self.nequil_timeseries, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)
                equil_loaded = False
            if not equil_loaded:
                numpy.savez(os.path.join(self.source_directory, self.save_prefix + self.phase + '_equil_data_%s.npz' % self.subsample_method), nequil=self.nequil, g_t=self.g_t, Neff_max=Neff_max)
        elif self.nequil is None:
            if self.subsample_method == 'per-state':
                self.nequil = numpy.zeros([self.nstates], dtype=numpy.int32)
                self.g_t = numpy.zeros([self.nstates])
                Neff_max = numpy.zeros([self.nstates])
                for k in xrange(self.nstates):
                    [self.nequil[k], self.g_t[k], Neff_max[k]] = self._detect_equilibration(u_kln[k,k,:])
                    if self.verbose: print "State %i equilibrated with %i samples" % (k, int(Neff_max[k]))
            else:
                [self.nequil, self.g_t, Neff_max] = self._detect_equilibration(self.u_n)

        if self.verbose: print [self.nequil, Neff_max]
        # 1) Discard equilibration data
        # 2) Subsample data to obtain uncorrelated samples
        self.N_k = numpy.zeros(self.nstates, numpy.int32)
        if self.subsample_method == 'per-state':
            # Discard samples
            nsamples_equil = self.niterations - self.nequil
            self.u_kln = numpy.zeros([self.nstates,self.nstates,nsamples_equil.max()])
            for k in xrange(self.nstates):
                self.u_kln[k,:,:nsamples_equil[k]] = u_kln[k,:,self.nequil[k]:]
            #Subsample
            transfer_retained_indices = numpy.zeros([self.nstates,nsamples_equil.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                state_indices = timeseries.subsampleCorrelatedData(self.u_kln[k,k,:], g = self.g_t[k])
                self.N_k[k] = len(state_indices)
                transfer_retained_indices[k,:self.N_k[k]] = state_indices
            transfer_kln = numpy.zeros([self.nstates, self.nstates, self.N_k.max()])
            self.retained_indices = numpy.zeros([self.nstates,self.N_k.max()], dtype=numpy.int32)
            for k in xrange(self.nstates):
                self.retained_indices[k,:self.N_k[k]] = transfer_retained_indices[k,:self.N_k[k]] #Memory reduction
                transfer_kln[k,:,:self.N_k[k]] = self.u_kln[k,:,self.retained_indices[k,:self.N_k[k]]].T #Have to transpose since indexing in this way causes issues

            #Cut down on memory, once function is done, transfer_kln should be released
            self.u_kln = transfer_kln
            self.retained_iters = self.N_k
        else:
            #Discard Samples
            self.u_kln = u_kln[:,:,self.nequil:]
            self.u_n = self.u_n[self.nequil:]
            #Subsamples
            indices = timeseries.subsampleCorrelatedData(self.u_n, g=self.g_t) # indices of uncorrelated samples
            self.u_kln = self.u_kln[:,:,indices]
            self.N_k[:] = len(indices)
            self.retained_indices = indices
            self.retained_iters = len(indices)
        return
Esempio n. 40
0
def EXPGauss(w_F, compute_uncertainty=True, is_timeseries=False):
    """Estimate free energy difference using gaussian approximation to one-sided (unidirectional) exponential averaging.

    Parameters
    ----------
    w_F : np.ndarray, float
        w_F[t] is the forward work value from snapshot t.  t = 0...(T-1)  Length T is deduced from vector.
    compute_uncertainty : bool, optional, default=True
        if False, will disable computation of the statistical uncertainty (default: True)
    is_timeseries : bool, default=False
        if True, correlation in data is corrected for by estimation of statisitcal inefficiency (default: False)
        Use this option if you are providing correlated timeseries data and have not subsampled the data to produce uncorrelated samples.

    Returns
    -------
    result_vals : dictionary
    
    Possible keys in the result_vals dictionary

    'Delta_f' : float
        Free energy difference between the two states
    'dDelta_f': float
        Estimated standard deviation of free energy difference between the two states.

    Notes
    -----
    If you are prodividing correlated timeseries data, be sure to set the 'timeseries' flag to True

    Examples
    --------
    Compute the free energy difference given a sample of forward work values.

    >>> from pymbar import testsystems
    >>> [w_F, w_R] = testsystems.gaussian_work_example(mu_F=None, DeltaF=1.0, seed=0)
    >>> results = EXPGauss(w_F)
    >>> print('Forward Gaussian approximated free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Forward Gaussian approximated free energy difference is 1.049 +- 0.089 kT
    >>> results = EXPGauss(w_R)
    >>> print('Reverse Gaussian approximated free energy difference is %.3f +- %.3f kT' % (results['Delta_f'], results['dDelta_f']))
    Reverse Gaussian approximated free energy difference is -1.073 +- 0.080 kT

    """

    # Get number of work measurements.
    T = float(np.size(w_F))  # number of work measurements

    var = np.var(w_F)
    # Estimate free energy difference by Gaussian approximation, dG = <U> - 0.5*var(U)
    DeltaF = np.average(w_F) - 0.5 * var

    result_vals = dict()
    if compute_uncertainty:
        # Compute effective number of uncorrelated samples.
        g = 1.0  # statistical inefficiency
        T_eff = T
        if is_timeseries:
            # Estimate statistical inefficiency of x timeseries.
            import timeseries
            g = timeseries.statisticalInefficiency(w_F, w_F)

            T_eff = T / g
        # Estimate standard error of E[x].
        dx2 = var / T_eff + 0.5 * var * var / (T_eff - 1)
        dDeltaF = np.sqrt(dx2)

        # Return estimate of free energy difference and uncertainty.
        result_vals['Delta_f'] = DeltaF
        result_vals['dDelta_f'] = dDeltaF 
    else:
        result_vals['Delta_f'] = DeltaF
    return result_vals