Example #1
0
def get_equilibration_points(df, column_name=None):
    """
  directly uses pymbar's timeseries utility!
  source: https://github.com/choderalab/pymbar
  https://pymbar.readthedocs.io/en/master/timeseries.html 
  and references therein
  """
    equilibration_data = dict([])
    for name in df.columns.values:
        """
    t - t_0 starting point of equilibrated part of series
    g - the statistical innefficency = 2*correlationtime +1
    N_effmax - effective number of uncorrelated samples
    """
        if isinstance(df.loc[0, name], complex):
            [t, g, Neff_max] = timeseries.detectEquilibration(
                np.array([x.real for x in df.loc[:, name]]))
            equilibration_data[name + "_real"] = [t, g, Neff_max]
            [t, g, Neff_max] = timeseries.detectEquilibration(
                np.array([x.imag for x in df.loc[:, name]]))
            equilibration_data[name + "_imag"] = [t, g, Neff_max]
        else:
            [t, g, Neff_max] = timeseries.detectEquilibration(df.loc[:, name])
            equilibration_data[name] = [t, g, Neff_max]
    print(equilibration_data)
    return equilibration_data
def test_detectEquil_constant_trailing():
    # This explicitly tests issue #122, see https://github.com/choderalab/pymbar/issues/122
    x = np.random.normal(size=100) * 0.01
    x[50:] = 3.0
    # The input data is some MCMC chain where the trailing end of the chain is a constant sequence.
    (t, g, Neff_max) = timeseries.detectEquilibration(x)
    """
Example #3
0
def test_detectEquil_constant_trailing():
    # This explicitly tests issue #122, see https://github.com/choderalab/pymbar/issues/122
    x = np.random.normal(size=100) * 0.01
    x[50:] = 3.0
    # The input data is some MCMC chain where the trailing end of the chain is a constant sequence.
    (t, g, Neff_max) = timeseries.detectEquilibration(x)
    """
Example #4
0
def is_converged(series: Series, frac_min=0.5):
    '''
    Determine whether a time series has converged or not

    Parameters
    ----------
    series : Series
        Time series
    frac_min : float
        Consider this time series is converged only if the fraction of converged parts relative to the full series
        is larger than this threshold

    Returns
    -------
    converged : bool
        Converged or not
    when : float
        From when this times series converged
    '''
    from pymbar import timeseries

    n_points = len(series)
    array = np.array(series)
    t0, g, Neff_max = timeseries.detectEquilibration(array,
                                                     nskip=max(
                                                         1, n_points // 100))
    if t0 > n_points * (1 - frac_min):
        return False, series.index[t0]
    return True, series.index[t0]
Example #5
0
def calc_df(u_kln):
    """
    u_kln should be (nstates) x (nstates) x (nframes)
    note that u_kln should be normalized by kT already
    where each element is 
        a config from frame `n` of a trajectory conducted with state `k`
        with energy recalculated using parameters of state `l`
    """
    dims = u_kln.shape
    if dims[0] != dims[1]:
        raise ValueError(
            "dimensions {} of u_kln should be square in the first two indices".
            format(dims))
    nstates = dims[0]

    N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
    for k in range(nstates):
        [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
        indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
        N_k[k] = len(indices)
        u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
    # Compute free energy differences and statistical uncertainties
    mbar = MBAR(u_kln, N_k)
    [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

    # save data?

    return DeltaF_ij, dDeltaF_ij
Example #6
0
def get_decorrelated_samples(replica_positions, replica_energies,
                             temperature_list):
    """
        Given a set of replica exchange trajectories, energies, and associated temperatures, this function returns decorrelated samples, as obtained from pymbar with timeseries.subsampleCorrelatedData.

        :param replica_positions: Positions array for the replica exchange data for which we will write PDB files
        :type replica_positions: `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ( np.array( [n_replicas,cgmodel.num_beads,3] ), simtk.unit )

        :param replica_energies: List of dimension num_replicas X simulation_steps, which gives the energies for all replicas at all simulation steps 
        :type replica_energies: List( List( float * simtk.unit.energy for simulation_steps ) for num_replicas )

        :param temperature_list: List of temperatures for the simulation data.
        :type temperature_list: List( float * simtk.unit.temperature )

        :returns:
           - configurations ( List( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ (n_decorrelated_samples,cgmodel.num_beads,3), simtk.unit ) ) - A list of decorrelated samples
           - energies ( List( `Quantity() <http://docs.openmm.org/development/api-python/generated/simtk.unit.quantity.Quantity.html>`_ ) ) - The energies for the decorrelated samples (configurations)

        """
    all_poses = []
    all_energies = []

    for replica_index in range(len(replica_positions)):
        energies = replica_energies[replica_index][replica_index]
        [t0, g, Neff_max] = timeseries.detectEquilibration(energies)
        energies_equil = energies[t0:]
        poses_equil = replica_positions[replica_index][t0:]
        indices = timeseries.subsampleCorrelatedData(energies_equil)
        for index in indices:
            all_energies.append(energies_equil[index])
            all_poses.append(poses_equil[index])

    all_energies = np.array([float(energy) for energy in all_energies])

    return (all_poses, all_energies)
Example #7
0
def collatedata(dictionary):
    #I want this function to read in my huge data dictionaries
    #Then I want it to work out the approximate final value (U or N) for each markov chain and simulation step
    #Then i want to average each of these approximate final values across parallel markov chains
    simsteps = list(dictionary.keys())
    print(simsteps)
    timesteps = list(dictionary[simsteps[0]].keys())
    print(timesteps)
    parallelsims = len(dictionary[simsteps[0]][timesteps[0]])
    print(dictionary[simsteps[0]][timesteps[0]])
    resultstot = {}
    #    datalist=np.zeros(len(EDicts[simsteps[0]].keys()))
    for t in simsteps:
        resultstot[t] = {'Ave': [], 'Std': []}
    for q in range(parallelsims):
        print(q)
        for r in simsteps:
            #print(len(datalist))
            placeholder = []
            for s in timesteps:
                placeholder.append(dictionary[r][s][q])
            #print(placeholder)
            datalist = np.asarray(placeholder, dtype=float)
            #print(datalist)
            [t0, g, Neff_max] = timeseries.detectEquilibration(datalist)
            #print('t0={0}'.format(t0))
            #print(datalist[t0:])
            avg = np.mean(datalist[t0:])
            sdv = np.std(datalist[t0:])
            resultstot[r]['Ave'].append(round(avg, 3))
            resultstot[r]['Std'].append(round(sdv, 3))
    return resultstot
Example #8
0
def calcTension(energy_data, verbose=False):
    dE1 = energy_data[:, 1] - energy_data[:, 0]
    dE2 = energy_data[:, 2] - energy_data[:, 0]
    BdE1 = dE1 / kTkJmol
    BdE2 = dE2 / kTkJmol

    nstates = 2
    nframes = len(dE1)
    u_kln = np.zeros([nstates, nstates, nframes], np.float64)
    u_kln[0, 1, :] = BdE1
    u_kln[1, 0, :] = BdE2

    N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
    for k in range(nstates):
        [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k, k, :])
        indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
        N_k[k] = len(indices)
        u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
    if verbose:
        print("...found {} uncorrelated samples out of {} total samples...".
              format(N_k, nframes))

    if verbose: print("=== Computing free energy differences ===")
    mbar = MBAR(u_kln, N_k)
    [DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

    tension = DeltaF_ij[
        0,
        1] / da * 1e18 * kT  #(in J/m^2). note da already has a factor of two for the two areas!
    tensionError = dDeltaF_ij[0, 1] / da * 1e18 * kT
    if verbose:
        print('tension (pymbar): {} +/- {}N/m'.format(tension, tensionError))

    return tension, tensionError
Example #9
0
    def production(self):  

        if os.path.exists(self.production_dcd_filename) and os.path.exists(self.production_data_filename):
            return

        prmtop = app.AmberPrmtopFile(self.prmtop_filename)
        pdb = app.PDBFile(self.equil_pdb_filename)

        system = prmtop.createSystem(nonbondedMethod=app.PME, nonbondedCutoff=CUTOFF, constraints=app.HBonds)

        integrator = mm.LangevinIntegrator(self.temperature, FRICTION, TIMESTEP)
        system.addForce(mm.MonteCarloBarostat(PRESSURE, self.temperature, BAROSTAT_FREQUENCY))

        simulation = app.Simulation(prmtop.topology, system, integrator)
        
        simulation.context.setPositions(pdb.positions)
        simulation.context.setPeriodicBoxVectors(*pdb.topology.getPeriodicBoxVectors())
        simulation.context.setVelocitiesToTemperature(self.temperature)
        
        print('Production.')
        
        simulation.reporters.append(app.DCDReporter(self.production_dcd_filename, OUTPUT_FREQUENCY))
        simulation.reporters.append(app.StateDataReporter(self.production_data_filename, OUTPUT_DATA_FREQUENCY, step=True, potentialEnergy=True, temperature=True, density=True))

        converged = False
        while not converged:
            simulation.step(N_STEPS)
            d = pd.read_csv(self.production_data_filename, names=["step", "U", "Temperature", "Density"], skiprows=1)
            density_ts = np.array(d.Density)
            [t0, g, Neff] = ts.detectEquilibration(density_ts, nskip=1000)
            density_ts = density_ts[t0:]
            density_mean_stderr = density_ts.std() / np.sqrt(Neff)
            if density_mean_stderr < STD_ERROR_TOLERANCE:
                converged = True
Example #10
0
def compute_timeseries(reduced_potentials):
    """
    Use pymbar timeseries to compute the uncorrelated samples in an array of reduced potentials.  Returns the uncorrelated sample indices.

    Arguments
    ---------
    reduced_potentials : np.array of floats
        reduced potentials from which a timeseries is to be extracted

    Returns
    -------
    t0 : int
        production region index
    g : float
        statistical inefficiency
    Neff_max : int
        effective number of samples in production region
    full_uncorrelated_indices : list of ints
        uncorrelated indices

    """
    from pymbar import timeseries
    t0, g, Neff_max = timeseries.detectEquilibration(
        reduced_potentials)  #computing indices of uncorrelated timeseries
    A_t_equil = reduced_potentials[t0:]
    uncorrelated_indices = timeseries.subsampleCorrelatedData(A_t_equil, g=g)
    A_t = A_t_equil[uncorrelated_indices]
    full_uncorrelated_indices = [i + t0 for i in uncorrelated_indices]

    return [t0, g, Neff_max, A_t, full_uncorrelated_indices]
Example #11
0
def equilibrium_detection(df, series=None, lower=None, upper=None, step=None):
    """Subsample a DataFrame using automated equilibrium detection on a timeseries.

    If `series` is ``None``, then this function will behave the same as
    :func:`slicing`.

    Parameters
    ----------
    df : DataFrame
        DataFrame to subsample according to equilibrium detection on `series`.
    series : Series
        Series to detect equilibration on. If ``None``, no equilibrium
        detection-based subsampling will be performed.
    lower : float
        Lower bound to pre-slice `series` data from.
    upper : float
        Upper bound to pre-slice `series` to (inclusive).
    step : int
        Step between `series` items to pre-slice by.

    Returns
    -------
    DataFrame
        `df` subsampled according to subsampled `series`.

    See Also
    --------
    pymbar.timeseries.detectEquilibration : detailed background

    """
    if _check_multiple_times(df):
        raise KeyError("Duplicate time values found; equilibrium detection "
                       "is only meaningful for a single, contiguous, "
                       "and sorted timeseries.")

    if not _check_sorted(df):
        raise KeyError("Equilibrium detection only works as expected if "
                       "values are sorted by time, increasing.")

    if series is not None:
        series = slicing(series, lower=lower, upper=upper, step=step)

        # calculate statistical inefficiency of series
        statinef = statisticalInefficiency(series)

        # calculate statistical inefficiency of series, with equilibrium detection
        t, statinef, Neff_max = detectEquilibration(series.values)

        # we round up
        statinef = int(np.rint(statinef))

        # subsample according to statistical inefficiency
        series = series.iloc[t::statinef]

        df = df.loc[series.index]
    else:
        df = slicing(df, lower=lower, upper=upper, step=step)

    return df
Example #12
0
def get_equilibration_data(timeseries_to_analyze):
    """
    Compute equilibration method given a timeseries

    See the ``pymbar.timeseries.detectEquilibration`` function for full documentation
    """
    [n_equilibration, g_t,
     n_effective_max] = timeseries.detectEquilibration(timeseries_to_analyze)
    return n_equilibration, g_t, n_effective_max
Example #13
0
def is_equilibrated(data, threshold_fraction=0.50, threshold_neff=50, nskip=1):
    """Check if a dataset is equilibrated based on a fraction of equil data.

    Using `pymbar.timeseries` module, check if a timeseries dataset has enough
    equilibrated data based on two threshold values. The threshold_fraction
    value translates to the fraction of total data from the dataset 'a_t' that
    can be thought of as being in the 'production' region. The threshold_neff
    is the minimum amount of effectively uncorrelated samples to have in a_t to
    consider it equilibrated.

    The `pymbar.timeseries` module returns the starting index of the
    'production' region from 'a_t'. The fraction of 'production' data is
    then compared to the threshold value. If the fraction of 'production' data
    is >= threshold fraction this will return a list of
    [True, t0, g, Neff] and [False, None, None, None] otherwise.

    Parameters
    ----------
    data : numpy.typing.Arraylike
        1-D time dependent data to check for equilibration.
    threshold_fraction : float, optional, default=0.8
        Fraction of data expected to be equilibrated.
    threshold_neff : int, optional, default=100
        Minimum amount of effectively correlated samples to consider a_t
        'equilibrated'.
    nskip : int, optional, default=1
        Since the statistical inefficiency is computed for every time origin
        in a call to timeseries.detectEquilibration, for larger datasets
        (> few hundred), increasing nskip might speed this up, while
        discarding more data.

    Returns
    -------
    list : [True, t0, g, Neff]
        If the data set is considered properly equilibrated
    list : [False, None, None, None]
        If the data set is not considered properly equilibrated

    """
    if threshold_fraction < 0.0 or threshold_fraction > 1.0:
        raise ValueError(
            f"Passed 'threshold_fraction' value: {threshold_fraction}, "
            "expected value between 0.0-1.0."
        )
    threshold_neff = int(threshold_neff)
    if threshold_neff < 1:
        raise ValueError(
            f"Passed 'threshold_neff' value: {threshold_neff}, expected value "
            "1 or greater."
        )
    [t0, g, Neff] = timeseries.detectEquilibration(data, nskip=nskip)
    frac_equilibrated = 1.0 - (t0 / np.shape(data)[0])

    if (frac_equilibrated >= threshold_fraction) and (Neff >= threshold_neff):
        return [True, t0, g, Neff]
    else:
        return [False, None, None, None]
Example #14
0
def is_converged(series: Series, frac_min=0.5) -> (bool, float):
    from pymbar import timeseries

    n_points = len(series)
    array = np.array(series)
    t0, g, Neff_max = timeseries.detectEquilibration(array, nskip=max(1, n_points // 100))
    if t0 > n_points * (1 - frac_min):
        return False, series.index[t0]
    return True, series.index[t0]
Example #15
0
def subsample(enthalpies):
    """
    Subsamples the enthalpies using John Chodera's code.
    This is probably better than the simple cutoff we normally use.
    No output -- it modifies the lists directly
    """
    # Use automatic equilibration detection and pymbar.timeseries to subsample
    [t0, g, Neff_max] = timeseries.detectEquilibration(enthalpies)
    enthalpies = enthalpies[t0:]
    return timeseries.subsampleCorrelatedData(enthalpies, g=g)
    def production(self):  
        utils.make_path('production/')
        self.production_dcd_filename = "production/"+self.identifier +"_production.dcd"
        self.production_pdb_filename = "production/"+self.identifier +"_production.pdb"
        self.production_data_filename = "production/"+self.identifier +"_production.csv"

        utils.make_path(self.production_dcd_filename)

        if os.path.exists(self.production_pdb_filename):
            return        

        if self.ran_equilibrate:
            pdb = app.PDBFile(self.equil_pdb_filename)
            topology = pdb.topology
            positions = pdb.positions
        else:
            positions = self.packed_trj.openmm_positions(0)
            topology = self.packed_trj.top.to_openmm()
            topology.setUnitCellDimensions(mm.Vec3(*self.packed_trj.unitcell_lengths[0]) * u.nanometer)
        
        ff = self.ffxml

        system = ff.createSystem(topology, nonbondedMethod=app.PME, nonbondedCutoff=self.cutoff, constraints=app.HBonds)
        integrator = mm.LangevinIntegrator(self.temperature, self.friction, self.timestep)
        system.addForce(mm.MonteCarloBarostat(self.pressure, self.temperature, self.barostat_frequency))

        simulation = app.Simulation(topology, system, integrator)
        simulation.context.setPositions(positions)

        if not self.ran_equilibrate:
            print('Minimizing.')
            simulation.minimizeEnergy()

        simulation.context.setVelocitiesToTemperature(self.temperature)
        print('Production.')
        simulation.reporters.append(app.DCDReporter(self.production_dcd_filename, self.output_frequency))
        simulation.reporters.append(app.StateDataReporter(self.production_data_filename, self.output_data_frequency, step=True, potentialEnergy=True, temperature=True, density=True))

        converged = False
        while not converged:
            simulation.step(self.n_steps)
            d = pd.read_csv(self.production_data_filename, names=["step", "U", "Temperature", "Density"], skiprows=1)
            density_ts = np.array(d.Density)
            [t0, g, Neff] = ts.detectEquilibration(density_ts, nskip=1000)
            density_ts = density_ts[t0:]
            density_mean_stderr = density_ts.std() / np.sqrt(Neff)
            if density_mean_stderr < self.stderr_tolerance:
                converged = True

        del(simulation)
        if self.ran_equilibrate:
            traj = md.load(self.production_dcd_filename, top=self.equil_pdb_filename)[-1]
        else:
            traj = md.load(self.production_dcd_filename, top=self.box_pdb_filename)[-1]
        traj.save(self.production_pdb_filename)
Example #17
0
def calc_statistics(_data):
    t0, g, Neff = timeseries.detectEquilibration(_data)
    data_equil = _data[t0:]
    indices_subsampled = timeseries.subsampleCorrelatedData(data_equil, g=g)
    sub_data = data_equil[indices_subsampled]

    avg = sub_data.mean()
    std = sub_data.std()
    err = sub_data.std() / np.sqrt(len(indices_subsampled))
    summary = [avg, std, err, t0, g, Neff]
    return summary
Example #18
0
 def _construct_decorrelation_mask(self, sim_collection, rep, skip):
     enes = sim_collection.reps_energies[rep]
     ops = sim_collection.reps_order_params[rep]
     steps = enes.steps
     rpots = utility.calc_reduced_potentials(enes, ops,
                                             sim_collection.conditions)
     start_i, g, Neff = timeseries.detectEquilibration(rpots, nskip=skip)
     template = '{:<8} {:<8} {:<3} {:<4.1f} {:<.1f}'
     print(template.format(sim_collection.conditions.fileformat, steps,
             start_i, g, Neff))
     indices = (timeseries.subsampleCorrelatedData(rpots[start_i:], g=skip*g))
     return [i + start_i for i in indices]
    def decorrelate_data(self, dHdl_data=None, u_nk_data=None):
        dHdl, u_nk = [], []
        if dHdl_data is not None:
            dHdl.append(equilibrium_detection(dHdl_data, dHdl_data.iloc[:, 0]))
            #logger(f'Subsampling dHdl data of the {ordinal(self.n_state)} state ...')
            _, g1, _ = detectEquilibration(dHdl_data.iloc[:, 0].values)
            dHdl = pd.concat(dHdl)
            setattr(dHdl, 'statineff', g1)

        if u_nk_data is not None:
            u_nk.append(equilibrium_detection(u_nk_data, u_nk_data.iloc[:, 0]))
            #logger(f'Subsampling u_nk data of the {ordinal(self.n_state)} state ...\n')
            t2, g2, N2 = detectEquilibration(u_nk_data.iloc[:, 0].values)
            u_nk = pd.concat(u_nk)
            setattr(u_nk, 'statineff', g2)

        logger("Data preprocessing completed!\n")
        if os.path.isfile('temporary.xvg') is True:
            os.system("rm temporary.xvg")

        return dHdl, u_nk
Example #20
0
def compute_timeseries(reduced_potentials: np.array) -> list:
    """
    Use pymbar timeseries to compute the uncorrelated samples in an array of reduced potentials.  Returns the uncorrelated sample indices.
    """
    from pymbar import timeseries
    t0, g, Neff_max = timeseries.detectEquilibration(
        reduced_potentials)  #computing indices of uncorrelated timeseries
    A_t_equil = reduced_potentials[t0:]
    uncorrelated_indices = timeseries.subsampleCorrelatedData(A_t_equil, g=g)
    A_t = A_t_equil[uncorrelated_indices]
    full_uncorrelated_indices = [i + t0 for i in uncorrelated_indices]

    return [t0, g, Neff_max, A_t, full_uncorrelated_indices]
    def _calc_stat_neff(self):
        """
        Estimate the statistical inefficiency of the salt occupancy.
        """
        stat_ineff = []
        for counts in self.nsalt:
            t, g, Neff = ts.detectEquilibration(counts, fast=True)
            stat_ineff.append(g)
        stat_ineff = np.array(stat_ineff)

        # Correcting the statistical inefficieny has returned a value of 1.0, when there were no acceptances.
        stat_ineff[np.where(stat_ineff == 1.0)] = np.inf

        return stat_ineff
Example #22
0
    def gather_dg(self, u_kln, nstates):
        # Subsample data to extract uncorrelated equilibrium timeseries
        N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
        for k in range(nstates):
            [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :])
            indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
            N_k[k] = len(indices)
            u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
        # Compute free energy differences and statistical uncertainties
        mbar = MBAR(u_kln, N_k)
        [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences()
        print("Number of uncorrelated samples per state: {}".format(N_k))

        return DeltaF_ij, dDeltaF_ij
Example #23
0
def get_stats(data):
    """
    later, can generalize, to use one column for decorrelating and getting reference indices
    """
    [t0, g, Neff] = timeseries.detectEquilibration(data)
    data_equil = data[t0:]
    indices = timeseries.subsampleCorrelatedData(data_equil, g=g)
    sub_data = data_equil[indices]
    
    avg = sub_data.mean()
    std = sub_data.std()
    err = sub_data.std()/np.sqrt( len(indices) )

    return avg,std,err, t0,g,Neff, sub_data
Example #24
0
def test_analyze_time_series():
    """Compare the output of the ``analyze_time_series`` utility with ``pymbar``."""

    np.random.seed(4)
    random_array = np.random.rand(10)

    statistics = analyze_time_series(random_array, minimum_samples=3)
    expected_index, expected_value, _ = detectEquilibration(random_array,
                                                            fast=False)

    assert expected_index == statistics.equilibration_index
    assert np.isclose(statistics.statistical_inefficiency, expected_value)
    assert statistics.n_total_points == 10
    assert 0 < statistics.n_uncorrelated_points <= 10
    assert 0 <= statistics.equilibration_index < 10
Example #25
0
def production(in_top, in_pdb, out_dcd, out_csv, temperature):
    temperature = temperature * u.kelvin  # TODO: recycle John's simtk.unit parser

    pdb = app.PDBFile(in_pdb)

    top = app.GromacsTopFile(in_top)
    top.topology.setPeriodicBoxVectors(pdb.topology.getPeriodicBoxVectors())

    system = top.createSystem(nonbondedMethod=app.PME,
                              nonbondedCutoff=CUTOFF,
                              constraints=app.HBonds)

    integrator = mm.LangevinIntegrator(temperature, FRICTION, TIMESTEP)
    system.addForce(
        mm.MonteCarloBarostat(PRESSURE, temperature, BAROSTAT_FREQUENCY))

    simulation = app.Simulation(top.topology, system, integrator)

    simulation.context.setPositions(pdb.positions)
    simulation.context.setPeriodicBoxVectors(
        *pdb.topology.getPeriodicBoxVectors())
    simulation.context.setVelocitiesToTemperature(temperature)

    print('Production.')

    simulation.reporters.append(app.DCDReporter(out_dcd, OUTPUT_FREQUENCY))
    simulation.reporters.append(
        app.StateDataReporter(out_csv,
                              OUTPUT_DATA_FREQUENCY,
                              step=True,
                              potentialEnergy=True,
                              temperature=True,
                              density=True))

    converged = False
    while not converged:
        simulation.step(N_STEPS)
        d = pd.read_csv(out_csv,
                        names=["step", "U", "Temperature", "Density"],
                        skiprows=1)
        density_ts = np.array(d.Density)
        [t0, g, Neff] = ts.detectEquilibration(density_ts, nskip=1000)
        density_ts = density_ts[t0:]
        density_mean_stderr = density_ts.std() / np.sqrt(Neff)
        if density_mean_stderr < STD_ERROR_TOLERANCE:
            converged = True
Example #26
0
def find_equilibrium(all_timeseries, column_name = None):
  """
  directly uses pymbar's timeseries utility!
  source: https://github.com/choderalab/pymbar
  https://pymbar.readthedocs.io/en/master/timeseries.html 
  and references therein
  """
  equilibration_data=dict([])
  for name in all_timeseries:
    [t, g, Neff_max] = timeseries.detectEquilibration(np.array(all_timeseries[name]) )
    """
    t - t_0 starting point of equilibrated part of series
    g - the statistical innefficency = 2*correlationtime +1
    N_effmax - effective number of uncorrelated samples
    """
    equilibration_data[name] = [t,g,Neff_max]
  return equilibration_data
def read_concentration(files, discard=10, fast=False):
    """
    Calculate the mean concentration and standard error from numerous numerous simulations, where each simulation has
    a fixed chemical potential. Timeseries analysis is used to determine equilibrium properties.

    Parameters
    ----------
    files: list of str
        the path to each results file that will be analysed.
    discard: int
        the initial amount of data to throw away
    fast: bool
        whether to perform the fast varient of the time series analysis
    """
    concentration = np.zeros(len(files))
    standard_error = np.zeros(len(files))
    delta_mu = np.zeros(len(files))
    lower = np.zeros(len(files))
    upper = np.zeros(len(files))
    for i in range(len(files)):
        ncfile = Dataset(files[i], 'r')
        volume = ncfile.groups['Sample state data']['volume'][:]
        #ncations = ncfile.groups['Sample state data']['species counts'][:, 1]
        nsalt = np.min(ncfile.groups['Sample state data']['species counts'][:, 1:2], axis=1)
        delta_mu[i] = ncfile.groups['Control parameters']['delta_chem'][0]
        ncfile.close()

        # Get the concentration in Molarity
        c = 1.0 * nsalt / volume * 1.66054

        # Estimate the mean and standard error with timeseries analysis
        t_equil, stat_ineff, n_eff = timeseries.detectEquilibration(c[discard:], fast=fast)
        #mu, sigma, num_batches, conf_width = misc_tools.batch_estimate_2(c[(discard + t_equil):], stat_ineff)
        #print("{0} batches for {1}".format(num_batches, files[i]))
        c_equil = c[(discard + t_equil):]
        concentration[i] = np.mean(c_equil)
        independent_inds = timeseries.subsampleCorrelatedData(c_equil, g=stat_ineff, conservative=True)
        mu_samps = misc_tools.bootstrap_estimates(c_equil[independent_inds])
        lower[i] = np.percentile(mu_samps, 2.5)
        upper[i] = np.percentile(mu_samps, 97.5)
        standard_error[i] = mu_samps.std()

    return concentration, standard_error, delta_mu, lower, upper
Example #28
0
def equilibrate(traj, verbose=False, name=None):
    traj = np.array(traj)
    if traj.ndim == 1:
        t0, g, n_eff = timeseries.detectEquilibration(traj)
        if t0 == 0 and traj.size > 10:
            # See https://github.com/choderalab/pymbar/issues/277
            t0x, gx, n_effx = timeseries.detectEquilibration(traj[10:])
            if t0x != 0:
                t0 = t0x + 10
        n = traj.size
        res = traj[t0:]

    elif traj.ndim == 2 and traj.shape[0] == 2:
        t01, g1, n_eff1 = timeseries.detectEquilibration(traj[0])
        t02, g2, n_eff2 = timeseries.detectEquilibration(traj[1])
        t0 = max(t01, t02)
        if t0 == 0 and traj.shape[1] > 10:
            # See https://github.com/choderalab/pymbar/issues/277
            t01x, g1x, n_eff1x = timeseries.detectEquilibration(traj[0, 10:])
            t02x, g2x, n_eff2x = timeseries.detectEquilibration(traj[1, 10:])
            t0x = max(t01x, t02x)
            if t0x != 0:
                t0 = t0x + 10
        n = traj.shape[1]
        res = traj[:, t0:]
    elif traj.ndim == 2:
        raise NotImplementedError(
            'trajectory.equilibrate() in 2 dimensions is only '
            'implemented for exactly two timeseries.')
    else:
        raise NotImplementedError(
            'trajectory.equilibrate() is not implemented for '
            'trajectories with more than 2 dimensions.')

    if verbose:
        if not name:
            name = 'Trajectory'
        if t0 == 0:
            print(
                '{:s} equilibration: No frames discarded for burn-in.'.format(
                    name))
        elif t0 == 1:
            print('{:s} equilibration: First frame ({:.1%} of '
                  'trajectory) discarded for burn-in.'.format(name, 1 / n))
        else:
            print('{:s} equilibration: First {:d} frames ({:.1%} of '
                  'trajectory) discarded for burn-in.'.format(
                      name, t0, t0 / n))

    return res
    def __init__(self,
                 ani_model: AlchemicalANI,
                 ani_trajs: list,
                 potential_energy_trajs: list,
                 lambdas,
                 max_snapshots_per_window=50,
                 ):
        K = len(lambdas)
        assert (len(ani_trajs) == K)
        assert (len(potential_energy_trajs) == K)

        self.ani_model = ani_model
        self.ani_trajs = ani_trajs
        self.potential_energy_trajs = potential_energy_trajs
        self.lambdas = lambdas

        # thin each based automatic equilibration detection
        N_k = []

        snapshots = []
        for i in range(K):
            traj = self.ani_trajs[i]

            equil, g = detectEquilibration(self.potential_energy_trajs[i])[:2]
            thinning = int(g)
            if len(traj[equil::thinning]) > max_snapshots_per_window:
                # what thinning will give me len(traj[equil::thinning]) == max_snapshots_per_window?
                thinning = int((len(traj) - equil) / max_snapshots_per_window)

            new_snapshots = list(traj[equil::thinning].xyz * unit.nanometer)[:max_snapshots_per_window]
            N_k.append(len(new_snapshots))
            snapshots.extend(new_snapshots)

        self.snapshots = snapshots
        N = len(snapshots)
        u_kn = np.zeros((K, N))
        for k in range(K):
            lamb = lambdas[k]
            self.ani_model.lambda_value = lamb
            for n in range(N):
                u_kn[k, n] = self.ani_model.calculate_energy(snapshots[n]) / kT
        self.mbar = MBAR(u_kn, N_k)
def test_compare_detectEquil(show_hist=False):
    """
    compare detectEquilibration implementations (with and without binary search + fft)
    """
    t_res = []
    N=100
    for _ in xrange(100):
        A_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 2.0
        B_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 1.0
        C_t = testsystems.correlated_timeseries_example(N=N*2, tau=5.0)
        D_t = np.concatenate([A_t, B_t, C_t, np.zeros(20)]) #concatenate and add flat region to one end (common in MC data)         
        bs_de = timeseries.detectEquilibration_binary_search(D_t, bs_nodes=10)
        std_de = timeseries.detectEquilibration(D_t, fast=False, nskip=1)
        t_res.append(bs_de[0]-std_de[0])
    t_res_mode = float(stats.mode(t_res)[0][0])
    eq(t_res_mode,0.,decimal=1)
    if show_hist:
        import matplotlib.pyplot as plt
        plt.hist(t_res)
        plt.show()
Example #31
0
def test_compare_detectEquil(show_hist=False):
    """
    compare detectEquilibration implementations (with and without binary search + fft)
    """
    t_res = []
    N = 100
    for _ in xrange(100):
        A_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 2.0
        B_t = testsystems.correlated_timeseries_example(N=N, tau=5.0) + 1.0
        C_t = testsystems.correlated_timeseries_example(N=N * 2, tau=5.0)
        D_t = np.concatenate([A_t, B_t, C_t])
        bs_de = timeseries.detectEquilibration_binary_search(D_t, bs_nodes=10)
        std_de = timeseries.detectEquilibration(D_t, fast=False, nskip=1)
        t_res.append(bs_de[0] - std_de[0])
    t_res_mode = float(stats.mode(t_res)[0][0])
    eq(t_res_mode, 0., decimal=1)
    if show_hist:
        import matplotlib.pyplot as plt
        plt.hist(t_res)
        plt.show()
def individual_analysis_procedure(temperature):

    ###
    #
    # This subroutine analyzes a timeseries for 'temperature',
    # and generates a set of decorrelated sample energies and distances,
    # which are used in later sampling to generate a free energy surface.
    #
    ###
    if (search_for_existing_data and not (os.path.exists(
            str(output_dir + str(temperature) + "/uncorrelated_distances.dat"))
                                          )) or not (search_for_existing_data):
        output_obj = open(str(output_dir + str(temperature) + "/sim_data.dat"),
                          'r')
        E_total_all_temp = np.array(
            [l.split(',')[3] for l in output_obj.readlines()]
        )  # E_total_all_temp temporarily stores the total energies from NaCl simulation output
        output_obj.close()
        distances = util.get_distances(
            str(output_dir + str(temperature) + "/coordinates.pdb"),
            simulation_steps)  # Read in the distances
        E_total_all = np.array(
            np.delete(E_total_all_temp, 0, 0), dtype=float
        )  # E_total_all stores total energies from NaCl simulation output, after re-typing
        [t0, g, Neff_max] = timeseries.detectEquilibration(
            E_total_all, nskip=nskip
        )  # Identify the indices of samples with high statistical efficiency (g)
        E_total_equil = E_total_all[
            t0:]  # Using the index for the equilibration time (t0), truncate the time-series data before this index
        uncorrelated_energy_indices = timeseries.subsampleCorrelatedData(
            E_total_equil, g=g)  # Determine indices of uncorrelated samples
        np.savetxt(
            str(output_dir + str(temperature) +
                '/uncorrelated_total_energies.dat'),
            E_total_equil[uncorrelated_energy_indices]
        )  # Write uncorrelated total energies to file
        np.savetxt(
            str(output_dir + str(temperature) + '/uncorrelated_distances.dat'),
            distances[uncorrelated_energy_indices]
        )  # Write uncorrelated Na-Cl distances to file
        return
    def gather_dg(self, u_kln, nstates):
        u_kln = np.vstack(u_kln)
        # Subsample data to extract uncorrelated equilibrium timeseries
        N_k = np.zeros([nstates], np.int32)  # number of uncorrelated samples
        for k in range(nstates):
            [_, g, __] = timeseries.detectEquilibration(u_kln[k, k, :])
            indices = timeseries.subsampleCorrelatedData(u_kln[k, k, :], g=g)
            N_k[k] = len(indices)
            u_kln[k, :, 0:N_k[k]] = u_kln[k, :, indices].T
        # Compute free energy differences and statistical uncertainties
        mbar = MBAR(u_kln, N_k)
        [DeltaF_ij, dDeltaF_ij, _] = mbar.getFreeEnergyDifferences()
        logger.debug(
            "Number of uncorrelated samples per state: {}".format(N_k))
        logger.debug("Relative free energy change for {0} = {1} +- {2}".format(
            self.name, DeltaF_ij[0, nstates - 1] * self.kTtokcal,
            dDeltaF_ij[0, nstates - 1] * self.kTtokcal))

        return DeltaF_ij[0, nstates -
                         1] * self.kTtokcal, dDeltaF_ij[0, nstates -
                                                        1] * self.kTtokcal
Example #34
0
def equilibrate(traj, verbose=False, name=None):
    traj = np.array(traj)
    if traj.ndim == 1:
        t0, g, n_eff = timeseries.detectEquilibration(traj)
        if t0 == 0 and traj.size > 10:
            # See https://github.com/choderalab/pymbar/issues/277
            t0x, gx, n_effx = timeseries.detectEquilibration(traj[10:])
            if t0x != 0:
                t0 = t0x + 10
        n = traj.size
        res = traj[t0:]

    elif traj.ndim == 2 and traj.shape[0] == 2:
        t01, g1, n_eff1 = timeseries.detectEquilibration(traj[0])
        t02, g2, n_eff2 = timeseries.detectEquilibration(traj[1])
        t0 = max(t01, t02)
        if t0 == 0 and traj.shape[1] > 10:
            # See https://github.com/choderalab/pymbar/issues/277
            t01x, g1x, n_eff1x = timeseries.detectEquilibration(traj[0, 10:])
            t02x, g2x, n_eff2x = timeseries.detectEquilibration(traj[1, 10:])
            t0x = max(t01x, t02x)
            if t0x != 0:
                t0 = t0x + 10
        n = traj.shape[1]
        res = traj[:, t0:]
    elif traj.ndim == 2:
        raise NotImplementedError('trajectory.equilibrate() in 2 dimensions is only '
                                  'implemented for exactly two timeseries.')
    else:
        raise NotImplementedError('trajectory.equilibrate() is not implemented for '
                                  'trajectories with more than 2 dimensions.')

    if verbose:
        if not name:
            name = 'Trajectory'
        if t0 == 0:
            print('{:s} equilibration: No frames discarded for burn-in.'.format(name))
        elif t0 == 1:
            print('{:s} equilibration: First frame ({:.1%} of '
                  'trajectory) discarded for burn-in.'.format(name, 1 / n))
        else:
            print('{:s} equilibration: First {:d} frames ({:.1%} of '
                  'trajectory) discarded for burn-in.'.format(name, t0, t0 / n))

    return res
Example #35
0
def run_endpoint_perturbation(lambda_thermodynamic_state, nonalchemical_thermodynamic_state, initial_hybrid_sampler_state, mc_move, n_iterations, factory,
    lambda_index=0, print_work=False, write_system=False, write_state=False, write_trajectories=False):
    """

    Parameters
    ----------
    lambda_thermodynamic_state : ThermodynamicState
        The thermodynamic state corresponding to the hybrid system at a lambda endpoint
    nonalchemical_thermodynamic_state : ThermodynamicState
        The nonalchemical thermodynamic state for the relevant endpoint
    initial_hybrid_sampler_state : SamplerState
        Starting positions for the sampler. Must be compatible with lambda_thermodynamic_state
    mc_move : MCMCMove
        The MCMove that will be used for sampling at the lambda endpoint
    n_iterations : int
        The number of iterations
    factory : HybridTopologyFactory
        The hybrid topology factory
    lambda_index : int, optional, default=0
        The index, 0 or 1, at which to retrieve nonalchemical positions
    print_work : bool, optional, default=False
        If True, will print work values
    write_system : bool, optional, default=False
        If True, will write alchemical and nonalchemical System XML files
    write_state : bool, optional, default=False
        If True, write alchemical (hybrid) State XML files each iteration
    write_trajectories : bool, optional, default=False
        If True, will write trajectories

    Returns
    -------
    df : float
        Free energy difference between alchemical and nonalchemical systems, estimated with EXP
    ddf : float
        Standard deviation of estimate, corrected for correlation, from EXP estimator.
    """
    import mdtraj as md

    #run an initial minimization:
    mcmc_sampler = mcmc.MCMCSampler(lambda_thermodynamic_state, initial_hybrid_sampler_state, mc_move)
    mcmc_sampler.minimize(max_iterations=20)
    new_sampler_state = mcmc_sampler.sampler_state

    if write_system:
        with open(f'hybrid{lambda_index}-system.xml', 'w') as outfile:
            outfile.write(openmm.XmlSerializer.serialize(lambda_thermodynamic_state.system))
        with open(f'nonalchemical{lambda_index}-system.xml', 'w') as outfile:
            outfile.write(openmm.XmlSerializer.serialize(nonalchemical_thermodynamic_state.system))

    #initialize work array
    w = np.zeros([n_iterations])
    non_potential = np.zeros([n_iterations])
    hybrid_potential = np.zeros([n_iterations])

    #run n_iterations of the endpoint perturbation:
    hybrid_trajectory = unit.Quantity(np.zeros([n_iterations, lambda_thermodynamic_state.system.getNumParticles(), 3]), unit.nanometers) # DEBUG
    nonalchemical_trajectory = unit.Quantity(np.zeros([n_iterations, nonalchemical_thermodynamic_state.system.getNumParticles(), 3]), unit.nanometers) # DEBUG
    for iteration in range(n_iterations):
        # Generate a new sampler state for the hybrid system
        mc_move.apply(lambda_thermodynamic_state, new_sampler_state)

        # Compute the hybrid reduced potential at the new sampler state
        hybrid_context, integrator = cache.global_context_cache.get_context(lambda_thermodynamic_state)
        new_sampler_state.apply_to_context(hybrid_context, ignore_velocities=True)
        hybrid_reduced_potential = lambda_thermodynamic_state.reduced_potential(hybrid_context)

        if write_state:
            state = hybrid_context.getState(getPositions=True, getParameters=True)
            state_xml = openmm.XmlSerializer.serialize(state)
            with open(f'state{iteration}_l{lambda_index}.xml', 'w') as outfile:
                outfile.write(state_xml)

        # Construct a sampler state for the nonalchemical system
        if lambda_index == 0:
            nonalchemical_positions = factory.old_positions(new_sampler_state.positions)
        elif lambda_index == 1:
            nonalchemical_positions = factory.new_positions(new_sampler_state.positions)
        else:
            raise ValueError("The lambda index needs to be either one or zero for this to be meaningful")
        nonalchemical_sampler_state = SamplerState(nonalchemical_positions, box_vectors=new_sampler_state.box_vectors)

        if write_trajectories:
            state = hybrid_context.getState(getPositions=True)
            hybrid_trajectory[iteration,:,:] = state.getPositions(asNumpy=True)
            nonalchemical_trajectory[iteration,:,:] = nonalchemical_positions

        # Compute the nonalchemical reduced potential
        nonalchemical_context, integrator = cache.global_context_cache.get_context(nonalchemical_thermodynamic_state)
        nonalchemical_sampler_state.apply_to_context(nonalchemical_context, ignore_velocities=True)
        nonalchemical_reduced_potential = nonalchemical_thermodynamic_state.reduced_potential(nonalchemical_context)

        # Compute and store the work
        w[iteration] = nonalchemical_reduced_potential - hybrid_reduced_potential
        non_potential[iteration] = nonalchemical_reduced_potential
        hybrid_potential[iteration] = hybrid_reduced_potential

        if print_work:
            print(f'{iteration:8d} {hybrid_reduced_potential:8.3f} {nonalchemical_reduced_potential:8.3f} => {w[iteration]:8.3f}')

    if write_trajectories:
        if lambda_index == 0:
            nonalchemical_mdtraj_topology = md.Topology.from_openmm(factory._topology_proposal.old_topology)
        elif lambda_index == 1:
            nonalchemical_mdtraj_topology = md.Topology.from_openmm(factory._topology_proposal.new_topology)
        md.Trajectory(hybrid_trajectory / unit.nanometers, factory.hybrid_topology).save(f'hybrid{lambda_index}.pdb')
        md.Trajectory(nonalchemical_trajectory / unit.nanometers, nonalchemical_mdtraj_topology).save(f'nonalchemical{lambda_index}.pdb')

    # Analyze data and return results
    [t0, g, Neff_max] = timeseries.detectEquilibration(w)
    w_burned_in = w[t0:]
    [df, ddf] = pymbar.EXP(w_burned_in)
    ddf_corrected = ddf * np.sqrt(g)
    results = [df, ddf_corrected, t0, Neff_max]

    return results, non_potential, hybrid_potential
Example #36
0
def overlap_check(reference_system, positions, platform_name=None, precision=None, nsteps=50, nsamples=200, factory_args=None, cached_trajectory_filename=None):
    """
    Test overlap between reference system and alchemical system by running a short simulation.

    Parameters
    ----------
    reference_system : simtk.openmm.System
       The reference System object to compare with
    positions : simtk.unit.Quantity with units compatible with nanometers
       The positions to assess energetics for.
    platform_name : str, optional, default=None
       The name of the platform to use for benchmarking.
    nsteps : int, optional, default=50
       Number of molecular dynamics steps between samples.
    nsamples : int, optional, default=100
       Number of samples to collect.
    factory_args : dict(), optional, default=None
       Arguments passed to AbsoluteAlchemicalFactory.
    cached_trajectory_filename : str, optional, default=None
       If specified, attempt to cache (or reuse) trajectory.

    """

    # Create a fully-interacting alchemical state.
    factory = AbsoluteAlchemicalFactory(reference_system, **factory_args)
    alchemical_state = AlchemicalState()
    alchemical_system = factory.createPerturbedSystem(alchemical_state)

    temperature = 300.0 * unit.kelvin
    collision_rate = 5.0 / unit.picoseconds
    timestep = 2.0 * unit.femtoseconds
    kT = (kB * temperature)

    # Select platform.
    platform = None
    if platform_name:
        platform = openmm.Platform.getPlatformByName(platform_name)

    # Create integrators.
    reference_integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
    alchemical_integrator = openmm.VerletIntegrator(timestep)

    # Create contexts.
    if platform:
        reference_context = openmm.Context(reference_system, reference_integrator, platform)
        alchemical_context = openmm.Context(alchemical_system, alchemical_integrator, platform)
    else:
        reference_context = openmm.Context(reference_system, reference_integrator)
        alchemical_context = openmm.Context(alchemical_system, alchemical_integrator)

    ncfile = None
    if cached_trajectory_filename:
        cache_mode = 'write'

        # Try reading from cache
        from netCDF4 import Dataset
        if os.path.exists(cached_trajectory_filename):
            try:
                ncfile = Dataset(cached_trajectory_filename, 'r')
                if (ncfile.variables['positions'].shape == (nsamples, reference_system.getNumParticles(), 3)):
                    # Read the cache if everything matches
                    cache_mode = 'read'
            except:
                pass

        if cache_mode == 'write':
            # If anything went wrong, create a new cache.
            try:
                (pathname, filename) = os.path.split(cached_trajectory_filename)
                if not os.path.exists(pathname): os.makedirs(pathname)
                ncfile = Dataset(cached_trajectory_filename, 'w', format='NETCDF4')
                ncfile.createDimension('samples', 0)
                ncfile.createDimension('atoms', reference_system.getNumParticles())
                ncfile.createDimension('spatial', 3)
                ncfile.createVariable('positions', 'f4', ('samples', 'atoms', 'spatial'))
            except Exception as e:
                logger.info(str(e))
                logger.info('Could not create a trajectory cache (%s).' % cached_trajectory_filename)
                ncfile = None

    # Collect simulation data.
    reference_context.setPositions(positions)
    du_n = np.zeros([nsamples], np.float64) # du_n[n] is the
    print()
    import click
    with click.progressbar(range(nsamples)) as bar:
        for sample in bar:
            if cached_trajectory_filename and (cache_mode == 'read'):
                # Load cached frames.
                positions = unit.Quantity(ncfile.variables['positions'][sample,:,:], unit.nanometers)
                reference_context.setPositions(positions)
            else:
                # Run dynamics.
                reference_integrator.step(nsteps)

            # Get reference energies.
            reference_state = reference_context.getState(getEnergy=True, getPositions=True)
            reference_potential = reference_state.getPotentialEnergy()
            if np.isnan(reference_potential/kT):
                raise Exception("Reference potential is NaN")

            # Get alchemical energies.
            alchemical_context.setPositions(reference_state.getPositions(asNumpy=True))
            alchemical_state = alchemical_context.getState(getEnergy=True)
            alchemical_potential = alchemical_state.getPotentialEnergy()
            if np.isnan(alchemical_potential/kT):
                raise Exception("Alchemical potential is NaN")

            du_n[sample] = (alchemical_potential - reference_potential) / kT

            if cached_trajectory_filename and (cache_mode == 'write') and (ncfile is not None):
                ncfile.variables['positions'][sample,:,:] = reference_state.getPositions(asNumpy=True) / unit.nanometers

    # Clean up.
    del reference_context, alchemical_context
    if cached_trajectory_filename and (ncfile is not None):
        ncfile.close()

    # Discard data to equilibration and subsample.
    from pymbar import timeseries
    [t0, g, Neff] = timeseries.detectEquilibration(du_n)
    indices = timeseries.subsampleCorrelatedData(du_n, g=g)
    du_n = du_n[indices]

    # Compute statistics.
    from pymbar import EXP
    [DeltaF, dDeltaF] = EXP(du_n)

    # Raise an exception if the error is larger than 3kT.
    MAX_DEVIATION = 3.0 # kT
    if (dDeltaF > MAX_DEVIATION):
        report = "DeltaF = %12.3f +- %12.3f kT (%5d samples, g = %6.1f)" % (DeltaF, dDeltaF, Neff, g)
        raise Exception(report)

    return
from pymbar import timeseries
# determine equilibrated region
[t, g, Neff_max] = timeseries.detectEquilibration(A_t)
# extract equilibrated region
A_t_equilibrated = A_t[t:]


Example #38
0
            print "%16s %8d" % (dimension_name, len(ncfile.dimensions[dimension_name]))
    
        # Read dimensions.
        niterations = ncfile.variables['positions'].shape[0]
        nstates = ncfile.variables['positions'].shape[1]
        natoms = ncfile.variables['positions'].shape[2]
        print "Read %(niterations)d iterations, %(nstates)d states" % vars()

        # Read reference PDB file.
        reference_pdb_filename = os.path.join(source_directory, "complex.pdb")
        atoms = read_pdb(reference_pdb_filename)

        # Choose number of samples to discard to equilibration
        u_n = extract_u_n(ncfile)
        if numpy.any(numpy.isnan(u_n)): continue
        nskip = int(len(u_n) / 100.0)
        [nequil, g_t, Neff_max] = timeseries.detectEquilibration(u_n, nskip)
        print [nequil, Neff_max]

        # Resample configurations for state 0.
        state = 0
        nsamples = 5000
        output_pdb_filename = os.path.join(source_directory, 'resampled.pdb')
        write_pdb_resampled(ncfile, atoms, output_pdb_filename, state, nequil, nsamples)

        # Close input NetCDF file.
        ncfile.close()
    #except Exception as e:
    #    print str(e)
    #    pass
Example #39
0
kT = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB * integrator.getTemperature()
for k in range(nstates):
    for iteration in range(niterations):
        print('state %5d iteration %5d / %5d' % (k, iteration, niterations))
        # Set alchemical state
        context.setParameter('lambda', lambdas[k])
        # Run some dynamics
        integrator.step(nsteps)
        # Compute energies at all alchemical states
        for l in range(nstates):
            context.setParameter('lambda', lambdas[l])
            u_kln[k,l,iteration] = context.getState(getEnergy=True).getPotentialEnergy() / kT

# Estimate free energy of Lennard-Jones particle insertion
from pymbar import MBAR, timeseries
# Subsample data to extract uncorrelated equilibrium timeseries
N_k = np.zeros([nstates], np.int32) # number of uncorrelated samples
for k in range(nstates):
    [nequil, g, Neff_max] = timeseries.detectEquilibration(u_kln[k,k,:])
    indices = timeseries.subsampleCorrelatedData(u_kln[k,k,:], g=g)
    N_k[k] = len(indices)
    u_kln[k,:,0:N_k[k]] = u_kln[k,:,indices].T
# Compute free energy differences and statistical uncertainties
mbar = MBAR(u_kln, N_k)
[DeltaF_ij, dDeltaF_ij, Theta_ij] = mbar.getFreeEnergyDifferences()

print('DeltaF_ij (kT):')
print(DeltaF_ij)
print('dDeltaF_ij (kT):')
print(dDeltaF_ij)
Example #40
0
def analyze(source_directory):
    """
    Analyze contents of store files to compute free energy differences.

    Parameters
    ----------
    source_directory : string
       The location of the NetCDF simulation storage files.

    """

    # Storage for different phases.
    data = dict()

    phase_prefixes = ['solvent', 'complex']
    suffixes = ['explicit', 'implicit']

    DeltaF_restraints = None

    # Process each netcdf file.
    netcdf_files_found = 0
    for phase in phase_prefixes:
        # Read reference PDB file.
        #from simtk.openmm import app
        #reference_pdb_filename = os.path.join(source_directory, phase + '.pdb')
        #reference_pdb = app.PDBFile(reference_pdb_filename)
            #if phase in ['vacuum', 'solvent']:
            #    reference_pdb_filename = os.path.join(source_directory, "ligand.pdb")
            #else:
            #    reference_pdb_filename = os.path.join(source_directory, "complex.pdb")
            #atoms = read_pdb(reference_pdb_filename)

        for suffix in suffixes:
            # Construct full path to NetCDF file.
            fullpath = os.path.join(source_directory, '%s-%s.nc' % (phase, suffix))
            logger.debug("Attempting to open %s..." % fullpath)

            # Skip if the file doesn't exist.
            if (not os.path.exists(fullpath)): continue

            # Open NetCDF file for reading.
            logger.info("Opening NetCDF trajectory file '%(fullpath)s' for reading..." % vars())
            try:
                ncfile = netcdf.Dataset(fullpath, 'r')
            except Exception as e:
                logger.error(e.message)
                raise Exception("Error opening NetCDF trajectory file '%(fullpath)s' for reading..." % vars())

            # DEBUG
            logger.info("dimensions:")
            for dimension_name in ncfile.dimensions.keys():
                logger.info("%16s %8d" % (dimension_name, len(ncfile.dimensions[dimension_name])))

            # Read dimensions.
            niterations = ncfile.variables['positions'].shape[0]
            nstates = ncfile.variables['positions'].shape[1]
            natoms = ncfile.variables['positions'].shape[2]
            logger.info("Read %(niterations)d iterations, %(nstates)d states" % vars())

            # Increment number of netcdf files found.
            netcdf_files_found += 1

            # Read standard state correction free energy.
            if phase == 'complex':
                DeltaF_restraints = ncfile.groups['metadata'].variables['standard_state_correction'][0]

            # Read reference PDB file.
            #if phase in ['vacuum', 'solvent']:
            #    reference_pdb_filename = os.path.join(source_directory, "ligand.pdb")
            #else:
            #    reference_pdb_filename = os.path.join(source_directory, "complex.pdb")
            #atoms = read_pdb(reference_pdb_filename)

            # Check to make sure no self-energies go nan.
            #check_energies(ncfile, atoms)

            # Check to make sure no positions are nan
            #check_positions(ncfile)

            # Choose number of samples to discard to equilibration
            MIN_ITERATIONS = 10 # minimum number of iterations to use automatic detection
            if niterations > MIN_ITERATIONS:
                from pymbar import timeseries
                u_n = extract_u_n(ncfile)
                u_n = u_n[1:] # discard initial frame of zero energies TODO: Get rid of initial frame of zero energies
                [nequil, g_t, Neff_max] = timeseries.detectEquilibration(u_n)
                nequil += 1 # account for initial frame of zero energies
                logger.info([nequil, Neff_max])
            else:
                nequil = 1 # discard first frame
                g_t = 1
                Neff_max = niterations

            # Examine acceptance probabilities.
            show_mixing_statistics(ncfile, cutoff=0.05, nequil=nequil)

            # Estimate free energies.
            (Deltaf_ij, dDeltaf_ij) = estimate_free_energies(ncfile, ndiscard = nequil, g=g_t)

            # Estimate average enthalpies
            (DeltaH_i, dDeltaH_i) = estimate_enthalpies(ncfile, ndiscard = nequil, g=g_t)

            # Accumulate free energy differences
            entry = dict()
            entry['DeltaF'] = Deltaf_ij[0,nstates-1]
            entry['dDeltaF'] = dDeltaf_ij[0,nstates-1]
            entry['DeltaH'] = DeltaH_i[nstates-1] - DeltaH_i[0]
            entry['dDeltaH'] = np.sqrt(dDeltaH_i[0]**2 + dDeltaH_i[nstates-1]**2)
            data[phase] = entry

            # Get temperatures.
            ncvar = ncfile.groups['thermodynamic_states'].variables['temperatures']
            temperature = ncvar[0] * units.kelvin
            kT = kB * temperature

            # Close input NetCDF file.
            ncfile.close()

    # Give the user a useful warning if no NetCDF files found.
    if netcdf_files_found == 0:
        raise Exception("No YANK output files were found in the specified store directory (%s)" % source_directory)

    # Compute hydration free energy (free energy of transfer from vacuum to water)
    #DeltaF = data['vacuum']['DeltaF'] - data['solvent']['DeltaF']
    #dDeltaF = numpy.sqrt(data['vacuum']['dDeltaF']**2 + data['solvent']['dDeltaF']**2)
    #print "Hydration free energy: %.3f +- %.3f kT (%.3f +- %.3f kcal/mol)" % (DeltaF, dDeltaF, DeltaF * kT / units.kilocalories_per_mole, dDeltaF * kT / units.kilocalories_per_mole)

    # Compute enthalpy of transfer from vacuum to water
    #DeltaH = data['vacuum']['DeltaH'] - data['solvent']['DeltaH']
    #dDeltaH = numpy.sqrt(data['vacuum']['dDeltaH']**2 + data['solvent']['dDeltaH']**2)
    #print "Enthalpy of hydration: %.3f +- %.3f kT (%.3f +- %.3f kcal/mol)" % (DeltaH, dDeltaH, DeltaH * kT / units.kilocalories_per_mole, dDeltaH * kT / units.kilocalories_per_mole)

    if DeltaF_restraints is None:
        raise Exception("DeltaF_restraints not found.")

    # Compute binding free energy.
    DeltaF = data['solvent']['DeltaF'] - DeltaF_restraints - data['complex']['DeltaF']
    dDeltaF = np.sqrt(data['solvent']['dDeltaF']**2 + data['complex']['dDeltaF']**2)
    logger.info("")
    logger.info("Binding free energy : %16.3f +- %.3f kT (%16.3f +- %.3f kcal/mol)" % (DeltaF, dDeltaF, DeltaF * kT / units.kilocalories_per_mole, dDeltaF * kT / units.kilocalories_per_mole))
    logger.info("")
    #logger.info("DeltaG vacuum       : %16.3f +- %.3f kT" % (data['vacuum']['DeltaF'], data['vacuum']['dDeltaF']))
    logger.info("DeltaG solvent      : %16.3f +- %.3f kT" % (data['solvent']['DeltaF'], data['solvent']['dDeltaF']))
    logger.info("DeltaG complex      : %16.3f +- %.3f kT" % (data['complex']['DeltaF'], data['complex']['dDeltaF']))
    logger.info("DeltaG restraint    : %16.3f          kT" % DeltaF_restraints)
    logger.info("")

    # Compute binding enthalpy
    DeltaH = data['solvent']['DeltaH'] - DeltaF_restraints - data['complex']['DeltaH']
    dDeltaH = np.sqrt(data['solvent']['dDeltaH']**2 + data['complex']['dDeltaH']**2)
    logger.info("Binding enthalpy    : %16.3f +- %.3f kT (%16.3f +- %.3f kcal/mol)" % (DeltaH, dDeltaH, DeltaH * kT / units.kilocalories_per_mole, dDeltaH * kT / units.kilocalories_per_mole))
def test_detectEquil():
    x = np.random.normal(size=10000)
    (t, g, Neff_max) = timeseries.detectEquilibration(x)
Example #42
0
def generate_simulation_data(database, parameters, cid):
    """
    Regenerate simulation data for given parameters.

    ARGUMENTS

    database (dict) - database of molecules
    parameters (dict) - dictionary of GBSA parameters keyed on GBSA atom types

    """

    platform = openmm.Platform.getPlatformByName("Reference")

    from pymbar import timeseries

    entry = database[cid]
    molecule = entry["molecule"]
    iupac_name = entry["iupac"]

    # Retrieve vacuum system.
    vacuum_system = copy.deepcopy(entry["system"])

    # Retrieve OpenMM System.
    solvent_system = copy.deepcopy(entry["system"])

    # Get nonbonded force.
    forces = {
        solvent_system.getForce(index).__class__.__name__: solvent_system.getForce(index)
        for index in range(solvent_system.getNumForces())
    }
    nonbonded_force = forces["NonbondedForce"]

    # Add GBSA term
    gbsa_force = openmm.GBSAOBCForce()
    gbsa_force.setNonbondedMethod(openmm.GBSAOBCForce.NoCutoff)  # set no cutoff
    gbsa_force.setSoluteDielectric(1)
    gbsa_force.setSolventDielectric(78)

    # Build indexable list of atoms.
    atoms = [atom for atom in molecule.GetAtoms()]
    natoms = len(atoms)

    # Assign GBSA parameters.
    for (atom_index, atom) in enumerate(atoms):
        [charge, sigma, epsilon] = nonbonded_force.getParticleParameters(atom_index)
        atomtype = atom.GetStringData("gbsa_type")  # GBSA atomtype
        radius = parameters["%s_%s" % (atomtype, "radius")] * units.angstroms
        scalingFactor = parameters["%s_%s" % (atomtype, "scalingFactor")]
        gbsa_force.addParticle(charge, radius, scalingFactor)

    # Add the force to the system.
    solvent_system.addForce(gbsa_force)

    # Create context for solvent system.
    timestep = 2.0 * units.femtosecond
    collision_rate = 20.0 / units.picoseconds
    temperature = entry["temperature"]
    integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
    context = openmm.Context(vacuum_system, integrator, platform)

    # Set the coordinates.
    positions = entry["positions"]
    context.setPositions(positions)

    # Minimize.
    openmm.LocalEnergyMinimizer.minimize(context)

    # Simulate, saving periodic snapshots of configurations.
    kT = kB * temperature
    beta = 1.0 / kT

    initial_time = time.time()
    nsteps_per_iteration = 2500
    niterations = 200
    x_n = np.zeros([niterations, natoms, 3], np.float32)  # positions, in nm
    u_n = np.zeros([niterations], np.float64)  # energy differences, in kT
    for iteration in range(niterations):
        integrator.step(nsteps_per_iteration)
        state = context.getState(getEnergy=True, getPositions=True)
        x_n[iteration, :, :] = state.getPositions(asNumpy=True) / units.nanometers
        u_n[iteration] = beta * state.getPotentialEnergy()

    if np.any(np.isnan(u_n)):
        raise Exception("Encountered NaN for molecule %s | %s" % (cid, iupac_name))

    final_time = time.time()
    elapsed_time = final_time - initial_time

    # Clean up.
    del context, integrator

    # Discard initial transient to equilibration.
    [t0, g, Neff_max] = timeseries.detectEquilibration(u_n)
    x_n = x_n[t0:, :, :]
    u_n = u_n[t0:]

    # Subsample to remove correlation.
    indices = timeseries.subsampleCorrelatedData(u_n, g=g)
    x_n = x_n[indices, :, :]
    u_n = u_n[indices]

    # Store data.
    entry["x_n"] = x_n
    entry["u_n"] = u_n

    print "%48s | %64s | simulation %12.3f s | %5d samples discarded | %5d independent samples remain" % (
        cid,
        iupac_name,
        elapsed_time,
        t0,
        len(indices),
    )

    return [cid, entry]
Example #43
0
def analyze(source_directory, verbose=False):
    """
    Analyze contents of store files to compute free energy differences.

    Parameters
    ----------
    source_directory : string
       The location of the NetCDF simulation storage files.
    verbose : bool, optional, default=False
       If True, verbose output will be generated.

    """
    # Turn on debug info.
    # TODO: Control verbosity of logging output using verbose optional flag.
    logging.basicConfig(level=logging.DEBUG)

    # Storage for different phases.
    data = dict()

    phase_prefixes = ['solvent', 'complex']
    suffixes = ['explicit', 'implicit']

    # Process each netcdf file.
    for phase in phase_prefixes:
        for suffix in suffixes:
            # Construct full path to NetCDF file.
            fullpath = os.path.join(source_directory, '%s-%s.nc' % (phase, suffix))
            if verbose: print "Attempting to open %s..." % fullpath

            # Skip if the file doesn't exist.
            if (not os.path.exists(fullpath)): continue

            # Open NetCDF file for reading.
            logger.info("Opening NetCDF trajectory file '%(fullpath)s' for reading..." % vars())
            ncfile = netcdf.Dataset(fullpath, 'r')

            # DEBUG
            logger.info("dimensions:")
            for dimension_name in ncfile.dimensions.keys():
                logger.info("%16s %8d" % (dimension_name, len(ncfile.dimensions[dimension_name])))

            # Read dimensions.
            niterations = ncfile.variables['positions'].shape[0]
            nstates = ncfile.variables['positions'].shape[1]
            natoms = ncfile.variables['positions'].shape[2]
            logger.info("Read %(niterations)d iterations, %(nstates)d states" % vars())

            # Read reference PDB file.
            #if phase in ['vacuum', 'solvent']:
            #    reference_pdb_filename = os.path.join(source_directory, "ligand.pdb")
            #else:
            #    reference_pdb_filename = os.path.join(source_directory, "complex.pdb")
            #atoms = read_pdb(reference_pdb_filename)

            # Check to make sure no self-energies go nan.
            #check_energies(ncfile, atoms)

            # Check to make sure no positions are nan
            #check_positions(ncfile)

            # Choose number of samples to discard to equilibration
            # TODO: Switch to pymbar.timeseries module.
            from pymbar import timeseries
            u_n = extract_u_n(ncfile)
            [nequil, g_t, Neff_max] = timeseries.detectEquilibration(u_n)
            logger.info([nequil, Neff_max])

            # Examine acceptance probabilities.
            show_mixing_statistics(ncfile, cutoff=0.05, nequil=nequil)

            # Estimate free energies.
            (Deltaf_ij, dDeltaf_ij) = estimate_free_energies(ncfile, ndiscard = nequil)

            # Estimate average enthalpies
            (DeltaH_i, dDeltaH_i) = estimate_enthalpies(ncfile, ndiscard = nequil)

            # Accumulate free energy differences
            entry = dict()
            entry['DeltaF'] = Deltaf_ij[0,nstates-1]
            entry['dDeltaF'] = dDeltaf_ij[0,nstates-1]
            entry['DeltaH'] = DeltaH_i[nstates-1] - DeltaH_i[0]
            entry['dDeltaH'] = np.sqrt(dDeltaH_i[0]**2 + dDeltaH_i[nstates-1]**2)
            data[phase] = entry

            # Get temperatures.
            ncvar = ncfile.groups['thermodynamic_states'].variables['temperatures']
            temperature = ncvar[0] * units.kelvin
            kT = kB * temperature

            # Close input NetCDF file.
            ncfile.close()

    # Compute hydration free energy (free energy of transfer from vacuum to water)
    #DeltaF = data['vacuum']['DeltaF'] - data['solvent']['DeltaF']
    #dDeltaF = numpy.sqrt(data['vacuum']['dDeltaF']**2 + data['solvent']['dDeltaF']**2)
    #print "Hydration free energy: %.3f +- %.3f kT (%.3f +- %.3f kcal/mol)" % (DeltaF, dDeltaF, DeltaF * kT / units.kilocalories_per_mole, dDeltaF * kT / units.kilocalories_per_mole)

    # Compute enthalpy of transfer from vacuum to water
    #DeltaH = data['vacuum']['DeltaH'] - data['solvent']['DeltaH']
    #dDeltaH = numpy.sqrt(data['vacuum']['dDeltaH']**2 + data['solvent']['dDeltaH']**2)
    #print "Enthalpy of hydration: %.3f +- %.3f kT (%.3f +- %.3f kcal/mol)" % (DeltaH, dDeltaH, DeltaH * kT / units.kilocalories_per_mole, dDeltaH * kT / units.kilocalories_per_mole)

    # Read standard state correction free energy.
    DeltaF_restraints = 0.0
    phase = 'complex'
    fullpath = os.path.join(source_directory, phase + '.nc')
    ncfile = netcdf.Dataset(fullpath, 'r')
    DeltaF_restraints = ncfile.groups['metadata'].variables['standard_state_correction'][0]
    ncfile.close()

    # Compute binding free energy.
    DeltaF = data['solvent']['DeltaF'] - DeltaF_restraints - data['complex']['DeltaF']
    dDeltaF = np.sqrt(data['solvent']['dDeltaF']**2 + data['complex']['dDeltaF']**2)
    logger.info("")
    logger.info("Binding free energy : %16.3f +- %.3f kT (%16.3f +- %.3f kcal/mol)" % (DeltaF, dDeltaF, DeltaF * kT / units.kilocalories_per_mole, dDeltaF * kT / units.kilocalories_per_mole))
    logger.info("")
    #logger.info("DeltaG vacuum       : %16.3f +- %.3f kT" % (data['vacuum']['DeltaF'], data['vacuum']['dDeltaF']))
    logger.info("DeltaG solvent      : %16.3f +- %.3f kT" % (data['solvent']['DeltaF'], data['solvent']['dDeltaF']))
    logger.info("DeltaG complex      : %16.3f +- %.3f kT" % (data['complex']['DeltaF'], data['complex']['dDeltaF']))
    logger.info("DeltaG restraint    : %16.3f          kT" % DeltaF_restraints)
    logger.info("")

    # Compute binding enthalpy
    DeltaH = data['solvent']['DeltaH'] - DeltaF_restraints - data['complex']['DeltaH']
    dDeltaH = np.sqrt(data['solvent']['dDeltaH']**2 + data['complex']['dDeltaH']**2)
    logger.info("Binding enthalpy    : %16.3f +- %.3f kT (%16.3f +- %.3f kcal/mol)" % (DeltaH, dDeltaH, DeltaH * kT / units.kilocalories_per_mole, dDeltaH * kT / units.kilocalories_per_mole))
Example #44
0
def overlap_check(reference_system, positions, receptor_atoms, ligand_atoms, platform_name=None, annihilate_electrostatics=True, annihilate_sterics=False, precision=None, nsteps=50, nsamples=200):
    """
    Test overlap between reference system and alchemical system by running a short simulation.

    Parameters
    ----------
    reference_system : simtk.openmm.System
       The reference System object to compare with
    positions : simtk.unit.Quantity with units compatible with nanometers
       The positions to assess energetics for.
    receptor_atoms : list of int
       The list of receptor atoms.
    ligand_atoms : list of int
       The list of ligand atoms to alchemically modify.
    platform_name : str, optional, default=None
       The name of the platform to use for benchmarking.
    annihilate_electrostatics : bool, optional, default=True
       If True, electrostatics will be annihilated; if False, decoupled.
    annihilate_sterics : bool, optional, default=False
       If True, sterics will be annihilated; if False, decoupled.
    nsteps : int, optional, default=50
       Number of molecular dynamics steps between samples.
    nsamples : int, optional, default=100
       Number of samples to collect.

    """

    # Create a fully-interacting alchemical state.
    factory = AbsoluteAlchemicalFactory(reference_system, ligand_atoms=ligand_atoms)
    alchemical_state = AlchemicalState()
    alchemical_system = factory.createPerturbedSystem(alchemical_state)

    temperature = 300.0 * units.kelvin
    collision_rate = 5.0 / units.picoseconds
    timestep = 2.0 * units.femtoseconds
    kT = (kB * temperature)

    # Select platform.
    platform = None
    if platform_name:
        platform = openmm.Platform.getPlatformByName(platform_name)

    # Create integrators.
    reference_integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep)
    alchemical_integrator = openmm.VerletIntegrator(timestep)

    # Create contexts.
    if platform:
        reference_context = openmm.Context(reference_system, reference_integrator, platform)
        alchemical_context = openmm.Context(alchemical_system, alchemical_integrator, platform)
    else:
        reference_context = openmm.Context(reference_system, reference_integrator)
        alchemical_context = openmm.Context(alchemical_system, alchemical_integrator)

    # Collect simulation data.
    reference_context.setPositions(positions)
    du_n = np.zeros([nsamples], np.float64) # du_n[n] is the
    for sample in range(nsamples):
        # Run dynamics.
        reference_integrator.step(nsteps)

        # Get reference energies.
        reference_state = reference_context.getState(getEnergy=True, getPositions=True)
        reference_potential = reference_state.getPotentialEnergy()

        # Get alchemical energies.
        alchemical_context.setPositions(reference_state.getPositions())
        alchemical_state = alchemical_context.getState(getEnergy=True)
        alchemical_potential = alchemical_state.getPotentialEnergy()

        du_n[sample] = (alchemical_potential - reference_potential) / kT

    # Clean up.
    del reference_context, alchemical_context

    # Discard data to equilibration and subsample.
    from pymbar import timeseries
    [t0, g, Neff] = timeseries.detectEquilibration(du_n)
    indices = timeseries.subsampleCorrelatedData(du_n, g=g)
    du_n = du_n[indices]

    # Compute statistics.
    from pymbar import EXP
    [DeltaF, dDeltaF] = EXP(du_n)

    # Raise an exception if the error is larger than 3kT.
    MAX_DEVIATION = 3.0 # kT
    if (dDeltaF > MAX_DEVIATION):
        report = "DeltaF = %12.3f +- %12.3f kT (%5d samples, g = %6.1f)" % (DeltaF, dDeltaF, Neff, g)
        raise Exception(report)

    return
Example #45
0
def analyze(source_directory):
    """
    Analyze contents of store files to compute free energy differences.

    Parameters
    ----------
    source_directory : string
       The location of the NetCDF simulation storage files.

    """
    analysis_script_path = os.path.join(source_directory, 'analysis.yaml')
    if not os.path.isfile(analysis_script_path):
        err_msg = 'Cannot find analysis.yaml script in {}'.format(source_directory)
        logger.error(err_msg)
        raise RuntimeError(err_msg)
    with open(analysis_script_path, 'r') as f:
        analysis = yaml.load(f)
    phases = [phase_name for phase_name, sign in analysis]

    # Storage for different phases.
    data = dict()

    # Process each netcdf file.
    for phase in phases:
        ncfile_path = os.path.join(source_directory, phase + '.nc')

        # Open NetCDF file for reading.
        logger.info("Opening NetCDF trajectory file %(ncfile_path)s for reading..." % vars())
        try:
            ncfile = netcdf.Dataset(ncfile_path, 'r')

            logger.debug("dimensions:")
            for dimension_name in ncfile.dimensions.keys():
                logger.debug("%16s %8d" % (dimension_name, len(ncfile.dimensions[dimension_name])))

            # Read dimensions.
            niterations = ncfile.variables['positions'].shape[0]
            nstates = ncfile.variables['positions'].shape[1]
            logger.info("Read %(niterations)d iterations, %(nstates)d states" % vars())

            DeltaF_restraints = 0.0
            if 'metadata' in ncfile.groups:
                # Read phase direction and standard state correction free energy.
                # Yank sets correction to 0 if there are no restraints
                DeltaF_restraints = ncfile.groups['metadata'].variables['standard_state_correction'][0]

            # Choose number of samples to discard to equilibration
            MIN_ITERATIONS = 10 # minimum number of iterations to use automatic detection
            if niterations > MIN_ITERATIONS:
                from pymbar import timeseries
                u_n = extract_u_n(ncfile)
                u_n = u_n[1:] # discard initial frame of zero energies TODO: Get rid of initial frame of zero energies
                [nequil, g_t, Neff_max] = timeseries.detectEquilibration(u_n)
                nequil += 1 # account for initial frame of zero energies
                logger.info([nequil, Neff_max])
            else:
                nequil = 1  # discard first frame
                g_t = 1
                Neff_max = niterations

            # Examine acceptance probabilities.
            show_mixing_statistics(ncfile, cutoff=0.05, nequil=nequil)

            # Extract equilibrated, decorrelated energies, check for fully interacting state
            (u_kln, N_k, u_n) = extract_ncfile_energies(ncfile, ndiscard=nequil, g=g_t)

            # Create MBAR object to use for free energy and entropy states
            mbar = initialize_MBAR(ncfile, u_kln=u_kln, N_k=N_k)

            # Estimate free energies, use fully interacting state if present
            (Deltaf_ij, dDeltaf_ij) = estimate_free_energies(ncfile, mbar=mbar)

            # Estimate average enthalpies
            (DeltaH_i, dDeltaH_i) = estimate_enthalpies(ncfile, mbar=mbar)

            # Accumulate free energy differences
            entry = dict()
            entry['DeltaF'] = Deltaf_ij[0, -1]
            entry['dDeltaF'] = dDeltaf_ij[0, -1]
            entry['DeltaH'] = DeltaH_i[0, -1]
            entry['dDeltaH'] = dDeltaH_i[0, -1]
            entry['DeltaF_restraints'] = DeltaF_restraints
            data[phase] = entry

            # Get temperatures.
            ncvar = ncfile.groups['thermodynamic_states'].variables['temperatures']
            temperature = ncvar[0] * units.kelvin
            kT = kB * temperature

        finally:
            ncfile.close()

    # Compute free energy and enthalpy
    DeltaF = 0.0
    dDeltaF = 0.0
    DeltaH = 0.0
    dDeltaH = 0.0
    for phase, sign in analysis:
        DeltaF -= sign * (data[phase]['DeltaF'] + data[phase]['DeltaF_restraints'])
        dDeltaF += data[phase]['dDeltaF']**2
        DeltaH -= sign * (data[phase]['DeltaH'] + data[phase]['DeltaF_restraints'])
        dDeltaH += data[phase]['dDeltaH']**2
    dDeltaF = np.sqrt(dDeltaF)
    dDeltaH = np.sqrt(dDeltaH)

    # Attempt to guess type of calculation
    calculation_type = ''
    for phase in phases:
        if 'complex' in phase:
            calculation_type = ' of binding'
        elif 'solvent1' in phase:
            calculation_type = ' of solvation'

    # Print energies
    logger.info("")
    logger.info("Free energy{}: {:16.3f} +- {:.3f} kT ({:16.3f} +- {:.3f} kcal/mol)".format(
        calculation_type, DeltaF, dDeltaF, DeltaF * kT / units.kilocalories_per_mole,
        dDeltaF * kT / units.kilocalories_per_mole))
    logger.info("")

    for phase in phases:
        logger.info("DeltaG {:<25} : {:16.3f} +- {:.3f} kT".format(phase, data[phase]['DeltaF'],
                                                                   data[phase]['dDeltaF']))
        if data[phase]['DeltaF_restraints'] != 0.0:
            logger.info("DeltaG {:<25} : {:25.3f} kT".format('restraint',
                                                             data[phase]['DeltaF_restraints']))
    logger.info("")
    logger.info("Enthalpy{}: {:16.3f} +- {:.3f} kT ({:16.3f} +- {:.3f} kcal/mol)".format(
        calculation_type, DeltaH, dDeltaH, DeltaH * kT / units.kilocalories_per_mole,
        dDeltaH * kT / units.kilocalories_per_mole))
Example #46
0
def extract_trajectory(output_path, nc_path, state_index=None, replica_index=None,
                       start_frame=0, end_frame=-1, skip_frame=1, keep_solvent=True,
                       discard_equilibration=False, image_molecules=False):
    """Extract phase trajectory from the NetCDF4 file.

    Parameters
    ----------
    output_path : str
        Path to the trajectory file to be created. The extension of the file
        determines the format.
    nc_path : str
        Path to the NetCDF4 file containing the trajectory.
    state_index : int, optional
        The index of the alchemical state for which to extract the trajectory.
        One and only one between state_index and replica_index must be not None
        (default is None).
    replica_index : int, optional
        The index of the replica for which to extract the trajectory. One and
        only one between state_index and replica_index must be not None (default
        is None).
    start_frame : int, optional
        Index of the first frame to include in the trajectory (default is 0).
    end_frame : int, optional
        Index of the last frame to include in the trajectory. If negative, will
        count from the end (default is -1).
    skip_frame : int, optional
        Extract one frame every skip_frame (default is 1).
    keep_solvent : bool, optional
        If False, solvent molecules are ignored (default is True).
    discard_equilibration : bool, optional
        If True, initial equilibration frames are discarded (see the method
        pymbar.timeseries.detectEquilibration() for details, default is False).

    """
    # Check correct input
    if (state_index is None) == (replica_index is None):
        raise ValueError('One and only one between "state_index" and '
                         '"replica_index" must be specified.')
    if not os.path.isfile(nc_path):
        raise ValueError('Cannot find file {}'.format(nc_path))

    # Import simulation data
    try:
        nc_file = netcdf.Dataset(nc_path, 'r')

        # Extract topology and system serialization
        serialized_system = nc_file.groups['metadata'].variables['reference_system'][0]
        serialized_topology = nc_file.groups['metadata'].variables['topology'][0]

        # Determine if system is periodic
        from simtk import openmm
        reference_system = openmm.XmlSerializer.deserialize(str(serialized_system))
        is_periodic = reference_system.usesPeriodicBoundaryConditions()
        logger.info('Detected periodic boundary conditions: {}'.format(is_periodic))

        # Get dimensions
        n_iterations = nc_file.variables['positions'].shape[0]
        n_atoms = nc_file.variables['positions'].shape[2]
        logger.info('Number of iterations: {}, atoms: {}'.format(n_iterations, n_atoms))

        # Determine frames to extract
        if start_frame <= 0:
            # TODO yank saves first frame with 0 energy!
            start_frame = 1
        if end_frame < 0:
            end_frame = n_iterations + end_frame + 1
        frame_indices = range(start_frame, end_frame, skip_frame)
        if len(frame_indices) == 0:
            raise ValueError('No frames selected')
        logger.info('Extracting frames from {} to {} every {}'.format(
            start_frame, end_frame, skip_frame))

        # Discard equilibration samples
        if discard_equilibration:
            u_n = extract_u_n(nc_file)[frame_indices]
            n_equil, g, n_eff = timeseries.detectEquilibration(u_n)
            logger.info(("Discarding initial {} equilibration samples (leaving {} "
                         "effectively uncorrelated samples)...").format(n_equil, n_eff))
            frame_indices = frame_indices[n_equil:-1]

        # Extract state positions and box vectors
        positions = np.zeros((len(frame_indices), n_atoms, 3))
        if is_periodic:
            box_vectors = np.zeros((len(frame_indices), 3, 3))
        if state_index is not None:
            logger.info('Extracting positions of state {}...'.format(state_index))

            # Deconvolute state indices
            state_indices = np.zeros(len(frame_indices))
            for i, iteration in enumerate(frame_indices):
                replica_indices = nc_file.variables['states'][iteration, :]
                state_indices[i] = np.where(replica_indices == state_index)[0][0]

            # Extract state positions and box vectors
            for i, iteration in enumerate(frame_indices):
                replica_index = state_indices[i]
                positions[i, :, :] = nc_file.variables['positions'][iteration, replica_index, :, :].astype(np.float32)
                if is_periodic:
                    box_vectors[i, :, :] = nc_file.variables['box_vectors'][iteration, replica_index, :, :].astype(np.float32)

        else:  # Extract replica positions and box vectors
            logger.info('Extracting positions of replica {}...'.format(replica_index))

            for i, iteration in enumerate(frame_indices):
                positions[i, :, :] = nc_file.variables['positions'][iteration, replica_index, :, :].astype(np.float32)
                if is_periodic:
                    box_vectors[i, :, :] = nc_file.variables['box_vectors'][iteration, replica_index, :, :].astype(np.float32)
    finally:
        nc_file.close()

    # Create trajectory object
    logger.info('Creating trajectory object...')
    topology = utils.deserialize_topology(serialized_topology)
    trajectory = mdtraj.Trajectory(positions, topology)
    if is_periodic:
        trajectory.unitcell_vectors = box_vectors

    # Force periodic boundary conditions to molecules positions
    if image_molecules:
        logger.info('Applying periodic boundary conditions to molecules positions...')
        trajectory.image_molecules(inplace=True)

    # Remove solvent
    if not keep_solvent:
        logger.info('Removing solvent molecules...')
        trajectory = trajectory.remove_solvent()

    # Detect format
    extension = os.path.splitext(output_path)[1][1:]  # remove dot
    try:
        save_function = getattr(trajectory, 'save_' + extension)
    except AttributeError:
        raise ValueError('Cannot detect format from extension of file {}'.format(output_path))

    # Create output directory and save trajectory
    logger.info('Creating trajectory file: {}'.format(output_path))
    output_dir = os.path.dirname(output_path)
    if output_dir != '' and not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    save_function(output_path)
out = open("logd_bayes.txt", 'w')
used_samples = open("mcmc_sampling_details.txt","w")
out.write("Molecule, Log D +/-, HPD95%[low, high]\n")
debug.write("Molecule mean - median = difference")
used_samples.write("Molecule, equilibration, N samples")
# curdir = os.getcwd()
# os.makedirs("plots", )
# os.chdir("plots")


for mol in sorted(list(x.logd.keys())):
    print("Processing {}".format(mol))
    # sns.plt.figure()
    trace = numpy.asarray(mc.trace("LogD_{}".format(mol))[:])
    # Burn in and thinning estimated using pymbar
    burnin = detectEquilibration(trace)[0]
    trace= trace[burnin:]
    uncorrelated_indices = subsampleCorrelatedData(trace)
    trace=trace[uncorrelated_indices]

    median = pymc.utils.quantiles(trace)[50]
    mean = numpy.mean(trace)
    lower, upper = pymc.utils.hpd(trace, 0.05)
    lower_s = to_precision(lower,2) # string of number with 2 sig digits
    upper_s = to_precision(upper,2)
    logd = ufloat(mean, numpy.std(trace))

    # Formats the mean and error by the correct amount of significant digits
    out.write("{0}, {1:.1u}, [{2}, {3}]\n".format(mol, logd, lower_s, upper_s ))
    debug.write("{}: {} - {} = {}".format(mol, mean, median, mean-median))
    used_samples.write("{}, {}, {}".format(mol, burnin, len(uncorrelated_indices)))