Esempio n. 1
0
def get_energy(paths, listRes=['Ligand']):
    """
    Read Energies from .edr files and
    use the `panedr` library to parse them.

    :params paths:  Path to the edr files.
    :returns: Pandas dataframe.
    """
    if not isinstance(paths, list):
        df = edr_to_df(paths)
    else:
        # concatenate the data frames and reduce with the mean function
        rs = [edr_to_df(p) for p in paths]
        df = pandas.concat(rs)
        df.groupby(df.index).mean()

    # Reindex dataframe using sequential integers
    df.reset_index(inplace=True)

    # Electrostatic Energy
    df['ele'] = sum_available_columns(
        df, ['Coulomb-14', 'Coulomb (SR)', 'Coulomb (LR)', 'Coul. recip.'])

    # Van der Waals terms
    df['vdw'] = sum_available_columns(
        df, ['LJ-14', 'LJ (SR)', 'LJ (LR)'])

    return extract_ligand_info(df, listRes)
Esempio n. 2
0
    def get_dDens_from_para(self, k) -> (float, float):
        os.chdir(self.dir_npt)

        # energy and Hvap after diff
        try:
            df = panedr.edr_to_df('diff1.%s.edr' % k)
        except:
            raise Exception('File not exist: ' +
                            os.path.abspath('diff1.%s.edr' % k))
        pene_array_diff_p = np.array(df.Potential)

        # try:
        #     df = panedr.edr_to_df('diff-1.%s.edr' % k)
        # except:
        #     raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k))
        # pene_array_diff_n = np.array(df.Potential)
        try:
            df = panedr.edr_to_df('npt.edr')
        except:
            raise Exception('File not exist: ' + os.path.abspath('npt.edr'))
        pene_array = np.array(df.Potential)

        # calculate the derivative series dA/dp
        delta = get_delta_for_para(k)
        # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2
        dPene_array = (pene_array_diff_p - pene_array) / delta

        # calculate the derivative dA/dp according to ForceBalance
        # TODO To accurately calculate the covariant, using dens_array.mean() instead of dens_series.mean()
        dDdp = -1 / self.RT * ((self.dens_array * dPene_array).mean() -
                               self.dens_array.mean() * dPene_array.mean())

        return dDdp
Esempio n. 3
0
def get_gmx_energy(edrfile):
    """ Parse and canonicalize energies from gromacs edr file 
    
    Notes
    -----
    gromacs energy units are kJ/mol
    """
    gmx_force_groups = {'gromacs': {}}
    key_to_col = {
        'bond': ['Bond'],
        'angle': ['Angle'],
        'dihedral': ['Proper Dih.', 'Ryckaert-Bell.'],
        'LJ': ['LJ-14', 'LJ (SR)'],
        'QQ': ['Coulomb-14', 'Coulomb (SR)'],
        'nonbond': ['LJ-14', 'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)'],
        'all': ['Potential']
    }

    edr_df = panedr.edr_to_df(edrfile)  # From the edr

    for canonical_name, df_cols in key_to_col.items():
        gmx_force_groups['gromacs'][canonical_name] = sum(
            [edr_df.iloc[0][col] for col in df_cols if col in edr_df.columns])

    return gmx_force_groups
Esempio n. 4
0
def edr(request):
    edrfile, xvgfile = request.param
    df = panedr.edr_to_df(edrfile)
    xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
    xvgtime = xvgdata[:, 0]
    xvgdata = xvgdata[:, 1:]
    return EDR_Data(df, xvgdata, xvgtime, xvgnames, xvgprec, edrfile, xvgfile)
Esempio n. 5
0
 def test_progress(self):
     """
     Test the progress meter displays what is expected.
     """
     output = StringIO()
     with redirect_stderr(output):
         df = panedr.edr_to_df(EDR, verbose=True)
     progress = output.getvalue().split('\n')[0].split('\r')
     print(progress)
     dt = 2000.0
     # We can already iterate on `progress`, but I want to keep the cursor
     # position from one for loop to the other.
     progress_iter = iter(progress)
     self.assertEqual('', next(progress_iter))
     self._assert_progress_range(progress_iter, dt, 0, 21, 1)
     self._assert_progress_range(progress_iter, dt, 30, 201, 10)
     self._assert_progress_range(progress_iter, dt, 300, 2001, 100)
     self._assert_progress_range(progress_iter, dt, 3000, 14101, 1000)
     # Check the last line
     print(df.iloc[-1, 0])
     ref_line = 'Last Frame read : 14099, time : 28198000.0 ps'
     last_line = next(progress_iter)
     self.assertEqual(ref_line, last_line)
     # Did we leave stderr clean with a nice new line at the end?
     self.assertTrue(output.getvalue().endswith('\n'),
                     'The new line is missing at the end.')
def dA_endpoint_MBAR(polymorphs='p1 p2', Molecules=72, Independent=4, Temp=200):
    # Setting constants
    kJ_to_kcal = 1/4.184  # Converting kJ to kcal
    kB = 0.0019872041  # boltzman constant in kcal/(mol*K)

    # Getting the polymorph names
    polymorphs = polymorphs.split()

    # Place to store the free energy differences
    dA = np.zeros(len(polymorphs))
    ddA = np.zeros(len(polymorphs))

    for i, poly in enumerate(polymorphs):
        if os.path.isfile(poly + '/interactions/100/PROD.edr') and os.path.isfile(poly + '/interactions/100/END.edr'):
            # Loading in the difference between the endpoint and production files
            dU = panedr.edr_to_df(poly + '/interactions/100/END.edr')['Potential'].values - panedr.edr_to_df(poly + '/interactions/100/PROD.edr')['Potential'].values

            # Converting the energy differences to go into pymbar
            dW = dU / Molecules * kJ_to_kcal / (kB * Temp)

            # Getting the energy differences with MBAR using Exponential Averaging
            da = np.array(pymbar.EXP(dW)) * kB * Temp

            dA[i] = -da[0]
            ddA[i] = da[1]
        else:
            dA[i] = np.nan
            ddA[i] = np.nan

    # Check to see if there are any nan values
    if np.any(dA == np.nan):
        dA[:] = 0.
        ddA[:] = 0.

    return dA, ddA
Esempio n. 7
0
 def test_progress(self):
     """
     Test the progress meter displays what is expected.
     """
     output = StringIO()
     with redirect_stderr(output):
         df = panedr.edr_to_df(EDR, verbose=True)
     progress = output.getvalue().split('\n')[0].split('\r')
     print(progress)
     dt = 2000.0
     # We can already iterate on `progress`, but I want to keep the cursor
     # position from one for loop to the other.
     progress_iter = iter(progress)
     assert '' == next(progress_iter)
     self._assert_progress_range(progress_iter, dt, 0, 21, 1)
     self._assert_progress_range(progress_iter, dt, 30, 201, 10)
     self._assert_progress_range(progress_iter, dt, 300, 2001, 100)
     self._assert_progress_range(progress_iter, dt, 3000, 14101, 1000)
     # Check the last line
     print(df.iloc[-1, 0])
     ref_line = 'Last Frame read : 14099, time : 28198000.0 ps'
     last_line = next(progress_iter)
     assert ref_line == last_line
     # Did we leave stderr clean with a nice new line at the end?
     assert output.getvalue().endswith('\n'), \
            'New line missing at the end of output.'
Esempio n. 8
0
def calculate_density(job):
    """Calculate the density"""

    import panedr
    import numpy as np
    from block_average import block_average

    # Load the thermo data
    df = panedr.edr_to_df(job.fn("prod.edr"))

    # pull density and take average
    density = df[df.Time > 500.0].Density.values
    ave = np.mean(density)

    # save average density
    job.doc.density = ave

    (means_est, vars_est, vars_err) = block_average(density)

    with open(job.fn("density_blk_avg.txt"), "w") as ferr:
        ferr.write("# nblk_ops, mean, vars, vars_err\n")
        for nblk_ops, (mean_est, var_est,
                       var_err) in enumerate(zip(means_est, vars_est,
                                                 vars_err)):
            ferr.write("{}\t{}\t{}\t{}\n".format(nblk_ops, mean_est, var_est,
                                                 var_err))

    job.doc.density_unc = np.max(np.sqrt(vars_est))
    def fileedr(self):
        if self._file_handler is None:
            try:
                self._file_handler = panedr.edr_to_df(self.mainfile)
            except Exception:
                self.logger.error('Error reading edr file.')

        return self._file_handler
Esempio n. 10
0
    def get_hvap(self) -> float:
        os.chdir(self.dir_npt)
        print(os.getcwd())

        if not self.need_vacuum:
            df = panedr.edr_to_df('hvap.edr')
            hvap = self.RT - df.Potential.mean() / self.n_mol
        else:
            df = panedr.edr_to_df('npt.edr')
            pe_liq = df.Potential.mean()
            os.chdir(self.dir_vacuum)
            print(os.getcwd())

            df = panedr.edr_to_df('nvt.edr')
            pe_gas = df.Potential.mean()
            hvap = self.RT + pe_gas - pe_liq / self.n_mol

        return hvap
Esempio n. 11
0
    def get_density(self) -> float:
        os.chdir(self.dir_npt)
        print(os.getcwd())

        df = panedr.edr_to_df('npt.edr')
        density = df.Density.mean() / 1000  # convert to g/mL

        self.sim_dens = density  # save self.sim_dens for calculating expansivity
        return density
Esempio n. 12
0
 def test_times(self):
     """
     Test that the time is read correctly when dt is regular.
     """
     df = panedr.edr_to_df(EDR)
     xvg = read_xvg(EDR_XVG)
     ref_time = xvg[:, 0]
     time = df[u'Time'].as_matrix()
     self.assertTrue(numpy.allclose(ref_time, time, atol=5e-7))
Esempio n. 13
0
 def test_verbosity(self):
     """
     Make sure the verbose mode does not alter the results.
     """
     with redirect_stderr(sys.stdout):
         df = panedr.edr_to_df(EDR, verbose=True)
     ref_content, _, prec = read_xvg(EDR_XVG)
     content = df.values
     print(ref_content - content)
     assert_allclose(ref_content, content, atol=prec/2)
Esempio n. 14
0
def edr(request):
    edrfile, xvgfile = request.param
    df = panedr.edr_to_df(edrfile)
    edr_dict = pyedr.edr_to_dict(edrfile)
    xvgdata, xvgnames, xvgprec = read_xvg(xvgfile)
    xvgtime = xvgdata[:, 0]
    xvgdata = xvgdata[:, 1:]
    xvgcols = np.insert(xvgnames, 0, u'Time')
    return EDR_Data(df, edr_dict, xvgdata, xvgtime, xvgnames,
                    xvgcols, xvgprec, edrfile, xvgfile)
Esempio n. 15
0
 def test_verbosity(self):
     """
     Make sure the verbose mode does not alter the results.
     """
     with redirect_stderr(sys.stdout):
         df = panedr.edr_to_df(EDR, verbose=True)
     ref_content = read_xvg(EDR_XVG)
     content = df.as_matrix()
     print(ref_content - content)
     self.assertTrue(numpy.allclose(ref_content, content, atol=5e-7))
Esempio n. 16
0
 def test_content(self):
     """
     Test that the content of the DataFrame is the expected one.
     """
     df = panedr.edr_to_df(EDR)
     xvg = read_xvg(EDR_XVG)
     ref_content = xvg[:, 1:]  # The time column is tested separately
     content = df.iloc[:, 1:].as_matrix()
     print(ref_content - content)
     self.assertTrue(numpy.allclose(ref_content, content, atol=5e-7))
Esempio n. 17
0
    def get_dHvap_list_from_paras(self, paras: OrderedDict):
        os.chdir(self.dir_npt)

        if not self.need_vacuum:
            df = panedr.edr_to_df('hvap.edr')
            self.hvap_array = self.RT - np.array(df.Potential) / self.n_mol
        else:
            df = panedr.edr_to_df('npt.edr')
            self.pe_liq_array = np.array(df.Potential)

            os.chdir(self.dir_vacuum)
            df = panedr.edr_to_df('nvt.edr')
            self.pe_gas_array = np.array(df.Potential)

        # dHdp_list = [self.get_dHvap_from_para(k) for k in paras.keys()]
        from multiprocessing import Pool
        with Pool(len(paras)) as p:
            dHdp_list = p.map(wrapper_target, [(self, 'get_dHvap_from_para', k)
                                               for k in paras.keys()])

        return dHdp_list
Esempio n. 18
0
def _parse_gmx_energy(edr_path: str) -> EnergyReport:
    """Parse an `.edr` file written by `gmx energy`."""
    import panedr

    if TYPE_CHECKING:
        from pandas import DataFrame

    df: DataFrame = panedr.edr_to_df("out.edr")
    energies_dict: Dict = df.to_dict("index")  # type: ignore[assignment]
    energies = energies_dict[0.0]
    energies.pop("Time")

    for key in energies:
        energies[key] *= kj_mol

    # TODO: Better way of filling in missing fields
    # GROMACS may not populate all keys
    for required_key in ["Bond", "Angle", "Proper Dih."]:
        if required_key not in energies:
            energies[required_key] = 0.0 * kj_mol

    keys_to_drop = [
        "Kinetic En.",
        "Temperature",
        "Pres. DC",
        "Pressure",
        "Vir-XX",
        "Vir-YY",
        "Vir-ZZ",
        "Vir-YX",
        "Vir-XY",
        "Vir-YZ",
        "Vir-XZ",
    ]
    for key in keys_to_drop:
        if key in energies.keys():
            energies.pop(key)

    report = EnergyReport()

    report.update_energies({
        "Bond": energies["Bond"],
        "Angle": energies["Angle"],
        "Torsion": _get_gmx_energy_torsion(energies),
        "vdW": _get_gmx_energy_vdw(energies),
        "Electrostatics": _get_gmx_energy_coul(energies),
    })

    return report
Esempio n. 19
0
 def test_columns(self):
     """
     Test that the column names and order match.
     """
     df = panedr.edr_to_df(EDR)
     ref_columns = numpy.array([u'Time', u'Bond', u'G96Angle',
                                u'Improper Dih.', u'LJ (SR)',
                                u'Coulomb (SR)', u'Potential',
                                u'Kinetic En.', u'Total Energy',
                                u'Temperature', u'Pressure',
                                u'Constr. rmsd', u'Box-X', u'Box-Y',
                                u'Box-Z', u'Volume', u'Density', u'pV',
                                u'Enthalpy', u'Vir-XX', u'Vir-XY',
                                u'Vir-XZ', u'Vir-YX', u'Vir-YY', u'Vir-YZ',
                                u'Vir-ZX', u'Vir-ZY', u'Vir-ZZ', u'Pres-XX',
                                u'Pres-XY', u'Pres-XZ', u'Pres-YX',
                                u'Pres-YY', u'Pres-YZ', u'Pres-ZX',
                                u'Pres-ZY', u'Pres-ZZ', u'#Surf*SurfTen',
                                u'Box-Vel-XX', u'Box-Vel-YY', u'Box-Vel-ZZ',
                                u'Mu-X', u'Mu-Y', u'Mu-Z',
                                u'Coul-SR:water-water',
                                u'LJ-SR:water-water', u'Coul-SR:water-DPPC',
                                u'LJ-SR:water-DPPC', u'Coul-SR:water-DUPC',
                                u'LJ-SR:water-DUPC', u'Coul-SR:water-CHOL',
                                u'LJ-SR:water-CHOL', u'Coul-SR:water-OCO',
                                u'LJ-SR:water-OCO', u'Coul-SR:DPPC-DPPC',
                                u'LJ-SR:DPPC-DPPC', u'Coul-SR:DPPC-DUPC',
                                u'LJ-SR:DPPC-DUPC', u'Coul-SR:DPPC-CHOL',
                                u'LJ-SR:DPPC-CHOL', u'Coul-SR:DPPC-OCO',
                                u'LJ-SR:DPPC-OCO', u'Coul-SR:DUPC-DUPC',
                                u'LJ-SR:DUPC-DUPC', u'Coul-SR:DUPC-CHOL',
                                u'LJ-SR:DUPC-CHOL', u'Coul-SR:DUPC-OCO',
                                u'LJ-SR:DUPC-OCO', u'Coul-SR:CHOL-CHOL',
                                u'LJ-SR:CHOL-CHOL', u'Coul-SR:CHOL-OCO',
                                u'LJ-SR:CHOL-OCO', u'Coul-SR:OCO-OCO',
                                u'LJ-SR:OCO-OCO', u'T-non_water',
                                u'T-water', u'Lamb-non_water',
                                u'Lamb-water'], dtype='object')
     columns = df.columns.values
     if columns.shape[0] == ref_columns.shape[0]:
         print('These columns differ from the reference (displayed as read):')
         print(columns[ref_columns != columns])
         print('The corresponding names displayed as reference:')
         print(ref_columns[ref_columns != columns])
     self.assertTrue(ref_columns.shape == columns.shape,
                     'The number of column read is unexpected.')
     self.assertTrue(numpy.all(ref_columns == columns),
                     'At least one column name was misread.')
Esempio n. 20
0
    def get_dDens_list_from_paras(self, paras: OrderedDict):
        os.chdir(self.dir_npt)

        df = panedr.edr_to_df('npt.edr')
        # TODO Because of the float error in gmx edr file, the index in Series is erroneous. Convert to array
        self.dens_array = np.array(df.Density) / 1000  # convert to g/mL

        # dDdp_list = [self.get_dDens_from_para(k) for k in paras.keys()]
        from multiprocessing import Pool
        with Pool(len(paras)) as p:
            dDdp_list = p.map(wrapper_target, [(self, 'get_dDens_from_para', k)
                                               for k in paras.keys()])

        self.dDdp_array = np.array(
            dDdp_list)  # save dDdp_array for calculating expansivity
        return dDdp_list
Esempio n. 21
0
    def get_properties(self,
                       deffnm,
                       cwd='.',
                       stride=1,
                       mdp=None,
                       mindist=True):
        if mdp is None:
            mdp = self.mdp

        if mindist:
            self.call_gmx(cmd='mindist',
                          stdin='Protein',
                          cwd=cwd,
                          f=f'{deffnm}.xtc',
                          s=f'{deffnm}.tpr',
                          od=f'{deffnm}.mindist.xvg',
                          pi=True,
                          dt=float(mdp.nstenergy) * float(mdp.dt) * stride)
            with open(f'{cwd}/{deffnm}.mindist.xvg') as f:
                data = np.array([[float(s) for s in l.split()] for l in f
                                 if l[0] not in '#@']).T
        df = edr_to_df(f'{cwd}/{deffnm}.edr')[::stride]
        traj = self.load_xtc(f'{cwd}/{deffnm}.vis.xtc', stride=stride)

        if mindist:
            minlen = min(data.shape[1], len(df), len(traj))
        else:
            minlen = min(len(df), len(traj))

        df = df.head(minlen)
        if mindist:
            data = data[..., :minlen]
        traj = traj[:minlen]
        if not (((not mindist) or np.array_equal(df['Time'], data[0]))
                and np.array_equal(df['Time'], traj.time)):
            raise ValueError("Could not match times across different inputs")

        calpha_atom_indices = traj.top.select_atom_indices('alpha')
        rmsd = md.rmsd(traj, self.traj, atom_indices=calpha_atom_indices)

        if mindist:
            df['Min. PI dist'] = data[1]
            df['Max. int dist'] = data[2]
        df['RMSD'] = rmsd

        return (traj, df)
Esempio n. 22
0
def get_energies(in_base_name: str = 'npt_PT_out') -> pd.DataFrame:
    """Import the energies of GROMACS REMD trajectories.

    :param in_base_name: The base name for the output energy files
    :return: The MultiIndexed DataFrame of all the time-step energies
    :rtype: pd.DataFrame
    """
    in_files = glob.glob(in_base_name+'*.edr')
    in_files.sort()
    in_files.sort(key=len)
    dfs = dict()
    for edr_file in in_files:
        try:
            number = int(re.match(r'.+?(\d+)\.edr', edr_file).group(1))
        except AttributeError:
            raise ValueError('Unable to parse edr file name '
                             '"{}"'.format(edr_file))
        df = panedr.edr_to_df(edr_file)
        dfs[number] = df
    return pd.concat(dfs, names=['replica', 'time'])
Esempio n. 23
0

gaff_ff = foyer.forcefields.load_GAFF()
typed_compound = gaff_ff.apply(box_of_compounds, assert_dihedral_params=False)

charge_structure = apply_charges(box_structure=typed_compound,
                                 single_compound=compound,
                                 n_atoms=compound.n_particles,
                                 ff=gaff_ff)

# Handed back to our backend GMSO
topology = gmso.external.from_parmed(charge_structure)
topology.name = compound.name

gmso.formats.write_top(topology,
                       "simulation/topol.top",
                       top_vars={
                           "fudgeLJ": 0.5,
                           "fudgeQQ": 0.8,
                           "comb-rule": "geometric"
                       })

gmso.formats.write_gro(topology, "simulation/conf.gro")

# Run the simulation with gromacs
run_energy_minimization()
run_nvt()

# Data analysis
sim_data = panedr.edr_to_df("simulation/ener.edr")
def dA_Gamma_MBAR(plot_out=True,
                  MINGAMMA=0,
                  MAXGAMMA=100,
                  GSPACING=10,
                  LAMBDA=100,
                  exponent=2,
                  polymorphs='p1 p2',
                  Molecules=72,
                  Independent=4,
                  Temp=200,
                  Pressure=1,
                  k=1000,
                  ignoreframes=500,
                  includeframes=100000,
                  potential='oplsaa',
                  bonds=False,
                  hinge='DefaultHinge'):

    if (plot_out):
        import matplotlib  # for making plots, version 'matplotlib-1.1.0-1'; errors may pop up when using earlier versions
        import matplotlib.pyplot as plt
        font = {'family': 'normal', 'weight': 'normal', 'size': 16}
        matplotlib.rc('font', **font)

    # =============================================================================================
    # ENSURE THAT USER INPUTS ARE SENSIBLE
    # =============================================================================================
    # TEMPERATURE
    if Temp < 0:
        print("Invalid Temperature: " + str(Temp))
        sys.exit()

    # GAMMA
    if (MINGAMMA == -1) and (MAXGAMMA == -1) and (GSPACING == -1) and (exponent
                                                                       == 1):
        print("Using default values!")
        # The Gamma points sampled
        Gammas = [
            '000L', '010L', '020L', '030L', '040L', '050L', '060L', '070L',
            '080L', '090L', '100L'
        ]
    elif MINGAMMA < 0 or MAXGAMMA < 0 or GSPACING < 0 or MINGAMMA > MAXGAMMA:
        print("Invalid Gamma Specifications")
        sys.exit()
    else:
        RawGamma = MINGAMMA
        Gammas = []
        Gamma_names = []
        gamma_names = np.arange(MINGAMMA, MAXGAMMA + GSPACING, GSPACING)
        while RawGamma < MAXGAMMA:
            if exponent >= 0:
                Gamma = int(100 *
                            (float(RawGamma) / float(MAXGAMMA))**abs(exponent))
            else:
                Gamma = int(100 * (1 - (float(MAXGAMMA - RawGamma) /
                                        float(MAXGAMMA))**abs(exponent)))
            Gammas.append(Gamma)
            # Format the gamma point name
            if RawGamma < 10:
                Gamma_names.append('00' + str(int(RawGamma)) + 'G')
            elif RawGamma < 100:
                Gamma_names.append('0' + str(int(RawGamma)) + 'G')
            else:
                Gamma_names.append('100G')
            RawGamma = RawGamma + GSPACING
        # Catch the final gamma point
        Gammas.append(int(MAXGAMMA))
        if MAXGAMMA < 10:
            Gamma_names.append('00' + str(int(MAXGAMMA)) + 'G')
        elif MAXGAMMA < 100:
            Gamma_names.append('0' + str(int(MAXGAMMA)) + 'G')
        else:
            Gamma_names.append('100G')

    # LAMBDA
    if LAMBDA < 0 or LAMBDA > 100:
        print("Invalid Lambda Point: " + str(LAMBDA))
        sys.exit()

    # POLYMORPH
    polymorphs = polymorphs.split()
    polymorph = []
    polymorph_short = []
    for i, token in enumerate(polymorphs):
        polymorph.append('Polymorph ' + str(token))
        polymorph_short.append(token)

    # POTENTIAL
    if potential != "oplsaa" and potential != "gromos" and potential != "designeda" and potential != "oplsaafakeg" and \
                    potential != "oplsaafakea":
        print("Invalid Potential")
        print(
            "Supported potentials: oplsaa gromos designeda oplsaafakeg oplsaafakea"
        )
        sys.exit()

    # =============================================================================================
    # FORMAT INPUTS
    # =============================================================================================
    # POTENTIAL
    PotNAME = ""
    if potential == "oplsaa":
        PotNAME = "OPLS"
    elif potential == "gromos":
        PotNAME = "GROM"
    elif potential == "designeda":
        PotNAME = "DESA"
    elif potential == "oplsaafakeg":
        PotNAME = "FAKEG"
    elif potential == "oplsaafakea":
        PotNAME = "FAKEA"

    # OPTIONAL HINGE
    if hinge == "DefaultHinge":
        hinges = ['_G']
    else:
        # Read in each job
        hinges = []
        hingevect = options.hinge.split()
        for i, token in enumerate(hingevect):
            hinges.append("_G_" + str(token))

    # =============================================================================================
    # READ IN RAW DATA
    # =============================================================================================
    # Constants.
    kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184
                                         )  # Boltzmann constant in kcal/mol
    omitT = []  # Temperatures to be omitted from the analysis

    # Parameters
    T_k = Temp * np.ones(len(Gammas), float)  # Convert temperatures to floats
    print(T_k)
    print(Gammas)

    g_k = np.zeros([len(Gammas)], float)
    K = len(Gammas)  # How many states?

    # total number of states examined; 0 are unsampled if bonds are left on, 1 is unsampled if the bonds are removed
    if bonds == True:
        Kbig = K
        dhdl_placement = 6
    else:
        Kbig = K
        dhdl_placement = 5

    # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough.
    N_max = 200000

    # beta factor for the different temperatures
    beta_k = 1.0 / (kB * T_k)
    dA = np.zeros([len(polymorph), Kbig], float)
    ddA = np.zeros([len(polymorph), Kbig], float)
    convert_units = 0.2390057 * np.ones(
        Kbig, float)  # Convert all energies to kcal/mol

    # Allocate storage for simulation data
    for i, poly in enumerate(polymorph):
        # N_k[k] is the total number of snapshots from alchemical state k
        N_k = np.zeros([Kbig], np.int32)

        # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s
        N_k_s = np.zeros([Kbig, len(hinges)], np.int32)

        # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k
        u_kln = np.zeros([K, Kbig, N_max], np.float64)

        # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k
        dhdl_kn = np.zeros([K, N_max], np.float64)

        #Load in the data for each run
        for k in range(K):
            n = 0
            for s, hinge in enumerate(hinges):

                # cycle through all the input total energy data
                dirpath = polymorph_short[i] + '/interactions/' + str(
                    gamma_names[k])

                fname = dirpath + '/PROD.edr'
                dhdlname = dirpath + '/dhdl_PROD.xvg'

                if k not in omitT:
                    potential_energy = panedr.edr_to_df(
                        fname)['Potential'].values
                    print("loading " + fname)

                    dhdl_energy = np.loadtxt(dhdlname,
                                             comments=['#', '$', '@', '!'])
                    print("loading " + dhdlname)

                    # Removing any non-equilibrated points of the simulation
                    [start_production, _,
                     _] = timeseries.detectEquilibration(potential_energy)
                    potential_energy = potential_energy[start_production:]
                    dhdl_energy = dhdl_energy[start_production:]

                    # the energy of every configuration from each state evaluated at its sampled state
                    n = len(potential_energy)
                    u_kln[k, :K, :n] = (potential_energy.reshape((n, 1)) + dhdl_energy[:, dhdl_placement:]).T * \
                                       convert_units[k]
                    dhdl_kn[k, :n] = (float(Independent) / Molecules) * \
                                     np.sum(dhdl_energy[:, 2:dhdl_placement], axis=1) * convert_units[k]

                if s == 0:
                    N_k_s[k, s] = n
                else:
                    N_k_s[k, s] = n - sum(N_k_s[k, 0:s])
            N_k[k] = n

        # convert to nondimensional units from kcal/mol

        u_kln *= beta_k[0]
        # all data loaded from the three sets

        u_kln_save = u_kln.copy()
        N_k_save = N_k.copy()
        g_k = np.zeros([K])

        print("Number of retained samples")
        print(N_k)
        print("Number of retained samples from each seed")
        print(N_k_s)

        # =============================================================================================
        # COMPUTE FREE ENERGY DIFFERENCE USING MBAR
        # =============================================================================================

        # Initialize MBAR.
        print("Running MBAR...")

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])

        print("MBAR Converged...")
        # testing

        for k in range(Kbig):
            w = np.exp(mbar.Log_W_nk[:, k])
            print("max weight in state %d is %12.7f" % (k, np.max(w)))
            neff = 1 / np.sum(w**2)
            print("Effective number of sample in state %d is %10.3f" %
                  (k, neff))
            print("Efficiency for state %d is %d/%d = %10.4f" %
                  (k, neff, len(w), neff / len(w)))

        # extract self-consistent weights and uncertainties
        (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences()

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_i /= (beta_k[0] * float(Independent))
        ddf_i /= (beta_k[0] * float(Independent))

        dA[i, :] = df_i[-1]

        # =============================================================================================
        # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA
        # =============================================================================================

        for k in range(K):
            N_k[k] = 0
            n_old = 0
            if k not in omitT:
                for s in range(len(hinges)):
                    g_k[k] = timeseries.statisticalInefficiency(
                        dhdl_kn[k, n_old:(n_old + N_k_s[k, s])])
                    print("Correlation time for sampled state %d is %10.3f" %
                          (k, g_k[k]))
                    # subsample the data to get statistically uncorrelated data
                    indices = np.array(
                        timeseries.subsampleCorrelatedData(
                            u_kln[k, k, n_old:(n_old + N_k_s[k, s])],
                            g=g_k[k]))  # subsample

                    # not sure why we have to transpose
                    u_kln[k, :,
                          N_k[k]:(N_k[k] + len(indices))] = u_kln_save[k, :, (
                              indices + n_old)].transpose()
                    N_k[k] = N_k[k] + len(indices)
                    n_old += N_k_s[k, s]
        print("Number of retained samples")
        print(N_k)
        print("Number of retained samples from each seed")
        print(N_k_s)

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])

        print("MBAR Converged...")
        # testing

        # extract self-consistent weights and uncertainties
        (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences()

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_u /= (beta_k[0] * float(Independent))
        ddf_u /= (beta_k[0] * float(Independent))

        ddA[i, :] = ddf_u[-1]

        # Write out free energy differences
        print("Free Energy Difference (in units of kcal/mol)")
        print("  dA(Gamma) = A(Gamma) - A(Interactions Off)")
        for k in range(Kbig):
            print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1]))

    # =============================================================================================
    # PRINT THE FINAL DATA
    # =============================================================================================

    out_dA = np.zeros(len(polymorph))
    out_ddA = np.zeros(len(polymorph))
    for i, poly in enumerate(polymorph):
        out_dA[i] = dA[i, 0]  #Kbig - 1]
        out_ddA[i] = ddA[i, 0]  #Kbig - 1]

    # =============================================================================================
    # PLOT THE FINAL DATA
    # =============================================================================================

#    if (plot_out) and polymorphs == 'all':
#        # now plot the free energy change as a function of temperature
#        fig = plt.figure(4)
#        ax = fig.add_subplot(111)
#        xlabel = 'Interaction Strength, $\gamma$'
#        ylabel = 'Relative Free Energy (kcal/mol)'
#        plt.xlabel(xlabel)
#        plt.ylabel(ylabel)
#        Xaxis = [float(j / 100.0) for j in Gammas]
#
#        if os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' + Tname + '_' + Pname +
#                                  '_dAvsG_All'):
#            ddA[0, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' +
#                                                       Tname + '_' + Pname + '_dAvsG_All')
#        elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname +
#                                    '_dAvsG_All'):
#            ddA[1, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' +
#                                                       Tname + '_' + Pname + '_dAvsG_All')
#        elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname +
#                                    '_dAvsG_All'):
#            ddA[2, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph3_' + str(Molecules) + '_' +
#                                                       Tname + '_' + Pname + '_dAvsG_All')
#
#        ax.errorbar(Xaxis, dA[0, :], color='b', yerr=ddA[0, :], label='Benzene I')
#        ax.errorbar(Xaxis, dA[1, :], color='g', yerr=ddA[1, :], label='Benzene II')
#        ax.errorbar(Xaxis, dA[2, :], color='r', yerr=ddA[2, :], label='Benzene III')
#        plt.legend(loc='upper right')
#
#        if len(hinges) > 1:
#            filename = PotNAME + '_' + str(Molecules) + '_' + Tname + '_dAvsG.pdf'
#        else:
#            filename = PotNAME + '_' + str(Molecules) + '_' + Tname + hinge + '_dAvsG.pdf'
#        plt.savefig(filename, bbox_inches='tight')

    print(out_dA, out_ddA)
    sys.exit()
Esempio n. 25
0
 def test_output_type(self):
     """
     Test that the function returns a pandas DataFrame.
     """
     df = panedr.edr_to_df(EDR)
     self.assertIsInstance(df, pandas.DataFrame)
Esempio n. 26
0
def dA_Lambda_MBAR(plot_out=True,
                   MinL=0,
                   MaxL=100,
                   dL=5,
                   GAMMA=100,
                   exponent=4,
                   polymorphs='p1 p2',
                   Molecules=72,
                   Independent=4,
                   Temp=200,
                   Pressure=1,
                   potential='oplsaa',
                   hinge='DefaultHinge'):
    if (plot_out):
        import matplotlib  # for making plots, version 'matplotlib-1.1.0-1'; errors may pop up when using earlier versions
        import matplotlib.pyplot as plt
        font = {'family': 'normal', 'weight': 'normal', 'size': 16}
        matplotlib.rc('font', **font)

    # =============================================================================================
    # ENSURE THAT USER INPUTS ARE SENSIBLE
    # =============================================================================================
    # TEMPERATURE
    if Temp < 0:
        print("Invalid Temperature: " + str(Temp))
        sys.exit()

    if Pressure < 0:
        print("Invalid Pressure: " + str(Pressure))
        sys.exit()

    # LAMBDA
    if (MinL == -1) and (MaxL == -1) and (dL == -1) and (exponent == 1):
        print("Using default values!")

        # The Lambda points sampled
        Lambdas = [
            '000L', '010L', '020L', '030L', '040L', '050L', '060L', '070L',
            '080L', '090L', '100L'
        ]
    elif MinL < 0 or MaxL < 0 or dL < 0 or MinL > MaxL:
        print("Invalid Lambda Specifications")
        sys.exit()
    else:
        RawLambda = 0
        Lambdas = []
        lambda_names = np.arange(MinL, MaxL + dL, dL)
        Lambda_names = []
        Lambda_indicies = []
        index = 0
        while RawLambda < MaxL:
            if RawLambda >= MinL:
                Lambda_indicies.append(index)
                index += 1
            else:
                index += 1
                RawLambda = RawLambda + dL
                continue
            if exponent >= 0:
                Lambda = int(100 *
                             (float(RawLambda) / float(MaxL))**abs(exponent))
            else:
                Lambda = int(
                    100 *
                    (1 -
                     (float(MaxL - RawLambda) / float(MaxL))**abs(exponent)))
            Lambdas.append(Lambda)
            # Format the lambda point name
            if RawLambda < 10:
                Lambda_names.append('00' + str(int(RawLambda)) + 'L')
            elif RawLambda < 100:
                Lambda_names.append('0' + str(int(RawLambda)) + 'L')
            else:
                Lambda_names.append('100L')
            RawLambda = RawLambda + dL

        # Catch the final lambda point
        Lambdas.append(MaxL)
        Lambda_indicies.append(index)
        if MaxL < 10:
            Lambda_names.append('00' + str(int(MaxL)) + 'L')
        elif MaxL < 100:
            Lambda_names.append('0' + str(int(MaxL)) + 'L')
        else:
            Lambda_names.append('100L')

    # GAMMA
    if GAMMA < 0 or GAMMA > 100:
        print("Invalid Gamma Point: " + str(GAMMA))
        sys.exit()

    # POLYMORPH
    polymorphs = polymorphs.split()
    polymorph = []
    polymorph_short = []
    for i, token in enumerate(polymorphs):
        polymorph.append('Polymorph ' + str(token))
        polymorph_short.append(token)

    # POTENTIAL
    if potential not in [
            "oplsaa", "gromos", "designeda", "oplsaafakeg", "oplsaafakea"
    ]:
        print("Invalid Potential")
        print(
            "Supported potentials: oplsaa gromos designeda oplsaafakeg oplsaafakea"
        )
        sys.exit()

    # =============================================================================================
    # FORMAT INPUTS
    # =============================================================================================
    # POTENTIAL
    PotNAME = ""
    if potential == "oplsaa":
        PotNAME = "OPLS"
    elif potential == "gromos":
        PotNAME = "GROM"
    elif potential == "designeda":
        PotNAME = "DESA"
    elif potential == "oplsaafakeg":
        PotNAME = "FAKEG"
    elif potential == "oplsaafakea":
        PotNAME = "FAKEA"

    # OPTIONAL HINGE
    if str(GAMMA) == "100":
        hingeLetter = "L"
    else:
        hingeLetter = "R"

    if hinge == "DefaultHinge":
        hinges = ["_" + hingeLetter]
    else:
        # Read in each job
        hinges = []
        hingevect = hinge.split()
        for i, token in enumerate(hingevect):
            hinges.append("_" + hingeLetter + "_" + str(token))

    # =============================================================================================
    # READ IN RAW DATA
    # =============================================================================================
    # Constants.
    kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184
                                         )  # Boltzmann constant in kcal/mol
    omitK = []

    # Parameters
    T_k = Temp * np.ones(len(Lambdas), float)  # Convert temperatures to floats
    g_k = np.zeros([len(Lambdas)], float)
    K = len(Lambdas)  # How many states?

    # total number of states examined; none are unsampled
    Kbig = K + 0

    # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough.
    N_max = 200000

    # beta factor for the different temperatures
    beta_k = 1.0 / (kB * T_k)
    dA = np.zeros([len(polymorph), len(Lambdas)], float)
    ddA = np.zeros([len(polymorph), len(Lambdas)], float)
    convert_units = (0.2390057) * np.ones(
        len(Lambdas), float)  # Convert all energies to kcal/mol

    # Lines to ignore when reading in energies
    for i, poly in enumerate(polymorph):
        # Allocate storage for simulation data
        # N_k[k] is the total number of snapshots from alchemical state k
        N_k = np.zeros([Kbig], np.int32)

        # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s in 'unflipped segment j'
        N_ksj = np.zeros([Kbig, len(hinges), 100], np.int32)

        # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k
        u_kln = np.zeros([K, Kbig, N_max], np.float64)

        # dhdl_kln[k,l,n] is the restraint energy value of snapshop n from simulation k
        dhdl_kln = np.zeros([K, Kbig, N_max], np.float64)

        # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k
        dhdl_kn = np.zeros([K, N_max], np.float64)

        # Load in the data for each run
        for k in range(K):
            n = 0
            for s, hinge in enumerate(hinges):
                keepconfigs = np.arange(
                    N_max
                )  # The index of each configuration to keep in the MBAR analysis

                # cycle through all the input total energy data
                dirpath = polymorph_short[i] + '/restraints/' + str(
                    lambda_names[k])
                fname = dirpath + '/PROD.edr'
                dhdlname = dirpath + '/dhdl_PROD.xvg'

                if k not in omitK:
                    potential_energy = panedr.edr_to_df(
                        fname)['Potential'].values
                    print("loading " + fname)

                    dhdl_energy = np.loadtxt(dhdlname,
                                             comments=['#', '$', '@', '!'])
                    print("loading " + dhdlname)

                    # Removing any non-equilibrated points of the simulation
                    [start_production, _,
                     _] = timeseries.detectEquilibration(potential_energy)
                    potential_energy = potential_energy[start_production:]
                    dhdl_energy = dhdl_energy[start_production:]

                    # the energy of every configuration from each state evaluated at its sampled state
                    n = len(potential_energy)
                    u_kln[k, :, :n] = (float(Independent) / Molecules) * (
                        potential_energy.reshape(
                            (n, 1)) + dhdl_energy[:, 5:]).T * convert_units[k]
                    dhdl_kln[k, :, :n] = dhdl_energy[:,
                                                     5:].T * convert_units[k]
                    dhdl_kn[k, :n] = (
                        float(Independent) /
                        Molecules) * dhdl_energy[:, 4].T * convert_units[k]

                    # NSA: Can this go?
                    symbolcounter = 0

                    # Truncate the kept configuration list to be less than n
                    keepconfigs = [
                        j for j in keepconfigs
                        if j < (len(potential_energy) -
                                symbolcounter) and j >= 0
                    ]

                    # Split up the retained configurations into connected segments
                    j = 0
                    for a in range(len(keepconfigs)):
                        if a == 0:
                            continue
                        elif int(keepconfigs[a - 1]) + 1 != int(
                                keepconfigs[a]):
                            N_ksj[k, s, j] = a - (sum(N_ksj[k, s, 0:j]))
                            j += 1
                    # Catch the final segment
                    N_ksj[k, s, j] = len(keepconfigs) - sum(N_ksj[k, s, 0:j])
                    j += 1

            N_k[k] = n

        # convert to nondimensional units from kcal/mol
        u_kln *= beta_k[0]

        # all data loaded from the three sets
        u_kln_save = u_kln.copy()
        g_k = np.zeros([K])

        # Ignore the first state due to jumping
        print("Number of retained samples")
        print(N_k)

        # =============================================================================================
        # COMPUTE FREE ENERGY DIFFERENCE USING MBAR
        # =============================================================================================

        # Initialize MBAR.
        print("Running MBAR...")

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])
        print("MBAR Converged...")

        for k in range(Kbig):
            w = np.exp(mbar.Log_W_nk[:, k])
            print("max weight in state %d is %12.7f" % (k, np.max(w)))
            neff = 1 / np.sum(w**2)
            print("Effective number of sample in state %d is %10.3f" %
                  (k, neff))
            print("Efficiency for state %d is %d/%d = %10.4f" %
                  (k, neff, len(w), neff / len(w)))

        # extract self-consistent weights and uncertainties
        (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences()

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_i /= (beta_k[0] * float(Independent))
        ddf_i /= (beta_k[0] * float(Independent))

        dA[i, :] = df_i[-1]

        # =============================================================================================
        # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA
        # =============================================================================================
        for k in range(K):  # For each restraint state
            N_k[k] = 0
            n_old = 0
            if k not in omitK:
                for s in range(
                        len(hinges)
                ):  # For each independent trajectory of this restraint state
                    for j in range(
                            100
                    ):  # For each untossed segment of each independent trajectory of this restraint state
                        if N_ksj[k, s, j] == 0:
                            continue
                        # Feed in the segment and calculate correlation time
                        g_k[k] = timeseries.statisticalInefficiency(
                            dhdl_kn[k, n_old:(n_old + N_ksj[k, s, j])])
                        print(
                            "Correlation time for sampled state %d is %10.3f" %
                            (k, g_k[k]))

                        # subsample the data to get statistically uncorrelated data
                        # subsample indices within the segment
                        indices = np.array(
                            timeseries.subsampleCorrelatedData(
                                u_kln[k, k, n_old:(n_old + N_ksj[k, s, j])],
                                g=g_k[k])).astype(int)

                        # Apphend the uncorrelated configurations in the segment to the u_kln matrix
                        u_kln[k, :, N_k[k]:(N_k[k] +
                                            len(indices))] = u_kln_save[k, :, (
                                                indices + n_old)].transpose()
                        N_k[k] = N_k[k] + len(indices)
                        n_old += N_ksj[k, s, j]

        print("Number of retained samples")
        print(N_k)
        print("Number of retained samples from each seed")
        print(N_ksj)

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])

        print("MBAR Converged...")
        # testing

        # extract self-consistent weights and uncertainties
        (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences()

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_u /= (beta_k[0] * float(Independent))
        ddf_u /= (beta_k[0] * float(Independent))

        ddA[i, :] = ddf_u[-1]

        # Write out free energy differences
        print("Free Energy Difference (in units of kcal/mol)")
        print("  dA(Lambda) = A(Lambda) - A(Fully Restrained)")
        for k in range(Kbig):
            print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1]))

    # =============================================================================================
    # PRINT THE FINAL DATA
    # =============================================================================================
    out_dA = np.zeros(len(polymorph))
    out_ddA = np.zeros(len(polymorph))
    for i, poly in enumerate(polymorph):
        out_dA[i] = dA[i, 0]  #Kbig - 1]
        out_ddA[i] = ddA[i, 0]  #Kbig - 1]

    # =============================================================================================
    # PLOT THE FINAL DATA
    # =============================================================================================
    if (plot_out) and polymorphs == 'all':
        # now plot the free energy change as a function of temperature
        fig = plt.figure(4)
        ax = fig.add_subplot(111)
        xlabel = 'Restraint Strength, $\lambda$'
        ylabel = 'Relative Free Energy (kcal/mol)'
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        Xaxis = [float(j / 100.0) for j in Lambdas]

        if os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph1_' +
                          str(Molecules) + '_' + Tname + '_' + Pname +
                          '_dAvsL_All'):
            ddA[0, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' +
                                                       PotNAME +
                                                       '_Polymorph1_' +
                                                       str(Molecules) + '_' +
                                                       Tname + '_' + Pname +
                                                       '_dAvsL_All')
        elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' +
                            str(Molecules) + '_' + Tname + '_' + Pname +
                            '_dAvsL_All'):
            ddA[1, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' +
                                                       PotNAME +
                                                       '_Polymorph2_' +
                                                       str(Molecules) + '_' +
                                                       Tname + '_' + Pname +
                                                       '_dAvsL_All')
        elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' +
                            str(Molecules) + '_' + Tname + '_' + Pname +
                            '_dAvsL_All'):
            ddA[2, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' +
                                                       PotNAME +
                                                       '_Polymorph3_' +
                                                       str(Molecules) + '_' +
                                                       Tname + '_' + Pname +
                                                       '_dAvsL_All')

        ax.errorbar(Xaxis,
                    dA[0, :],
                    color='b',
                    yerr=ddA[0, :],
                    label='Benzene I')
        ax.errorbar(Xaxis,
                    dA[1, :],
                    color='g',
                    yerr=ddA[1, :],
                    label='Benzene II')
        ax.errorbar(Xaxis,
                    dA[2, :],
                    color='r',
                    yerr=ddA[2, :],
                    label='Benzene III')
        plt.legend(loc='upper left')

        if len(hinges) > 1:
            filename = PotNAME + '_' + str(
                Molecules) + '_' + Tname + '_dAvsL.pdf'
        else:
            filename = PotNAME + '_' + str(
                Molecules) + '_' + Tname + hinge + '_dAvsL.pdf'
        plt.show()
    return out_dA, out_ddA
def dA_MBAR(minimum=0,
            maximum=100,
            spacing=10,
            exponent=2,
            polymorphs='p1 p2',
            Molecules=72,
            Independent=4,
            Temp=200,
            bonds=False,
            primary_directory='.',
            added_directories=[]):
    # =============================================================================================
    # Setting up the values for gamma or lambda states
    # =============================================================================================
    #    raw_value = minimum
    #    values = []
    directory_names = np.arange(minimum, maximum + spacing, spacing)
    directory_names = np.sort(np.append(directory_names, added_directories))

    #    while raw_value <= maximum:
    #        if exponent >= 0:
    #            value = int(100 * (float(raw_value) / float(maximum)) ** abs(exponent))
    #        else:
    #            value = int(100 * (1 - (float(maximum - raw_value) / float(maximum)) ** abs(exponent)))
    #        values.append(value)
    #        raw_value = raw_value + spacing
    #    print(values)
    #    print(directory_names)
    #    exit()

    # POLYMORPH
    polymorphs = polymorphs.split()

    # =============================================================================================
    # READ IN RAW DATA
    # =============================================================================================
    # Constants.
    kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184
                                         )  # Boltzmann constant in kcal/mol

    # Parameters
    T_k = Temp * np.ones(len(directory_names),
                         float)  # Convert temperatures to floats
    print(T_k)
    #  print(values)

    K = len(directory_names)  # How many states?

    # total number of states examined; 0 are unsampled if bonds are left on, 1 is unsampled if the bonds are removed
    Kbig = K

    # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough.
    N_max = 5000

    # beta factor for the different temperatures
    beta_k = 1.0 / (kB * T_k)
    dA = np.zeros([len(polymorphs), Kbig], float)
    ddA = np.zeros([len(polymorphs), Kbig], float)
    convert_units = 0.2390057 * np.ones(
        Kbig, float)  # Convert all energies to kcal/mol

    # Allocate storage for simulation data
    for i, poly in enumerate(polymorphs):
        # N_k[k] is the total number of snapshots from alchemical state k
        N_k = np.zeros([Kbig], np.int32)

        # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s
        N_k_s = np.zeros([Kbig], np.int32)

        # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k
        u_kln = np.zeros([K, Kbig, N_max], np.float64)

        # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k
        dhdl_kn = np.zeros([K, N_max], np.float64)

        #Load in the data for each run
        for k in range(K):
            n = 0

            # cycle through all the input total energy data
            if directory_names[k] == int(directory_names[k]):
                dirpath = polymorphs[i] + '/' + primary_directory + '/' + str(
                    int(directory_names[k]))
            else:
                dirpath = polymorphs[i] + '/' + primary_directory + '/' + str(
                    directory_names[k])
            if os.path.isdir(dirpath):
                fname = dirpath + '/PROD.edr'
                dhdlname = dirpath + '/dhdl_PROD.xvg'

                potential_energy = panedr.edr_to_df(fname)['Potential'].values
                print("loading " + fname)

                dhdl_energy = np.loadtxt(dhdlname,
                                         comments=['#', '$', '@', '!'])
                print("loading " + dhdlname)

                # Removing any non-equilibrated points of the simulation
                [start_production, _,
                 _] = timeseries.detectEquilibration(potential_energy)
                potential_energy = potential_energy[start_production:]
                dhdl_energy = dhdl_energy[start_production:, :]

                # Cutting points if they exceed N_max
                if len(potential_energy) > N_max:
                    potential_energy = potential_energy[len(potential_energy) -
                                                        N_max:]
                    dhdl_energy = dhdl_energy[len(dhdl_energy) - N_max:, :]

                # the energy of every configuration from each state evaluated at its sampled state
                n = len(potential_energy)
                dhdl_placement = len(dhdl_energy[0, :]) - K
                u_kln[k, :K, :n] = (potential_energy.reshape(
                    (n, 1)) + dhdl_energy[:, dhdl_placement:]
                                    ).T * convert_units[k]
                dhdl_kn[k, :n] = (float(Independent) / Molecules) * \
                                 np.sum(dhdl_energy[:, 2:dhdl_placement], axis=1) * convert_units[k]

                N_k_s[k] = n
                N_k[k] = n

        # convert to nondimensional units from kcal/mol
        u_kln *= beta_k[0]

        #u_kln_save = u_kln.copy()
        u_kln_save = u_kln[:]
        g_k = np.zeros([K])

        print("Number of retained samples")
        print(N_k)
        print("Number of retained samples from each seed")
        print(N_k_s)

        # =============================================================================================
        # COMPUTE FREE ENERGY DIFFERENCE USING MBAR
        # =============================================================================================

        # Initialize MBAR.
        print("Running MBAR...")

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])

        print("MBAR Converged...")
        # testing

        for k in range(Kbig):
            w = np.exp(mbar.Log_W_nk[:, k])
            print("max weight in state %d is %12.7f" % (k, np.max(w)))
            neff = 1 / np.sum(w**2)
            print("Effective number of sample in state %d is %10.3f" %
                  (k, neff))
            print("Efficiency for state %d is %d/%d = %10.4f" %
                  (k, neff, len(w), neff / len(w)))

        # extract self-consistent weights and uncertainties
        (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences()

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_i /= (beta_k[0] * float(Independent))
        ddf_i /= (beta_k[0] * float(Independent))

        dA[i, :] = df_i[-1]

        # =============================================================================================
        # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA
        # =============================================================================================

        for k in range(K):
            N_k[k] = 0
            n_old = 0

            g_k[k] = timeseries.statisticalInefficiency(
                dhdl_kn[k, n_old:(n_old + N_k_s[k])])
            print("Correlation time for sampled state %d is %10.3f" %
                  (k, g_k[k]))
            # subsample the data to get statistically uncorrelated data
            indices = np.array(
                timeseries.subsampleCorrelatedData(u_kln[k, k,
                                                         n_old:(n_old +
                                                                N_k_s[k])],
                                                   g=g_k[k]))  # subsample

            # not sure why we have to transpose
            if indices != []:
                u_kln[k, :,
                      N_k[k]:(N_k[k] +
                              len(indices))] = u_kln_save[k, :,
                                                          (indices +
                                                           n_old)].transpose()
                N_k[k] = N_k[k] + len(indices)
                n_old += N_k_s[k]

        print("Number of retained samples")
        print(N_k)
        print("Number of retained samples from each seed")
        print(N_k_s)

        # generate the weights of each of the umbrella set
        mbar = pymbar.MBAR(u_kln,
                           N_k,
                           verbose=True,
                           subsampling_protocol=[{
                               'method': 'L-BFGS-B'
                           }])

        print("MBAR Converged...")

        # extract self-consistent weights and uncertainties
        try:
            (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences()
        except ValueError:
            pass

        print("Free Energies Optained...")

        # convert PMF to kcal/mol and normalize by the number of molecules
        df_u /= (beta_k[0] * float(Independent))
        ddf_u /= (beta_k[0] * float(Independent))

        ddA[i, :] = ddf_u[-1]
        #        ddA[i, :] = ddf_i[-1]

        # Write out free energy differences
        print("Free Energy Difference (in units of kcal/mol)")
        print("  dA(Gamma) = A(Gamma) - A(Interactions Off)")
        for k in range(Kbig):
            print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1]))

        del N_k
        del N_k_s
        del u_kln
        del dhdl_kn

    out_dA = np.zeros(len(polymorphs))
    out_ddA = np.zeros(len(polymorphs))
    for i, poly in enumerate(polymorphs):
        out_dA[i] = dA[i, 0]
        out_ddA[i] = ddA[i, 0]

    return out_dA, out_ddA
def compute_COV_dGref(refT_cov, Temperatures_MD, Molecules, Polymorphs):
    # setting a place to store the reference free energy differences
    refdG = np.zeros((len(refT_cov), len(Polymorphs)))

    # setting key variables for QHA
    natoms = len(
        md.load(Polymorphs[0] + '/temperature/0/pre_EQ.gro').xyz[0, :, 0])
    nmodes = natoms * 3

    # boltzmann constant in kcal/(mol * K)
    kB = 0.0019872041

    # converting kcal to g*nm**2 / (ps**2)
    ekcal = 418.4

    # speed of light in cm / ps
    speed_of_light = 0.0299792458

    # Reduced planks constant
    h_bar = 2.520 * 10**(-38)

    # Avogadro's number
    Na = 6.022 * 10**23

    for i, t in enumerate(refT_cov):
        # determining what directory to look into for this temperature
        directory = '/temperature/' + str(np.where(t == Temperatures_MD)[0][0])
        for j, p in enumerate(Polymorphs):
            path = p + directory
            edr = panedr.edr_to_df(path + '/PROD.edr')

            if not os.path.isfile(path + '/eigenvalues.xvg'):
                # Generating the eigenvalues from the covarience matrix
                c = subprocess.Popen(['echo', '0', ';', 'echo', '0'],
                                     stdout=subprocess.PIPE)
                output = subprocess.check_output([
                    'gmx', 'covar', '-f', path + '/PROD.trr', '-s',
                    path + '/PROD_0.tpr', '-o', path + '/eigenvalues.xvg',
                    '-mwa', 'yes', '-pbc', 'yes', '-last',
                    str(nmodes)
                ],
                                                 stdin=c.stdout)
                c.wait()

                # Removing excess files that take up too much space
                subprocess.call(
                    ['rm', 'eigenvec.trr', 'covar.log', 'average.pdb'])

            # Loading in eigenvalues and converting them to wavenumbers
            wavenumbers = np.loadtxt(path + '/eigenvalues.xvg',
                                     comments=['#', '@'])[:, 1]
            #wavenumbers = kB * t / (np.absolute(wavenumbers[np.where(wavenumbers > 0.)])*100)
            wavenumbers = kB * t / (np.absolute(wavenumbers) * 100)
            wavenumbers = np.sort(
                np.sqrt(wavenumbers[3:] * ekcal) /
                (2 * np.pi * speed_of_light))
            print(len(wavenumbers))
            # Getting the potential energy
            U = np.average(edr['Potential'].values) / 4.184

            # Computing the vbirational energy
            Av = kB * t * np.sum(
                np.log(Na * h_bar * wavenumbers * speed_of_light * 10**12 /
                       (kB * t)))
            print(
                U / Molecules, Av / Molecules,
                np.average(edr['Volume'].values) * Na * 0.024201 * 10**(-24) /
                Molecules)
            # Computing the free energy
            refdG[i, j] = (U + Av + np.average(edr['Volume'].values) * Na *
                           0.024201 * 10**(-24)) / Molecules

    refdG -= refdG[:, 0]
    print(refdG, refT_cov)
    exit()
    return np.array(refT_cov), refdG
Esempio n. 29
0
def load_potenergy(fil):
    U = pdr.edr_to_df(fil)
    U = np.array(U['Potential'])
    [t0,_,_] = detectEquilibration(U) 
    return U[t0:]
def dGvsT_QHA(Temperatures_MD=np.array([100, 200, 300]),
              Temperatures_unsampled=[],
              Molecules=72,
              molecule='benzene',
              Independent=0,
              potential='oplsaa',
              spacing=1,
              phase='solid',
              Polymorphs=['p1', 'p2', 'p3'],
              refdG_type='QHA',
              output_directory='output_QHA',
              refT_files=['', '', ''],
              refG_files=['', '', ''],
              refT_cov=[]):

    if not os.path.isdir(output_directory):
        subprocess.call(['mkdir', output_directory])

    # Setting-up if the simulation is suppose to use QHA or covarience for dG ref
    if refdG_type == 'QHA':
        # Loading in the longest string of temperatures for refT
        refT = []
        for i in refT_files:
            temp_T = np.load(i)
            if len(temp_T) > len(refT):
                refT = np.load(i)

        # Cutting off any zero values form refT
        if refT[0] == 0.:
            refT = refT[1:]

        # Adding in the reference free energy differences for each temperature
        refdG = np.zeros((len(refT), len(Polymorphs)))
        for i in range(len(Polymorphs)):
            G0 = np.load(refG_files[0])
            G1 = np.load(refG_files[i])

            T0 = np.load(refT_files[0])
            T1 = np.load(refT_files[i])
            for j, t in enumerate(refT):
                placement_0 = np.where(T0 == t)
                placement_1 = np.where(T1 == t)
                if (len(placement_0[0]) == 1) and (len(placement_1[0]) == 1):
                    refdG[j, i] = G1[placement_1[0]] - G0[placement_0[0]]
                else:
                    refdG[j, i] = np.nan

    elif refdG_type == 'COV':
        if output_directory == 'output_QHA':
            output_directory = 'output_COV'
        refT, refdG = compute_COV_dGref(refT_cov, Temperatures_MD, Molecules,
                                        Polymorphs)

    else:
        print("ERROR: refdG_type " + refdG_type + " is not a valid input.")
        exit()

    if Independent == 0:
        Independent = Molecules
    # =============================================================================================
    # Load reference free energy differences
    # =============================================================================================

    # Hard set from old dictionary funciton
    refPot = 0
    ExtraPressures = []
    Temperatures = np.sort(np.append(Temperatures_MD, Temperatures_unsampled))
    Temperatures = np.sort(np.unique(np.append(Temperatures, refT)))
    Pressures = np.ones(len(Temperatures), int)
    Pressures[len(Pressures) -
              len(ExtraPressures):len(Pressures)] = ExtraPressures
    Potentials = [potential]

    # =============================================================================================
    # FORMAT INPUTS
    # =============================================================================================
    # TEMPERATURE
    refk = []
    for k, temp in enumerate(refT):
        refk.append(np.where(temp == Temperatures)[0][0])

    # =============================================================================================
    # READ IN RAW DATA
    # =============================================================================================
    # Constants.
    kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184
                                         )  # Boltzmann constant in kcal/mol/K

    # Parameters
    # How many states?
    K = len(Potentials) * len(Temperatures)

    #  maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough.
    N_max = 30000

    # beta factor for the different temperatures
    beta_k = 1.0 / (kB * Temperatures)
    beta_k = np.tile(beta_k, (1, len(Potentials)))[0]

    # Conversion from kJ to kcal
    kJ_to_kcal = 0.2390057

    # This is the sampling efficiency for each potential in each combination of potentials
    Efficiency = np.zeros(K, float)

    # N_k[k] is the total number of snapshots from alchemical state k
    N_k = np.zeros(K, np.int32)

    # dA[p,i,k] is the p.interp(Tr, T1, G1)free energy between potential 0 and state k for spacing i in polymorph p
    dA = np.zeros([len(refT), len(Polymorphs), spacing + 1, K], float)

    # ddA[p,i,k] is the uncertainty in the free energy between potential 0 and state k for spacing i in polymorph p
    ddA = np.zeros([len(refT), len(Polymorphs), spacing + 1, K], float)

    run_dA_analysis = True

    if os.path.isdir(output_directory +
                     '/dA_raw.npy') and os.path.isdir(output_directory +
                                                      '/ddA_raw.npy'):
        hold_dA = np.load(output_directory + '/dA_raw.npy')
        if np.shape(hold_dA) == np.shape(dA):
            dA = np.load(output_directory + '/dA_raw.npy')
            ddA = np.load(output_directory + '/ddA_raw.npy')
            run_dA_analysis = False

    # dG[p,i,t] is the free energy between polymorph 1 and polymorph p for spacing i and temperature t
    dG = np.zeros([len(refT), len(Polymorphs), spacing + 1, len(Temperatures)])

    # ddG[p,i,t] is the uncertanity in the free energy between polymorph 1 and polymorph p for spacing i and temperature t
    ddG = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)])

    # dS[p,i,t] is the relative entropy between polymorph 1 and polymorph p for spacing i and temperature t
    dS = np.zeros([len(refT), len(Polymorphs), spacing + 1, len(Temperatures)])

    # ddS[p,i,t] is the uncertanity in the relative entropy between polymorph 1 and polymorph p for spacing i and temperature t
    ddS = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)])

    dS_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)])
    ddS_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)])
    dH_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)])

    # O_pij[p,i,j] is the overlap within polymorph p between temperature state i and temperature state j
    O_pij = np.zeros([len(Polymorphs), len(Temperatures), len(Temperatures)])
    dU = np.zeros([len(Polymorphs), len(Temperatures)])
    ddU = np.zeros([len(Polymorphs), len(Temperatures)])

    # u_kln[k,l,n] is the reduced potential energy of configuration n from potential k in potential l
    u_kln = np.zeros([K, K, N_max], np.float64)

    # V_pkn is the volume of configuration n of polymorph p at temperature k
    V_pkn = np.zeros([len(Polymorphs), len(Temperatures), N_max], float)

    # V_avg is the average volume of polymorph p at temperature k
    V_avg = np.zeros([len(Polymorphs), len(Temperatures)], float)

    # ddV_avg is the standard deviation of the volume of polymorph p at temperature k
    ddV_avg = np.zeros([len(Polymorphs), len(Temperatures)], float)

    # C_pkn is the lattice tensor of the polymorph p at temperature k
    box_place = np.matrix([[0, 0], [1, 1], [2, 2], [0, 1], [0, 2], [1, 2]])
    C_pkn = np.zeros([len(Polymorphs), len(Temperatures), N_max, 3, 3], float)

    # h_avg is the average lattice parameters of polymorph p at temperature k
    h_avg = np.zeros([len(Polymorphs), len(Temperatures), 6], float)

    # dh is the standard deviation of the lattice parameters of polymorph p at temperature k
    dh = np.zeros([len(Polymorphs), len(Temperatures), 6], float)

    # Cycle through all polymorphs
    if run_dA_analysis:
        for p, polymorph in enumerate(Polymorphs):
            # Cycle through all sampled potentials
            for i, potential_k in enumerate(Potentials):
                count = 0
                for t in range(len(Temperatures)):
                    k = len(Temperatures) * i + t
                    # Cycle through all evaluated potentials
                    for j, potential_l in enumerate(Potentials):
                        l = len(Temperatures) * j

                        dirpath = polymorph + '/temperature/' + str(
                            count) + '/'
                        if os.path.isfile(dirpath + 'PROD.edr') and (
                                Temperatures[t] in Temperatures_MD):
                            count += 1
                            print("loading " + dirpath + 'PROD.edr')
                            all_energy = panedr.edr_to_df(dirpath + 'PROD.edr')
                            if len(all_energy['Potential'].values) > N_max:
                                [start_production, _,
                                 _] = timeseries.detectEquilibration(
                                     all_energy['Potential'].values[::10])
                                start_production *= 10
                            else:
                                [start_production, _,
                                 _] = timeseries.detectEquilibration(
                                     all_energy['Potential'].values)

                            # Now read in the lattice tensor and average them
                            if 'Box-XX' in list(all_energy):
                                box_letters = [
                                    'XX', 'YY', 'ZZ', 'YX', 'ZX', 'ZY'
                                ]
                            else:
                                box_letters = ['X', 'Y', 'Z']

                            for b in range(len(box_letters)):
                                if len(all_energy['Potential'].values) > N_max:
                                    [hold, _,
                                     _] = timeseries.detectEquilibration(
                                         all_energy[
                                             'Box-' +
                                             box_letters[b]].values[::10])
                                    hold *= 10
                                else:
                                    [hold, _,
                                     _] = timeseries.detectEquilibration(
                                         all_energy['Box-' +
                                                    box_letters[b]].values)

                                if hold > start_production:
                                    start_production = hold

                            if len(all_energy['Total Energy'].
                                   values[start_production:]) > N_max:
                                start_production = len(
                                    all_energy['Total Energy'].values) - N_max

                            # Setting the end point of the simulation
                            N = len(all_energy['Total Energy'].
                                    values[start_production:])
                            N_k[k] = N

                            u_kln[k, l, :N] = all_energy['Potential'].values[
                                start_production:]

                            # Now set these energies over all temperatures
                            u_kln[k,
                                  l:(l + len(Temperatures)), :N] = u_kln[k,
                                                                         l, :N]

                            # Now read in the volumes and average them
                            V_pkn[p, t, :N] = all_energy['Volume'].values[
                                start_production:]
                            V_avg[p, t] = np.average(
                                V_pkn[p, t, :N]) / float(Independent)
                            ddV_avg[p, t] = np.std(
                                V_pkn[p, t, :N]) / float(Independent)

                            # Making the lattice tensor all the correct sign with time
                            if count == 1:
                                sign = np.sign(
                                    md.load(
                                        dirpath +
                                        'pre_EQ.gro').unitcell_vectors[0].T)
                                for s in range(3):
                                    for j in range(3):
                                        if sign[s, j] == 0.:
                                            # Correcting for the sign of the lattice parameters
                                            sign[s, j] = 1.

                            for b in range(len(box_letters)):
                                C_pkn[p, t, :N, box_place[b, 0], box_place[b, 1]] = np.absolute(all_energy['Box-' + box_letters[b]].values[start_production:]) * \
                                        sign[box_place[b, 0], box_place[b, 1]] * 10
                            C_avg = np.average(C_pkn[p, t, :N], axis=0)
                            dC = np.std(C_pkn[p, t, :N], axis=0)
                            h_avg[p, t] = crystal_matrix_to_lattice_parameters(
                                C_avg)
                            dh[p, t] = np.absolute(
                                crystal_matrix_to_lattice_parameters(C_avg +
                                                                     dC) -
                                h_avg[p, t])
                        else:
                            N_k[k] = 0
                            V_avg[p, t] = np.nan
                            ddV_avg[p, t] = np.nan
                            h_avg[p, t] = np.nan
                            dh[p, t] = np.nan

            print("Start1")
            # Convert all units to kcal
            #u_pklnT[p, :, :, :] *= kJ_to_kcal
            u_kln *= kJ_to_kcal

            print("Start2")
            # If this was already in kcal or already fully independent, revert
            for j in range(len(Potentials)):
                if Potentials[j][:6] == "amoeba":
                    #u_pklnT[p, :, j * len(Temperatures):(j + 1) * len(Temperatures), :, :] /= kJ_to_kcal
                    u_kln[:, j * len(Temperatures):(j + 1) *
                          len(Temperatures), :] /= kJ_to_kcal

            print("Start3")
            # Remove dependent molecules
            for j in range(len(Potentials)):
                if Potentials[j][:6] != "amoeba":
                    #u_pklnT[p, :, j * len(Temperatures):(j + 1) * len(Temperatures), :, :] *= float(Independent) / Molecules
                    u_kln[:, j * len(Temperatures):(j + 1) *
                          len(Temperatures), :] *= float(
                              Independent) / Molecules

            print("Start4")
            # Now average together the energies and volumes at each state
            for t in range(len(Temperatures)):
                dU[p,
                   t] = np.average(u_kln[t, t, :N_k[t]]) / float(Independent)
                ddU[p, t] = np.std(
                    u_kln[t, t, :N_k[t]]) / N_k[t]**0.5 / float(Independent)

            print("Start5")
            # convert to nondimensional units from kcal/mol
            for k, beta in enumerate(beta_k):
                u_kln[:, k, :] *= beta

            u_kln_save = u_kln.copy()
            N_k_save = N_k.copy()
            print("End!")

            print("Number of retained samples")
            print(N_k)

            # Now create the full N_k matrix including the roll-backs as well as the free energy container
            # N_k_matrix[i,k] is the total number of snapshots from alchemical state k using in spacing i
            N_k_matrix = np.zeros([spacing + 1, K], np.int32)
            for i in range(spacing + 1):
                N_k_matrix[i, :] = N_k_save.copy()
                N_k_matrix[i, 0:len(Temperatures)] = N_k_matrix[
                    i, 0:len(Temperatures)] * float(i) / float(spacing)

            # =============================================================================================
            # COMPUTE FREE ENERGY DIFFERENCE USING MBAR FOR EACH SPACING
            # =============================================================================================
            for i in range(spacing + 1):
                if i == 0 and len(Potentials) == 1:
                    continue
                # Initialize MBAR.
                print("Running MBAR...")

                # generate the weights of each of the umbrella set
                mbar = pymbar.MBAR(u_kln, N_k_matrix[i, :], verbose=True)
                print("MBAR Converged...")

                hold = mbar.computeEffectiveSampleNumber(verbose=True)
                print(hold)

                # extract self-consistent weights and uncertainties
                (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences()

                # extract entropy
                [_, _, Delta_u_ij, _, Delta_s_ij,
                 dDelta_s_ij] = mbar.computeEntropyAndEnthalpy()
                print("Free Energies Optained...")

                # Store the dimensionless results in the dA container
                dA[:, p, i, :] = df_i[refk]
                dH_mbar[p, i, :] = Delta_u_ij[0]
                dS_mbar[p, i, :] = Delta_s_ij[0]
                ddS_mbar[p, i, :] = dDelta_s_ij[0]
                print(dA)

            # =============================================================================================
            # COMPUTE UNCERTAINTY USING MBAR
            # =============================================================================================
            g_k = np.zeros([K])
            for i in range(spacing + 1):
                if i == 0 and len(Potentials) == 1:
                    continue

                for k in range(K):
                    # subsample correlated data - for now, use energy from current state
                    if N_k_matrix[i, k] > 0:
                        print(N_k_matrix[i, k])
                        g_k[k] = timeseries.statisticalInefficiency(
                            u_kln_save[k, k, 0:100])
                        print(
                            "Correlation time for phase (%s), sampled state %d is %10.3f"
                            % (phase, k, g_k[k]))

                        # subsample the data to get statistically uncorrelated data
                        indices = np.array(
                            timeseries.subsampleCorrelatedData(
                                u_kln_save[k, k, 0:N_k_matrix[i,
                                                              k]], g=g_k[k]))
                        N_k_matrix[i, k] = len(indices)
                        u_kln[k, :, 0:N_k_matrix[i, k]] = u_kln_save[
                            k, :, indices].transpose(
                            )  # not sure why we have to transpose

                print("Number of retained samples")
                print(N_k)

                print("Running MBAR...")

                # generate the weights of each state
                mbar = pymbar.MBAR(u_kln, N_k_matrix[i, :], verbose=True)
                print("MBAR Converged...")

                # extract self-consistent weights and uncertainties
                (df_u, ddf_u, theta_u) = mbar.getFreeEnergyDifferences()

                # calculate the overlap it necessary
                if len(Temperatures) == 2:
                    O_pij[p, :, :] = mbar.computeOverlap()[2]

                # testing
                weights_in_gromos = np.zeros(K, float)
                for k in range(K):
                    w = np.exp(mbar.Log_W_nk[:, k])
                    print("max weight in state %d is %12.7f" % (k, np.max(w)))
                    neff = 1 / np.sum(w**2)

                    print("Effective number of sample in state %d is %10.3f" %
                          (k, neff))
                    print("Efficiency for state %d is %d/%d = %10.4f" %
                          (k, neff, len(w), neff / len(w)))
                    Efficiency[k] = neff / len(w)  # Store the efficiency
                    w_0 = np.exp(mbar.Log_W_nk[:, 0])  # Weights in gromos
                    initial_configs = np.sum(N_k[0:k])
                    final_configs = np.sum(N_k[0:k + 1])

                    print("Total weight in gromos " +
                          str(np.sum(w_0[initial_configs:final_configs])))
                    weights_in_gromos[k] = np.sum(
                        w_0[initial_configs:final_configs])

                # Write out free energy differences
                print("Free Energy Difference (in units of kcal/mol)")
                for k in range(K):
                    print("%8.3f %8.3f" % (-df_i[k, 0], ddf_u[k, 0]))

                # Store the dimensionless results in the ddA container
                ddA[:, p, i, :] = ddf_u[refk]
#            ddA[:, p, i, :] = ddf_i[refk]

# Saving the files if needed for QHA
        if refdG_type == 'QHA':
            np.save(output_directory + '/dA_raw.npy', dA)
            np.save(output_directory + '/ddA_raw.npy', ddA)

    # =============================================================================================
    # FINALIZE THE RELATIVE FREE ENERGY AND ENTROPY
    # =============================================================================================
    for k in range(len(refT)):
        for i in range(spacing + 1):
            for t, T in enumerate(Temperatures):
                for p in range(len(Polymorphs)):
                    dG[k, p, i, t] = (dA[k, p, i, t] - dA[k, 0, i, t]) / (
                        beta_k[t] * float(Independent)) + float(T) / float(
                            refT[k]) * refdG[k, p]
                    if p == 0:
                        continue
                    dS[k, p, i,
                       t] = (dU[p, t] - dU[0, t] - dG[k, p, i, t]) / float(T)
                    if k == 0:
                        ddG[p, i,
                            t] = ((ddA[k, p, i, t]**2 + ddA[k, 0, i, t]**2) /
                                  (beta_k[t] * float(Independent))**2)**0.5
                        ddS[p, i, t] = (ddU[p, t]**2 + ddU[p, t]**2 +
                                        ddG[p, i, t]**2)**0.5 / float(T)

    # =============================================================================================
    # PLOT THE RELATIVE FREE ENERGY VS TEMPERATURE
    # =============================================================================================

    PlotPress = 1  # Pressure to plot the dGvT curve at
    Temperatures_P = Temperatures[Pressures == PlotPress]

    np.save(output_directory + '/T_' + molecule + '_' + potential,
            Temperatures_P)
    for p, Poly in enumerate(Polymorphs):
        np.save(
            output_directory + '/dGvT_' + molecule + '_' + Poly + '_' +
            potential, dG[:, p, spacing, Pressures == PlotPress])
        np.save(
            output_directory + '/ddGvT_' + molecule + '_' + Poly + '_' +
            potential, ddG[p, spacing, Pressures == PlotPress])
        if len(Potentials) > 1:
            np.save(
                output_directory + '/dGvT_' + molecule + '_' + Poly + '_' +
                potential + '_indirect', dG[:, p, 0, :])
            np.save(
                output_directory + '/ddGvT_' + molecule + '_' + Poly + '_' +
                potential + '_indirect', ddG[p, 0, :])
            if spacing > 1:
                np.save(
                    output_directory + '/dGvT_' + molecule + '_' + Poly + '_' +
                    potential + '_convergence', dG[:, p, :, :])
                np.save(
                    output_directory + '/ddGvT_' + molecule + '_' + Poly +
                    '_' + potential + '_convergence', ddG[p, :, :])
        np.save(
            output_directory + '/dS_' + molecule + '_' + Poly + '_' +
            potential, dS[:, p, spacing, :])
        np.save(
            output_directory + '/ddS_' + molecule + '_' + Poly + '_' +
            potential, ddS[p, spacing, :])

    for p, Poly in enumerate(Polymorphs):
        np.save(
            output_directory + '/UvT_' + molecule + '_' + Poly + '_' +
            potential, dU[p, :])

    for p, Poly in enumerate(Polymorphs):
        np.save(
            output_directory + '/VvT_' + molecule + '_' + Poly + '_' +
            potential, V_avg[p, :])
        np.save(
            output_directory + '/dVvT_' + molecule + '_' + Poly + '_' +
            potential, ddV_avg[p, :])

    # =============================================================================================
    # SAVE THE AVERAGE BOX VECTORS AND ANGLES VS TEMPERATURE
    # =============================================================================================

    for p, Poly in enumerate(Polymorphs):
        np.save(
            output_directory + '/hvT_' + molecule + '_' + Poly + '_' +
            potential, h_avg[p, :])
        np.save(
            output_directory + '/dhvT_' + molecule + '_' + Poly + '_' +
            potential, dh[p, :])

    # Save the data for future use.
    for p, Poly in enumerate(Polymorphs):
        np.save(
            output_directory + '/dUvT_' + molecule + '_' + Poly + '_' +
            potential, dU[p, :] - dU[0, :])
        np.save(
            output_directory + '/ddUvT_' + molecule + '_' + Poly + '_' +
            potential, (ddU[p, :]**2 + ddU[0, :]**2)**0.5)
Esempio n. 31
0
    def get_dHvap_from_para(self, k) -> (float, float):
        os.chdir(self.dir_npt)

        # energy and Hvap after diff
        try:
            df = panedr.edr_to_df('diff1.%s.edr' % k)
        except:
            raise Exception('File not exist: ' +
                            os.path.abspath('diff1.%s.edr' % k))
        pene_array_diff_p = np.array(df.Potential)

        # try:
        #     df = panedr.edr_to_df('diff-1.%s.edr' % k)
        # except:
        #     raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k))
        # pene_array_diff_n = np.array(df.Potential)
        try:
            df = panedr.edr_to_df('npt.edr')
        except:
            raise Exception('File not exist: ' + os.path.abspath('npt.edr'))
        pene_array = np.array(df.Potential)

        # calculate the derivative series dA/dp
        delta = get_delta_for_para(k)
        # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2
        dPene_array = (pene_array_diff_p - pene_array) / delta

        if not self.need_vacuum:
            try:
                df = panedr.edr_to_df('diff1.%s-hvap.edr' % k)
            except:
                raise Exception('File not exist: ' +
                                os.path.abspath('diff1.%s-hvap.edr' % k))
            hvap_array_diff_p = self.RT - np.array(df.Potential) / self.n_mol

            # try:
            #     df = panedr.edr_to_df('diff-1.%s-hvap.edr' % k)
            # except:
            #     raise Exception('File not exist: ' + os.path.abspath('diff-1.%s-hvap.edr' % k))
            # hvap_array_diff_n = self.RT - np.array(df.Potential) / self.n_mol
            try:
                df = panedr.edr_to_df('hvap.edr')
            except:
                raise Exception('File not exist: ' +
                                os.path.abspath('hvap.edr'))
            hvap_array = self.RT - np.array(df.Potential) / self.n_mol

            # dHvap_array = (hvap_array_diff_p - hvap_array_diff_n) / delta / 2
            dHvap_array = (hvap_array_diff_p - hvap_array) / delta

            dHdp = dHvap_array.mean() - 1 / self.RT * (
                (self.hvap_array * dPene_array).mean() -
                self.hvap_array.mean() * dPene_array.mean())
        else:
            dELIQdp = dPene_array.mean() - 1 / self.RT * (
                (self.pe_liq_array * dPene_array).mean() -
                self.pe_liq_array.mean() * dPene_array.mean())

            os.chdir(self.dir_vacuum)

            try:
                df = panedr.edr_to_df('diff1.%s.edr' % k)
            except:
                raise Exception('File not exist: ' +
                                os.path.abspath('diff1.%s.edr' % k))
            pene_array_diff_p = np.array(df.Potential)

            # try:
            #     df = panedr.edr_to_df('diff-1.%s.edr' % k)
            # except:
            #     raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k))
            # pene_array_diff_n = np.array(df.Potential)
            try:
                df = panedr.edr_to_df('nvt.edr')
            except:
                raise Exception('File not exist: ' +
                                os.path.abspath('nvt.edr' % k))
            pene_array = np.array(df.Potential)

            # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2
            dPene_array = (pene_array_diff_p - pene_array) / delta

            dEGASdp = dPene_array.mean() - 1 / self.RT * (
                (self.pe_gas_array * dPene_array).mean() -
                self.pe_gas_array.mean() * dPene_array.mean())

            dHdp = dEGASdp - dELIQdp / self.n_mol

        return dHdp