def get_energy(paths, listRes=['Ligand']): """ Read Energies from .edr files and use the `panedr` library to parse them. :params paths: Path to the edr files. :returns: Pandas dataframe. """ if not isinstance(paths, list): df = edr_to_df(paths) else: # concatenate the data frames and reduce with the mean function rs = [edr_to_df(p) for p in paths] df = pandas.concat(rs) df.groupby(df.index).mean() # Reindex dataframe using sequential integers df.reset_index(inplace=True) # Electrostatic Energy df['ele'] = sum_available_columns( df, ['Coulomb-14', 'Coulomb (SR)', 'Coulomb (LR)', 'Coul. recip.']) # Van der Waals terms df['vdw'] = sum_available_columns( df, ['LJ-14', 'LJ (SR)', 'LJ (LR)']) return extract_ligand_info(df, listRes)
def get_dDens_from_para(self, k) -> (float, float): os.chdir(self.dir_npt) # energy and Hvap after diff try: df = panedr.edr_to_df('diff1.%s.edr' % k) except: raise Exception('File not exist: ' + os.path.abspath('diff1.%s.edr' % k)) pene_array_diff_p = np.array(df.Potential) # try: # df = panedr.edr_to_df('diff-1.%s.edr' % k) # except: # raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k)) # pene_array_diff_n = np.array(df.Potential) try: df = panedr.edr_to_df('npt.edr') except: raise Exception('File not exist: ' + os.path.abspath('npt.edr')) pene_array = np.array(df.Potential) # calculate the derivative series dA/dp delta = get_delta_for_para(k) # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2 dPene_array = (pene_array_diff_p - pene_array) / delta # calculate the derivative dA/dp according to ForceBalance # TODO To accurately calculate the covariant, using dens_array.mean() instead of dens_series.mean() dDdp = -1 / self.RT * ((self.dens_array * dPene_array).mean() - self.dens_array.mean() * dPene_array.mean()) return dDdp
def get_gmx_energy(edrfile): """ Parse and canonicalize energies from gromacs edr file Notes ----- gromacs energy units are kJ/mol """ gmx_force_groups = {'gromacs': {}} key_to_col = { 'bond': ['Bond'], 'angle': ['Angle'], 'dihedral': ['Proper Dih.', 'Ryckaert-Bell.'], 'LJ': ['LJ-14', 'LJ (SR)'], 'QQ': ['Coulomb-14', 'Coulomb (SR)'], 'nonbond': ['LJ-14', 'Coulomb-14', 'LJ (SR)', 'Coulomb (SR)'], 'all': ['Potential'] } edr_df = panedr.edr_to_df(edrfile) # From the edr for canonical_name, df_cols in key_to_col.items(): gmx_force_groups['gromacs'][canonical_name] = sum( [edr_df.iloc[0][col] for col in df_cols if col in edr_df.columns]) return gmx_force_groups
def edr(request): edrfile, xvgfile = request.param df = panedr.edr_to_df(edrfile) xvgdata, xvgnames, xvgprec = read_xvg(xvgfile) xvgtime = xvgdata[:, 0] xvgdata = xvgdata[:, 1:] return EDR_Data(df, xvgdata, xvgtime, xvgnames, xvgprec, edrfile, xvgfile)
def test_progress(self): """ Test the progress meter displays what is expected. """ output = StringIO() with redirect_stderr(output): df = panedr.edr_to_df(EDR, verbose=True) progress = output.getvalue().split('\n')[0].split('\r') print(progress) dt = 2000.0 # We can already iterate on `progress`, but I want to keep the cursor # position from one for loop to the other. progress_iter = iter(progress) self.assertEqual('', next(progress_iter)) self._assert_progress_range(progress_iter, dt, 0, 21, 1) self._assert_progress_range(progress_iter, dt, 30, 201, 10) self._assert_progress_range(progress_iter, dt, 300, 2001, 100) self._assert_progress_range(progress_iter, dt, 3000, 14101, 1000) # Check the last line print(df.iloc[-1, 0]) ref_line = 'Last Frame read : 14099, time : 28198000.0 ps' last_line = next(progress_iter) self.assertEqual(ref_line, last_line) # Did we leave stderr clean with a nice new line at the end? self.assertTrue(output.getvalue().endswith('\n'), 'The new line is missing at the end.')
def dA_endpoint_MBAR(polymorphs='p1 p2', Molecules=72, Independent=4, Temp=200): # Setting constants kJ_to_kcal = 1/4.184 # Converting kJ to kcal kB = 0.0019872041 # boltzman constant in kcal/(mol*K) # Getting the polymorph names polymorphs = polymorphs.split() # Place to store the free energy differences dA = np.zeros(len(polymorphs)) ddA = np.zeros(len(polymorphs)) for i, poly in enumerate(polymorphs): if os.path.isfile(poly + '/interactions/100/PROD.edr') and os.path.isfile(poly + '/interactions/100/END.edr'): # Loading in the difference between the endpoint and production files dU = panedr.edr_to_df(poly + '/interactions/100/END.edr')['Potential'].values - panedr.edr_to_df(poly + '/interactions/100/PROD.edr')['Potential'].values # Converting the energy differences to go into pymbar dW = dU / Molecules * kJ_to_kcal / (kB * Temp) # Getting the energy differences with MBAR using Exponential Averaging da = np.array(pymbar.EXP(dW)) * kB * Temp dA[i] = -da[0] ddA[i] = da[1] else: dA[i] = np.nan ddA[i] = np.nan # Check to see if there are any nan values if np.any(dA == np.nan): dA[:] = 0. ddA[:] = 0. return dA, ddA
def test_progress(self): """ Test the progress meter displays what is expected. """ output = StringIO() with redirect_stderr(output): df = panedr.edr_to_df(EDR, verbose=True) progress = output.getvalue().split('\n')[0].split('\r') print(progress) dt = 2000.0 # We can already iterate on `progress`, but I want to keep the cursor # position from one for loop to the other. progress_iter = iter(progress) assert '' == next(progress_iter) self._assert_progress_range(progress_iter, dt, 0, 21, 1) self._assert_progress_range(progress_iter, dt, 30, 201, 10) self._assert_progress_range(progress_iter, dt, 300, 2001, 100) self._assert_progress_range(progress_iter, dt, 3000, 14101, 1000) # Check the last line print(df.iloc[-1, 0]) ref_line = 'Last Frame read : 14099, time : 28198000.0 ps' last_line = next(progress_iter) assert ref_line == last_line # Did we leave stderr clean with a nice new line at the end? assert output.getvalue().endswith('\n'), \ 'New line missing at the end of output.'
def calculate_density(job): """Calculate the density""" import panedr import numpy as np from block_average import block_average # Load the thermo data df = panedr.edr_to_df(job.fn("prod.edr")) # pull density and take average density = df[df.Time > 500.0].Density.values ave = np.mean(density) # save average density job.doc.density = ave (means_est, vars_est, vars_err) = block_average(density) with open(job.fn("density_blk_avg.txt"), "w") as ferr: ferr.write("# nblk_ops, mean, vars, vars_err\n") for nblk_ops, (mean_est, var_est, var_err) in enumerate(zip(means_est, vars_est, vars_err)): ferr.write("{}\t{}\t{}\t{}\n".format(nblk_ops, mean_est, var_est, var_err)) job.doc.density_unc = np.max(np.sqrt(vars_est))
def fileedr(self): if self._file_handler is None: try: self._file_handler = panedr.edr_to_df(self.mainfile) except Exception: self.logger.error('Error reading edr file.') return self._file_handler
def get_hvap(self) -> float: os.chdir(self.dir_npt) print(os.getcwd()) if not self.need_vacuum: df = panedr.edr_to_df('hvap.edr') hvap = self.RT - df.Potential.mean() / self.n_mol else: df = panedr.edr_to_df('npt.edr') pe_liq = df.Potential.mean() os.chdir(self.dir_vacuum) print(os.getcwd()) df = panedr.edr_to_df('nvt.edr') pe_gas = df.Potential.mean() hvap = self.RT + pe_gas - pe_liq / self.n_mol return hvap
def get_density(self) -> float: os.chdir(self.dir_npt) print(os.getcwd()) df = panedr.edr_to_df('npt.edr') density = df.Density.mean() / 1000 # convert to g/mL self.sim_dens = density # save self.sim_dens for calculating expansivity return density
def test_times(self): """ Test that the time is read correctly when dt is regular. """ df = panedr.edr_to_df(EDR) xvg = read_xvg(EDR_XVG) ref_time = xvg[:, 0] time = df[u'Time'].as_matrix() self.assertTrue(numpy.allclose(ref_time, time, atol=5e-7))
def test_verbosity(self): """ Make sure the verbose mode does not alter the results. """ with redirect_stderr(sys.stdout): df = panedr.edr_to_df(EDR, verbose=True) ref_content, _, prec = read_xvg(EDR_XVG) content = df.values print(ref_content - content) assert_allclose(ref_content, content, atol=prec/2)
def edr(request): edrfile, xvgfile = request.param df = panedr.edr_to_df(edrfile) edr_dict = pyedr.edr_to_dict(edrfile) xvgdata, xvgnames, xvgprec = read_xvg(xvgfile) xvgtime = xvgdata[:, 0] xvgdata = xvgdata[:, 1:] xvgcols = np.insert(xvgnames, 0, u'Time') return EDR_Data(df, edr_dict, xvgdata, xvgtime, xvgnames, xvgcols, xvgprec, edrfile, xvgfile)
def test_verbosity(self): """ Make sure the verbose mode does not alter the results. """ with redirect_stderr(sys.stdout): df = panedr.edr_to_df(EDR, verbose=True) ref_content = read_xvg(EDR_XVG) content = df.as_matrix() print(ref_content - content) self.assertTrue(numpy.allclose(ref_content, content, atol=5e-7))
def test_content(self): """ Test that the content of the DataFrame is the expected one. """ df = panedr.edr_to_df(EDR) xvg = read_xvg(EDR_XVG) ref_content = xvg[:, 1:] # The time column is tested separately content = df.iloc[:, 1:].as_matrix() print(ref_content - content) self.assertTrue(numpy.allclose(ref_content, content, atol=5e-7))
def get_dHvap_list_from_paras(self, paras: OrderedDict): os.chdir(self.dir_npt) if not self.need_vacuum: df = panedr.edr_to_df('hvap.edr') self.hvap_array = self.RT - np.array(df.Potential) / self.n_mol else: df = panedr.edr_to_df('npt.edr') self.pe_liq_array = np.array(df.Potential) os.chdir(self.dir_vacuum) df = panedr.edr_to_df('nvt.edr') self.pe_gas_array = np.array(df.Potential) # dHdp_list = [self.get_dHvap_from_para(k) for k in paras.keys()] from multiprocessing import Pool with Pool(len(paras)) as p: dHdp_list = p.map(wrapper_target, [(self, 'get_dHvap_from_para', k) for k in paras.keys()]) return dHdp_list
def _parse_gmx_energy(edr_path: str) -> EnergyReport: """Parse an `.edr` file written by `gmx energy`.""" import panedr if TYPE_CHECKING: from pandas import DataFrame df: DataFrame = panedr.edr_to_df("out.edr") energies_dict: Dict = df.to_dict("index") # type: ignore[assignment] energies = energies_dict[0.0] energies.pop("Time") for key in energies: energies[key] *= kj_mol # TODO: Better way of filling in missing fields # GROMACS may not populate all keys for required_key in ["Bond", "Angle", "Proper Dih."]: if required_key not in energies: energies[required_key] = 0.0 * kj_mol keys_to_drop = [ "Kinetic En.", "Temperature", "Pres. DC", "Pressure", "Vir-XX", "Vir-YY", "Vir-ZZ", "Vir-YX", "Vir-XY", "Vir-YZ", "Vir-XZ", ] for key in keys_to_drop: if key in energies.keys(): energies.pop(key) report = EnergyReport() report.update_energies({ "Bond": energies["Bond"], "Angle": energies["Angle"], "Torsion": _get_gmx_energy_torsion(energies), "vdW": _get_gmx_energy_vdw(energies), "Electrostatics": _get_gmx_energy_coul(energies), }) return report
def test_columns(self): """ Test that the column names and order match. """ df = panedr.edr_to_df(EDR) ref_columns = numpy.array([u'Time', u'Bond', u'G96Angle', u'Improper Dih.', u'LJ (SR)', u'Coulomb (SR)', u'Potential', u'Kinetic En.', u'Total Energy', u'Temperature', u'Pressure', u'Constr. rmsd', u'Box-X', u'Box-Y', u'Box-Z', u'Volume', u'Density', u'pV', u'Enthalpy', u'Vir-XX', u'Vir-XY', u'Vir-XZ', u'Vir-YX', u'Vir-YY', u'Vir-YZ', u'Vir-ZX', u'Vir-ZY', u'Vir-ZZ', u'Pres-XX', u'Pres-XY', u'Pres-XZ', u'Pres-YX', u'Pres-YY', u'Pres-YZ', u'Pres-ZX', u'Pres-ZY', u'Pres-ZZ', u'#Surf*SurfTen', u'Box-Vel-XX', u'Box-Vel-YY', u'Box-Vel-ZZ', u'Mu-X', u'Mu-Y', u'Mu-Z', u'Coul-SR:water-water', u'LJ-SR:water-water', u'Coul-SR:water-DPPC', u'LJ-SR:water-DPPC', u'Coul-SR:water-DUPC', u'LJ-SR:water-DUPC', u'Coul-SR:water-CHOL', u'LJ-SR:water-CHOL', u'Coul-SR:water-OCO', u'LJ-SR:water-OCO', u'Coul-SR:DPPC-DPPC', u'LJ-SR:DPPC-DPPC', u'Coul-SR:DPPC-DUPC', u'LJ-SR:DPPC-DUPC', u'Coul-SR:DPPC-CHOL', u'LJ-SR:DPPC-CHOL', u'Coul-SR:DPPC-OCO', u'LJ-SR:DPPC-OCO', u'Coul-SR:DUPC-DUPC', u'LJ-SR:DUPC-DUPC', u'Coul-SR:DUPC-CHOL', u'LJ-SR:DUPC-CHOL', u'Coul-SR:DUPC-OCO', u'LJ-SR:DUPC-OCO', u'Coul-SR:CHOL-CHOL', u'LJ-SR:CHOL-CHOL', u'Coul-SR:CHOL-OCO', u'LJ-SR:CHOL-OCO', u'Coul-SR:OCO-OCO', u'LJ-SR:OCO-OCO', u'T-non_water', u'T-water', u'Lamb-non_water', u'Lamb-water'], dtype='object') columns = df.columns.values if columns.shape[0] == ref_columns.shape[0]: print('These columns differ from the reference (displayed as read):') print(columns[ref_columns != columns]) print('The corresponding names displayed as reference:') print(ref_columns[ref_columns != columns]) self.assertTrue(ref_columns.shape == columns.shape, 'The number of column read is unexpected.') self.assertTrue(numpy.all(ref_columns == columns), 'At least one column name was misread.')
def get_dDens_list_from_paras(self, paras: OrderedDict): os.chdir(self.dir_npt) df = panedr.edr_to_df('npt.edr') # TODO Because of the float error in gmx edr file, the index in Series is erroneous. Convert to array self.dens_array = np.array(df.Density) / 1000 # convert to g/mL # dDdp_list = [self.get_dDens_from_para(k) for k in paras.keys()] from multiprocessing import Pool with Pool(len(paras)) as p: dDdp_list = p.map(wrapper_target, [(self, 'get_dDens_from_para', k) for k in paras.keys()]) self.dDdp_array = np.array( dDdp_list) # save dDdp_array for calculating expansivity return dDdp_list
def get_properties(self, deffnm, cwd='.', stride=1, mdp=None, mindist=True): if mdp is None: mdp = self.mdp if mindist: self.call_gmx(cmd='mindist', stdin='Protein', cwd=cwd, f=f'{deffnm}.xtc', s=f'{deffnm}.tpr', od=f'{deffnm}.mindist.xvg', pi=True, dt=float(mdp.nstenergy) * float(mdp.dt) * stride) with open(f'{cwd}/{deffnm}.mindist.xvg') as f: data = np.array([[float(s) for s in l.split()] for l in f if l[0] not in '#@']).T df = edr_to_df(f'{cwd}/{deffnm}.edr')[::stride] traj = self.load_xtc(f'{cwd}/{deffnm}.vis.xtc', stride=stride) if mindist: minlen = min(data.shape[1], len(df), len(traj)) else: minlen = min(len(df), len(traj)) df = df.head(minlen) if mindist: data = data[..., :minlen] traj = traj[:minlen] if not (((not mindist) or np.array_equal(df['Time'], data[0])) and np.array_equal(df['Time'], traj.time)): raise ValueError("Could not match times across different inputs") calpha_atom_indices = traj.top.select_atom_indices('alpha') rmsd = md.rmsd(traj, self.traj, atom_indices=calpha_atom_indices) if mindist: df['Min. PI dist'] = data[1] df['Max. int dist'] = data[2] df['RMSD'] = rmsd return (traj, df)
def get_energies(in_base_name: str = 'npt_PT_out') -> pd.DataFrame: """Import the energies of GROMACS REMD trajectories. :param in_base_name: The base name for the output energy files :return: The MultiIndexed DataFrame of all the time-step energies :rtype: pd.DataFrame """ in_files = glob.glob(in_base_name+'*.edr') in_files.sort() in_files.sort(key=len) dfs = dict() for edr_file in in_files: try: number = int(re.match(r'.+?(\d+)\.edr', edr_file).group(1)) except AttributeError: raise ValueError('Unable to parse edr file name ' '"{}"'.format(edr_file)) df = panedr.edr_to_df(edr_file) dfs[number] = df return pd.concat(dfs, names=['replica', 'time'])
gaff_ff = foyer.forcefields.load_GAFF() typed_compound = gaff_ff.apply(box_of_compounds, assert_dihedral_params=False) charge_structure = apply_charges(box_structure=typed_compound, single_compound=compound, n_atoms=compound.n_particles, ff=gaff_ff) # Handed back to our backend GMSO topology = gmso.external.from_parmed(charge_structure) topology.name = compound.name gmso.formats.write_top(topology, "simulation/topol.top", top_vars={ "fudgeLJ": 0.5, "fudgeQQ": 0.8, "comb-rule": "geometric" }) gmso.formats.write_gro(topology, "simulation/conf.gro") # Run the simulation with gromacs run_energy_minimization() run_nvt() # Data analysis sim_data = panedr.edr_to_df("simulation/ener.edr")
def dA_Gamma_MBAR(plot_out=True, MINGAMMA=0, MAXGAMMA=100, GSPACING=10, LAMBDA=100, exponent=2, polymorphs='p1 p2', Molecules=72, Independent=4, Temp=200, Pressure=1, k=1000, ignoreframes=500, includeframes=100000, potential='oplsaa', bonds=False, hinge='DefaultHinge'): if (plot_out): import matplotlib # for making plots, version 'matplotlib-1.1.0-1'; errors may pop up when using earlier versions import matplotlib.pyplot as plt font = {'family': 'normal', 'weight': 'normal', 'size': 16} matplotlib.rc('font', **font) # ============================================================================================= # ENSURE THAT USER INPUTS ARE SENSIBLE # ============================================================================================= # TEMPERATURE if Temp < 0: print("Invalid Temperature: " + str(Temp)) sys.exit() # GAMMA if (MINGAMMA == -1) and (MAXGAMMA == -1) and (GSPACING == -1) and (exponent == 1): print("Using default values!") # The Gamma points sampled Gammas = [ '000L', '010L', '020L', '030L', '040L', '050L', '060L', '070L', '080L', '090L', '100L' ] elif MINGAMMA < 0 or MAXGAMMA < 0 or GSPACING < 0 or MINGAMMA > MAXGAMMA: print("Invalid Gamma Specifications") sys.exit() else: RawGamma = MINGAMMA Gammas = [] Gamma_names = [] gamma_names = np.arange(MINGAMMA, MAXGAMMA + GSPACING, GSPACING) while RawGamma < MAXGAMMA: if exponent >= 0: Gamma = int(100 * (float(RawGamma) / float(MAXGAMMA))**abs(exponent)) else: Gamma = int(100 * (1 - (float(MAXGAMMA - RawGamma) / float(MAXGAMMA))**abs(exponent))) Gammas.append(Gamma) # Format the gamma point name if RawGamma < 10: Gamma_names.append('00' + str(int(RawGamma)) + 'G') elif RawGamma < 100: Gamma_names.append('0' + str(int(RawGamma)) + 'G') else: Gamma_names.append('100G') RawGamma = RawGamma + GSPACING # Catch the final gamma point Gammas.append(int(MAXGAMMA)) if MAXGAMMA < 10: Gamma_names.append('00' + str(int(MAXGAMMA)) + 'G') elif MAXGAMMA < 100: Gamma_names.append('0' + str(int(MAXGAMMA)) + 'G') else: Gamma_names.append('100G') # LAMBDA if LAMBDA < 0 or LAMBDA > 100: print("Invalid Lambda Point: " + str(LAMBDA)) sys.exit() # POLYMORPH polymorphs = polymorphs.split() polymorph = [] polymorph_short = [] for i, token in enumerate(polymorphs): polymorph.append('Polymorph ' + str(token)) polymorph_short.append(token) # POTENTIAL if potential != "oplsaa" and potential != "gromos" and potential != "designeda" and potential != "oplsaafakeg" and \ potential != "oplsaafakea": print("Invalid Potential") print( "Supported potentials: oplsaa gromos designeda oplsaafakeg oplsaafakea" ) sys.exit() # ============================================================================================= # FORMAT INPUTS # ============================================================================================= # POTENTIAL PotNAME = "" if potential == "oplsaa": PotNAME = "OPLS" elif potential == "gromos": PotNAME = "GROM" elif potential == "designeda": PotNAME = "DESA" elif potential == "oplsaafakeg": PotNAME = "FAKEG" elif potential == "oplsaafakea": PotNAME = "FAKEA" # OPTIONAL HINGE if hinge == "DefaultHinge": hinges = ['_G'] else: # Read in each job hinges = [] hingevect = options.hinge.split() for i, token in enumerate(hingevect): hinges.append("_G_" + str(token)) # ============================================================================================= # READ IN RAW DATA # ============================================================================================= # Constants. kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184 ) # Boltzmann constant in kcal/mol omitT = [] # Temperatures to be omitted from the analysis # Parameters T_k = Temp * np.ones(len(Gammas), float) # Convert temperatures to floats print(T_k) print(Gammas) g_k = np.zeros([len(Gammas)], float) K = len(Gammas) # How many states? # total number of states examined; 0 are unsampled if bonds are left on, 1 is unsampled if the bonds are removed if bonds == True: Kbig = K dhdl_placement = 6 else: Kbig = K dhdl_placement = 5 # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough. N_max = 200000 # beta factor for the different temperatures beta_k = 1.0 / (kB * T_k) dA = np.zeros([len(polymorph), Kbig], float) ddA = np.zeros([len(polymorph), Kbig], float) convert_units = 0.2390057 * np.ones( Kbig, float) # Convert all energies to kcal/mol # Allocate storage for simulation data for i, poly in enumerate(polymorph): # N_k[k] is the total number of snapshots from alchemical state k N_k = np.zeros([Kbig], np.int32) # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s N_k_s = np.zeros([Kbig, len(hinges)], np.int32) # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k u_kln = np.zeros([K, Kbig, N_max], np.float64) # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k dhdl_kn = np.zeros([K, N_max], np.float64) #Load in the data for each run for k in range(K): n = 0 for s, hinge in enumerate(hinges): # cycle through all the input total energy data dirpath = polymorph_short[i] + '/interactions/' + str( gamma_names[k]) fname = dirpath + '/PROD.edr' dhdlname = dirpath + '/dhdl_PROD.xvg' if k not in omitT: potential_energy = panedr.edr_to_df( fname)['Potential'].values print("loading " + fname) dhdl_energy = np.loadtxt(dhdlname, comments=['#', '$', '@', '!']) print("loading " + dhdlname) # Removing any non-equilibrated points of the simulation [start_production, _, _] = timeseries.detectEquilibration(potential_energy) potential_energy = potential_energy[start_production:] dhdl_energy = dhdl_energy[start_production:] # the energy of every configuration from each state evaluated at its sampled state n = len(potential_energy) u_kln[k, :K, :n] = (potential_energy.reshape((n, 1)) + dhdl_energy[:, dhdl_placement:]).T * \ convert_units[k] dhdl_kn[k, :n] = (float(Independent) / Molecules) * \ np.sum(dhdl_energy[:, 2:dhdl_placement], axis=1) * convert_units[k] if s == 0: N_k_s[k, s] = n else: N_k_s[k, s] = n - sum(N_k_s[k, 0:s]) N_k[k] = n # convert to nondimensional units from kcal/mol u_kln *= beta_k[0] # all data loaded from the three sets u_kln_save = u_kln.copy() N_k_save = N_k.copy() g_k = np.zeros([K]) print("Number of retained samples") print(N_k) print("Number of retained samples from each seed") print(N_k_s) # ============================================================================================= # COMPUTE FREE ENERGY DIFFERENCE USING MBAR # ============================================================================================= # Initialize MBAR. print("Running MBAR...") # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") # testing for k in range(Kbig): w = np.exp(mbar.Log_W_nk[:, k]) print("max weight in state %d is %12.7f" % (k, np.max(w))) neff = 1 / np.sum(w**2) print("Effective number of sample in state %d is %10.3f" % (k, neff)) print("Efficiency for state %d is %d/%d = %10.4f" % (k, neff, len(w), neff / len(w))) # extract self-consistent weights and uncertainties (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences() print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_i /= (beta_k[0] * float(Independent)) ddf_i /= (beta_k[0] * float(Independent)) dA[i, :] = df_i[-1] # ============================================================================================= # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA # ============================================================================================= for k in range(K): N_k[k] = 0 n_old = 0 if k not in omitT: for s in range(len(hinges)): g_k[k] = timeseries.statisticalInefficiency( dhdl_kn[k, n_old:(n_old + N_k_s[k, s])]) print("Correlation time for sampled state %d is %10.3f" % (k, g_k[k])) # subsample the data to get statistically uncorrelated data indices = np.array( timeseries.subsampleCorrelatedData( u_kln[k, k, n_old:(n_old + N_k_s[k, s])], g=g_k[k])) # subsample # not sure why we have to transpose u_kln[k, :, N_k[k]:(N_k[k] + len(indices))] = u_kln_save[k, :, ( indices + n_old)].transpose() N_k[k] = N_k[k] + len(indices) n_old += N_k_s[k, s] print("Number of retained samples") print(N_k) print("Number of retained samples from each seed") print(N_k_s) # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") # testing # extract self-consistent weights and uncertainties (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences() print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_u /= (beta_k[0] * float(Independent)) ddf_u /= (beta_k[0] * float(Independent)) ddA[i, :] = ddf_u[-1] # Write out free energy differences print("Free Energy Difference (in units of kcal/mol)") print(" dA(Gamma) = A(Gamma) - A(Interactions Off)") for k in range(Kbig): print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1])) # ============================================================================================= # PRINT THE FINAL DATA # ============================================================================================= out_dA = np.zeros(len(polymorph)) out_ddA = np.zeros(len(polymorph)) for i, poly in enumerate(polymorph): out_dA[i] = dA[i, 0] #Kbig - 1] out_ddA[i] = ddA[i, 0] #Kbig - 1] # ============================================================================================= # PLOT THE FINAL DATA # ============================================================================================= # if (plot_out) and polymorphs == 'all': # # now plot the free energy change as a function of temperature # fig = plt.figure(4) # ax = fig.add_subplot(111) # xlabel = 'Interaction Strength, $\gamma$' # ylabel = 'Relative Free Energy (kcal/mol)' # plt.xlabel(xlabel) # plt.ylabel(ylabel) # Xaxis = [float(j / 100.0) for j in Gammas] # # if os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' + Tname + '_' + Pname + # '_dAvsG_All'): # ddA[0, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' + # Tname + '_' + Pname + '_dAvsG_All') # elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname + # '_dAvsG_All'): # ddA[1, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + # Tname + '_' + Pname + '_dAvsG_All') # elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname + # '_dAvsG_All'): # ddA[2, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph3_' + str(Molecules) + '_' + # Tname + '_' + Pname + '_dAvsG_All') # # ax.errorbar(Xaxis, dA[0, :], color='b', yerr=ddA[0, :], label='Benzene I') # ax.errorbar(Xaxis, dA[1, :], color='g', yerr=ddA[1, :], label='Benzene II') # ax.errorbar(Xaxis, dA[2, :], color='r', yerr=ddA[2, :], label='Benzene III') # plt.legend(loc='upper right') # # if len(hinges) > 1: # filename = PotNAME + '_' + str(Molecules) + '_' + Tname + '_dAvsG.pdf' # else: # filename = PotNAME + '_' + str(Molecules) + '_' + Tname + hinge + '_dAvsG.pdf' # plt.savefig(filename, bbox_inches='tight') print(out_dA, out_ddA) sys.exit()
def test_output_type(self): """ Test that the function returns a pandas DataFrame. """ df = panedr.edr_to_df(EDR) self.assertIsInstance(df, pandas.DataFrame)
def dA_Lambda_MBAR(plot_out=True, MinL=0, MaxL=100, dL=5, GAMMA=100, exponent=4, polymorphs='p1 p2', Molecules=72, Independent=4, Temp=200, Pressure=1, potential='oplsaa', hinge='DefaultHinge'): if (plot_out): import matplotlib # for making plots, version 'matplotlib-1.1.0-1'; errors may pop up when using earlier versions import matplotlib.pyplot as plt font = {'family': 'normal', 'weight': 'normal', 'size': 16} matplotlib.rc('font', **font) # ============================================================================================= # ENSURE THAT USER INPUTS ARE SENSIBLE # ============================================================================================= # TEMPERATURE if Temp < 0: print("Invalid Temperature: " + str(Temp)) sys.exit() if Pressure < 0: print("Invalid Pressure: " + str(Pressure)) sys.exit() # LAMBDA if (MinL == -1) and (MaxL == -1) and (dL == -1) and (exponent == 1): print("Using default values!") # The Lambda points sampled Lambdas = [ '000L', '010L', '020L', '030L', '040L', '050L', '060L', '070L', '080L', '090L', '100L' ] elif MinL < 0 or MaxL < 0 or dL < 0 or MinL > MaxL: print("Invalid Lambda Specifications") sys.exit() else: RawLambda = 0 Lambdas = [] lambda_names = np.arange(MinL, MaxL + dL, dL) Lambda_names = [] Lambda_indicies = [] index = 0 while RawLambda < MaxL: if RawLambda >= MinL: Lambda_indicies.append(index) index += 1 else: index += 1 RawLambda = RawLambda + dL continue if exponent >= 0: Lambda = int(100 * (float(RawLambda) / float(MaxL))**abs(exponent)) else: Lambda = int( 100 * (1 - (float(MaxL - RawLambda) / float(MaxL))**abs(exponent))) Lambdas.append(Lambda) # Format the lambda point name if RawLambda < 10: Lambda_names.append('00' + str(int(RawLambda)) + 'L') elif RawLambda < 100: Lambda_names.append('0' + str(int(RawLambda)) + 'L') else: Lambda_names.append('100L') RawLambda = RawLambda + dL # Catch the final lambda point Lambdas.append(MaxL) Lambda_indicies.append(index) if MaxL < 10: Lambda_names.append('00' + str(int(MaxL)) + 'L') elif MaxL < 100: Lambda_names.append('0' + str(int(MaxL)) + 'L') else: Lambda_names.append('100L') # GAMMA if GAMMA < 0 or GAMMA > 100: print("Invalid Gamma Point: " + str(GAMMA)) sys.exit() # POLYMORPH polymorphs = polymorphs.split() polymorph = [] polymorph_short = [] for i, token in enumerate(polymorphs): polymorph.append('Polymorph ' + str(token)) polymorph_short.append(token) # POTENTIAL if potential not in [ "oplsaa", "gromos", "designeda", "oplsaafakeg", "oplsaafakea" ]: print("Invalid Potential") print( "Supported potentials: oplsaa gromos designeda oplsaafakeg oplsaafakea" ) sys.exit() # ============================================================================================= # FORMAT INPUTS # ============================================================================================= # POTENTIAL PotNAME = "" if potential == "oplsaa": PotNAME = "OPLS" elif potential == "gromos": PotNAME = "GROM" elif potential == "designeda": PotNAME = "DESA" elif potential == "oplsaafakeg": PotNAME = "FAKEG" elif potential == "oplsaafakea": PotNAME = "FAKEA" # OPTIONAL HINGE if str(GAMMA) == "100": hingeLetter = "L" else: hingeLetter = "R" if hinge == "DefaultHinge": hinges = ["_" + hingeLetter] else: # Read in each job hinges = [] hingevect = hinge.split() for i, token in enumerate(hingevect): hinges.append("_" + hingeLetter + "_" + str(token)) # ============================================================================================= # READ IN RAW DATA # ============================================================================================= # Constants. kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184 ) # Boltzmann constant in kcal/mol omitK = [] # Parameters T_k = Temp * np.ones(len(Lambdas), float) # Convert temperatures to floats g_k = np.zeros([len(Lambdas)], float) K = len(Lambdas) # How many states? # total number of states examined; none are unsampled Kbig = K + 0 # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough. N_max = 200000 # beta factor for the different temperatures beta_k = 1.0 / (kB * T_k) dA = np.zeros([len(polymorph), len(Lambdas)], float) ddA = np.zeros([len(polymorph), len(Lambdas)], float) convert_units = (0.2390057) * np.ones( len(Lambdas), float) # Convert all energies to kcal/mol # Lines to ignore when reading in energies for i, poly in enumerate(polymorph): # Allocate storage for simulation data # N_k[k] is the total number of snapshots from alchemical state k N_k = np.zeros([Kbig], np.int32) # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s in 'unflipped segment j' N_ksj = np.zeros([Kbig, len(hinges), 100], np.int32) # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k u_kln = np.zeros([K, Kbig, N_max], np.float64) # dhdl_kln[k,l,n] is the restraint energy value of snapshop n from simulation k dhdl_kln = np.zeros([K, Kbig, N_max], np.float64) # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k dhdl_kn = np.zeros([K, N_max], np.float64) # Load in the data for each run for k in range(K): n = 0 for s, hinge in enumerate(hinges): keepconfigs = np.arange( N_max ) # The index of each configuration to keep in the MBAR analysis # cycle through all the input total energy data dirpath = polymorph_short[i] + '/restraints/' + str( lambda_names[k]) fname = dirpath + '/PROD.edr' dhdlname = dirpath + '/dhdl_PROD.xvg' if k not in omitK: potential_energy = panedr.edr_to_df( fname)['Potential'].values print("loading " + fname) dhdl_energy = np.loadtxt(dhdlname, comments=['#', '$', '@', '!']) print("loading " + dhdlname) # Removing any non-equilibrated points of the simulation [start_production, _, _] = timeseries.detectEquilibration(potential_energy) potential_energy = potential_energy[start_production:] dhdl_energy = dhdl_energy[start_production:] # the energy of every configuration from each state evaluated at its sampled state n = len(potential_energy) u_kln[k, :, :n] = (float(Independent) / Molecules) * ( potential_energy.reshape( (n, 1)) + dhdl_energy[:, 5:]).T * convert_units[k] dhdl_kln[k, :, :n] = dhdl_energy[:, 5:].T * convert_units[k] dhdl_kn[k, :n] = ( float(Independent) / Molecules) * dhdl_energy[:, 4].T * convert_units[k] # NSA: Can this go? symbolcounter = 0 # Truncate the kept configuration list to be less than n keepconfigs = [ j for j in keepconfigs if j < (len(potential_energy) - symbolcounter) and j >= 0 ] # Split up the retained configurations into connected segments j = 0 for a in range(len(keepconfigs)): if a == 0: continue elif int(keepconfigs[a - 1]) + 1 != int( keepconfigs[a]): N_ksj[k, s, j] = a - (sum(N_ksj[k, s, 0:j])) j += 1 # Catch the final segment N_ksj[k, s, j] = len(keepconfigs) - sum(N_ksj[k, s, 0:j]) j += 1 N_k[k] = n # convert to nondimensional units from kcal/mol u_kln *= beta_k[0] # all data loaded from the three sets u_kln_save = u_kln.copy() g_k = np.zeros([K]) # Ignore the first state due to jumping print("Number of retained samples") print(N_k) # ============================================================================================= # COMPUTE FREE ENERGY DIFFERENCE USING MBAR # ============================================================================================= # Initialize MBAR. print("Running MBAR...") # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") for k in range(Kbig): w = np.exp(mbar.Log_W_nk[:, k]) print("max weight in state %d is %12.7f" % (k, np.max(w))) neff = 1 / np.sum(w**2) print("Effective number of sample in state %d is %10.3f" % (k, neff)) print("Efficiency for state %d is %d/%d = %10.4f" % (k, neff, len(w), neff / len(w))) # extract self-consistent weights and uncertainties (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences() print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_i /= (beta_k[0] * float(Independent)) ddf_i /= (beta_k[0] * float(Independent)) dA[i, :] = df_i[-1] # ============================================================================================= # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA # ============================================================================================= for k in range(K): # For each restraint state N_k[k] = 0 n_old = 0 if k not in omitK: for s in range( len(hinges) ): # For each independent trajectory of this restraint state for j in range( 100 ): # For each untossed segment of each independent trajectory of this restraint state if N_ksj[k, s, j] == 0: continue # Feed in the segment and calculate correlation time g_k[k] = timeseries.statisticalInefficiency( dhdl_kn[k, n_old:(n_old + N_ksj[k, s, j])]) print( "Correlation time for sampled state %d is %10.3f" % (k, g_k[k])) # subsample the data to get statistically uncorrelated data # subsample indices within the segment indices = np.array( timeseries.subsampleCorrelatedData( u_kln[k, k, n_old:(n_old + N_ksj[k, s, j])], g=g_k[k])).astype(int) # Apphend the uncorrelated configurations in the segment to the u_kln matrix u_kln[k, :, N_k[k]:(N_k[k] + len(indices))] = u_kln_save[k, :, ( indices + n_old)].transpose() N_k[k] = N_k[k] + len(indices) n_old += N_ksj[k, s, j] print("Number of retained samples") print(N_k) print("Number of retained samples from each seed") print(N_ksj) # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") # testing # extract self-consistent weights and uncertainties (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences() print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_u /= (beta_k[0] * float(Independent)) ddf_u /= (beta_k[0] * float(Independent)) ddA[i, :] = ddf_u[-1] # Write out free energy differences print("Free Energy Difference (in units of kcal/mol)") print(" dA(Lambda) = A(Lambda) - A(Fully Restrained)") for k in range(Kbig): print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1])) # ============================================================================================= # PRINT THE FINAL DATA # ============================================================================================= out_dA = np.zeros(len(polymorph)) out_ddA = np.zeros(len(polymorph)) for i, poly in enumerate(polymorph): out_dA[i] = dA[i, 0] #Kbig - 1] out_ddA[i] = ddA[i, 0] #Kbig - 1] # ============================================================================================= # PLOT THE FINAL DATA # ============================================================================================= if (plot_out) and polymorphs == 'all': # now plot the free energy change as a function of temperature fig = plt.figure(4) ax = fig.add_subplot(111) xlabel = 'Restraint Strength, $\lambda$' ylabel = 'Relative Free Energy (kcal/mol)' plt.xlabel(xlabel) plt.ylabel(ylabel) Xaxis = [float(j / 100.0) for j in Lambdas] if os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All'): ddA[0, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph1_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All') elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All'): ddA[1, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All') elif os.path.isfile('BootstrapStd_' + PotNAME + '_Polymorph2_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All'): ddA[2, :] = MBARBootstrap.ExtractBootstrap('BootstrapStd_' + PotNAME + '_Polymorph3_' + str(Molecules) + '_' + Tname + '_' + Pname + '_dAvsL_All') ax.errorbar(Xaxis, dA[0, :], color='b', yerr=ddA[0, :], label='Benzene I') ax.errorbar(Xaxis, dA[1, :], color='g', yerr=ddA[1, :], label='Benzene II') ax.errorbar(Xaxis, dA[2, :], color='r', yerr=ddA[2, :], label='Benzene III') plt.legend(loc='upper left') if len(hinges) > 1: filename = PotNAME + '_' + str( Molecules) + '_' + Tname + '_dAvsL.pdf' else: filename = PotNAME + '_' + str( Molecules) + '_' + Tname + hinge + '_dAvsL.pdf' plt.show() return out_dA, out_ddA
def dA_MBAR(minimum=0, maximum=100, spacing=10, exponent=2, polymorphs='p1 p2', Molecules=72, Independent=4, Temp=200, bonds=False, primary_directory='.', added_directories=[]): # ============================================================================================= # Setting up the values for gamma or lambda states # ============================================================================================= # raw_value = minimum # values = [] directory_names = np.arange(minimum, maximum + spacing, spacing) directory_names = np.sort(np.append(directory_names, added_directories)) # while raw_value <= maximum: # if exponent >= 0: # value = int(100 * (float(raw_value) / float(maximum)) ** abs(exponent)) # else: # value = int(100 * (1 - (float(maximum - raw_value) / float(maximum)) ** abs(exponent))) # values.append(value) # raw_value = raw_value + spacing # print(values) # print(directory_names) # exit() # POLYMORPH polymorphs = polymorphs.split() # ============================================================================================= # READ IN RAW DATA # ============================================================================================= # Constants. kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184 ) # Boltzmann constant in kcal/mol # Parameters T_k = Temp * np.ones(len(directory_names), float) # Convert temperatures to floats print(T_k) # print(values) K = len(directory_names) # How many states? # total number of states examined; 0 are unsampled if bonds are left on, 1 is unsampled if the bonds are removed Kbig = K # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough. N_max = 5000 # beta factor for the different temperatures beta_k = 1.0 / (kB * T_k) dA = np.zeros([len(polymorphs), Kbig], float) ddA = np.zeros([len(polymorphs), Kbig], float) convert_units = 0.2390057 * np.ones( Kbig, float) # Convert all energies to kcal/mol # Allocate storage for simulation data for i, poly in enumerate(polymorphs): # N_k[k] is the total number of snapshots from alchemical state k N_k = np.zeros([Kbig], np.int32) # N_k_s[k,s] is the total number of snapshots from alchemical state k from seed s N_k_s = np.zeros([Kbig], np.int32) # u_kln[k,l,n] is the adjusted energy of snapshot n from simulation k u_kln = np.zeros([K, Kbig, N_max], np.float64) # dhdl_kn[k,n] is the derivative of energy with respect to lambda of snapshot n from simulation k dhdl_kn = np.zeros([K, N_max], np.float64) #Load in the data for each run for k in range(K): n = 0 # cycle through all the input total energy data if directory_names[k] == int(directory_names[k]): dirpath = polymorphs[i] + '/' + primary_directory + '/' + str( int(directory_names[k])) else: dirpath = polymorphs[i] + '/' + primary_directory + '/' + str( directory_names[k]) if os.path.isdir(dirpath): fname = dirpath + '/PROD.edr' dhdlname = dirpath + '/dhdl_PROD.xvg' potential_energy = panedr.edr_to_df(fname)['Potential'].values print("loading " + fname) dhdl_energy = np.loadtxt(dhdlname, comments=['#', '$', '@', '!']) print("loading " + dhdlname) # Removing any non-equilibrated points of the simulation [start_production, _, _] = timeseries.detectEquilibration(potential_energy) potential_energy = potential_energy[start_production:] dhdl_energy = dhdl_energy[start_production:, :] # Cutting points if they exceed N_max if len(potential_energy) > N_max: potential_energy = potential_energy[len(potential_energy) - N_max:] dhdl_energy = dhdl_energy[len(dhdl_energy) - N_max:, :] # the energy of every configuration from each state evaluated at its sampled state n = len(potential_energy) dhdl_placement = len(dhdl_energy[0, :]) - K u_kln[k, :K, :n] = (potential_energy.reshape( (n, 1)) + dhdl_energy[:, dhdl_placement:] ).T * convert_units[k] dhdl_kn[k, :n] = (float(Independent) / Molecules) * \ np.sum(dhdl_energy[:, 2:dhdl_placement], axis=1) * convert_units[k] N_k_s[k] = n N_k[k] = n # convert to nondimensional units from kcal/mol u_kln *= beta_k[0] #u_kln_save = u_kln.copy() u_kln_save = u_kln[:] g_k = np.zeros([K]) print("Number of retained samples") print(N_k) print("Number of retained samples from each seed") print(N_k_s) # ============================================================================================= # COMPUTE FREE ENERGY DIFFERENCE USING MBAR # ============================================================================================= # Initialize MBAR. print("Running MBAR...") # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") # testing for k in range(Kbig): w = np.exp(mbar.Log_W_nk[:, k]) print("max weight in state %d is %12.7f" % (k, np.max(w))) neff = 1 / np.sum(w**2) print("Effective number of sample in state %d is %10.3f" % (k, neff)) print("Efficiency for state %d is %d/%d = %10.4f" % (k, neff, len(w), neff / len(w))) # extract self-consistent weights and uncertainties (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences() print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_i /= (beta_k[0] * float(Independent)) ddf_i /= (beta_k[0] * float(Independent)) dA[i, :] = df_i[-1] # ============================================================================================= # COMPUTE UNCERTAINTY USING THE UNCORRELATED DATA # ============================================================================================= for k in range(K): N_k[k] = 0 n_old = 0 g_k[k] = timeseries.statisticalInefficiency( dhdl_kn[k, n_old:(n_old + N_k_s[k])]) print("Correlation time for sampled state %d is %10.3f" % (k, g_k[k])) # subsample the data to get statistically uncorrelated data indices = np.array( timeseries.subsampleCorrelatedData(u_kln[k, k, n_old:(n_old + N_k_s[k])], g=g_k[k])) # subsample # not sure why we have to transpose if indices != []: u_kln[k, :, N_k[k]:(N_k[k] + len(indices))] = u_kln_save[k, :, (indices + n_old)].transpose() N_k[k] = N_k[k] + len(indices) n_old += N_k_s[k] print("Number of retained samples") print(N_k) print("Number of retained samples from each seed") print(N_k_s) # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k, verbose=True, subsampling_protocol=[{ 'method': 'L-BFGS-B' }]) print("MBAR Converged...") # extract self-consistent weights and uncertainties try: (df_u, ddf_u, theta_i) = mbar.getFreeEnergyDifferences() except ValueError: pass print("Free Energies Optained...") # convert PMF to kcal/mol and normalize by the number of molecules df_u /= (beta_k[0] * float(Independent)) ddf_u /= (beta_k[0] * float(Independent)) ddA[i, :] = ddf_u[-1] # ddA[i, :] = ddf_i[-1] # Write out free energy differences print("Free Energy Difference (in units of kcal/mol)") print(" dA(Gamma) = A(Gamma) - A(Interactions Off)") for k in range(Kbig): print("%8.3f %8.3f" % (df_i[k, -1], ddf_u[k, -1])) del N_k del N_k_s del u_kln del dhdl_kn out_dA = np.zeros(len(polymorphs)) out_ddA = np.zeros(len(polymorphs)) for i, poly in enumerate(polymorphs): out_dA[i] = dA[i, 0] out_ddA[i] = ddA[i, 0] return out_dA, out_ddA
def compute_COV_dGref(refT_cov, Temperatures_MD, Molecules, Polymorphs): # setting a place to store the reference free energy differences refdG = np.zeros((len(refT_cov), len(Polymorphs))) # setting key variables for QHA natoms = len( md.load(Polymorphs[0] + '/temperature/0/pre_EQ.gro').xyz[0, :, 0]) nmodes = natoms * 3 # boltzmann constant in kcal/(mol * K) kB = 0.0019872041 # converting kcal to g*nm**2 / (ps**2) ekcal = 418.4 # speed of light in cm / ps speed_of_light = 0.0299792458 # Reduced planks constant h_bar = 2.520 * 10**(-38) # Avogadro's number Na = 6.022 * 10**23 for i, t in enumerate(refT_cov): # determining what directory to look into for this temperature directory = '/temperature/' + str(np.where(t == Temperatures_MD)[0][0]) for j, p in enumerate(Polymorphs): path = p + directory edr = panedr.edr_to_df(path + '/PROD.edr') if not os.path.isfile(path + '/eigenvalues.xvg'): # Generating the eigenvalues from the covarience matrix c = subprocess.Popen(['echo', '0', ';', 'echo', '0'], stdout=subprocess.PIPE) output = subprocess.check_output([ 'gmx', 'covar', '-f', path + '/PROD.trr', '-s', path + '/PROD_0.tpr', '-o', path + '/eigenvalues.xvg', '-mwa', 'yes', '-pbc', 'yes', '-last', str(nmodes) ], stdin=c.stdout) c.wait() # Removing excess files that take up too much space subprocess.call( ['rm', 'eigenvec.trr', 'covar.log', 'average.pdb']) # Loading in eigenvalues and converting them to wavenumbers wavenumbers = np.loadtxt(path + '/eigenvalues.xvg', comments=['#', '@'])[:, 1] #wavenumbers = kB * t / (np.absolute(wavenumbers[np.where(wavenumbers > 0.)])*100) wavenumbers = kB * t / (np.absolute(wavenumbers) * 100) wavenumbers = np.sort( np.sqrt(wavenumbers[3:] * ekcal) / (2 * np.pi * speed_of_light)) print(len(wavenumbers)) # Getting the potential energy U = np.average(edr['Potential'].values) / 4.184 # Computing the vbirational energy Av = kB * t * np.sum( np.log(Na * h_bar * wavenumbers * speed_of_light * 10**12 / (kB * t))) print( U / Molecules, Av / Molecules, np.average(edr['Volume'].values) * Na * 0.024201 * 10**(-24) / Molecules) # Computing the free energy refdG[i, j] = (U + Av + np.average(edr['Volume'].values) * Na * 0.024201 * 10**(-24)) / Molecules refdG -= refdG[:, 0] print(refdG, refT_cov) exit() return np.array(refT_cov), refdG
def load_potenergy(fil): U = pdr.edr_to_df(fil) U = np.array(U['Potential']) [t0,_,_] = detectEquilibration(U) return U[t0:]
def dGvsT_QHA(Temperatures_MD=np.array([100, 200, 300]), Temperatures_unsampled=[], Molecules=72, molecule='benzene', Independent=0, potential='oplsaa', spacing=1, phase='solid', Polymorphs=['p1', 'p2', 'p3'], refdG_type='QHA', output_directory='output_QHA', refT_files=['', '', ''], refG_files=['', '', ''], refT_cov=[]): if not os.path.isdir(output_directory): subprocess.call(['mkdir', output_directory]) # Setting-up if the simulation is suppose to use QHA or covarience for dG ref if refdG_type == 'QHA': # Loading in the longest string of temperatures for refT refT = [] for i in refT_files: temp_T = np.load(i) if len(temp_T) > len(refT): refT = np.load(i) # Cutting off any zero values form refT if refT[0] == 0.: refT = refT[1:] # Adding in the reference free energy differences for each temperature refdG = np.zeros((len(refT), len(Polymorphs))) for i in range(len(Polymorphs)): G0 = np.load(refG_files[0]) G1 = np.load(refG_files[i]) T0 = np.load(refT_files[0]) T1 = np.load(refT_files[i]) for j, t in enumerate(refT): placement_0 = np.where(T0 == t) placement_1 = np.where(T1 == t) if (len(placement_0[0]) == 1) and (len(placement_1[0]) == 1): refdG[j, i] = G1[placement_1[0]] - G0[placement_0[0]] else: refdG[j, i] = np.nan elif refdG_type == 'COV': if output_directory == 'output_QHA': output_directory = 'output_COV' refT, refdG = compute_COV_dGref(refT_cov, Temperatures_MD, Molecules, Polymorphs) else: print("ERROR: refdG_type " + refdG_type + " is not a valid input.") exit() if Independent == 0: Independent = Molecules # ============================================================================================= # Load reference free energy differences # ============================================================================================= # Hard set from old dictionary funciton refPot = 0 ExtraPressures = [] Temperatures = np.sort(np.append(Temperatures_MD, Temperatures_unsampled)) Temperatures = np.sort(np.unique(np.append(Temperatures, refT))) Pressures = np.ones(len(Temperatures), int) Pressures[len(Pressures) - len(ExtraPressures):len(Pressures)] = ExtraPressures Potentials = [potential] # ============================================================================================= # FORMAT INPUTS # ============================================================================================= # TEMPERATURE refk = [] for k, temp in enumerate(refT): refk.append(np.where(temp == Temperatures)[0][0]) # ============================================================================================= # READ IN RAW DATA # ============================================================================================= # Constants. kB = 1.3806488e-23 * 6.0221413e23 / (1000.0 * 4.184 ) # Boltzmann constant in kcal/mol/K # Parameters # How many states? K = len(Potentials) * len(Temperatures) # maximum number of snapshots/simulation (could make this automated) - doesn't matter, as long as it's long enough. N_max = 30000 # beta factor for the different temperatures beta_k = 1.0 / (kB * Temperatures) beta_k = np.tile(beta_k, (1, len(Potentials)))[0] # Conversion from kJ to kcal kJ_to_kcal = 0.2390057 # This is the sampling efficiency for each potential in each combination of potentials Efficiency = np.zeros(K, float) # N_k[k] is the total number of snapshots from alchemical state k N_k = np.zeros(K, np.int32) # dA[p,i,k] is the p.interp(Tr, T1, G1)free energy between potential 0 and state k for spacing i in polymorph p dA = np.zeros([len(refT), len(Polymorphs), spacing + 1, K], float) # ddA[p,i,k] is the uncertainty in the free energy between potential 0 and state k for spacing i in polymorph p ddA = np.zeros([len(refT), len(Polymorphs), spacing + 1, K], float) run_dA_analysis = True if os.path.isdir(output_directory + '/dA_raw.npy') and os.path.isdir(output_directory + '/ddA_raw.npy'): hold_dA = np.load(output_directory + '/dA_raw.npy') if np.shape(hold_dA) == np.shape(dA): dA = np.load(output_directory + '/dA_raw.npy') ddA = np.load(output_directory + '/ddA_raw.npy') run_dA_analysis = False # dG[p,i,t] is the free energy between polymorph 1 and polymorph p for spacing i and temperature t dG = np.zeros([len(refT), len(Polymorphs), spacing + 1, len(Temperatures)]) # ddG[p,i,t] is the uncertanity in the free energy between polymorph 1 and polymorph p for spacing i and temperature t ddG = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)]) # dS[p,i,t] is the relative entropy between polymorph 1 and polymorph p for spacing i and temperature t dS = np.zeros([len(refT), len(Polymorphs), spacing + 1, len(Temperatures)]) # ddS[p,i,t] is the uncertanity in the relative entropy between polymorph 1 and polymorph p for spacing i and temperature t ddS = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)]) dS_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)]) ddS_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)]) dH_mbar = np.zeros([len(Polymorphs), spacing + 1, len(Temperatures)]) # O_pij[p,i,j] is the overlap within polymorph p between temperature state i and temperature state j O_pij = np.zeros([len(Polymorphs), len(Temperatures), len(Temperatures)]) dU = np.zeros([len(Polymorphs), len(Temperatures)]) ddU = np.zeros([len(Polymorphs), len(Temperatures)]) # u_kln[k,l,n] is the reduced potential energy of configuration n from potential k in potential l u_kln = np.zeros([K, K, N_max], np.float64) # V_pkn is the volume of configuration n of polymorph p at temperature k V_pkn = np.zeros([len(Polymorphs), len(Temperatures), N_max], float) # V_avg is the average volume of polymorph p at temperature k V_avg = np.zeros([len(Polymorphs), len(Temperatures)], float) # ddV_avg is the standard deviation of the volume of polymorph p at temperature k ddV_avg = np.zeros([len(Polymorphs), len(Temperatures)], float) # C_pkn is the lattice tensor of the polymorph p at temperature k box_place = np.matrix([[0, 0], [1, 1], [2, 2], [0, 1], [0, 2], [1, 2]]) C_pkn = np.zeros([len(Polymorphs), len(Temperatures), N_max, 3, 3], float) # h_avg is the average lattice parameters of polymorph p at temperature k h_avg = np.zeros([len(Polymorphs), len(Temperatures), 6], float) # dh is the standard deviation of the lattice parameters of polymorph p at temperature k dh = np.zeros([len(Polymorphs), len(Temperatures), 6], float) # Cycle through all polymorphs if run_dA_analysis: for p, polymorph in enumerate(Polymorphs): # Cycle through all sampled potentials for i, potential_k in enumerate(Potentials): count = 0 for t in range(len(Temperatures)): k = len(Temperatures) * i + t # Cycle through all evaluated potentials for j, potential_l in enumerate(Potentials): l = len(Temperatures) * j dirpath = polymorph + '/temperature/' + str( count) + '/' if os.path.isfile(dirpath + 'PROD.edr') and ( Temperatures[t] in Temperatures_MD): count += 1 print("loading " + dirpath + 'PROD.edr') all_energy = panedr.edr_to_df(dirpath + 'PROD.edr') if len(all_energy['Potential'].values) > N_max: [start_production, _, _] = timeseries.detectEquilibration( all_energy['Potential'].values[::10]) start_production *= 10 else: [start_production, _, _] = timeseries.detectEquilibration( all_energy['Potential'].values) # Now read in the lattice tensor and average them if 'Box-XX' in list(all_energy): box_letters = [ 'XX', 'YY', 'ZZ', 'YX', 'ZX', 'ZY' ] else: box_letters = ['X', 'Y', 'Z'] for b in range(len(box_letters)): if len(all_energy['Potential'].values) > N_max: [hold, _, _] = timeseries.detectEquilibration( all_energy[ 'Box-' + box_letters[b]].values[::10]) hold *= 10 else: [hold, _, _] = timeseries.detectEquilibration( all_energy['Box-' + box_letters[b]].values) if hold > start_production: start_production = hold if len(all_energy['Total Energy']. values[start_production:]) > N_max: start_production = len( all_energy['Total Energy'].values) - N_max # Setting the end point of the simulation N = len(all_energy['Total Energy']. values[start_production:]) N_k[k] = N u_kln[k, l, :N] = all_energy['Potential'].values[ start_production:] # Now set these energies over all temperatures u_kln[k, l:(l + len(Temperatures)), :N] = u_kln[k, l, :N] # Now read in the volumes and average them V_pkn[p, t, :N] = all_energy['Volume'].values[ start_production:] V_avg[p, t] = np.average( V_pkn[p, t, :N]) / float(Independent) ddV_avg[p, t] = np.std( V_pkn[p, t, :N]) / float(Independent) # Making the lattice tensor all the correct sign with time if count == 1: sign = np.sign( md.load( dirpath + 'pre_EQ.gro').unitcell_vectors[0].T) for s in range(3): for j in range(3): if sign[s, j] == 0.: # Correcting for the sign of the lattice parameters sign[s, j] = 1. for b in range(len(box_letters)): C_pkn[p, t, :N, box_place[b, 0], box_place[b, 1]] = np.absolute(all_energy['Box-' + box_letters[b]].values[start_production:]) * \ sign[box_place[b, 0], box_place[b, 1]] * 10 C_avg = np.average(C_pkn[p, t, :N], axis=0) dC = np.std(C_pkn[p, t, :N], axis=0) h_avg[p, t] = crystal_matrix_to_lattice_parameters( C_avg) dh[p, t] = np.absolute( crystal_matrix_to_lattice_parameters(C_avg + dC) - h_avg[p, t]) else: N_k[k] = 0 V_avg[p, t] = np.nan ddV_avg[p, t] = np.nan h_avg[p, t] = np.nan dh[p, t] = np.nan print("Start1") # Convert all units to kcal #u_pklnT[p, :, :, :] *= kJ_to_kcal u_kln *= kJ_to_kcal print("Start2") # If this was already in kcal or already fully independent, revert for j in range(len(Potentials)): if Potentials[j][:6] == "amoeba": #u_pklnT[p, :, j * len(Temperatures):(j + 1) * len(Temperatures), :, :] /= kJ_to_kcal u_kln[:, j * len(Temperatures):(j + 1) * len(Temperatures), :] /= kJ_to_kcal print("Start3") # Remove dependent molecules for j in range(len(Potentials)): if Potentials[j][:6] != "amoeba": #u_pklnT[p, :, j * len(Temperatures):(j + 1) * len(Temperatures), :, :] *= float(Independent) / Molecules u_kln[:, j * len(Temperatures):(j + 1) * len(Temperatures), :] *= float( Independent) / Molecules print("Start4") # Now average together the energies and volumes at each state for t in range(len(Temperatures)): dU[p, t] = np.average(u_kln[t, t, :N_k[t]]) / float(Independent) ddU[p, t] = np.std( u_kln[t, t, :N_k[t]]) / N_k[t]**0.5 / float(Independent) print("Start5") # convert to nondimensional units from kcal/mol for k, beta in enumerate(beta_k): u_kln[:, k, :] *= beta u_kln_save = u_kln.copy() N_k_save = N_k.copy() print("End!") print("Number of retained samples") print(N_k) # Now create the full N_k matrix including the roll-backs as well as the free energy container # N_k_matrix[i,k] is the total number of snapshots from alchemical state k using in spacing i N_k_matrix = np.zeros([spacing + 1, K], np.int32) for i in range(spacing + 1): N_k_matrix[i, :] = N_k_save.copy() N_k_matrix[i, 0:len(Temperatures)] = N_k_matrix[ i, 0:len(Temperatures)] * float(i) / float(spacing) # ============================================================================================= # COMPUTE FREE ENERGY DIFFERENCE USING MBAR FOR EACH SPACING # ============================================================================================= for i in range(spacing + 1): if i == 0 and len(Potentials) == 1: continue # Initialize MBAR. print("Running MBAR...") # generate the weights of each of the umbrella set mbar = pymbar.MBAR(u_kln, N_k_matrix[i, :], verbose=True) print("MBAR Converged...") hold = mbar.computeEffectiveSampleNumber(verbose=True) print(hold) # extract self-consistent weights and uncertainties (df_i, ddf_i, theta_i) = mbar.getFreeEnergyDifferences() # extract entropy [_, _, Delta_u_ij, _, Delta_s_ij, dDelta_s_ij] = mbar.computeEntropyAndEnthalpy() print("Free Energies Optained...") # Store the dimensionless results in the dA container dA[:, p, i, :] = df_i[refk] dH_mbar[p, i, :] = Delta_u_ij[0] dS_mbar[p, i, :] = Delta_s_ij[0] ddS_mbar[p, i, :] = dDelta_s_ij[0] print(dA) # ============================================================================================= # COMPUTE UNCERTAINTY USING MBAR # ============================================================================================= g_k = np.zeros([K]) for i in range(spacing + 1): if i == 0 and len(Potentials) == 1: continue for k in range(K): # subsample correlated data - for now, use energy from current state if N_k_matrix[i, k] > 0: print(N_k_matrix[i, k]) g_k[k] = timeseries.statisticalInefficiency( u_kln_save[k, k, 0:100]) print( "Correlation time for phase (%s), sampled state %d is %10.3f" % (phase, k, g_k[k])) # subsample the data to get statistically uncorrelated data indices = np.array( timeseries.subsampleCorrelatedData( u_kln_save[k, k, 0:N_k_matrix[i, k]], g=g_k[k])) N_k_matrix[i, k] = len(indices) u_kln[k, :, 0:N_k_matrix[i, k]] = u_kln_save[ k, :, indices].transpose( ) # not sure why we have to transpose print("Number of retained samples") print(N_k) print("Running MBAR...") # generate the weights of each state mbar = pymbar.MBAR(u_kln, N_k_matrix[i, :], verbose=True) print("MBAR Converged...") # extract self-consistent weights and uncertainties (df_u, ddf_u, theta_u) = mbar.getFreeEnergyDifferences() # calculate the overlap it necessary if len(Temperatures) == 2: O_pij[p, :, :] = mbar.computeOverlap()[2] # testing weights_in_gromos = np.zeros(K, float) for k in range(K): w = np.exp(mbar.Log_W_nk[:, k]) print("max weight in state %d is %12.7f" % (k, np.max(w))) neff = 1 / np.sum(w**2) print("Effective number of sample in state %d is %10.3f" % (k, neff)) print("Efficiency for state %d is %d/%d = %10.4f" % (k, neff, len(w), neff / len(w))) Efficiency[k] = neff / len(w) # Store the efficiency w_0 = np.exp(mbar.Log_W_nk[:, 0]) # Weights in gromos initial_configs = np.sum(N_k[0:k]) final_configs = np.sum(N_k[0:k + 1]) print("Total weight in gromos " + str(np.sum(w_0[initial_configs:final_configs]))) weights_in_gromos[k] = np.sum( w_0[initial_configs:final_configs]) # Write out free energy differences print("Free Energy Difference (in units of kcal/mol)") for k in range(K): print("%8.3f %8.3f" % (-df_i[k, 0], ddf_u[k, 0])) # Store the dimensionless results in the ddA container ddA[:, p, i, :] = ddf_u[refk] # ddA[:, p, i, :] = ddf_i[refk] # Saving the files if needed for QHA if refdG_type == 'QHA': np.save(output_directory + '/dA_raw.npy', dA) np.save(output_directory + '/ddA_raw.npy', ddA) # ============================================================================================= # FINALIZE THE RELATIVE FREE ENERGY AND ENTROPY # ============================================================================================= for k in range(len(refT)): for i in range(spacing + 1): for t, T in enumerate(Temperatures): for p in range(len(Polymorphs)): dG[k, p, i, t] = (dA[k, p, i, t] - dA[k, 0, i, t]) / ( beta_k[t] * float(Independent)) + float(T) / float( refT[k]) * refdG[k, p] if p == 0: continue dS[k, p, i, t] = (dU[p, t] - dU[0, t] - dG[k, p, i, t]) / float(T) if k == 0: ddG[p, i, t] = ((ddA[k, p, i, t]**2 + ddA[k, 0, i, t]**2) / (beta_k[t] * float(Independent))**2)**0.5 ddS[p, i, t] = (ddU[p, t]**2 + ddU[p, t]**2 + ddG[p, i, t]**2)**0.5 / float(T) # ============================================================================================= # PLOT THE RELATIVE FREE ENERGY VS TEMPERATURE # ============================================================================================= PlotPress = 1 # Pressure to plot the dGvT curve at Temperatures_P = Temperatures[Pressures == PlotPress] np.save(output_directory + '/T_' + molecule + '_' + potential, Temperatures_P) for p, Poly in enumerate(Polymorphs): np.save( output_directory + '/dGvT_' + molecule + '_' + Poly + '_' + potential, dG[:, p, spacing, Pressures == PlotPress]) np.save( output_directory + '/ddGvT_' + molecule + '_' + Poly + '_' + potential, ddG[p, spacing, Pressures == PlotPress]) if len(Potentials) > 1: np.save( output_directory + '/dGvT_' + molecule + '_' + Poly + '_' + potential + '_indirect', dG[:, p, 0, :]) np.save( output_directory + '/ddGvT_' + molecule + '_' + Poly + '_' + potential + '_indirect', ddG[p, 0, :]) if spacing > 1: np.save( output_directory + '/dGvT_' + molecule + '_' + Poly + '_' + potential + '_convergence', dG[:, p, :, :]) np.save( output_directory + '/ddGvT_' + molecule + '_' + Poly + '_' + potential + '_convergence', ddG[p, :, :]) np.save( output_directory + '/dS_' + molecule + '_' + Poly + '_' + potential, dS[:, p, spacing, :]) np.save( output_directory + '/ddS_' + molecule + '_' + Poly + '_' + potential, ddS[p, spacing, :]) for p, Poly in enumerate(Polymorphs): np.save( output_directory + '/UvT_' + molecule + '_' + Poly + '_' + potential, dU[p, :]) for p, Poly in enumerate(Polymorphs): np.save( output_directory + '/VvT_' + molecule + '_' + Poly + '_' + potential, V_avg[p, :]) np.save( output_directory + '/dVvT_' + molecule + '_' + Poly + '_' + potential, ddV_avg[p, :]) # ============================================================================================= # SAVE THE AVERAGE BOX VECTORS AND ANGLES VS TEMPERATURE # ============================================================================================= for p, Poly in enumerate(Polymorphs): np.save( output_directory + '/hvT_' + molecule + '_' + Poly + '_' + potential, h_avg[p, :]) np.save( output_directory + '/dhvT_' + molecule + '_' + Poly + '_' + potential, dh[p, :]) # Save the data for future use. for p, Poly in enumerate(Polymorphs): np.save( output_directory + '/dUvT_' + molecule + '_' + Poly + '_' + potential, dU[p, :] - dU[0, :]) np.save( output_directory + '/ddUvT_' + molecule + '_' + Poly + '_' + potential, (ddU[p, :]**2 + ddU[0, :]**2)**0.5)
def get_dHvap_from_para(self, k) -> (float, float): os.chdir(self.dir_npt) # energy and Hvap after diff try: df = panedr.edr_to_df('diff1.%s.edr' % k) except: raise Exception('File not exist: ' + os.path.abspath('diff1.%s.edr' % k)) pene_array_diff_p = np.array(df.Potential) # try: # df = panedr.edr_to_df('diff-1.%s.edr' % k) # except: # raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k)) # pene_array_diff_n = np.array(df.Potential) try: df = panedr.edr_to_df('npt.edr') except: raise Exception('File not exist: ' + os.path.abspath('npt.edr')) pene_array = np.array(df.Potential) # calculate the derivative series dA/dp delta = get_delta_for_para(k) # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2 dPene_array = (pene_array_diff_p - pene_array) / delta if not self.need_vacuum: try: df = panedr.edr_to_df('diff1.%s-hvap.edr' % k) except: raise Exception('File not exist: ' + os.path.abspath('diff1.%s-hvap.edr' % k)) hvap_array_diff_p = self.RT - np.array(df.Potential) / self.n_mol # try: # df = panedr.edr_to_df('diff-1.%s-hvap.edr' % k) # except: # raise Exception('File not exist: ' + os.path.abspath('diff-1.%s-hvap.edr' % k)) # hvap_array_diff_n = self.RT - np.array(df.Potential) / self.n_mol try: df = panedr.edr_to_df('hvap.edr') except: raise Exception('File not exist: ' + os.path.abspath('hvap.edr')) hvap_array = self.RT - np.array(df.Potential) / self.n_mol # dHvap_array = (hvap_array_diff_p - hvap_array_diff_n) / delta / 2 dHvap_array = (hvap_array_diff_p - hvap_array) / delta dHdp = dHvap_array.mean() - 1 / self.RT * ( (self.hvap_array * dPene_array).mean() - self.hvap_array.mean() * dPene_array.mean()) else: dELIQdp = dPene_array.mean() - 1 / self.RT * ( (self.pe_liq_array * dPene_array).mean() - self.pe_liq_array.mean() * dPene_array.mean()) os.chdir(self.dir_vacuum) try: df = panedr.edr_to_df('diff1.%s.edr' % k) except: raise Exception('File not exist: ' + os.path.abspath('diff1.%s.edr' % k)) pene_array_diff_p = np.array(df.Potential) # try: # df = panedr.edr_to_df('diff-1.%s.edr' % k) # except: # raise Exception('File not exist: ' + os.path.abspath('diff-1.%s.edr' % k)) # pene_array_diff_n = np.array(df.Potential) try: df = panedr.edr_to_df('nvt.edr') except: raise Exception('File not exist: ' + os.path.abspath('nvt.edr' % k)) pene_array = np.array(df.Potential) # dPene_array = (pene_array_diff_p - pene_array_diff_n) / delta / 2 dPene_array = (pene_array_diff_p - pene_array) / delta dEGASdp = dPene_array.mean() - 1 / self.RT * ( (self.pe_gas_array * dPene_array).mean() - self.pe_gas_array.mean() * dPene_array.mean()) dHdp = dEGASdp - dELIQdp / self.n_mol return dHdp