def reweight_direct(nodes, options): print "Direct free energy reweighting: see Klimm, Bujotzek, Weber 2011" custom_energy_terms = None if(options.e_nonbonded in ("run_custom", "rerun_custom")): assert(path.exists(options.custom_energy)) custom_energy_terms = [entry.strip() for entry in open(options.custom_energy).readlines() if entry != "\n"] beta = nodes[0].pool.thermo_beta for n in nodes: # get potential V and substract penalty potential energies = load_energy(n, options.e_bonded, options.e_nonbonded, custom_energy_terms) frame_weights = n.frameweights phi_weighted_energies = energies + get_phi_potential(n.trajectory, n) # define evaluation region where sampling is rather dense, e. g. around mean potential energy with standard deviation of potential energy n.obs.mean_V = np.average(phi_weighted_energies, weights=frame_weights) n.obs.std_V = np.sqrt(np.average(np.square(phi_weighted_energies - n.obs.mean_V), weights=frame_weights)) n.tmp['weight'] = 0.0 # new part mean_mean_V = np.mean([n.obs.mean_V for n in nodes]) std_mean_V = np.sqrt(np.mean([np.square(n.obs.mean_V - mean_mean_V) for n in nodes])) print "### original std_mean_V: %f"%std_mean_V print "### mean over obs.std_V: %f"%np.mean([n.obs.std_V for n in nodes]) #TODO decide upon one way to calculate std_mean_V energy_region = std_mean_V for n in nodes: refpoints = np.where(np.abs(phi_weighted_energies - n.obs.mean_V) < energy_region)[0] n.tmp['n_refpoints'] = len(refpoints) log = open(n.reweighting_log_fn, "a") # using separate log-file def output(message): print(message) log.write(message+"\n") output("======= Starting node reweighting %s"%datetime.now()) output(" unweighted mean V: %s [kJ/mol], without penalty potential" % np.mean(energies)) output(" phi-weighted mean V: %s [kJ/mol], without penalty potential" % np.mean(phi_weighted_energies)) output(" weighted mean V: %f [kJ/mol]" % n.obs.mean_V) output(" energy region (=tenth of weighted V standard deviaton): %f [kJ/mol]" % energy_region) output(" number of refpoints: %d" % n.tmp['n_refpoints']) # calculate weights with direct ansatz for ref in refpoints: n.tmp['weight'] += np.exp(beta*phi_weighted_energies[ref]) n.tmp['weight'] = float(n.trajectory.n_frames) / float(n.tmp['weight']) n.obs.S = 0.0 n.obs.A = 0.0 if(options.save_refpoints): n.obs.refpoints = refpoints log.close()
def reweight_presampling(nodes, presamp_temp, moi_energies, sol_energy): print "Presampling analysis reweighting: see formula 18 in Fackeldey, Durmaz, Weber 2011" # presampling data presampling_internals = nodes[0].pool.root.trajectory # alternatively pool[0].trajectory # presampling and sampling beta beta_samp = nodes[0].pool.thermo_beta beta_presamp = 1/(presamp_temp*BOLTZMANN*AVOGADRO) # calculate free energy per node for n in nodes: log = open(n.reweighting_log_fn, "a") # using separate log-file def output(message): print(message) log.write(message+"\n") output("======= Starting node reweighting %s"%datetime.now()) # get potential V and substract penalty potential energies = load_energies(n, with_penalty=False, with_sol=sol_energy, with_moi_energies=moi_energies) frame_weights = n.frameweights phi_values = n.phi_values phi_weighted_energies = energies + get_phi_potential(n.trajectory, n) # calculate mean V and standard deviation n.obs.mean_V = np.average(phi_weighted_energies, weights=frame_weights) n.tmp['weight'] = 1.0 n.obs.std_V = np.sqrt(np.average(np.square(phi_weighted_energies - n.obs.mean_V), weights=frame_weights)) # number of presampling points in node i => free energy at high temperature n.tmp['presamp_weight'] = np.sum(get_phi(presampling_internals, n)) n.tmp['presamp_A']= -1/beta_presamp * np.log(n.tmp['presamp_weight']) # estimate global optimum potential energy factor= 1.0 n.tmp['opt_pot_energy']= n.obs.mean_V - 3.0 * factor * n.obs.std_V # compute free energy and entropy at sampling temperature n.obs.S = 0.0 #TODO can we get separate entropy from the term below? n.obs.A = (beta_samp - beta_presamp) / beta_samp * n.tmp['opt_pot_energy'] + np.log(beta_samp / beta_presamp) * factor * n.obs.std_V + (beta_presamp / beta_samp) * n.tmp['presamp_A'] if('refpoints' in n.obs): del n.obs['refpoints'] log.close() nodes.sort(key = lambda n: n.obs.A) # sort in ascending order by free energy values for (n1, n2) in zip(nodes[1:], nodes[:-1]): # calculate and normalize weights n1.tmp['weight'] = np.exp(-nodes[0].pool.thermo_beta*( n1.obs.A - n2.obs.A )) * n2.tmp['weight']
def min_per_node(mins_dir, partition, nodes, e_bonded_type, e_nonbonded_type, custom_e_terms=None): """ Calculates the minium energy of the presampling frames belonging to the according node. A frame belongs to the node to which it has the strongest membership. @param mins_dir: directory in which the minimizations required for the presampling reweighting are performed @param partition: 1D array, at index i is the index of the node corresponding to presampling frame i""" mins = [[]] * len(nodes) presamp_int = nodes[0].pool.root.trajectory for i in xrange(partition.size): edr_fn = mins_dir + "/ener" + str(i) + ".edr" e_terms = [] if (e_bonded_type != "none"): # get bonded energy if (e_bonded_type in ("run_standard_potential", "rerun_standard_potential")): e_terms += ["Potential"] elif (e_bonded_type in ("run_standard_bondedterms", "rerun_standard_bondedterms")): e_terms += [ "Bond", "Angle", "Proper-Dih.", "Ryckaert-Bell.", "Improper-Dih." ] else: raise (Exception("Method unkown: " + e_bonded_type)) if (e_nonbonded_type != "none"): # get non-bonded energy if (e_nonbonded_type in ("run_standard_nonbondedterms", "rerun_standard_nonbondedterms")): e_terms += [ "LJ-14", "Coulomb-14", "LJ-(SR)", "LJ-(LR)", "Disper.-corr.", "Coulomb-(SR)", "Coul.-recip." ] elif (e_nonbonded_type in ("run_moi", "rerun_moi")): e_terms += [ "Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI" ] elif (e_nonbonded_type in ("run_moi_sol_interact", "rerun_moi_sol_interact")): e_terms += ["Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL"] elif (e_nonbonded_type in ("run_moi_sol_interact_withLR", "rerun_moi_sol_interact_withLR")): #e_nonbonded_terms = ["Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL", "LJ-LR:MOI-SOL"] e_terms += [ "Coul-SR:SOL-UNK", "LJ-SR:SOL-UNK", "LJ-LR:SOL-UNK" ] elif (e_nonbonded_type in ("run_moi_sol_sr", "rerun_moi_sol_sr")): e_terms += [ "Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI", "Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL" ] elif (e_nonbonded_type in ("run_moi_sol_lr", "rerun_moi_sol_lr")): e_terms += [ "Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI", "Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL", "LJ-LR:MOI-SOL" ] elif (e_nonbonded_type in ("run_custom", "rerun_custom")): assert (custom_e_terms) e_terms += custom_e_terms else: raise (Exception("Method unkown: " + e_bonded_type)) if (len(e_terms) >= 0): xvg_fn = mktemp(suffix=".xvg", dir=mins_dir) cmd = ["g_energy", "-dp", "-f", edr_fn, "-o", xvg_fn, "-sum"] print("Calling: " + (" ".join(cmd))) p = Popen(cmd, stdin=PIPE) p.communicate(input=("\n".join(e_terms) + "\n")) assert (p.wait() == 0) # skipping over "#"-comments at the beginning of xvg-file e = np.loadtxt(xvg_fn, comments="@", usecols=(1, ), skiprows=10)[-1] os.remove(xvg_fn) else: e = 0 mins[partition[i]].append(e + get_phi_potential( presamp_int.getframes([i]), nodes[partition[i]])[0]) for i in xrange(len(nodes)): nodes[i].tmp["opt_pot_e"] = min(mins[i])
def reweight_entropy(nodes, options): print "Entropy reweighting: see Klimm, Bujotzek, Weber 2011" custom_energy_terms = None if (options.e_nonbonded in ("run_custom", "rerun_custom")): assert (path.exists(options.custom_energy)) custom_energy_terms = [ entry.strip() for entry in open(options.custom_energy).readlines() if entry != "\n" ] # calculate variance of internal coordinates conjugate_var = np.mean([ n.trajectory.merged_var_weighted() for n in nodes ]) # this be our evaluation region # find refpoints and calculate nearpoints for n in nodes: log = open(n.reweighting_log_fn, "a") # using separate log-file def output(message): print(message) log.write(message + "\n") output("======= Starting node reweighting %s" % datetime.now()) # get potential V and substract penalty potential energies = load_energy(n, options.e_bonded, options.e_nonbonded, custom_energy_terms) frame_weights = n.frameweights phi_weighted_energies = energies + get_phi_potential(n.trajectory, n) # calculate mean V n.obs.mean_V = np.average(phi_weighted_energies, weights=frame_weights) n.tmp['weight'] = 1.0 n.obs.std_V = np.sqrt( np.average(np.square(phi_weighted_energies - n.obs.mean_V), weights=frame_weights)) # every frame within this region is considered refpoint energy_region = n.obs.std_V refpoints = np.where( np.abs(phi_weighted_energies - n.obs.mean_V) < energy_region)[0] output(" unweighted mean V: %s [kJ/mol], without penalty potential" % np.mean(energies)) output( " phi-weighted mean V: %s [kJ/mol], without penalty potential" % np.mean(phi_weighted_energies)) output(" weighted mean V: %f [kJ/mol]" % n.obs.mean_V) output( " energy region (=weighted V standard deviation): %f [kJ/mol]" % energy_region) output(" evaluation region (=conjugate variance): %f" % conjugate_var) output(" number of refpoints: %d" % len(refpoints)) if (len(refpoints) == 0): raise (Exception("Zero refpoints for " + n.name + " [" + n.trr_fn + "].")) norm_inv_nearpoints = [] for ref in refpoints: # for each refpoint count nearpoints diffs = (n.trajectory - n.trajectory.getframe(ref)).norm2( ) #TODO -> needs Marcus-check -> Do we have to consider Frame-weights here? nearpoints = np.sum(diffs < conjugate_var) #output(" refpoint %d with energy %f has %d nearpoints" % (ref, phi_weighted_energies[ref], nearpoints)) if (nearpoints == 1): output("WARNING: No nearpoints found for refpoint %d! (%s)" % (ref, n.name)) norm_inv_nearpoints.append( float(n.trajectory.n_frames) / float(nearpoints) ) # new calculation formula (see wiki), +1 is implicit as refpoint counts as nearpoint n.tmp['medi_inv_nearpoints'] = np.median(norm_inv_nearpoints) n.obs.S = AVOGADRO * BOLTZMANN * np.log( n.tmp['medi_inv_nearpoints']) # [kJ/mol*K] n.obs.A = n.obs.mean_V - nodes[0].pool.temperature * n.obs.S # [kJ/mol] if (options.save_refpoints): n.obs.refpoints = refpoints log.close() nodes.sort( key=lambda n: n.obs.A) # sort in ascending order by free energy values for (n1, n2) in zip(nodes[1:], nodes[:-1]): # calculate and normalize weights n1.tmp['weight'] = np.exp(-nodes[0].pool.thermo_beta * (n1.obs.A - n2.obs.A)) * n2.tmp['weight']
def reweight_direct(nodes, options): print "Direct free energy reweighting: see Klimm, Bujotzek, Weber 2011" custom_energy_terms = None if (options.e_nonbonded in ("run_custom", "rerun_custom")): assert (path.exists(options.custom_energy)) custom_energy_terms = [ entry.strip() for entry in open(options.custom_energy).readlines() if entry != "\n" ] beta = nodes[0].pool.thermo_beta for n in nodes: # get potential V and substract penalty potential energies = load_energy(n, options.e_bonded, options.e_nonbonded, custom_energy_terms) frame_weights = n.frameweights phi_weighted_energies = energies + get_phi_potential(n.trajectory, n) # define evaluation region where sampling is rather dense, e. g. around mean potential energy with standard deviation of potential energy n.obs.mean_V = np.average(phi_weighted_energies, weights=frame_weights) n.obs.std_V = np.sqrt( np.average(np.square(phi_weighted_energies - n.obs.mean_V), weights=frame_weights)) n.tmp['weight'] = 0.0 # new part mean_mean_V = np.mean([n.obs.mean_V for n in nodes]) std_mean_V = np.sqrt( np.mean([np.square(n.obs.mean_V - mean_mean_V) for n in nodes])) print "### original std_mean_V: %f" % std_mean_V print "### mean over obs.std_V: %f" % np.mean( [n.obs.std_V for n in nodes]) #TODO decide upon one way to calculate std_mean_V energy_region = std_mean_V for n in nodes: refpoints = np.where( np.abs(phi_weighted_energies - n.obs.mean_V) < energy_region)[0] n.tmp['n_refpoints'] = len(refpoints) log = open(n.reweighting_log_fn, "a") # using separate log-file def output(message): print(message) log.write(message + "\n") output("======= Starting node reweighting %s" % datetime.now()) output(" unweighted mean V: %s [kJ/mol], without penalty potential" % np.mean(energies)) output( " phi-weighted mean V: %s [kJ/mol], without penalty potential" % np.mean(phi_weighted_energies)) output(" weighted mean V: %f [kJ/mol]" % n.obs.mean_V) output( " energy region (=tenth of weighted V standard deviaton): %f [kJ/mol]" % energy_region) output(" number of refpoints: %d" % n.tmp['n_refpoints']) # calculate weights with direct ansatz for ref in refpoints: n.tmp['weight'] += np.exp(beta * phi_weighted_energies[ref]) n.tmp['weight'] = float(n.trajectory.n_frames) / float(n.tmp['weight']) n.obs.S = 0.0 n.obs.A = 0.0 if (options.save_refpoints): n.obs.refpoints = refpoints log.close()
def min_per_node(mins_dir, partition, nodes, e_bonded_type, e_nonbonded_type, custom_e_terms=None): """ Calculates the minium energy of the presampling frames belonging to the according node. A frame belongs to the node to which it has the strongest membership. @param mins_dir: directory in which the minimizations required for the presampling reweighting are performed @param partition: 1D array, at index i is the index of the node corresponding to presampling frame i""" mins = [[]] * len(nodes) presamp_int = nodes[0].pool.root.trajectory for i in xrange(partition.size): edr_fn = mins_dir + "/ener" + str(i) + ".edr" e_terms = [] if(e_bonded_type != "none"): # get bonded energy if(e_bonded_type in ("run_standard_potential", "rerun_standard_potential")): e_terms += ["Potential"] elif(e_bonded_type in ("run_standard_bondedterms", "rerun_standard_bondedterms")): e_terms += ["Bond", "Angle", "Proper-Dih.", "Ryckaert-Bell.", "Improper-Dih."] else: raise(Exception("Method unkown: "+e_bonded_type)) if(e_nonbonded_type != "none"): # get non-bonded energy if(e_nonbonded_type in ("run_standard_nonbondedterms", "rerun_standard_nonbondedterms")): e_terms += ["LJ-14", "Coulomb-14", "LJ-(SR)", "LJ-(LR)", "Disper.-corr.", "Coulomb-(SR)", "Coul.-recip."] elif(e_nonbonded_type in ("run_moi", "rerun_moi")): e_terms += ["Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI"] elif(e_nonbonded_type in ("run_moi_sol_interact", "rerun_moi_sol_interact")): e_terms += ["Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL"] elif(e_nonbonded_type in ("run_moi_sol_interact_withLR", "rerun_moi_sol_interact_withLR")): #e_nonbonded_terms = ["Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL", "LJ-LR:MOI-SOL"] e_terms += ["Coul-SR:SOL-UNK", "LJ-SR:SOL-UNK", "LJ-LR:SOL-UNK"] elif(e_nonbonded_type in ("run_moi_sol_sr", "rerun_moi_sol_sr")): e_terms += ["Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI", "Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL"] elif(e_nonbonded_type in ("run_moi_sol_lr", "rerun_moi_sol_lr")): e_terms += ["Coul-SR:MOI-MOI", "LJ-SR:MOI-MOI", "LJ-LR:MOI-MOI", "Coul-14:MOI-MOI", "LJ-14:MOI-MOI", "Coul-SR:MOI-SOL", "LJ-SR:MOI-SOL", "LJ-LR:MOI-SOL"] elif(e_nonbonded_type in ("run_custom", "rerun_custom")): assert(custom_e_terms) e_terms += custom_e_terms else: raise(Exception("Method unkown: "+e_bonded_type)) if (len(e_terms) >= 0): xvg_fn = mktemp(suffix=".xvg", dir=mins_dir) cmd = ["g_energy", "-dp", "-f", edr_fn, "-o", xvg_fn, "-sum"] print("Calling: "+(" ".join(cmd))) p = Popen(cmd, stdin=PIPE) p.communicate(input=("\n".join(e_terms)+"\n")) assert(p.wait() == 0) # skipping over "#"-comments at the beginning of xvg-file e = np.loadtxt(xvg_fn, comments="@", usecols=(1,), skiprows=10) [-1] os.remove(xvg_fn) else: e = 0 mins[partition[i]].append(e + get_phi_potential(presamp_int.getframes([i]), nodes[partition[i]])[0]) for i in xrange(len(nodes)): nodes[i].tmp["opt_pot_e"] = min(mins[i])
def reweight_entropy(nodes, options): print "Entropy reweighting: see Klimm, Bujotzek, Weber 2011" custom_energy_terms = None if(options.e_nonbonded in ("run_custom", "rerun_custom")): assert(path.exists(options.custom_energy)) custom_energy_terms = [entry.strip() for entry in open(options.custom_energy).readlines() if entry != "\n"] # calculate variance of internal coordinates conjugate_var = np.mean([n.trajectory.merged_var_weighted() for n in nodes]) # this be our evaluation region # find refpoints and calculate nearpoints for n in nodes: log = open(n.reweighting_log_fn, "a") # using separate log-file def output(message): print(message) log.write(message+"\n") output("======= Starting node reweighting %s"%datetime.now()) # get potential V and substract penalty potential energies = load_energy(n, options.e_bonded, options.e_nonbonded, custom_energy_terms) frame_weights = n.frameweights phi_weighted_energies = energies + get_phi_potential(n.trajectory, n) # calculate mean V n.obs.mean_V = np.average(phi_weighted_energies, weights=frame_weights) n.tmp['weight'] = 1.0 n.obs.std_V = np.sqrt(np.average(np.square(phi_weighted_energies - n.obs.mean_V), weights=frame_weights)) # every frame within this region is considered refpoint energy_region = n.obs.std_V refpoints = np.where(np.abs(phi_weighted_energies - n.obs.mean_V) < energy_region)[0] output(" unweighted mean V: %s [kJ/mol], without penalty potential" % np.mean(energies)) output(" phi-weighted mean V: %s [kJ/mol], without penalty potential" % np.mean(phi_weighted_energies)) output(" weighted mean V: %f [kJ/mol]" % n.obs.mean_V) output(" energy region (=weighted V standard deviation): %f [kJ/mol]" % energy_region) output(" evaluation region (=conjugate variance): %f" % conjugate_var) output(" number of refpoints: %d" % len(refpoints)) if( len(refpoints) == 0 ): raise(Exception("Zero refpoints for "+n.name+" ["+n.trr_fn+"].")) norm_inv_nearpoints = [] for ref in refpoints: # for each refpoint count nearpoints diffs = (n.trajectory - n.trajectory.getframe(ref)).norm2() #TODO -> needs Marcus-check -> Do we have to consider Frame-weights here? nearpoints = np.sum(diffs < conjugate_var) #output(" refpoint %d with energy %f has %d nearpoints" % (ref, phi_weighted_energies[ref], nearpoints)) if(nearpoints == 1): output("WARNING: No nearpoints found for refpoint %d! (%s)" % (ref, n.name)) norm_inv_nearpoints.append( float(n.trajectory.n_frames)/float(nearpoints) ) # new calculation formula (see wiki), +1 is implicit as refpoint counts as nearpoint n.tmp['medi_inv_nearpoints'] = np.median(norm_inv_nearpoints) n.obs.S = AVOGADRO*BOLTZMANN*np.log(n.tmp['medi_inv_nearpoints']) # [kJ/mol*K] n.obs.A = n.obs.mean_V - nodes[0].pool.temperature*n.obs.S # [kJ/mol] if(options.save_refpoints): n.obs.refpoints = refpoints log.close() nodes.sort(key = lambda n: n.obs.A) # sort in ascending order by free energy values for (n1, n2) in zip(nodes[1:], nodes[:-1]): # calculate and normalize weights n1.tmp['weight'] = np.exp(-nodes[0].pool.thermo_beta*( n1.obs.A - n2.obs.A )) * n2.tmp['weight']