def convert(subdir): """Convert united atom files to all atom format.""" # Get substition numbers from table hydrogen_per_atom = read_atom_numbers() with cd(subdir): pdb_path = find(path='..', folder_keyword='initial', file_keyword='.pdb')[0] top_path = find(path='..', folder_keyword='initial', file_keyword='.top')[0] for dft_type in ('esp', 'rho'): dft_path = find(path='..', folder_keyword='dft_calculations', file_keyword='{}.cube'.format(dft_type))[0] out_file = dft_type + '_ua' + '.cube' kwargs = { 'infile_pdb': pdb_path, 'infile_top': top_path, 'infile_cube': dft_path, 'outfile_cube': out_file, 'implicitHbondingPartners': hydrogen_per_atom } # Call Johannes' conversion script smamp.aa2ua_cube.aa2ua_cube(**kwargs)
def main(): """ Run the script.""" print('This is {}.'.format(__file__)) # Create a folder for the averaged cost function chargepath = './horton_charges/sweep_rhoref' if os.path.isdir(chargepath): pass # shutil.rmtree(chargepath) else: os.makedirs(chargepath) cost_paths = find(path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', file_keyword='cost', nr_occ=None) lnrho_range = [] sigma_range = [] for charge_file in cost_paths: # print(charge_file) # Parse parameters from filename lnrho, sigma = charge_file[-15:-3].split('_')[-2:] lnrho_range += [lnrho] sigma_range += [sigma] lnrho_range = set(lnrho_range) sigma_range = set(sigma_range) for lnrho in lnrho_range: print('lnrho = {}'.format(lnrho)) for sigma in sigma_range: filename = 'cost_{}_{}.h5'.format(lnrho, sigma) cost_function_paths = find(path='.', folder_keyword='lnrho_sweep', file_keyword=filename, nr_occ=None) # print(cost_function_paths) if len(cost_function_paths) == 0: print('WARNING: No cost functions found! Continue ..') continue # Extract cost function As and Bs A_matrices, B_vectors = collect_matrices(cost_function_paths) # Average over all matrices & vectors average_A, average_B = average(A_matrices, B_vectors) # keep one HDF5 file as a template for writing into later out_name = 'costfunction_average_{}_{}.h5'.format(lnrho, sigma) template_path = os.path.join(chargepath, out_name) shutil.copyfile(cost_function_paths[0], template_path) # Export matrices to hdf5 export(average_A, average_B, template_path=template_path) print('Done.')
def main(): """ Execute everything.""" print('This is {}.'.format(__file__)) rho_paths = find(path='./', folder_keyword='dft', file_keyword='rho') for path in rho_paths: # Get the exact density file name folder_path, file_name = os.path.split(path) # Go to the bader directory to place the bader analysis output there topdir = os.path.split(folder_path)[0] bader_dir = os.path.join(topdir, '5_bader_charges') with cd(bader_dir): print('Moving to {}'.format(bader_dir)) # Find structure and topology files of the same snapshot snapshot_path = find(path='..', folder_keyword='initial', file_keyword='.pdb')[0] top_path = find(path='..', folder_keyword='initial', file_keyword='.top')[0] if os.path.exists('bader_charges.csv'): print('File exists, skipping.') continue # Write output to logfile with open('bader.log', 'w') as logfile: # Assemble the shell command bader command = 'bader -p atom_index ' command += os.path.join('../2_dft_calculations/', file_name) kwargs = {"shell": True, "stdout": logfile, "stderr": subprocess.STDOUT} # Execute the shell command print('Running bader ...') p = subprocess.Popen(command, **kwargs) # Wait for the shell command to finish p.communicate() # Extract charges from the bader anaylsis output to .csv print('Bader done. Extracting bader charges ...') smamp.extract_bader_charges.extract(snapshot_path, top_path) print('Extraction done.') print('Done.')
def collect_avg(): collect_df = pd.DataFrame() cost_paths = find(path='.', folder_keyword='horton_charges/sweep_rhoref', file_keyword='charges', nr_occ=None) for charge_file in cost_paths: # Parse parameters from filename lnrho, sigma = charge_file[-15:-4].split('_')[-2:] sarah_wish_sigma = ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4'] if sigma in sarah_wish_sigma: # Read file df = pd.read_csv(charge_file) # Paste the lnrho parameter into the dataframe df['lnrhoref'] = float(lnrho) df['sigma'] = float(sigma) # Also note the snapshot identifier df['diff'] = (df.q - df.q_unconstrained)**2 collect_df = collect_df.append(df) collect_df = pd.melt(collect_df, id_vars=['atom', 'residue', 'lnrhoref', 'sigma'], value_vars=['diff']) return collect_df
def collect_snapshots(plot_range=[-9, -5]): collect_df = pd.DataFrame() cost_paths = find(path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', file_keyword='charge', nr_occ=None) for charge_file in cost_paths: # Parse parameters from filename lnrho, sigma = charge_file[-15:-4].split('_')[-2:] sarah_wish_sigma = ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4'] if sigma in sarah_wish_sigma: # Read file df = pd.read_csv(charge_file) # Paste the lnrho parameter into the dataframe df['lnrhoref'] = float(lnrho) df['sigma'] = float(sigma) # Also note the snapshot identifier timestamp = re.findall(r'\d+', charge_file)[0] df['snapshot'] = timestamp # df['diff'] = (df.q - df.q_unconstrained).abs() df['diff'] = (df.q - df.q_unconstrained)**2 collect_df = collect_df.append(df) collect_df = pd.melt( collect_df, id_vars=['atom', 'residue', 'snapshot', 'lnrhoref', 'sigma'], value_vars=['diff']) return collect_df
def collect_snapshots(): cost_paths = find(path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', file_keyword='charges', nr_occ=None) collect_df = pd.DataFrame() for charge_file in cost_paths: # Parse parameters from filename lnrho, sigma = charge_file[-15:-4].split('_')[-2:] #if sigma == '0.8': if sigma in ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4']: df = pd.read_csv(charge_file) # Paste the lnrho parameter into the dataframe df['lnrho'] = lnrho df['sigma'] = sigma # Also note the snapshot identifier timestamp = re.findall(r'\d+', charge_file)[0] df['snapshot'] = timestamp collect_df = collect_df.append(df) collect_df = pd.melt(collect_df, id_vars=['atom', 'residue', 'lnrho', 'sigma', 'snapshot'], value_vars=['q']) return collect_df
def collect_bfgs_energies(): """Find BFGS energies and put them in one dataframe.""" # Initialize collection data frame coll_df = None # Crawl the directory structure bfgs_list = find(path='./', folder_keyword='dft', file_keyword='BFGS', nr_occ=None) for energy_file in bfgs_list: print('Moving to {}'.format(energy_file)) topdir = os.path.split(os.path.split(energy_file)[0])[0] time = topdir.replace('./', '').replace('_ps_snapshot', '') time = int(time) with open(energy_file) as data: string = data.read() string = re.sub(r"\[.+\]", "", string) string = re.sub(r"\*", "", string) string = re.sub('BFGSLineSearch: ', '', string) sio = StringIO(string) df = pd.read_csv(sio, sep='\s+', engine='python', skiprows=[1]) df['Timestamp'] = time if coll_df is None: coll_df = df else: coll_df = coll_df.append(df) return coll_df
def collect_averages(): collect_df = pd.DataFrame() cost_paths = find(path='.', folder_keyword='horton_charges/sweep_rhoref', file_keyword='charge', nr_occ=None) for charge_file in cost_paths: # Parse parameters from filename lnrho, sigma = charge_file[-15:-4].split('_')[-2:] if sigma == '0.8': charge_file = find(path='.', folder_keyword='horton_charges/sweep_rhoref', file_keyword='charges_{}_{}.csv'.format(lnrho, sigma), nr_occ=1)[0] df = pd.read_csv(charge_file) df['lnrho'] = float(lnrho) collect_df = collect_df.append(df) collect_df = pd.melt(collect_df, id_vars=['atom', 'residue', 'lnrho'], value_vars=['q']) return collect_df
def read_input_files(): """Search for and read input files (with implicit H-atoms).""" ase_struct, pmd_top = None, None pdb_file = find(path='..', folder_keyword='initial_structure', file_keyword='.pdb')[0] top_file = find(path='..', folder_keyword='initial_structure', file_keyword='.top')[0] ase_struct = ase.io.read(pdb_file) pmd_struct = pmd.load_file(pdb_file) pmd_top = gromacs.GromacsTopologyFile(top_file, parametrize=False) # Make sure we actually found everything we need if ase_struct is None: raise RuntimeError( 'structure file (.pdb) not found in {}'.format(input_dir)) if pmd_top is None: raise RuntimeError( 'topology file (.top) not found in {}'.format(input_dir)) return ase_struct, pmd_struct, pmd_top
"""Visualize the convergence of potential differences. Copyright 2019 Simulation Lab University of Freiburg Author: Lukas Elflein <*****@*****.**> """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from smamp.tools import find print('Load data (wide format) ...') data_paths = find(path='.', folder_keyword='/data', file_keyword='csv', nr_occ=None) collect_df = pd.DataFrame() for data_file in data_paths: df = pd.read_csv(data_file) collect_df = collect_df.append(df) print('Manipulating data ...') df = collect_df.reset_index(drop=True) df.columns = df.columns.map(str.strip) df['lnrho'] = pd.to_numeric(df['lnrho']) df['sigma'] = df['sigma'].apply(lambda x: str(x)) df = df.loc[df.lnrho < -3] df_0 = df.loc[df.charge == 0] df_1 = df.loc[df.charge == 1]
def main(): print('This is {}.'.format(__file__)) charges = [0, 1, 2] random.shuffle(charges) for charge in charges: pmd2ase = None root = '../{}_charge/1_charge_cycle'.format(charge) pdb_path = find(path=root, folder_keyword='0_initial_structure', file_keyword='.pdb')[0] top_path = find(path=root, folder_keyword='0_initial_structure', file_keyword='.top')[0] hyd_path = find(path='../{}_charge'.format(charge), folder_keyword='fitting', file_keyword='hydrogen_per_atom.csv')[0] # Find all files with point charges charge_paths = find(path=root, folder_keyword='4_horton_cost_function/lnrho', file_keyword='charges', nr_occ=None) lnrho_range = [] sigma_range = [] time_range = [] print('{} paths found.'.format(len(charge_paths))) random.shuffle(charge_paths) i = 0 for charge_path in charge_paths: i += 1 if i % 10 == 0: print('{} of {}'.format(i, len(charge_paths))) work_dir = os.path.split( os.path.split(os.path.split(charge_path)[0])[0])[0] cube_path = find(path=work_dir, folder_keyword='2_dft_calculations', file_keyword='esp.cube')[0] # Parse parameters from filename lnrho, sigma = charge_path[-15:-4].split('_')[-2:] time = charge_path.split('/')[3].split('_')[0] if not sigma in [ '0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4', '1.6' ]: print('skipping sigma {}'.format(sigma)) continue out_path = 'data/pot_err_{}_{}_{}_{}.csv'.format( charge, time, lnrho, sigma) if os.path.exists(out_path): print('exists, skipping.') continue # Calculate the error between DFT-ESP and point-charge ESP rrmsd, pmd2ase = calc_error(cube_path, pdb_path, top_path, hyd_path, charge_path, pmd2ase=pmd2ase, n_samples=40000) # Write to file with open(out_path, 'w') as outfile: outfile.write('charge, time, lnrho, sigma, rrmsd\n') outfile.write('{}, {}, {}, {}, {}\n'.format( charge, time, lnrho, sigma, rrmsd)) print('Done.')
def main(): """ Execute everything.""" print('This is {}.'.format(__file__)) # We need any one top and pdb file for the ordering of atom-names; # The exact snapshot we use does not matter as the ordering is invariant pdb_file = find(path='.', folder_keyword='0_initial_structure', file_keyword='.pdb')[0] top_file = find(path='.', folder_keyword='0_initial_structure', file_keyword='.top')[0] hyd_file = find(path='..', folder_keyword='fitting', file_keyword='hydrogen_per_atom.csv')[0] cost_paths = find(path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', file_keyword='cost', nr_occ=None) lnrho_range = [] sigma_range = [] for charge_file in cost_paths: # Parse parameters from filename lnrho, sigma = charge_file[-15:-3].split('_')[-2:] lnrho_range += [lnrho] sigma_range += [sigma] lnrho_range = set(lnrho_range) sigma_range = set(sigma_range) for lnrho in lnrho_range: print('lnrho = {} ...'.format(lnrho)) for sigma in sigma_range: # Find the paths for the unaveraged snapshot cost functions cost_paths = find( path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', file_keyword='cost_{}_{}.h5'.format(lnrho, sigma), nr_occ=None) # Find the path for the average cost function cost_avg = find( path='.', folder_keyword='horton_charges/sweep_rhoref', file_keyword='costfunction_average_{}_{}.h5'.format( lnrho, sigma), nr_occ=None) cost_paths += cost_avg for cost_file in cost_paths: folder = os.path.split(cost_file)[0] output_file = os.path.join( folder, 'charges_{}_{}.csv'.format(lnrho, sigma)) if os.path.exists(output_file): # print('{} exists. Skipping ahead.'.format(output_file)) continue print('Optimizing charges for {}.'.format(cost_file[:18])) calc_charges(pdb_file, top_file, hyd_file, cost_file, output_file=output_file) print('Done.')
def calc_charges(pdb_infile, top_infile, hydrogen_file, horton_cost_function, output_file, charge_group_file=None, charge_group_charges_file=None, symmetry_file=None): '''Wraps fitESPconstrained.py''' # Look up the relationship between ASE indices, atom names pmd_struct, pmd_top, ase2pmd = create_structure(pdb_infile, top_infile, hydrogen_file) print('Atomic structure built.') # Import A and B matrices from HORTON A, B = read_horton_cost_function(horton_cost_function) if charge_group_file is None: try: charge_group_file = find('..', 'fitting_constraint_files', 'atoms_in_charge_group.csv')[0] except: charge_group_file = None if charge_group_charges_file is None: try: charge_group_charges_file = find( '..', 'fitting_constraint_files', 'charge_group_total_charge.csv')[0] except: charge_group_charges_file = None if symmetry_file is None: try: symmetry_file = find('..', 'fitting_constraint_files', 'atoms_of_same_charge.csv')[0] except: symmetry_file = None # Calculate constraints logic_constraints, charge_constraints = get_constraints( args=None, ase2pmd=ase2pmd, charge_group_file=charge_group_file, charge_group_charges_file=charge_group_charges_file, symmetry_file=symmetry_file, debug=False) print('Constraints calculated: {} non-redunant.'.format( logic_constraints.shape[0])) # Run the constrained minimization q, f = constrained_minimize(A, B, logic_constraints, charge_constraints) # print('Constrained minimization done.') # print('Extremal charges: {:1.5f}, {:1.5f}'.format(q.min(), q.max())) # print('Extremal Lagrange forces: {:1.5f}, {:1.5f}'.format(f.min(), f.max())) q_unconstrained = unconstrained_minimize(A, B) # Save charges charge_df = write_charges(q, q_unconstrained, ase2pmd, out_name=output_file, plot=False) # Save Lagrange forces # write_forces(f, logic_constraints, ase2pmd) print('Charges written to {}.'.format(output_file))
def main(): """ Execute everything.""" print('This is {}.'.format(__file__)) # Save the working dir topdir = os.getcwd() print('Current working dir: {}'.format(topdir)) # Read the file containing the info on the total charge of the system qtot = read_total_charge( path='../fitting_constraint_files/total_charge.csv') # Crawl the directory structure for subdir, dirs, files in sorted(os.walk('.')): # Exclude template folders from search if 'template' in subdir or 'exclude' in subdir or 'sweep' in subdir: continue # Select the folders with cost function if 'horton_cost_function' in subdir or 'horton_charges' in subdir: print('Moving to {}'.format(subdir)) with cd(subdir): # Search for the .pdb and .top file in the folders above input_path = '..' pdb_path = find(input_path, folder_keyword='initial', file_keyword='.pdb', nr_occ=1, exclude_kw=['template', 'exclude', 'sweep'])[0] top_path = find(input_path, folder_keyword='initial', file_keyword='.top', nr_occ=1, exclude_kw=['template', 'exclude', 'sweep'])[0] output_file = 'fitted_point_charges.csv' cost_file = 'cost.h5' constraint_path = '../../../fitting_constraint_files/' if 'horton_charges' in subdir: constraint_path = '../../fitting_constraint_files/' cost_file = 'costfunction_average.h5' hydrogen_file = os.path.join(constraint_path, 'hydrogen_per_atom.csv') charge_group_file = os.path.join(constraint_path, 'atoms_in_charge_group.csv') charge_group_charges_file = os.path.join( constraint_path, 'charge_group_total_charge.csv') symmetry_file = os.path.join(constraint_path, 'atoms_of_same_charge.csv') if os.path.isfile(output_file): print('File exists, skipping: {}'.format( os.path.join(subdir, output_file))) continue calc_charges( pdb_infile=pdb_path, top_infile=top_path, horton_cost_function=cost_file, output_file=output_file, hydrogen_file=hydrogen_file, charge_group_file=charge_group_file, charge_group_charges_file=charge_group_charges_file, symmetry_file=symmetry_file)