Python find Beispiele, smamp.tools.find Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: loop_convert_AA_to_UA.py Projekt: lukaselflein/charge_optimization_folderstructure

def convert(subdir):
    """Convert united atom files to all atom format."""

    # Get substition numbers from table
    hydrogen_per_atom = read_atom_numbers()

    with cd(subdir):
        pdb_path = find(path='..',
                        folder_keyword='initial',
                        file_keyword='.pdb')[0]
        top_path = find(path='..',
                        folder_keyword='initial',
                        file_keyword='.top')[0]

        for dft_type in ('esp', 'rho'):
            dft_path = find(path='..',
                            folder_keyword='dft_calculations',
                            file_keyword='{}.cube'.format(dft_type))[0]

            out_file = dft_type + '_ua' + '.cube'
            kwargs = {
                'infile_pdb': pdb_path,
                'infile_top': top_path,
                'infile_cube': dft_path,
                'outfile_cube': out_file,
                'implicitHbondingPartners': hydrogen_per_atom
            }

            # Call Johannes' conversion script
            smamp.aa2ua_cube.aa2ua_cube(**kwargs)

Beispiel #2

0

Datei anzeigen

def main():
    """ Run the script."""
    print('This is {}.'.format(__file__))
    # Create a folder for the averaged cost function
    chargepath = './horton_charges/sweep_rhoref'
    if os.path.isdir(chargepath):
        pass
        # shutil.rmtree(chargepath)
    else:
        os.makedirs(chargepath)

    cost_paths = find(path='.',
                      folder_keyword='4_horton_cost_function/lnrho_sweep',
                      file_keyword='cost',
                      nr_occ=None)
    lnrho_range = []
    sigma_range = []
    for charge_file in cost_paths:
        # print(charge_file)
        # Parse parameters from filename
        lnrho, sigma = charge_file[-15:-3].split('_')[-2:]
        lnrho_range += [lnrho]
        sigma_range += [sigma]

    lnrho_range = set(lnrho_range)
    sigma_range = set(sigma_range)

    for lnrho in lnrho_range:
        print('lnrho = {}'.format(lnrho))
        for sigma in sigma_range:
            filename = 'cost_{}_{}.h5'.format(lnrho, sigma)
            cost_function_paths = find(path='.',
                                       folder_keyword='lnrho_sweep',
                                       file_keyword=filename,
                                       nr_occ=None)

            # print(cost_function_paths)
            if len(cost_function_paths) == 0:
                print('WARNING: No cost functions found! Continue ..')
                continue
            # Extract cost function As and Bs
            A_matrices, B_vectors = collect_matrices(cost_function_paths)

            # Average over all matrices & vectors
            average_A, average_B = average(A_matrices, B_vectors)

            # keep one HDF5 file as a template for writing into later
            out_name = 'costfunction_average_{}_{}.h5'.format(lnrho, sigma)
            template_path = os.path.join(chargepath, out_name)
            shutil.copyfile(cost_function_paths[0], template_path)

            # Export matrices to hdf5
            export(average_A, average_B, template_path=template_path)

    print('Done.')

Beispiel #3

0

Datei anzeigen

Datei: loop_bader.py Projekt: lukaselflein/charge_optimization_folderstructure

def main():
   """ Execute everything."""
   print('This is {}.'.format(__file__))

   rho_paths = find(path='./', folder_keyword='dft', file_keyword='rho')

   for path in rho_paths:
      # Get the exact density file name
      folder_path, file_name = os.path.split(path)
      
      # Go to the bader directory to place the bader analysis output there   
      topdir = os.path.split(folder_path)[0]
      bader_dir = os.path.join(topdir, '5_bader_charges')
      with cd(bader_dir):
         print('Moving to {}'.format(bader_dir))

         # Find structure and topology files of the same snapshot
         snapshot_path = find(path='..', folder_keyword='initial', 
                    file_keyword='.pdb')[0]
         top_path = find(path='..', folder_keyword='initial', 
               file_keyword='.top')[0]

         if os.path.exists('bader_charges.csv'):
            print('File exists, skipping.')
            continue

         # Write output to logfile   
         with open('bader.log', 'w') as logfile:
            # Assemble the shell command bader
            command = 'bader -p atom_index '
            command += os.path.join('../2_dft_calculations/', 
                     file_name)
            kwargs = {"shell": True, "stdout": logfile, 
                 "stderr": subprocess.STDOUT}
            # Execute the shell command
            print('Running bader ...')
            p = subprocess.Popen(command, **kwargs)

            # Wait for the shell command to finish
            p.communicate()

            # Extract charges from the bader anaylsis output to .csv
            print('Bader done. Extracting bader charges ...')
            smamp.extract_bader_charges.extract(snapshot_path,
                            top_path)
            print('Extraction done.')

   print('Done.')

Beispiel #4

0

Datei anzeigen

def collect_avg():
    collect_df = pd.DataFrame()
    cost_paths = find(path='.',
                      folder_keyword='horton_charges/sweep_rhoref',
                      file_keyword='charges',
                      nr_occ=None)

    for charge_file in cost_paths:
        # Parse parameters from filename
        lnrho, sigma = charge_file[-15:-4].split('_')[-2:]
        sarah_wish_sigma = ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4']
        if sigma in sarah_wish_sigma:

            # Read file
            df = pd.read_csv(charge_file)

            # Paste the lnrho parameter into the dataframe
            df['lnrhoref'] = float(lnrho)
            df['sigma'] = float(sigma)

            # Also note the snapshot identifier
            df['diff'] = (df.q - df.q_unconstrained)**2

            collect_df = collect_df.append(df)

    collect_df = pd.melt(collect_df,
                         id_vars=['atom', 'residue', 'lnrhoref', 'sigma'],
                         value_vars=['diff'])
    return collect_df

Beispiel #5

0

Datei anzeigen

def collect_snapshots(plot_range=[-9, -5]):
    collect_df = pd.DataFrame()
    cost_paths = find(path='.',
                      folder_keyword='4_horton_cost_function/lnrho_sweep',
                      file_keyword='charge',
                      nr_occ=None)

    for charge_file in cost_paths:
        # Parse parameters from filename
        lnrho, sigma = charge_file[-15:-4].split('_')[-2:]
        sarah_wish_sigma = ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4']
        if sigma in sarah_wish_sigma:

            # Read file
            df = pd.read_csv(charge_file)

            # Paste the lnrho parameter into the dataframe
            df['lnrhoref'] = float(lnrho)
            df['sigma'] = float(sigma)

            # Also note the snapshot identifier
            timestamp = re.findall(r'\d+', charge_file)[0]
            df['snapshot'] = timestamp
            # df['diff'] = (df.q - df.q_unconstrained).abs()
            df['diff'] = (df.q - df.q_unconstrained)**2

            collect_df = collect_df.append(df)

    collect_df = pd.melt(
        collect_df,
        id_vars=['atom', 'residue', 'snapshot', 'lnrhoref', 'sigma'],
        value_vars=['diff'])
    return collect_df

Beispiel #6

0

Datei anzeigen

def collect_snapshots():
   cost_paths = find(path='.', folder_keyword='4_horton_cost_function/lnrho_sweep', 
           file_keyword='charges', 
           nr_occ=None)
   collect_df = pd.DataFrame()
   for charge_file in cost_paths:
      # Parse parameters from filename
      lnrho, sigma = charge_file[-15:-4].split('_')[-2:]
      #if sigma == '0.8':
      if sigma in ['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4']:
         df = pd.read_csv(charge_file)

         # Paste the lnrho parameter into the dataframe
         df['lnrho'] = lnrho
         df['sigma'] = sigma

         # Also note the snapshot identifier
         timestamp = re.findall(r'\d+', charge_file)[0]
         df['snapshot'] = timestamp

         collect_df = collect_df.append(df)

   collect_df = pd.melt(collect_df, 
                        id_vars=['atom', 'residue', 'lnrho', 'sigma', 'snapshot'],
                        value_vars=['q'])
   return collect_df

Beispiel #7

0

Datei anzeigen

Datei: convergence_analysis.py Projekt: lukaselflein/charge_optimization_folderstructure

def collect_bfgs_energies():
	"""Find BFGS energies and put them in one dataframe."""
	# Initialize collection data frame
	coll_df = None

	# Crawl the directory structure
	bfgs_list = find(path='./', folder_keyword='dft', file_keyword='BFGS', nr_occ=None)

	for energy_file in bfgs_list:
		print('Moving to {}'.format(energy_file))
		topdir = os.path.split(os.path.split(energy_file)[0])[0]
		time = topdir.replace('./', '').replace('_ps_snapshot', '')
		time = int(time)

		with open(energy_file) as data:
			string = data.read()
			string = re.sub(r"\[.+\]", "", string)
			string = re.sub(r"\*", "", string)
			string = re.sub('BFGSLineSearch:  ', '', string)

		sio = StringIO(string)
		df = pd.read_csv(sio, sep='\s+', engine='python', skiprows=[1])

		df['Timestamp'] = time

		if coll_df is None:
			coll_df = df
		else:
			coll_df = coll_df.append(df)

	return coll_df

Beispiel #8

0

Datei anzeigen

def collect_averages():
   collect_df = pd.DataFrame()
   
   cost_paths = find(path='.', folder_keyword='horton_charges/sweep_rhoref', 
           file_keyword='charge',
           nr_occ=None)

   for charge_file in cost_paths:
      # Parse parameters from filename
      lnrho, sigma = charge_file[-15:-4].split('_')[-2:]
      if sigma == '0.8':
         charge_file = find(path='.', folder_keyword='horton_charges/sweep_rhoref', 
                 file_keyword='charges_{}_{}.csv'.format(lnrho, sigma), 
                 nr_occ=1)[0]
         df = pd.read_csv(charge_file)
         df['lnrho'] = float(lnrho)
         collect_df = collect_df.append(df)

   collect_df = pd.melt(collect_df, id_vars=['atom', 'residue', 'lnrho'], value_vars=['q'])
   return collect_df

Beispiel #9

0

Datei anzeigen

def read_input_files():
    """Search for and read input files (with implicit H-atoms)."""
    ase_struct, pmd_top = None, None

    pdb_file = find(path='..',
                    folder_keyword='initial_structure',
                    file_keyword='.pdb')[0]
    top_file = find(path='..',
                    folder_keyword='initial_structure',
                    file_keyword='.top')[0]

    ase_struct = ase.io.read(pdb_file)
    pmd_struct = pmd.load_file(pdb_file)
    pmd_top = gromacs.GromacsTopologyFile(top_file, parametrize=False)

    # Make sure we actually found everything we need
    if ase_struct is None:
        raise RuntimeError(
            'structure file (.pdb) not found in {}'.format(input_dir))
    if pmd_top is None:
        raise RuntimeError(
            'topology file (.top) not found in {}'.format(input_dir))
    return ase_struct, pmd_struct, pmd_top

Beispiel #10

0

Datei anzeigen

"""Visualize the convergence of potential differences.
Copyright 2019 Simulation Lab
University of Freiburg
Author: Lukas Elflein <*****@*****.**>
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from smamp.tools import find

print('Load data (wide format) ...')
data_paths = find(path='.',
                  folder_keyword='/data',
                  file_keyword='csv',
                  nr_occ=None)

collect_df = pd.DataFrame()
for data_file in data_paths:
    df = pd.read_csv(data_file)
    collect_df = collect_df.append(df)

print('Manipulating data ...')
df = collect_df.reset_index(drop=True)
df.columns = df.columns.map(str.strip)
df['lnrho'] = pd.to_numeric(df['lnrho'])
df['sigma'] = df['sigma'].apply(lambda x: str(x))
df = df.loc[df.lnrho < -3]
df_0 = df.loc[df.charge == 0]
df_1 = df.loc[df.charge == 1]

Beispiel #11

0

Datei anzeigen

Datei: crawl_calc.py Projekt: lukaselflein/potential_error

def main():
    print('This is {}.'.format(__file__))
    charges = [0, 1, 2]
    random.shuffle(charges)
    for charge in charges:
        pmd2ase = None
        root = '../{}_charge/1_charge_cycle'.format(charge)
        pdb_path = find(path=root,
                        folder_keyword='0_initial_structure',
                        file_keyword='.pdb')[0]
        top_path = find(path=root,
                        folder_keyword='0_initial_structure',
                        file_keyword='.top')[0]
        hyd_path = find(path='../{}_charge'.format(charge),
                        folder_keyword='fitting',
                        file_keyword='hydrogen_per_atom.csv')[0]
        # Find all files with point charges
        charge_paths = find(path=root,
                            folder_keyword='4_horton_cost_function/lnrho',
                            file_keyword='charges',
                            nr_occ=None)
        lnrho_range = []
        sigma_range = []
        time_range = []
        print('{} paths found.'.format(len(charge_paths)))
        random.shuffle(charge_paths)

        i = 0
        for charge_path in charge_paths:
            i += 1
            if i % 10 == 0:
                print('{} of {}'.format(i, len(charge_paths)))

            work_dir = os.path.split(
                os.path.split(os.path.split(charge_path)[0])[0])[0]
            cube_path = find(path=work_dir,
                             folder_keyword='2_dft_calculations',
                             file_keyword='esp.cube')[0]
            # Parse parameters from filename
            lnrho, sigma = charge_path[-15:-4].split('_')[-2:]
            time = charge_path.split('/')[3].split('_')[0]

            if not sigma in [
                    '0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4', '1.6'
            ]:
                print('skipping sigma {}'.format(sigma))
                continue

            out_path = 'data/pot_err_{}_{}_{}_{}.csv'.format(
                charge, time, lnrho, sigma)

            if os.path.exists(out_path):
                print('exists, skipping.')
                continue

            # Calculate the error between DFT-ESP and point-charge ESP
            rrmsd, pmd2ase = calc_error(cube_path,
                                        pdb_path,
                                        top_path,
                                        hyd_path,
                                        charge_path,
                                        pmd2ase=pmd2ase,
                                        n_samples=40000)

            # Write to file
            with open(out_path, 'w') as outfile:
                outfile.write('charge, time, lnrho, sigma, rrmsd\n')
                outfile.write('{}, {}, {}, {}, {}\n'.format(
                    charge, time, lnrho, sigma, rrmsd))

    print('Done.')

Beispiel #12

0

Datei anzeigen

Datei: sweep_charges.py Projekt: lukaselflein/charge_optimization_folderstructure

def main():
    """ Execute everything."""
    print('This is {}.'.format(__file__))

    # We need any one top and pdb file for the ordering of atom-names;
    # The exact snapshot we use does not matter as the ordering is invariant
    pdb_file = find(path='.',
                    folder_keyword='0_initial_structure',
                    file_keyword='.pdb')[0]
    top_file = find(path='.',
                    folder_keyword='0_initial_structure',
                    file_keyword='.top')[0]
    hyd_file = find(path='..',
                    folder_keyword='fitting',
                    file_keyword='hydrogen_per_atom.csv')[0]

    cost_paths = find(path='.',
                      folder_keyword='4_horton_cost_function/lnrho_sweep',
                      file_keyword='cost',
                      nr_occ=None)
    lnrho_range = []
    sigma_range = []
    for charge_file in cost_paths:
        # Parse parameters from filename
        lnrho, sigma = charge_file[-15:-3].split('_')[-2:]
        lnrho_range += [lnrho]
        sigma_range += [sigma]

    lnrho_range = set(lnrho_range)
    sigma_range = set(sigma_range)

    for lnrho in lnrho_range:
        print('lnrho = {} ...'.format(lnrho))
        for sigma in sigma_range:

            # Find the paths for the unaveraged snapshot cost functions
            cost_paths = find(
                path='.',
                folder_keyword='4_horton_cost_function/lnrho_sweep',
                file_keyword='cost_{}_{}.h5'.format(lnrho, sigma),
                nr_occ=None)

            # Find the path for the average cost function
            cost_avg = find(
                path='.',
                folder_keyword='horton_charges/sweep_rhoref',
                file_keyword='costfunction_average_{}_{}.h5'.format(
                    lnrho, sigma),
                nr_occ=None)
            cost_paths += cost_avg

            for cost_file in cost_paths:
                folder = os.path.split(cost_file)[0]
                output_file = os.path.join(
                    folder, 'charges_{}_{}.csv'.format(lnrho, sigma))

                if os.path.exists(output_file):
                    # print('{} exists. Skipping ahead.'.format(output_file))
                    continue

                print('Optimizing charges for {}.'.format(cost_file[:18]))
                calc_charges(pdb_file,
                             top_file,
                             hyd_file,
                             cost_file,
                             output_file=output_file)

    print('Done.')

Beispiel #13

0

Datei anzeigen

Datei: sweep_charges.py Projekt: lukaselflein/charge_optimization_folderstructure

def calc_charges(pdb_infile,
                 top_infile,
                 hydrogen_file,
                 horton_cost_function,
                 output_file,
                 charge_group_file=None,
                 charge_group_charges_file=None,
                 symmetry_file=None):
    '''Wraps fitESPconstrained.py'''
    # Look up the relationship between ASE indices, atom names
    pmd_struct, pmd_top, ase2pmd = create_structure(pdb_infile, top_infile,
                                                    hydrogen_file)
    print('Atomic structure built.')

    # Import A and B matrices from HORTON
    A, B = read_horton_cost_function(horton_cost_function)

    if charge_group_file is None:
        try:
            charge_group_file = find('..', 'fitting_constraint_files',
                                     'atoms_in_charge_group.csv')[0]
        except:
            charge_group_file = None

    if charge_group_charges_file is None:
        try:
            charge_group_charges_file = find(
                '..', 'fitting_constraint_files',
                'charge_group_total_charge.csv')[0]
        except:
            charge_group_charges_file = None

    if symmetry_file is None:
        try:
            symmetry_file = find('..', 'fitting_constraint_files',
                                 'atoms_of_same_charge.csv')[0]
        except:
            symmetry_file = None

    # Calculate constraints
    logic_constraints, charge_constraints = get_constraints(
        args=None,
        ase2pmd=ase2pmd,
        charge_group_file=charge_group_file,
        charge_group_charges_file=charge_group_charges_file,
        symmetry_file=symmetry_file,
        debug=False)

    print('Constraints calculated: {} non-redunant.'.format(
        logic_constraints.shape[0]))

    # Run the constrained minimization
    q, f = constrained_minimize(A, B, logic_constraints, charge_constraints)
    # print('Constrained minimization done.')
    # print('Extremal charges: {:1.5f}, {:1.5f}'.format(q.min(), q.max()))
    # print('Extremal Lagrange forces: {:1.5f}, {:1.5f}'.format(f.min(), f.max()))

    q_unconstrained = unconstrained_minimize(A, B)

    # Save charges
    charge_df = write_charges(q,
                              q_unconstrained,
                              ase2pmd,
                              out_name=output_file,
                              plot=False)

    # Save Lagrange forces
    # write_forces(f, logic_constraints, ase2pmd)
    print('Charges written to {}.'.format(output_file))

Beispiel #14

0

Datei anzeigen

Datei: loop_charges.py Projekt: lukaselflein/charge_optimization_folderstructure

def main():
    """ Execute everything."""
    print('This is {}.'.format(__file__))

    # Save the working dir
    topdir = os.getcwd()
    print('Current working dir: {}'.format(topdir))

    # Read the file containing the info on the total charge of the system
    qtot = read_total_charge(
        path='../fitting_constraint_files/total_charge.csv')

    # Crawl the directory structure
    for subdir, dirs, files in sorted(os.walk('.')):

        # Exclude template folders from search
        if 'template' in subdir or 'exclude' in subdir or 'sweep' in subdir:
            continue

        # Select the folders with cost function
        if 'horton_cost_function' in subdir or 'horton_charges' in subdir:

            print('Moving to {}'.format(subdir))
            with cd(subdir):
                # Search for the .pdb and .top file in the folders above
                input_path = '..'
                pdb_path = find(input_path,
                                folder_keyword='initial',
                                file_keyword='.pdb',
                                nr_occ=1,
                                exclude_kw=['template', 'exclude', 'sweep'])[0]
                top_path = find(input_path,
                                folder_keyword='initial',
                                file_keyword='.top',
                                nr_occ=1,
                                exclude_kw=['template', 'exclude', 'sweep'])[0]

                output_file = 'fitted_point_charges.csv'
                cost_file = 'cost.h5'
                constraint_path = '../../../fitting_constraint_files/'
                if 'horton_charges' in subdir:
                    constraint_path = '../../fitting_constraint_files/'
                    cost_file = 'costfunction_average.h5'

                hydrogen_file = os.path.join(constraint_path,
                                             'hydrogen_per_atom.csv')
                charge_group_file = os.path.join(constraint_path,
                                                 'atoms_in_charge_group.csv')
                charge_group_charges_file = os.path.join(
                    constraint_path, 'charge_group_total_charge.csv')
                symmetry_file = os.path.join(constraint_path,
                                             'atoms_of_same_charge.csv')

                if os.path.isfile(output_file):
                    print('File exists, skipping: {}'.format(
                        os.path.join(subdir, output_file)))
                    continue

                calc_charges(
                    pdb_infile=pdb_path,
                    top_infile=top_path,
                    horton_cost_function=cost_file,
                    output_file=output_file,
                    hydrogen_file=hydrogen_file,
                    charge_group_file=charge_group_file,
                    charge_group_charges_file=charge_group_charges_file,
                    symmetry_file=symmetry_file)