def test_parse_lammps_dumps(self): # gzipped rdx_10_pattern = os.path.join(test_dir, "dump.rdx.gz") rdx_10 = list(parse_lammps_dumps(file_pattern=rdx_10_pattern)) timesteps_10 = [d.timestep for d in rdx_10] np.testing.assert_array_equal(timesteps_10, np.arange(0, 101, 10)) self.assertTupleEqual(rdx_10[-1].data.shape, (21, 5)) # wildcard rdx_25_pattern = os.path.join(test_dir, "dump.rdx_wc.*") rdx_25 = list(parse_lammps_dumps(file_pattern=rdx_25_pattern)) timesteps_25 = [d.timestep for d in rdx_25] np.testing.assert_array_equal(timesteps_25, np.arange(0, 101, 25)) self.assertTupleEqual(rdx_25[-1].data.shape, (21, 5))
def dump_to_df(filename, write_csv=True, output="data.csv"): """ A helper function that takes a lammps dump file and returns a Pandas Dataframe. It is also recommended to write a CSV file. This has the benefits: (1) CSV files take up less space than dump files. (2) It is far more efficient to parse massive amounts of lammps data entirely through Pandas instead of using a list of Pandas DataFrames, as Pymatgen currently provides, it just requires a little more knowledge of how to use Pandas efficiently. A single CSV can be read as a DF and then processed. (3) We can pre-sort the particles. LAMMPS does not retain the order of its particles in the dump file, which can be very annoying for post processing. When the csv is written, it sorts the Pd dataframe so that at each time step, the particles are listed in order of their id. Args: filename: (str) file name of the lammps dump. write_csv: (bool) Whether or not to write csv file output: (str) file name to output the csv. Include ".csv" in your filename. Returns: Pandas Dataframe of the dump """ dump = parse_lammps_dumps(filename) dfs = [] for frame in dump: dfs.append(frame.data) dfs[-1]['Timestep'] = frame.timestep df = pd.concat(dfs).sort_values(by=['Timestep', 'id']).reset_index( drop=True) if write_csv: df.to_csv(output) return df
def readLammps(desired_return): from pymatgen.io.lammps.outputs import parse_lammps_dumps, parse_lammps_log from pymatgen import Structure, Element from pymatgen.analysis.elasticity.stress import Stress from numpy import unique, array, argmin try: log = parse_lammps_log(filename="log.lammps")[-1] except IndexError: return_dict = {} for ret in desired_return: return_dict[ret] = None return return_dict result_dict = {} result_dict["energies"] = list(log['PotEng'])[-1] for dump in parse_lammps_dumps("dump.atoms"): atoms = dump.data coords = [''] * dump.natoms forces = [''] * dump.natoms masses = [''] * dump.natoms for atom in range(dump.natoms): coords[atoms["id"][atom] - 1] = [atoms["x"][atom], atoms["y"][atom], atoms["z"][atom]] forces[atoms['id'][atom] - 1] = [ atoms["fx"][atom], atoms["fy"][atom], atoms["fz"][atom] ] masses[atoms['id'][atom] - 1] = atoms["mass"][atom] box = dump.box unique_masses = unique(masses) ref_masses = [el.atomic_mass.real for el in Element] diff = abs(array(ref_masses) - unique_masses[:, None]) atomic_numbers = argmin(diff, axis=1) + 1 symbols = [Element.from_Z(an).symbol for an in atomic_numbers] species_map = {} for i in range(len(unique_masses)): species_map[unique_masses[i]] = symbols[i] atom_species = [species_map[mass] for mass in masses] result_dict["structures"] = Structure(box.to_lattice(), atom_species, coords, coords_are_cartesian=True) result_dict["forces"] = forces pressure = [ 1e-1 * list(log['c_press[{}]'.format(i)])[-1] for i in range(1, 7) ] result_dict["stresses"] = Stress([[pressure[0], pressure[3], pressure[4]], [pressure[3], pressure[1], pressure[5]], [pressure[4], pressure[5], pressure[2]]]) return_dict = {} for ret in desired_return: return_dict[ret] = result_dict[ret] return return_dict
def read_lammps_dump_file(filename=''): ''' Attributes: path of a LAMMPS dump file Returns: pymatgen structure of final relaxed structure. ''' dump = parse_lammps_dumps(filename) for i in dump: a = i lattice = a.box.to_lattice() df = a.data.sort_values(by=['id']) element_array = [] for i in df['type'].tolist(): if i == 1: element_array.append('Al') if i == 2: element_array.append('N') if i == 3: element_array.append('Ti') cord_np = df[['xs', 'ys', 'zs']].values pymatgen_struct = mg.Structure(lattice, element_array, cord_np) return pymatgen_struct
import numpy as np import pandas as pd from subprocess import run from pymatgen.io.lammps.outputs import parse_lammps_dumps ''' This script converts a single dump file (with one or more frames) into dump files (with a single frame) and xyz files (with a single frame) ''' cwd = os.getcwd() dump_file_path_pattern = os.path.join(cwd, 'dump.*.dump') Dump_files = glob.glob(dump_file_path_pattern) if len(Dump_files) == 1: run(['mkdir', '-p', 'trj_files/rdf_files']) run(['mkdir', 'xyz_files']) Dump_file = Dump_files[0] wd, Dump_file_name = os.path.split(Dump_file) Dumps = parse_lammps_dumps(Dump_file) for Dump in Dumps: trj_name = Dump_file_name[:-4] + str(Dump.timestep) + '.lammpstrj' xyz_name = Dump_file_name[:-4] + 'alt.' + str(Dump.timestep) + '.xyz' Dump.as_txt_file(trj_name, output=True) Dump.as_txt_file(xyz_name, convert='xyz', output=True) if Dump.timestep % 500000 == 0: run(['cp', trj_name, 'trj_files/rdf_files']) run(['mv', trj_name, 'trj_files']) run(['mv', xyz_name, 'xyz_files'])
if args.msd_type: msd_type = args.msd_type else: msd_type = 'com' '''Values used for testing''' # file_pattern = 'trj_files/msd_files/dump.0.5_dhps_2.5_naoh_spce.nvt_production.*.lammpstrj' # fs_per_step = 0.5 # msd_type = 'allatom' print('File pattern used: ' + file_pattern) print('Timestep used: ' + str(fs_per_step) + ' fs') print('Cutoff time used: ' + str(cutoff_time) + ' ps') print('MSD type used: ' + msd_type) # # Convert dump files to list of LammpsDump objects # file_pattern = 'trj_files/msd_files/dump.0.5_dhps_2.5_naoh_spce.nvt_production.*.lammpstrj' Dumps = list(parse_lammps_dumps(file_pattern)) print('Number of frames found: ' + str(len(Dumps))) # # ps per timestep conversion factor ps_per_step = fs_per_step / 1000 # # Instantiate data object. Time is in ps, msd is in square Angstroms Data = pd.DataFrame(columns=['Time', 'msd_x', 'msd_y', 'msd_z', 'msd']) for i, dump in enumerate(Dumps): print('Processing frame number ' + str(i)) # # Sorting for atom id dump.data = dump.data.sort_values(by=['id']) dump.data.reset_index(inplace=True)