def g09log_to_hdf5(f, fn_log): """Convert Gaussian09 BOMD log file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_log The name of the Gaussian log file. """ with log.section('G09H5'): if log.do_medium: log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % ( fn_log, f.filename )) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError('The HDF5 file must have a system group with atomic numbers.') natom = f['system/numbers'].shape[0] # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the pos and vel datasets dss = get_trajectory_datasets(tgrp, ('pos', (natom, 3)), ('vel', (natom, 3)), ('frc', (natom, 3)), ('time', (1,)), ('step', (1,)), ('epot', (1,)), ('ekin', (1,)), ('etot', (1,)), ) ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss # Load frame by frame row = get_last_trajectory_row(dss) for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09(fn_log): if (numbers != f['system/numbers']).any(): log.warn('The element numbers of the HDF5 and LOG file do not match.') write_to_dataset(ds_pos, pos, row) write_to_dataset(ds_vel, vel, row) write_to_dataset(ds_frc, frc, row) write_to_dataset(ds_time, time, row) write_to_dataset(ds_step, step, row) write_to_dataset(ds_epot, epot, row) write_to_dataset(ds_ekin, ekin, row) write_to_dataset(ds_etot, etot, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def g09log_to_hdf5(f, fn_log): """Convert Gaussian09 BOMD log file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_log The name of the Gaussian log file. """ with log.section('G09H5'): if log.do_medium: log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_log, f.filename)) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError( 'The HDF5 file must have a system group with atomic numbers.') natom = f['system/numbers'].shape[0] # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the pos and vel datasets dss = get_trajectory_datasets( tgrp, ('pos', (natom, 3)), ('vel', (natom, 3)), ('frc', (natom, 3)), ('time', (1, )), ('step', (1, )), ('epot', (1, )), ('ekin', (1, )), ('etot', (1, )), ) ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss # Load frame by frame row = get_last_trajectory_row(dss) for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09( fn_log): if (numbers != f['system/numbers']).any(): log.warn( 'The element numbers of the HDF5 and LOG file do not match.' ) write_to_dataset(ds_pos, pos, row) write_to_dataset(ds_vel, vel, row) write_to_dataset(ds_frc, frc, row) write_to_dataset(ds_time, time, row) write_to_dataset(ds_step, step, row) write_to_dataset(ds_epot, epot, row) write_to_dataset(ds_ekin, ekin, row) write_to_dataset(ds_etot, etot, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def cp2k_ener_to_hdf5(f, fn_ener, sub=slice(None)): """Convert a CP2K energy trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_ener The filename of the CP2K energy trajectory file. **Optional arguments:** sub This must be a slice object that defines the sub-sampling of the CP2K energy file. By default all time steps are read. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. Furthermore, this routine also checks the header of the CP2K energy file to make sure the values are interpreted correctly. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('CP2KEH5'): if log.do_medium: log('Loading CP2K energy file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_ener, f.filename)) # Take care of the data group tgrp = get_trajectory_group(f) # Take care of the datasets dss = get_trajectory_datasets( tgrp, ('step', (1, )), ('time', (1, )), ('ekin', (1, )), ('temp', (1, )), ('epot', (1, )), ('econs', (1, )), ) ds_step, ds_time, ds_ke, ds_temp, ds_pe, ds_cq = dss # Fill the datasets with data. row = get_last_trajectory_row(dss) counter = 0 fin = file(fn_ener) # check header line line = fin.next() words = line.split() if words[0] != '#': raise ValueError( 'The first line in the energies file should be a header line starting with #.' ) if words[3] != 'Time[fs]' or words[4] != 'Kin.[a.u.]' or \ words[5] != 'Temp[K]' or words[6] != 'Pot.[a.u.]' or \ words[7] + ' ' + words[8] != 'Cons Qty[a.u.]': raise ValueError( 'The fields in the header line indicate that this file contains unsupported data.' ) # Load lines for line in fin: if slice_match(sub, counter): words = line.split() write_to_dataset(ds_step, float(words[0]), row) write_to_dataset(ds_time, float(words[1]) * femtosecond, row) write_to_dataset(ds_ke, float(words[2]), row) write_to_dataset(ds_temp, float(words[3]), row) write_to_dataset(ds_pe, float(words[4]), row) write_to_dataset(ds_cq, float(words[5]), row) row += 1 counter += 1 fin.close() # Check number of rows check_trajectory_rows(tgrp, dss, row)
def dlpoly_history_to_hdf5(f, fn_history, sub=slice(None), pos_unit=angstrom, vel_unit=angstrom/picosecond, frc_unit=amu*angstrom/picosecond**2, time_unit=picosecond, mass_unit=amu): """Convert DLPolay History trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_history The filename of the DLPOLY history file. **Optional arguments:** sub The sub argument for the DLPolyHistoryReader. This must be a slice object that defines the subsampling of the samples from the history file. By default all frames are read. pos_unit, vel_unit, frc_unit, time_unit and mass_unit The units used in the dlpoly history file. The default values correspond to the defaults used in DLPOLY. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('DPH5'): if log.do_medium: log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % ( fn_history, f.filename )) # Take care of the data group tgrp = get_trajectory_group(f) # Open the history file for reading hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit, frc_unit, time_unit, mass_unit) # Take care of the datasets that should always be present natom = hist_reader.num_atoms dss = get_trajectory_datasets( tgrp, ('step', (1,)), ('time', (1,)), ('cell', (3,3)), ('pos', (natom, 3)), ) ds_step, ds_time, ds_cell, ds_pos = dss # Take care of optional data sets if hist_reader.keytrj > 0: ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0] dss.append(ds_vel) if hist_reader.keytrj > 1: ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0] dss.append(ds_frc) # Decide on the first row to start writing data row = get_last_trajectory_row(dss) # Load data for frame in hist_reader: write_to_dataset(ds_step, frame["step"], row) write_to_dataset(ds_time, frame["time"], row) write_to_dataset(ds_cell, frame["cell"].T, row) write_to_dataset(ds_pos, frame["pos"], row) if hist_reader.keytrj > 0: write_to_dataset(ds_vel, frame["vel"], row) if hist_reader.keytrj > 1: write_to_dataset(ds_frc, frame["frc"], row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def xyz_to_hdf5(f, fn_xyz, sub=slice(None), file_unit=angstrom, name='pos'): """Convert XYZ trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_xyz The filename of the XYZ trajectory file. **Optional arguments:** sub The sub argument for the XYZReader. This must be a slice object that defines the subsampling of the XYZ file reader. By default all frames are read. file_unit The unit of the data in the XYZ file. [default=angstrom] name The name of the HDF5 dataset where the trajectory is stored. This array is stored in the 'trajectory' group. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('XYZH5'): if log.do_medium: log('Loading XYZ file \'%s\' into \'trajectory/%s\' of HDF5 file \'%s\'' % (fn_xyz, name, f.filename)) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError( 'The HDF5 file must have a system group with atomic numbers.') xyz_reader = XYZReader(fn_xyz, sub=sub, file_unit=file_unit) if len(xyz_reader.numbers) != len(f['system/numbers']): raise ValueError( 'The number of atoms in the HDF5 and the XYZ files does not match.' ) if (xyz_reader.numbers != f['system/numbers']).any(): log.warn( 'The atomic numbers of the HDF5 and XYZ file do not match.') # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the dataset ds, = get_trajectory_datasets(tgrp, (name, (len(xyz_reader.numbers), 3))) # Fill the dataset with data. row = get_last_trajectory_row([ds]) for title, coordinates in xyz_reader: write_to_dataset(ds, coordinates, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, [ds], row)
def dlpoly_history_to_hdf5(f, fn_history, sub=slice(None), pos_unit=angstrom, vel_unit=angstrom / picosecond, frc_unit=amu * angstrom / picosecond**2, time_unit=picosecond, mass_unit=amu): """Convert DLPolay History trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_history The filename of the DLPOLY history file. **Optional arguments:** sub The sub argument for the DLPolyHistoryReader. This must be a slice object that defines the subsampling of the samples from the history file. By default all frames are read. pos_unit, vel_unit, frc_unit, time_unit and mass_unit The units used in the dlpoly history file. The default values correspond to the defaults used in DLPOLY. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('DPH5'): if log.do_medium: log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_history, f.filename)) # Take care of the data group tgrp = get_trajectory_group(f) # Open the history file for reading hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit, frc_unit, time_unit, mass_unit) # Take care of the datasets that should always be present natom = hist_reader.num_atoms dss = get_trajectory_datasets( tgrp, ('step', (1, )), ('time', (1, )), ('cell', (3, 3)), ('pos', (natom, 3)), ) ds_step, ds_time, ds_cell, ds_pos = dss # Take care of optional data sets if hist_reader.keytrj > 0: ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0] dss.append(ds_vel) if hist_reader.keytrj > 1: ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0] dss.append(ds_frc) # Decide on the first row to start writing data row = get_last_trajectory_row(dss) # Load data for frame in hist_reader: write_to_dataset(ds_step, frame["step"], row) write_to_dataset(ds_time, frame["time"], row) write_to_dataset(ds_cell, frame["cell"].T, row) write_to_dataset(ds_pos, frame["pos"], row) if hist_reader.keytrj > 0: write_to_dataset(ds_vel, frame["vel"], row) if hist_reader.keytrj > 1: write_to_dataset(ds_frc, frame["frc"], row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def cp2k_ener_to_hdf5(f, fn_ener, sub=slice(None)): """Convert a CP2K energy trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_ener The filename of the CP2K energy trajectory file. **Optional arguments:** sub This must be a slice object that defines the sub-sampling of the CP2K energy file. By default all time steps are read. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. Furthermore, this routine also checks the header of the CP2K energy file to make sure the values are interpreted correctly. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('CP2KEH5'): if log.do_medium: log('Loading CP2K energy file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % ( fn_ener, f.filename )) # Take care of the data group tgrp = get_trajectory_group(f) # Take care of the datasets dss = get_trajectory_datasets( tgrp, ('step', (1,)), ('time', (1,)), ('ekin', (1,)), ('temp', (1,)), ('epot', (1,)), ('econs', (1,)), ) ds_step, ds_time, ds_ke, ds_temp, ds_pe, ds_cq = dss # Fill the datasets with data. row = get_last_trajectory_row(dss) counter = 0 with open(fn_ener) as fin: # check header line line = next(fin) words = line.split() if words[0] != '#': raise ValueError('The first line in the energies file should be a header line starting with #.') if words[3] != 'Time[fs]' or words[4] != 'Kin.[a.u.]' or \ words[5] != 'Temp[K]' or words[6] != 'Pot.[a.u.]' or \ words[7] + ' ' + words[8] != 'Cons Qty[a.u.]': raise ValueError('The fields in the header line indicate that this file contains unsupported data.') # Load lines for line in fin: if slice_match(sub, counter): words = line.split() write_to_dataset(ds_step, float(words[0]), row) write_to_dataset(ds_time, float(words[1])*femtosecond, row) write_to_dataset(ds_ke, float(words[2]), row) write_to_dataset(ds_temp, float(words[3]), row) write_to_dataset(ds_pe, float(words[4]), row) write_to_dataset(ds_cq, float(words[5]), row) row += 1 counter += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def xyz_to_hdf5(f, fn_xyz, sub=slice(None), file_unit=angstrom, name='pos'): """Convert XYZ trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_xyz The filename of the XYZ trajectory file. **Optional arguments:** sub The sub argument for the XYZReader. This must be a slice object that defines the subsampling of the XYZ file reader. By default all frames are read. file_unit The unit of the data in the XYZ file. [default=angstrom] name The name of the HDF5 dataset where the trajectory is stored. This array is stored in the 'trajectory' group. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('XYZH5'): if log.do_medium: log('Loading XYZ file \'%s\' into \'trajectory/%s\' of HDF5 file \'%s\'' % ( fn_xyz, name, f.filename )) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError('The HDF5 file must have a system group with atomic numbers.') xyz_reader = XYZReader(fn_xyz, sub=sub, file_unit=file_unit) if len(xyz_reader.numbers) != len(f['system/numbers']): raise ValueError('The number of atoms in the HDF5 and the XYZ files does not match.') if (xyz_reader.numbers != f['system/numbers']).any(): log.warn('The atomic numbers of the HDF5 and XYZ file do not match.') # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the dataset ds, = get_trajectory_datasets(tgrp, (name, (len(xyz_reader.numbers), 3))) # Fill the dataset with data. row = get_last_trajectory_row([ds]) for title, coordinates in xyz_reader: write_to_dataset(ds, coordinates, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, [ds], row)