Python write_to_dataset Examples, yaff.conversion.common.write_to_dataset Python Examples

Example #1

0

Show file

def g09log_to_hdf5(f, fn_log):
    """Convert Gaussian09 BOMD log file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_log
            The name of the Gaussian log file.
    """
    with log.section('G09H5'):
        if log.do_medium:
            log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\''
                % (fn_log, f.filename))

        # First make sure the HDF5 file has a system description that is consistent
        # with the XYZ file.
        if 'system' not in f:
            raise ValueError('The HDF5 file must contain a system group.')
        if 'numbers' not in f['system']:
            raise ValueError(
                'The HDF5 file must have a system group with atomic numbers.')
        natom = f['system/numbers'].shape[0]

        # Take care of the trajectory group
        tgrp = get_trajectory_group(f)

        # Take care of the pos and vel datasets
        dss = get_trajectory_datasets(
            tgrp,
            ('pos', (natom, 3)),
            ('vel', (natom, 3)),
            ('frc', (natom, 3)),
            ('time', (1, )),
            ('step', (1, )),
            ('epot', (1, )),
            ('ekin', (1, )),
            ('etot', (1, )),
        )
        ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss

        # Load frame by frame
        row = get_last_trajectory_row(dss)
        for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09(
                fn_log):
            if (numbers != f['system/numbers']).any():
                log.warn(
                    'The element numbers of the HDF5 and LOG file do not match.'
                )
            write_to_dataset(ds_pos, pos, row)
            write_to_dataset(ds_vel, vel, row)
            write_to_dataset(ds_frc, frc, row)
            write_to_dataset(ds_time, time, row)
            write_to_dataset(ds_step, step, row)
            write_to_dataset(ds_epot, epot, row)
            write_to_dataset(ds_ekin, ekin, row)
            write_to_dataset(ds_etot, etot, row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #2

0

Show file

File: dlpoly.py Project: molmod/yaff

def dlpoly_history_to_hdf5(f, fn_history, sub=slice(None), pos_unit=angstrom,
    vel_unit=angstrom/picosecond, frc_unit=amu*angstrom/picosecond**2,
    time_unit=picosecond, mass_unit=amu):
    """Convert DLPolay History trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_history
            The filename of the DLPOLY history file.

       **Optional arguments:**

       sub
            The sub argument for the DLPolyHistoryReader. This must be a slice
            object that defines the subsampling of the samples from the history
            file. By default all frames are read.

       pos_unit, vel_unit, frc_unit, time_unit and mass_unit
            The units used in the dlpoly history file. The default values
            correspond to the defaults used in DLPOLY.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. It is highly recommended to first initialize
       the HDF5 file with the ``to_hdf5`` method of the System class.
    """
    with log.section('DPH5'):
        if log.do_medium:
            log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (
                fn_history, f.filename
            ))

        # Take care of the data group
        tgrp = get_trajectory_group(f)

        # Open the history file for reading
        hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit,
                                          frc_unit, time_unit, mass_unit)

        # Take care of the datasets that should always be present
        natom = hist_reader.num_atoms
        dss = get_trajectory_datasets(
            tgrp,
            ('step', (1,)),
            ('time', (1,)),
            ('cell', (3,3)),
            ('pos', (natom, 3)),
        )
        ds_step, ds_time, ds_cell, ds_pos = dss

        # Take care of optional data sets
        if hist_reader.keytrj > 0:
            ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0]
            dss.append(ds_vel)
        if hist_reader.keytrj > 1:
            ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0]
            dss.append(ds_frc)

        # Decide on the first row to start writing data
        row = get_last_trajectory_row(dss)

        # Load data
        for frame in hist_reader:
            write_to_dataset(ds_step, frame["step"], row)
            write_to_dataset(ds_time, frame["time"], row)
            write_to_dataset(ds_cell, frame["cell"].T, row)
            write_to_dataset(ds_pos, frame["pos"], row)
            if hist_reader.keytrj > 0:
                write_to_dataset(ds_vel, frame["vel"], row)
            if hist_reader.keytrj > 1:
                write_to_dataset(ds_frc, frame["frc"], row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #3

0

Show file

File: cp2k.py Project: boegel/yaff

def cp2k_ener_to_hdf5(f, fn_ener, sub=slice(None)):
    """Convert a CP2K energy trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_ener
            The filename of the CP2K energy trajectory file.

       **Optional arguments:**

       sub
            This must be a slice object that defines the sub-sampling of the
            CP2K energy file. By default all time steps are read.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. Furthermore, this routine also checks the
       header of the CP2K energy file to make sure the values are interpreted
       correctly.

       It is highly recommended to first initialize the HDF5 file with the
       ``to_hdf5`` method of the System class.
    """
    with log.section('CP2KEH5'):
        if log.do_medium:
            log('Loading CP2K energy file \'%s\' into \'trajectory\' of HDF5 file \'%s\''
                % (fn_ener, f.filename))

        # Take care of the data group
        tgrp = get_trajectory_group(f)

        # Take care of the datasets
        dss = get_trajectory_datasets(
            tgrp,
            ('step', (1, )),
            ('time', (1, )),
            ('ekin', (1, )),
            ('temp', (1, )),
            ('epot', (1, )),
            ('econs', (1, )),
        )
        ds_step, ds_time, ds_ke, ds_temp, ds_pe, ds_cq = dss

        # Fill the datasets with data.
        row = get_last_trajectory_row(dss)
        counter = 0
        fin = file(fn_ener)

        # check header line
        line = fin.next()
        words = line.split()
        if words[0] != '#':
            raise ValueError(
                'The first line in the energies file should be a header line starting with #.'
            )
        if words[3] != 'Time[fs]' or words[4] != 'Kin.[a.u.]' or \
           words[5] != 'Temp[K]' or words[6] != 'Pot.[a.u.]' or \
           words[7] + ' ' + words[8] != 'Cons Qty[a.u.]':
            raise ValueError(
                'The fields in the header line indicate that this file contains unsupported data.'
            )

        # Load lines
        for line in fin:
            if slice_match(sub, counter):
                words = line.split()
                write_to_dataset(ds_step, float(words[0]), row)
                write_to_dataset(ds_time, float(words[1]) * femtosecond, row)
                write_to_dataset(ds_ke, float(words[2]), row)
                write_to_dataset(ds_temp, float(words[3]), row)
                write_to_dataset(ds_pe, float(words[4]), row)
                write_to_dataset(ds_cq, float(words[5]), row)
                row += 1
            counter += 1
        fin.close()

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #4

0

Show file

File: gaussian.py Project: molmod/yaff

def g09log_to_hdf5(f, fn_log):
    """Convert Gaussian09 BOMD log file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_log
            The name of the Gaussian log file.
    """
    with log.section('G09H5'):
        if log.do_medium:
            log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (
                fn_log, f.filename
            ))

        # First make sure the HDF5 file has a system description that is consistent
        # with the XYZ file.
        if 'system' not in f:
            raise ValueError('The HDF5 file must contain a system group.')
        if 'numbers' not in f['system']:
            raise ValueError('The HDF5 file must have a system group with atomic numbers.')
        natom = f['system/numbers'].shape[0]

        # Take care of the trajectory group
        tgrp = get_trajectory_group(f)

        # Take care of the pos and vel datasets
        dss = get_trajectory_datasets(tgrp,
            ('pos', (natom, 3)),
            ('vel', (natom, 3)),
            ('frc', (natom, 3)),
            ('time', (1,)),
            ('step', (1,)),
            ('epot', (1,)),
            ('ekin', (1,)),
            ('etot', (1,)),
        )
        ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss

        # Load frame by frame
        row = get_last_trajectory_row(dss)
        for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09(fn_log):
            if (numbers != f['system/numbers']).any():
                log.warn('The element numbers of the HDF5 and LOG file do not match.')
            write_to_dataset(ds_pos, pos, row)
            write_to_dataset(ds_vel, vel, row)
            write_to_dataset(ds_frc, frc, row)
            write_to_dataset(ds_time, time, row)
            write_to_dataset(ds_step, step, row)
            write_to_dataset(ds_epot, epot, row)
            write_to_dataset(ds_ekin, ekin, row)
            write_to_dataset(ds_etot, etot, row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #5

0

Show file

File: xyz.py Project: boegel/yaff

def xyz_to_hdf5(f, fn_xyz, sub=slice(None), file_unit=angstrom, name='pos'):
    """Convert XYZ trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_xyz
            The filename of the XYZ trajectory file.

       **Optional arguments:**

       sub
            The sub argument for the XYZReader. This must be a slice object that
            defines the subsampling of the XYZ file reader. By default all
            frames are read.

       file_unit
            The unit of the data in the XYZ file. [default=angstrom]

       name
            The name of the HDF5 dataset where the trajectory is stored. This
            array is stored in the 'trajectory' group.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. It is highly recommended to first initialize
       the HDF5 file with the ``to_hdf5`` method of the System class.
    """
    with log.section('XYZH5'):
        if log.do_medium:
            log('Loading XYZ file \'%s\' into \'trajectory/%s\' of HDF5 file \'%s\''
                % (fn_xyz, name, f.filename))

        # First make sure the HDF5 file has a system description that is consistent
        # with the XYZ file.
        if 'system' not in f:
            raise ValueError('The HDF5 file must contain a system group.')
        if 'numbers' not in f['system']:
            raise ValueError(
                'The HDF5 file must have a system group with atomic numbers.')

        xyz_reader = XYZReader(fn_xyz, sub=sub, file_unit=file_unit)
        if len(xyz_reader.numbers) != len(f['system/numbers']):
            raise ValueError(
                'The number of atoms in the HDF5 and the XYZ files does not match.'
            )
        if (xyz_reader.numbers != f['system/numbers']).any():
            log.warn(
                'The atomic numbers of the HDF5 and XYZ file do not match.')

        # Take care of the trajectory group
        tgrp = get_trajectory_group(f)

        # Take care of the dataset
        ds, = get_trajectory_datasets(tgrp,
                                      (name, (len(xyz_reader.numbers), 3)))

        # Fill the dataset with data.
        row = get_last_trajectory_row([ds])
        for title, coordinates in xyz_reader:
            write_to_dataset(ds, coordinates, row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, [ds], row)

Example #6

0

Show file

File: dlpoly.py Project: boegel/yaff

def dlpoly_history_to_hdf5(f,
                           fn_history,
                           sub=slice(None),
                           pos_unit=angstrom,
                           vel_unit=angstrom / picosecond,
                           frc_unit=amu * angstrom / picosecond**2,
                           time_unit=picosecond,
                           mass_unit=amu):
    """Convert DLPolay History trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_history
            The filename of the DLPOLY history file.

       **Optional arguments:**

       sub
            The sub argument for the DLPolyHistoryReader. This must be a slice
            object that defines the subsampling of the samples from the history
            file. By default all frames are read.

       pos_unit, vel_unit, frc_unit, time_unit and mass_unit
            The units used in the dlpoly history file. The default values
            correspond to the defaults used in DLPOLY.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. It is highly recommended to first initialize
       the HDF5 file with the ``to_hdf5`` method of the System class.
    """
    with log.section('DPH5'):
        if log.do_medium:
            log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\''
                % (fn_history, f.filename))

        # Take care of the data group
        tgrp = get_trajectory_group(f)

        # Open the history file for reading
        hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit,
                                          frc_unit, time_unit, mass_unit)

        # Take care of the datasets that should always be present
        natom = hist_reader.num_atoms
        dss = get_trajectory_datasets(
            tgrp,
            ('step', (1, )),
            ('time', (1, )),
            ('cell', (3, 3)),
            ('pos', (natom, 3)),
        )
        ds_step, ds_time, ds_cell, ds_pos = dss

        # Take care of optional data sets
        if hist_reader.keytrj > 0:
            ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0]
            dss.append(ds_vel)
        if hist_reader.keytrj > 1:
            ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0]
            dss.append(ds_frc)

        # Decide on the first row to start writing data
        row = get_last_trajectory_row(dss)

        # Load data
        for frame in hist_reader:
            write_to_dataset(ds_step, frame["step"], row)
            write_to_dataset(ds_time, frame["time"], row)
            write_to_dataset(ds_cell, frame["cell"].T, row)
            write_to_dataset(ds_pos, frame["pos"], row)
            if hist_reader.keytrj > 0:
                write_to_dataset(ds_vel, frame["vel"], row)
            if hist_reader.keytrj > 1:
                write_to_dataset(ds_frc, frame["frc"], row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #7

0

Show file

File: cp2k.py Project: molmod/yaff

def cp2k_ener_to_hdf5(f, fn_ener, sub=slice(None)):
    """Convert a CP2K energy trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_ener
            The filename of the CP2K energy trajectory file.

       **Optional arguments:**

       sub
            This must be a slice object that defines the sub-sampling of the
            CP2K energy file. By default all time steps are read.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. Furthermore, this routine also checks the
       header of the CP2K energy file to make sure the values are interpreted
       correctly.

       It is highly recommended to first initialize the HDF5 file with the
       ``to_hdf5`` method of the System class.
    """
    with log.section('CP2KEH5'):
        if log.do_medium:
            log('Loading CP2K energy file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (
                fn_ener, f.filename
            ))

        # Take care of the data group
        tgrp = get_trajectory_group(f)

        # Take care of the datasets
        dss = get_trajectory_datasets(
            tgrp,
            ('step', (1,)),
            ('time', (1,)),
            ('ekin', (1,)),
            ('temp', (1,)),
            ('epot', (1,)),
            ('econs', (1,)),
        )
        ds_step, ds_time, ds_ke, ds_temp, ds_pe, ds_cq = dss

        # Fill the datasets with data.
        row = get_last_trajectory_row(dss)
        counter = 0
        with open(fn_ener) as fin:
            # check header line
            line = next(fin)
            words = line.split()
            if words[0] != '#':
                raise ValueError('The first line in the energies file should be a header line starting with #.')
            if words[3] != 'Time[fs]' or words[4] != 'Kin.[a.u.]' or \
               words[5] != 'Temp[K]' or words[6] != 'Pot.[a.u.]' or \
               words[7] + ' ' + words[8] != 'Cons Qty[a.u.]':
                raise ValueError('The fields in the header line indicate that this file contains unsupported data.')

            # Load lines
            for line in fin:
                if slice_match(sub, counter):
                    words = line.split()
                    write_to_dataset(ds_step, float(words[0]), row)
                    write_to_dataset(ds_time, float(words[1])*femtosecond, row)
                    write_to_dataset(ds_ke, float(words[2]), row)
                    write_to_dataset(ds_temp, float(words[3]), row)
                    write_to_dataset(ds_pe, float(words[4]), row)
                    write_to_dataset(ds_cq, float(words[5]), row)
                    row += 1
                counter += 1

        # Check number of rows
        check_trajectory_rows(tgrp, dss, row)

Example #8

0

Show file

File: xyz.py Project: molmod/yaff

def xyz_to_hdf5(f, fn_xyz, sub=slice(None), file_unit=angstrom, name='pos'):
    """Convert XYZ trajectory file to Yaff HDF5 format.

       **Arguments:**

       f
            An open and writable HDF5 file.

       fn_xyz
            The filename of the XYZ trajectory file.

       **Optional arguments:**

       sub
            The sub argument for the XYZReader. This must be a slice object that
            defines the subsampling of the XYZ file reader. By default all
            frames are read.

       file_unit
            The unit of the data in the XYZ file. [default=angstrom]

       name
            The name of the HDF5 dataset where the trajectory is stored. This
            array is stored in the 'trajectory' group.

       This routine will also test the consistency of the row attribute of the
       trajectory group. If some trajectory data is already present, it will be
       replaced by the new data. It is highly recommended to first initialize
       the HDF5 file with the ``to_hdf5`` method of the System class.
    """
    with log.section('XYZH5'):
        if log.do_medium:
            log('Loading XYZ file \'%s\' into \'trajectory/%s\' of HDF5 file \'%s\'' % (
                fn_xyz, name, f.filename
            ))

        # First make sure the HDF5 file has a system description that is consistent
        # with the XYZ file.
        if 'system' not in f:
            raise ValueError('The HDF5 file must contain a system group.')
        if 'numbers' not in f['system']:
            raise ValueError('The HDF5 file must have a system group with atomic numbers.')

        xyz_reader = XYZReader(fn_xyz, sub=sub, file_unit=file_unit)
        if len(xyz_reader.numbers) != len(f['system/numbers']):
            raise ValueError('The number of atoms in the HDF5 and the XYZ files does not match.')
        if (xyz_reader.numbers != f['system/numbers']).any():
            log.warn('The atomic numbers of the HDF5 and XYZ file do not match.')

        # Take care of the trajectory group
        tgrp = get_trajectory_group(f)

        # Take care of the dataset
        ds, = get_trajectory_datasets(tgrp, (name, (len(xyz_reader.numbers), 3)))

        # Fill the dataset with data.
        row = get_last_trajectory_row([ds])
        for title, coordinates in xyz_reader:
            write_to_dataset(ds, coordinates, row)
            row += 1

        # Check number of rows
        check_trajectory_rows(tgrp, [ds], row)