def _read_header_config(fh, header_maxlines=HEADER_MAXLINES, header_comment=HEADER_COMMENT): """Read a ini-style file from the header of a text file. Return a PydosConfigParser object. Parameters ---------- fh : file handle, readable header_maxlines : max lines to read down from top of the file header_comment : comment sign w/ which the header must be prefixed Returns ------- PydosConfigParser object Examples -------- >>> !cat foo.txt # [array] # shape = 3 # axis = -1 1 2 3 >>> _get_header_config('foo.txt') <pwtools.common.PydosConfigParser instance at 0x2c52320> """ fn = common.get_filename(fh) verbose("[_read_header_config]: reading header from '%s'" %fn) header = '' for i in range(header_maxlines): try: line = fh.next().strip() except StopIteration: break if line.startswith(header_comment): header += line.replace(header_comment, '').strip() + '\n' # Read one more line to see if the header is bigger than header_maxlines. try: if fh.next().strip().startswith(header_comment): raise StandardError("header seems to be > header_maxlines (%i)" %header_maxlines) except StopIteration: pass c = PydosConfigParser() c.readfp(StringIO(header)) # If header_maxlines > header size, we read beyond the header into the data. That # causes havoc for all functions that read fh afterwards. fh.seek(0) return c
def _read_header_config(fh, header_maxlines=HEADER_MAXLINES, header_comment=HEADER_COMMENT): """Read a ini-style file from the header of a text file. Return a ConfigParser object. Parameters ---------- fh : file handle, readable header_maxlines : max lines to read down from top of the file header_comment : comment sign w/ which the header must be prefixed Returns ------- ConfigParser object Examples -------- >>> !cat foo.txt # [array] # shape = 3 # axis = -1 1 2 3 >>> _get_header_config('foo.txt') <pwtools.common.ConfigParser instance at 0x2c52320> """ fn = common.get_filename(fh) verbose("[_read_header_config]: reading header from '%s'" %fn) header = '' for i in range(header_maxlines): try: line = next(fh).strip() except StopIteration: break if line.startswith(header_comment): header += line.replace(header_comment, '').strip() + '\n' # Read one more line to see if the header is bigger than header_maxlines. try: if next(fh).strip().startswith(header_comment): raise Exception("header seems to be > header_maxlines (%i)" %header_maxlines) except StopIteration: pass c = ConfigParser() c.read_file(StringIO(header)) # If header_maxlines > header size, we read beyond the header into the data. That # causes havoc for all functions that read fh afterwards. fh.seek(0) return c
def _write_header_config(fh, config, header_comment=HEADER_COMMENT, header_maxlines=HEADER_MAXLINES): """Write ini-style config file from `config` prefixed with `header_comment` to file handle `fh`.""" fn = common.get_filename(fh) verbose("[_write_header_config]: writing header to '%s'" %fn) # write config to dummy file ftmp = StringIO() config.write(ftmp) # write with comment signs to actual file ftmp.seek(0) lines = ftmp.readlines() common.assert_cond(len(lines) <= header_maxlines, "header has more then header_maxlines (%i) lines" \ %header_maxlines) for line in lines: fh.write((header_comment + ' ' + line).encode()) ftmp.close()
def _write_header_config(fh, config, header_comment=HEADER_COMMENT, header_maxlines=HEADER_MAXLINES): """Write ini-style config file from `config` prefixed with `header_comment` to file handle `fh`.""" fn = common.get_filename(fh) verbose("[_write_header_config]: writing header to '%s'" %fn) # write config to dummy file ftmp = StringIO() config.write(ftmp) # write with comment signs to actual file ftmp.seek(0) lines = ftmp.readlines() common.assert_cond(len(lines) <= header_maxlines, "header has more then header_maxlines (%i) lines" \ %header_maxlines) for line in lines: fh.write(header_comment + ' ' + line) ftmp.close()
def ffloat(st): """Convert strings representing numbers to Python floats using float(). The returned value is a double (or whatever the float() of your Python installation returns). Especially, strings representing Fortran floats are handled. Fortran Reals (= single) are converted to doubles. Kind parameters (like '_10' in '3.0d5_10') are NOT supported, they are ignored. Parameters ---------- st : string Returns ------- float """ assert_cond(isinstance(st, bytes), "`st` must be string") st = st.lower() if not 'd' in st: return float(st) else: # >>> s=' 40.0d+02_10 ' # >>> m.groups() # ('40.0', '+', '02', '_10 ') # >>> s=' 40.0d02 ' # >>> m.groups() # ('40.0', '', '02', ' ') # rex = re.compile(r'\s*([+-]*[0-9\.]+)d([+-]*)([0-9]+)([_]*.*)') m = rex.match(st) if m is None: raise ValueError("no match on string '%s'" %st) if m.group(4).strip() != '': verbose("[ffloat] WARNING: skipping kind '%s' in string '%s'" %(m.group(4), st)) ss = "%se%s%s" %m.groups()[:-1] return float(ss)
def writetxt(fn, arr, axis=-1, maxdim=TXT_MAXDIM, header=True): """Write 1d, 2d or 3d arrays to txt file. If 3d, write as 2d chunks. Take the 2d chunks along `axis`. Write a commented out ini-style header in the file with infos needed by readtxt() to restore the right shape. Parameters ---------- fn : filename arr : nd array axis : axis along which 2d chunks are written maxdim : highest number of dims that `arr` is allowed to have header : bool Write ini style header. Can be used by readtxt(). """ verbose("[writetxt] writing: %s" %fn) common.assert_cond(arr.ndim <= maxdim, 'no rank > %i arrays supported' %maxdim) fh = open(fn, 'wb+') if header: c = ConfigParser() sec = 'array' c.add_section(sec) c.set(sec, 'shape', common.seq2str(arr.shape)) c.set(sec, 'axis', str(axis)) _write_header_config(fh, c) # 1d and 2d case if arr.ndim < maxdim: np.savetxt(fh, arr) # 3d else: # TODO get rid of loop? # write 2d arrays, one by one sl = [slice(None)]*arr.ndim for ind in range(arr.shape[axis]): sl[axis] = ind np.savetxt(fh, arr[sl]) fh.close()
def writetxt(fn, arr, axis=-1, maxdim=TXT_MAXDIM, header=True): """Write 1d, 2d or 3d arrays to txt file. If 3d, write as 2d chunks. Take the 2d chunks along `axis`. Write a commented out ini-style header in the file with infos needed by readtxt() to restore the right shape. Parameters ---------- fn : filename arr : nd array axis : axis along which 2d chunks are written maxdim : highest number of dims that `arr` is allowed to have header : bool Write ini style header. Can be used by readtxt(). """ verbose("[writetxt] writing: %s" %fn) common.assert_cond(arr.ndim <= maxdim, 'no rank > %i arrays supported' %maxdim) fh = open(fn, 'w+') if header: c = PydosConfigParser() sec = 'array' c.add_section(sec) c.set(sec, 'shape', common.seq2str(arr.shape)) c.set(sec, 'axis', axis) _write_header_config(fh, c) # 1d and 2d case if arr.ndim < maxdim: np.savetxt(fh, arr) # 3d else: # TODO get rid of loop? # write 2d arrays, one by one sl = [slice(None)]*arr.ndim for ind in range(arr.shape[axis]): sl[axis] = ind np.savetxt(fh, arr[sl]) fh.close()
def ffloat(st): """Convert strings representing numbers to Python floats using float(). The returned value is a double (or whatever the float() of your Python installation returns). Especially, strings representing Fortran floats are handled. Fortran Reals (= single) are converted to doubles. Kind parameters (like '_10' in '3.0d5_10') are NOT supported, they are ignored. Parameters ---------- st : string Returns ------- float """ assert_cond(isinstance(st, types.StringType), "`st` must be string") st = st.lower() if not 'd' in st: return float(st) else: # >>> s=' 40.0d+02_10 ' # >>> m.groups() # ('40.0', '+', '02', '_10 ') # >>> s=' 40.0d02 ' # >>> m.groups() # ('40.0', '', '02', ' ') # rex = re.compile(r'\s*([+-]*[0-9\.]+)d([+-]*)([0-9]+)([_]*.*)') m = rex.match(st) if m is None: raise ValueError("no match on string '%s'" %st) if m.group(4).strip() != '': verbose("[ffloat] WARNING: skipping kind '%s' in string '%s'" %(m.group(4), st)) ss = "%se%s%s" %m.groups()[:-1] return float(ss)
def write(self, dct, calc_dir='calc', mode='dct'): """Write file self.filename (e.g. calc/0/pw.in) by replacing placeholders in the template (e.g. calc.templ/pw.in). Parameters ---------- dct : dict key-value pairs, dct.keys() are converted to placeholders with self.func() calc_dir : str the dir where to write the target file to mode : str, {'dct', 'sql'} | mode='dct': replacement values are dct[<key>] | mode='sql': replacement values are dct[<key>].fileval and every | dct[<key>] is an SQLEntry instance """ assert mode in ['dct', 'sql'], ("Wrong 'mode' kwarg, use 'dct' " "or 'sql'") # copy_only : bypass reading the file and passing the text thru the # replacement machinery and getting the text back, unchanged. While # this works, it is slower and useless. if self.keys == []: _keys = None txt = None copy_only = True else: if self.keys is None: _keys = dct.iterkeys() warn_not_found = False else: _keys = self.keys warn_not_found = True if self.txt is None: txt = common.file_read(self.filename) else: txt = self.txt copy_only = False tgt = pj(calc_dir, self.basename) verbose("write: %s" %tgt) if copy_only: verbose("write: ignoring input, just copying file to %s" %(self.filename, tgt)) shutil.copy(self.filename, tgt) else: rules = {} for key in _keys: if mode == 'dct': rules[self.func(key)] = dct[key] elif mode == 'sql': # dct = sql_record, a list of SQLEntry's rules[self.func(key)] = dct[key].fileval else: raise StandardError("'mode' must be wrong") new_txt = common.template_replace(txt, rules, mode='txt', conv=True, warn_not_found=warn_not_found, warn_mult_found=False, disp=False) common.file_write(tgt, new_txt)
def fvacf(vel, m=None, method=2, nthreads=None): """Interface to Fortran function _flib.vacf(). Otherwise same functionallity as pyvacf(). Use this for production calculations. Parameters ---------- vel : 3d array, (nstep, natoms, 3) Atomic velocities. m : 1d array (natoms,) Atomic masses. method : int | 1 : loops | 2 : vectorized loops nthreads : int ot None If int, then use this many OpenMP threads in the Fortran extension. Only useful if the extension was compiled with OpenMP support, of course. Returns ------- c : 1d array (nstep,) VACF Notes ----- Fortran extension:: $ python -c "import _flib; print _flib.vacf.__doc__" vacf - Function signature: c = vacf(v,m,c,method,use_m,[nthreads,natoms,nstep]) Required arguments: v : input rank-3 array('d') with bounds (natoms,3,nstep) m : input rank-1 array('d') with bounds (natoms) c : input rank-1 array('d') with bounds (nstep) method : input int use_m : input int Optional arguments: nthreads : input int natoms := shape(v,0) input int nstep := shape(v,2) input int Return objects: c : rank-1 array('d') with bounds (nstep) Shape of `vel`: The old array shapes were (natoms, 3, nstep), the new is (nstep,natoms,3). B/c we don't want to adapt flib.f90, we change vel's shape before passing it to the extension. See Also -------- :mod:`pwtools._flib` :func:`vacf_pdos` """ # f2py copies and C-order vs. Fortran-order arrays # ------------------------------------------------ # With vel = np.asarray(vel, order='F'), we convert vel to F-order and a # copy is made by numpy. If we don't do it, the f2py wrapper code does. # This copy is unavoidable, unless we allocate the array vel in F-order in # the first place. # c = _flib.vacf(np.asarray(vel, order='F'), m, c, method, use_m) # # speed # ----- # The most costly step is calculating the VACF. FFTing that is only the fft # of a 1d-array which is fast, even if the length is not a power of two. # Padding is not needed. # natoms = vel.shape[1] nstep = vel.shape[0] assert vel.shape[-1] == 3, ("last dim of vel must be 3: (nstep,natoms,3)") # `c` as "intent(in, out)" could be "intent(out), allocatable" or so, # makes extension more pythonic, don't pass `c` in, let be allocated on # Fortran side c = np.zeros((nstep, ), dtype=float) if m is None: # dummy m = np.empty((natoms, ), dtype=float) use_m = 0 else: use_m = 1 verbose("calling _flib.vacf ...") if nthreads is None: # Possible f2py bug workaround: The f2py extension does not always set # the number of threads correctly according to OMP_NUM_THREADS. Catch # OMP_NUM_THREADS here and set number of threads using the "nthreads" # arg. key = 'OMP_NUM_THREADS' if key in os.environ: nthreads = int(os.environ[key]) c = _flib.vacf(vel, m, c, method, use_m, nthreads) else: c = _flib.vacf(vel, m, c, method, use_m) else: c = _flib.vacf(vel, m, c, method, use_m, nthreads) verbose("... ready") return c
def readtxt(fh, axis=None, shape=None, header_maxlines=HEADER_MAXLINES, header_comment=HEADER_COMMENT, maxdim=TXT_MAXDIM, **kwargs): """Read arrays from .txt file using np.loadtxt(). If the file stores a 3d array as consecutive 2d arrays (e.g. output from molecular dynamics code) the file header (see writetxt()) is used to determine the shape of the original 3d array and the array is reshaped accordingly. If `axis` or `shape` is not None, then these are used instead and the header, if existing, will be ignored. This has the potential to shoot yourself in the foot. Use with care. If `axis` and `shape` are None, then this function does not work with normal text files which have no special header. Use np.loadtxt() in this case. Parameters ---------- fh : file_like axis : int shape : tuple **kwargs : keyword args passed to numpy.loadtxt(), e.g. comments='@@' to ignore weird lines etc. Returns ------- nd array """ fn = common.get_filename(fh) verbose("[readtxt] reading: %s" %fn) verbose("[readtxt] axis: %s" %str(axis)) verbose("[readtxt] shape: %s" %str(shape)) if shape is None or axis is None: c = _read_header_config(fh) sec = 'array' if shape is None: shape = common.str2tup(c.get(sec, 'shape')) if axis is None: axis = int(c.get(sec, 'axis')) ndim = len(shape) common.assert_cond(ndim <= maxdim, 'no rank > %i arrays supported' %maxdim) # axis = -1 means the last dim if axis == -1: axis = ndim - 1 # handle empty files (no data, only special header or nothing at all) header_lines = [] for i in range(header_maxlines): try: line = next(fh).strip() if not line.startswith(header_comment) and line != '': header_lines.append(line) except StopIteration: break fh.seek(0) if header_lines == []: verbose("[readtxt] WARNING: empty file: %s" %fn) return np.array([]) else: fh.seek(0) read_arr = np.loadtxt(fh, **kwargs) # 1d and 2d if ndim <= 2: arr = read_arr # 3d else: arr = arr2d_to_3d(read_arr, shape=shape, axis=axis) verbose("[readtxt] returning shape: %s" %str(arr.shape)) return arr
def write(self, dct, calc_dir='calc', mode='dct'): """Write file self.filename (e.g. calc/0/pw.in) by replacing placeholders in the template (e.g. calc.templ/pw.in). Parameters ---------- dct : dict key-value pairs, dct.keys() are converted to placeholders with self.func() calc_dir : str the dir where to write the target file to mode : str, {'dct', 'sql'} | mode='dct': replacement values are dct[<key>] | mode='sql': replacement values are dct[<key>].fileval and every | dct[<key>] is an SQLEntry instance """ assert mode in ['dct', 'sql'], ("Wrong 'mode' kwarg, use 'dct' " "or 'sql'") # copy_only : bypass reading the file and passing the text thru the # replacement machinery and getting the text back, unchanged. While # this works, it is slower and useless. if self.keys == []: _keys = None txt = None copy_only = True else: if self.keys is None: _keys = dct.keys() warn_not_found = False else: _keys = self.keys warn_not_found = True if self.txt is None: txt = common.file_read(self.filename) else: txt = self.txt copy_only = False tgt = pj(calc_dir, self.basename) verbose("write: %s" % tgt) if copy_only: verbose("write: ignoring input, just copying file to %s" % (self.filename, tgt)) shutil.copy(self.filename, tgt) else: rules = {} for key in _keys: if mode == 'dct': rules[self.func(key)] = dct[key] elif mode == 'sql': # dct = sql_record, a list of SQLEntry's rules[self.func(key)] = dct[key].fileval else: raise Exception("'mode' must be wrong") new_txt = common.template_replace(txt, rules, mode='txt', conv=True, warn_not_found=warn_not_found, warn_mult_found=False, disp=False) common.file_write(tgt, new_txt)
def readtxt(fh, axis=None, shape=None, header_maxlines=HEADER_MAXLINES, header_comment=HEADER_COMMENT, maxdim=TXT_MAXDIM, **kwargs): """Read arrays from .txt file using np.loadtxt(). If the file stores a 3d array as consecutive 2d arrays (e.g. output from molecular dynamics code) the file header (see writetxt()) is used to determine the shape of the original 3d array and the array is reshaped accordingly. If `axis` or `shape` is not None, then these are used instead and the header, if existing, will be ignored. This has the potential to shoot yourself in the foot. Use with care. If `axis` and `shape` are None, then this function does not work with normal text files which have no special header. Use np.loadtxt() in this case. Parameters ---------- fh : file_like axis : int shape : tuple **kwargs : keyword args passed to numpy.loadtxt(), e.g. comments='@@' to ignore weird lines etc. Returns ------- nd array """ fn = common.get_filename(fh) verbose("[readtxt] reading: %s" %fn) verbose("[readtxt] axis: %s" %str(axis)) verbose("[readtxt] shape: %s" %str(shape)) if shape is None or axis is None: c = _read_header_config(fh) sec = 'array' if shape is None: shape = common.str2tup(c.get(sec, 'shape')) if axis is None: axis = int(c.get(sec, 'axis')) ndim = len(shape) common.assert_cond(ndim <= maxdim, 'no rank > %i arrays supported' %maxdim) # axis = -1 means the last dim if axis == -1: axis = ndim - 1 # handle empty files (no data, only special header or nothing at all) header_lines = [] for i in range(header_maxlines): try: line = fh.next().strip() if not line.startswith(header_comment) and line != '': header_lines.append(line) except StopIteration: break fh.seek(0) if header_lines == []: verbose("[readtxt] WARNING: empty file: %s" %fn) return np.array([]) else: fh.seek(0) read_arr = np.loadtxt(fh, **kwargs) # 1d and 2d if ndim <= 2: arr = read_arr # 3d else: arr = arr2d_to_3d(read_arr, shape=shape, axis=axis) verbose("[readtxt] returning shape: %s" %str(arr.shape)) return arr
def fvacf(vel, m=None, method=2, nthreads=None): """Interface to Fortran function _flib.vacf(). Otherwise same functionallity as pyvacf(). Use this for production calculations. Parameters ---------- vel : 3d array, (nstep, natoms, 3) Atomic velocities. m : 1d array (natoms,) Atomic masses. method : int | 1 : loops | 2 : vectorized loops nthreads : int ot None If int, then use this many OpenMP threads in the Fortran extension. Only useful if the extension was compiled with OpenMP support, of course. Returns ------- c : 1d array (nstep,) VACF Notes ----- Fortran extension:: $ python -c "import _flib; print _flib.vacf.__doc__" vacf - Function signature: c = vacf(v,m,c,method,use_m,[nthreads,natoms,nstep]) Required arguments: v : input rank-3 array('d') with bounds (natoms,3,nstep) m : input rank-1 array('d') with bounds (natoms) c : input rank-1 array('d') with bounds (nstep) method : input int use_m : input int Optional arguments: nthreads : input int natoms := shape(v,0) input int nstep := shape(v,2) input int Return objects: c : rank-1 array('d') with bounds (nstep) Shape of `vel`: The old array shapes were (natoms, 3, nstep), the new is (nstep,natoms,3). B/c we don't want to adapt flib.f90, we change vel's shape before passing it to the extension. See Also -------- _flib vacf_pdos() """ # f2py copies and C-order vs. Fortran-order arrays # ------------------------------------------------ # With vel = np.asarray(vel, order='F'), we convert vel to F-order and a # copy is made by numpy. If we don't do it, the f2py wrapper code does. # This copy is unavoidable, unless we allocate the array vel in F-order in # the first place. # c = _flib.vacf(np.asarray(vel, order='F'), m, c, method, use_m) # # speed # ----- # The most costly step is calculating the VACF. FFTing that is only the fft # of a 1d-array which is fast, even if the length is not a power of two. # Padding is not needed. # natoms = vel.shape[1] nstep = vel.shape[0] assert vel.shape[-1] == 3, ("last dim of vel must be 3: (nstep,natoms,3)") # `c` as "intent(in, out)" could be "intent(out), allocatable" or so, # makes extension more pythonic, don't pass `c` in, let be allocated on # Fortran side c = np.zeros((nstep,), dtype=float) if m is None: # dummy m = np.empty((natoms,), dtype=float) use_m = 0 else: use_m = 1 verbose("calling _flib.vacf ...") if nthreads is None: # Possible f2py bug workaround: The f2py extension does not always set # the number of threads correctly according to OMP_NUM_THREADS. Catch # OMP_NUM_THREADS here and set number of threads using the "nthreads" # arg. key = 'OMP_NUM_THREADS' if os.environ.has_key(key): nthreads = int(os.environ[key]) c = _flib.vacf(vel, m, c, method, use_m, nthreads) else: c = _flib.vacf(vel, m, c, method, use_m) else: c = _flib.vacf(vel, m, c, method, use_m, nthreads) verbose("... ready") return c