def test_extend_array(): arr = rand(3,3) nrep = 5 a0 = num.extend_array(arr, nrep, axis=0) a1 = num.extend_array(arr, nrep, axis=1) a2 = num.extend_array(arr, nrep, axis=2) am1 = num.extend_array(arr, nrep, axis=-1) assert a0.shape == (nrep,3,3) assert a1.shape == (3,nrep,3) assert a2.shape == (3,3,nrep) assert am1.shape == (3,3,nrep) equal(a2, am1) for axis, aa in enumerate([a0, a1, a2]): for ii in range(nrep): # slicetake(a0, 3, 0) -> a0[3,:,:] equal(arr, num.slicetake(aa, ii, axis=axis))
def raw_slice_get(self, attr_name, sl, axis): """Shortcut method: * call ``try_set_attr(_<attr_name>_raw)`` -> set ``self._<attr_name>_raw`` to None or smth else * if set, return ``self._<attr_name>_raw`` sliced by `sl` along `axis`, else return None """ raw_attr_name = '_%s_raw' %attr_name if self.check_set_attr(raw_attr_name): arr = getattr(self, raw_attr_name) ret = num.slicetake(arr, sl, axis) # slicetake always returns an array, return scalar if ret = # array([10]) etc if (ret.ndim == 1) and (len(ret) == 1): return ret[0] else: return ret else: return None
def raw_slice_get(self, attr_name, sl, axis): """Shortcut method: * call ``try_set_attr(_<attr_name>_raw)`` -> set ``self._<attr_name>_raw`` to None or smth else * if set, return ``self._<attr_name>_raw`` sliced by `sl` along `axis`, else return None """ raw_attr_name = '_%s_raw' % attr_name if self.check_set_attr(raw_attr_name): arr = getattr(self, raw_attr_name) ret = num.slicetake(arr, sl, axis) # slicetake always returns an array, return scalar if ret = # array([10]) etc if (ret.ndim == 1) and (len(ret) == 1): return ret[0] else: return ret else: return None
def smooth(data, kern, axis=0, edge='m', norm=True): """Smooth N-dim `data` by convolution with a kernel `kern`. Uses scipy.signal.fftconvolve(). Note that due to edge effect handling (padding) and kernal normalization, the convolution identity convolve(data,kern) == convolve(kern,data) doesn't apply here. We always return an array of ``data.shape``. Parameters ---------- data : nd array The data to smooth. Example: 1d (N,) or (N,K,3) for trajectory kern : nd array Convolution kernel. Example: 1d (M,) or (M,1,1) for trajectory along axis=0 (data length N) axis : int Axis along which to do the smoothing. That is actually not needed for the convolution ``fftconvolve(data, kern)`` but is used for padding the data along `axis` to handle edge effects before convolution. edge : str Method for edge effect handling. | 'm' : pad with mirror signal | 'c' : pad with constant values (i.e. ``data[0]`` and | ``data[-1]`` in the 1d case) norm : bool Normalize kernel. Default is True. This assures that the smoothed signal lies within the data. Note that this is not True for kernels with very big spread (i.e. ``hann(N*10)`` or ``gaussian(N/2, std=N*10)``. Then the kernel is effectively a constant. Returns ------- ret : data.shape Convolved signal. Examples -------- >>> from pwtools.signal import welch >>> from numpy.random import rand >>> x = linspace(0,2*pi,500); a=cos(x)+rand(500) >>> plot(a, color='0.7') >>> k=scipy.signal.hann(21) >>> plot(signal.smooth(a,k), 'r', label='hann') >>> k=scipy.signal.gaussian(21, 3) >>> plot(signal.smooth(a,k), 'g', label='gauss') >>> k=welch(21) >>> plot(signal.smooth(a,k), 'y', label='welch') >>> legend() >>> # odd kernel [0,1,0] reproduces data exactly, i.e. convolution with >>> # delta peak >>> figure(); title('smooth with delta [0,1,0]') >>> x=linspace(0,2*pi,15); k=scipy.signal.hann(3) >>> plot(cos(x)) >>> plot(signal.smooth(cos(x),k), 'r') >>> legend() >>> # edge effects with normal convolution >>> figure(); title('edge effects') >>> x=rand(20)+10; k=scipy.signal.hann(11); >>> plot(x); plot(signal.smooth(x,k),label="smooth"); >>> plot(scipy.signal.convolve(x,k/k.sum(),'same'), label='convolve') >>> legend() >>> # edge effect methods >>> figure(); title('edge effect methods') >>> x=rand(20)+10; k=scipy.signal.hann(20); >>> plot(x); plot(signal.smooth(x,k,edge='m'),label="edge='m'"); >>> plot(signal.smooth(x,k,edge='c'),label="edge='c'"); >>> legend() >>> # smooth a trajectory of atomic coordinates >>> figure(); title('trajectory') >>> x = linspace(0,2*pi,500) >>> a = rand(500,2,3) # (nstep, natoms, 3) >>> a[:,0,:] += cos(x)[:,None] >>> a[:,1,:] += sin(x)[:,None] >>> k=scipy.signal.hann(21)[:,None,None] >>> y = signal.smooth(a,k) >>> plot(a[:,0,0], color='0.7'); plot(y[:,0,0],'b', ... label='atom1 x') >>> plot(a[:,1,0], color='0.7'); plot(y[:,1,0],'r', ... label='atom2 x') >>> legend() References ---------- [1] http://wiki.scipy.org/Cookbook/SignalSmooth See Also -------- :func:`welch` :func:`lorentz` Notes ----- Kernels: Even kernels result in shifted signals, odd kernels are better. However, for N >> M, it doesn't make a difference really. Usual kernels (window functions) are created by e.g. ``scipy.signal.hann(M)``. For ``kern=scipy.signal.gaussian(M, std)``, two values are needed, namely `M` and `std`, where `M` determines the number of points calculated for the convolution kernel, as in the other cases. But what is actually important is `std`, which determines the "used width" of the gaussian. Say we use N=100 and M=50. That would be a massively wide window and we would smooth away all details. OTOH, using ``gaussian(50,3)`` would generate a kernel with the same number `M` of data points, but the gauss peak which is effectively used for convolution is much smaller. For ``gaussian()``, `M` should be bigger then `std`. The convolved signal will converge with increasing `M`. Good values are `M=6*std` and bigger. For :func:`lorentz`, much wider kernels are needed such as `M=100*std` b/c of the long tails of the Lorentz function. Testing is mandatory! Edge effects: We use padding of the signal with ``M=len(kern)`` values at both ends such that the convolution with `kern` doesn't zero the `data` at the signal edges. We have two methods. `edge='m'`: padd with the signal mirrored at 0 and -1 or `edge='c'`: use the constant values ``data[0]`` and ``data[-1]``. Many more of these variants may be thought of. The choice of how to extend the data essentially involves an assumption about how the signal *would* continue, which is signal-dependent. In practice, we usually have ``M << N`` (e.g. ``scipy.signal.hann(M)``) or ``std << N`` (``scipy.signal.gaussian(M, std``). Then, both methods are identical in the middle and show only very small differences at the edges. Essentially, edge effect handling shall only ensure that the smoothed signal doesn't go to zero and that must be independent of the method, which is the case. Memory: For big data, fftconvolve() can easily eat up all your memory, for example:: >>> # assume axis=0 is the axis along which to convolve >>> arr = ones((1e5,200,3)) >>> kern = scipy.signal.hann(101) >>> ret = scipy.signal.fftconvolve(arr, kern[:,None,None]) Then it is better to loop over some or all of the remaing dimensions:: >>> ret = np.empty_like(arr) >>> for jj in range(arr.shape[1]): >>> ret[:,jj,:] = smooth(arr[:,jj,:], kern[:,None]) or:: >>> for jj in range(arr.shape[1]): >>> for kk in range(arr.shape[2]): >>> ret[:,jj,kk] = smooth(arr[:,jj,kk], kern) The size of the chunk over which you explicitely loop depends on the data of course. We do exactly this in :func:`pwtools.crys.smooth`. """ # edge = 'm' # ---------- # # Add mirror of the signal left and right to handle edge effects, up to # signal length N on both ends. If M > N then fill padding regions up with # zeros until we have sig = [(M,), (N,), (M,)]. fftconvolve(..., 'valid') # always returns only the signal length where sig and kern overlap # completely. Therefore, data at the far end doesn't influence the edge and # we can safely put zeros (or anything else) there. The returned length is # always N+M+1. # # example (M < N), add M=3 data parts left and right # npad = 3 # data = [1,2,3,4,5,6] # dleft = [4,3,2] # dright = [5,4,3] # sig = [4,3,2,1,2,3,4,5,6,5,4,3] # If M = 8 > N, then: # dleft = [6,5,4,3,2] # dright = [5,4,3,2,1] # sig = [0,0,0,6,5,4,3,2,1,2,3,4,5,6,5,4,3,2,1,0,0,0] # # edge = 'c' # ---------- # The same, but all padded values are the first (left) and last (right) # data value. N = data.shape[axis] M = kern.shape[axis] if edge == 'm': npad = min(M, N) sleft = slice(npad, 0, -1) sright = slice(-2, -(npad + 2), -1) dleft = num.slicetake(data, sl=sleft, axis=axis) dright = num.slicetake(data, sl=sright, axis=axis) assert dleft.shape == dright.shape K = dleft.shape[axis] if K < M: dleft = pad_zeros(dleft, axis=axis, where='start', nadd=M - K) dright = pad_zeros(dright, axis=axis, where='end', nadd=M - K) elif edge == 'c': sl = [slice(None)] * data.ndim sl[axis] = None dleft = np.repeat(num.slicetake(data, sl=0, axis=axis)[sl], M, axis=axis) dright = np.repeat(num.slicetake(data, sl=-1, axis=axis)[sl], M, axis=axis) assert dleft.shape == dright.shape # 1d special case: (M,1) -> (M,) if data.ndim == 1 and dleft.ndim == 2 and dleft.shape[1] == 1: dleft = dleft[:, 0] dright = dright[:, 0] else: raise Exception("unknown value for edge") sig = np.concatenate((dleft, data, dright), axis=axis) kk = kern / float(kern.sum()) if norm else kern ret = fftconvolve(sig, kk, 'valid') assert ret.shape[axis] == N + M + 1, "unexpected convolve result shape" del sig if M % 2 == 0: ##sl = slice(M//2+1,-(M//2)) # even kernel, shift result to left sl = slice(M // 2, -(M // 2) - 1) # even kernel, shift result to right else: sl = slice(M // 2 + 1, -(M // 2) - 1) ret = num.slicetake(ret, sl=sl, axis=axis) assert ret.shape == data.shape, ("ups, ret.shape (%s)!= data.shape (%s)" \ %(ret.shape, data.shape)) return ret
def pdos(vel, dt=1.0, m=None, full_out=False, area=1.0, window=True, npad=None, tonext=False, mirr=False, method='direct'): """Phonon DOS by FFT of the VACF or direct FFT of atomic velocities. Integral area is normalized to `area`. It is possible (and recommended) to zero-padd the velocities (see `npad`). Parameters ---------- vel : 3d array (nstep, natoms, 3) atomic velocities dt : time step m : 1d array (natoms,), atomic mass array, if None then mass=1.0 for all atoms is used full_out : bool area : float normalize area under frequency-PDOS curve to this value window : bool use Welch windowing on data before FFT (reduces leaking effect, recommended) npad : {None, int} method='direct' only: Length of zero padding along `axis`. `npad=None` = no padding, `npad > 0` = pad by a length of ``(nstep-1)*npad``. `npad > 5` usually results in sufficient interpolation. tonext : bool method='direct' only: Pad `vel` with zeros along `axis` up to the next power of two after the array length determined by `npad`. This gives you speed, but variable (better) frequency resolution. mirr : bool method='vacf' only: mirror one-sided VACF at t=0 before fft Returns ------- if full_out = False | ``(faxis, pdos)`` | faxis : 1d array [1/unit(dt)] | pdos : 1d array, the phonon DOS, normalized to `area` if full_out = True | if method == 'direct': | ``(faxis, pdos, (full_faxis, full_pdos, split_idx))`` | if method == 'vavcf': | ``(faxis, pdos, (full_faxis, full_pdos, split_idx, vacf, fft_vacf))`` | fft_vacf : 1d complex array, result of fft(vacf) or fft(mirror(vacf)) | vacf : 1d array, the VACF Examples -------- >>> from pwtools.constants import fs,rcm_to_Hz >>> tr = Trajectory(...) >>> # freq in [Hz] if timestep in [s] >>> freq,dos = pdos(tr.velocity, m=tr.mass, dt=tr.timestep*fs, >>> method='direct', npad=1) >>> # frequency in [1/cm] >>> plot(freq/rcm_to_Hz, dos) Notes ----- padding (only method='direct'): With `npad` we pad the velocities `vel` with ``npad*(nstep-1)`` zeros along `axis` (the time axis) before FFT b/c the signal is not periodic. For `npad=1`, this gives us the exact same spectrum and frequency resolution as with ``pdos(..., method='vacf',mirr=True)`` b/c the array to be fft'ed has length ``2*nstep-1`` along the time axis in both cases (remember that the array length = length of the time axis influences the freq. resolution). FFT is only fast for arrays with length = a power of two. Therefore, you may get very different fft speeds depending on whether ``2*nstep-1`` is a power of two or not (in most cases it won't). Try using `tonext` but remember that you get another (better) frequency resolution. References ---------- [1] Phys Rev B 47(9) 4863, 1993 See Also -------- :func:`pwtools.signal.fftsample` :func:`pwtools.signal.acorr` :func:`direct_pdos` :func:`vacf_pdos` """ mass = m # assume vel.shape = (nstep,natoms,3) axis = 0 assert vel.shape[-1] == 3 if mass is not None: assert len(mass) == vel.shape[1], "len(mass) != vel.shape[1]" # define here b/c may be used twice below mass_bc = mass[None, :, None] if window: sl = [None] * vel.ndim sl[axis] = slice(None) # ':' vel2 = vel * (welch(vel.shape[axis])[sl]) else: vel2 = vel # handle options which are mutually exclusive if method == 'vacf': assert npad in [0, None], "use npad={0,None} for method='vacf'" # padding if npad is not None: nadd = (vel2.shape[axis] - 1) * npad if tonext: vel2 = pad_zeros(vel2, tonext=True, tonext_min=vel2.shape[axis] + nadd, axis=axis) else: vel2 = pad_zeros(vel2, tonext=False, nadd=nadd, axis=axis) if method == 'direct': full_fft_vel = np.abs(fft(vel2, axis=axis))**2.0 full_faxis = np.fft.fftfreq(vel2.shape[axis], dt) split_idx = len(full_faxis) // 2 faxis = full_faxis[:split_idx] # First split the array, then multiply by `mass` and average. If # full_out, then we need full_fft_vel below, so copy before slicing. arr = full_fft_vel.copy() if full_out else full_fft_vel fft_vel = num.slicetake(arr, slice(0, split_idx), axis=axis, copy=False) if mass is not None: fft_vel *= mass_bc # average remaining axes, summing is enough b/c normalization is done below # sums: (nstep, natoms, 3) -> (nstep, natoms) -> (nstep,) pdos = num.sum(fft_vel, axis=axis, keepdims=True) default_out = (faxis, num.norm_int(pdos, faxis, area=area)) if full_out: # have to re-calculate this here b/c we never calculate the full_pdos # normally if mass is not None: full_fft_vel *= mass_bc full_pdos = num.sum(full_fft_vel, axis=axis, keepdims=True) extra_out = (full_faxis, full_pdos, split_idx) return default_out + extra_out else: return default_out elif method == 'vacf': vacf = fvacf(vel2, m=mass) if mirr: fft_vacf = fft(mirror(vacf)) else: fft_vacf = fft(vacf) full_faxis = np.fft.fftfreq(fft_vacf.shape[axis], dt) full_pdos = np.abs(fft_vacf) split_idx = len(full_faxis) // 2 faxis = full_faxis[:split_idx] pdos = full_pdos[:split_idx] default_out = (faxis, num.norm_int(pdos, faxis, area=area)) extra_out = (full_faxis, full_pdos, split_idx, vacf, fft_vacf) if full_out: return default_out + extra_out else: return default_out
def smooth(data, kern, axis=0, edge='m', norm=True): """Smooth `data` by convolution with a kernel `kern`. Uses scipy.signal.fftconvolve(). Note that due to edge effect handling (padding) and kernal normalization, the convolution identity convolve(data,kern) == convolve(kern,data) doesn't apply here. We always return an array of ``data.shape``. Parameters ---------- data : nd array The data to smooth. Example: 1d (N,) or (N,natoms,3) for trajectory kern : nd array Convolution kernel. Example: 1d (M,) or (M,1,1) for trajectory axis : int Axis along which to do the smoothing. That is actually not needed for the convolution ``fftconvolve(data, kern)`` but is used for padding the data along `axis` to handle edge effects before convolution. edge : str Method for edge effect handling. | 'm' : pad with mirror signal | 'c' : pad with constant values (i.e. ``data[0]`` and | ``data[-1]`` in the 1d case) norm : bool Normalize kernel. Default is True. This assures that the smoothed signal lies within the data. Note that this is not True for kernels with very big spread (i.e. ``hann(N*10)`` or ``gaussian(N/2, std=N*10)``. Then the kernel is effectively a constant. Returns ------- ret : data.shape Convolved signal. Examples -------- >>> from pwtools.signal import welch >>> from numpy.random import rand >>> x = linspace(0,2*pi,500); a=cos(x)+rand(500) >>> plot(a, color='0.7') >>> k=scipy.signal.hann(21) >>> plot(signal.smooth(a,k), 'r', label='hann') >>> k=scipy.signal.gaussian(21, 3) >>> plot(signal.smooth(a,k), 'g', label='gauss') >>> k=welch(21) >>> plot(signal.smooth(a,k), 'y', label='welch') >>> legend() >>> # odd kernel [0,1,0] reproduces data exactly, i.e. convolution with >>> # delta peak >>> figure(); title('smooth with delta [0,1,0]') >>> x=linspace(0,2*pi,15); k=scipy.signal.hann(3) >>> plot(cos(x)) >>> plot(signal.smooth(cos(x),k), 'r') >>> legend() >>> # edge effects with normal convolution >>> figure(); title('edge effects') >>> x=rand(20)+10; k=scipy.signal.hann(11); >>> plot(x); plot(signal.smooth(x,k),label="smooth"); >>> plot(scipy.signal.convolve(x,k/k.sum(),'same'), label='convolve') >>> legend() >>> # edge effect methods >>> figure(); title('edge effect methods') >>> x=rand(20)+10; k=scipy.signal.hann(20); >>> plot(x); plot(signal.smooth(x,k,edge='m'),label="edge='m'"); >>> plot(signal.smooth(x,k,edge='c'),label="edge='c'"); >>> legend() >>> # smooth a trajectory of atomic coordinates >>> figure(); title('trajectory') >>> x = linspace(0,2*pi,500) >>> a = rand(500,2,3) # (nstep, natoms, 3) >>> a[:,0,:] += cos(x)[:,None] >>> a[:,1,:] += sin(x)[:,None] >>> k=scipy.signal.hann(21)[:,None,None] >>> y = signal.smooth(a,k) >>> plot(a[:,0,0], color='0.7'); plot(y[:,0,0],'b', ... label='atom1 x') >>> plot(a[:,1,0], color='0.7'); plot(y[:,1,0],'r', ... label='atom2 x') >>> legend() References ---------- [1] http://wiki.scipy.org/Cookbook/SignalSmooth See Also -------- :func:`welch` :func:`lorentz` Notes ----- Kernels: Even kernels result in shifted signals, odd kernels are better. However, for N >> M, it doesn't make a difference really. Usual kernels (window functions) are created by e.g. ``scipy.signal.hann(M)``. For ``kern=scipy.signal.gaussian(M, std)``, two values are needed, namely `M` and `std`, where `M` determines the number of points calculated for the convolution kernel, as in the other cases. But what is actually important is `std`, which determines the "used width" of the gaussian. Say we use N=100 and M=50. That would be a massively wide window and we would smooth away all details. OTOH, using ``gaussian(50,3)`` would generate a kernel with the same number `M` of data points, but the gauss peak which is effectively used for convolution is much smaller. For ``gaussian()``, `M` should be bigger then `std`. The convolved signal will converge with increasing `M`. Good values are `M=6*std` and bigger. For :func:`lorentz`, much wider kernels are needed such as `M=100*std` b/c of the long tails of the Lorentz function. Testing is mandatory! Edge effects: We use padding of the signal with ``M=len(kern)`` values at both ends such that the convolution with `kern` doesn't zero the `data` at the signal edges. We have two methods. `edge='m'`: padd with the signal mirrored at 0 and -1 or `edge='c'`: use the constant values ``data[0]`` and ``data[-1]``. Many more of these variants may be thought of. The choice of how to extend the data essentially involves an assumption about how the signal *would* continue, which is signal-dependent. In practice, we usually have ``M << N`` (e.g. ``scipy.signal.hann(M)``) or ``std << N`` (``scipy.signal.gaussian(M, std``). Then, both methods are identical in the middle and show only very small differences at the edges. Essentially, edge effect handling shall only ensure that the smoothed signal doesn't go to zero and that must be independent of the method, which is the case. Memory: For big data, fftconvolve() can easily eat up all your memory, for example:: >>> # assume axis=0 is the axis along which to convolve >>> arr = ones((1e5,200,3)) >>> kern = scipy.signal.hann(101) >>> ret = scipy.signal.fftconvolve(arr, kern[:,None,None]) Then it is better to loop over some or all of the remaing dimensions:: >>> ret = np.empty_like(arr) >>> for jj in range(arr.shape[1]): >>> ret[:,jj,:] = smooth(arr[:,jj,:], kern[:,None]) or:: >>> for jj in range(arr.shape[1]): >>> for kk in range(arr.shape[2]): >>> ret[:,jj,kk] = smooth(arr[:,jj,kk], kern) The size of the chunk over which you explicitely loop depends on the data of course. """ # edge = 'm' # ---------- # # Add mirror of the signal left and right to handle edge effects, up to # signal length N on both ends. If M > N then fill padding regions up with # zeros until we have sig = [(M,), (N,), (M,)]. fftconvolve(..., 'valid') # always returns only the signal length where sig and kern overlap # completely. Therefore, data at the far end doesn't influence the edge and # we can safely put zeros (or anything else) there. The returned length is # always N+M+1. # # example (M < N), add M=3 data parts left and right # npad = 3 # data = [1,2,3,4,5,6] # dleft = [4,3,2] # dright = [5,4,3] # sig = [4,3,2,1,2,3,4,5,6,5,4,3] # If M = 8 > N, then: # dleft = [6,5,4,3,2] # dright = [5,4,3,2,1] # sig = [0,0,0,6,5,4,3,2,1,2,3,4,5,6,5,4,3,2,1,0,0,0] # # edge = 'c' # ---------- # The same, but all padded values are the first (left) and last (right) # data value. N = data.shape[axis] M = kern.shape[axis] if edge == 'm': npad = min(M,N) sleft = slice(npad,0,-1) sright = slice(-2,-(npad+2),-1) dleft = num.slicetake(data, sl=sleft, axis=axis) dright = num.slicetake(data, sl=sright, axis=axis) assert dleft.shape == dright.shape K = dleft.shape[axis] if K < M: dleft = pad_zeros(dleft, axis=axis, where='start', nadd=M-K) dright = pad_zeros(dright, axis=axis, where='end', nadd=M-K) elif edge == 'c': sl = [slice(None)]*data.ndim sl[axis] = None dleft = np.repeat(num.slicetake(data, sl=0, axis=axis)[sl], M, axis=axis) dright = np.repeat(num.slicetake(data, sl=-1, axis=axis)[sl], M, axis=axis) assert dleft.shape == dright.shape # 1d special case: (M,1) -> (M,) if data.ndim == 1 and dleft.ndim == 2 and dleft.shape[1] == 1: dleft = dleft[:,0] dright = dright[:,0] else: raise StandardError("unknown value for edge") sig = np.concatenate((dleft, data, dright), axis=axis) kk = kern/float(kern.sum()) if norm else kern ret = fftconvolve(sig, kk, 'valid') assert ret.shape[axis] == N+M+1, "unexpected convolve result shape" del sig if M % 2 == 0: ##sl = slice(M/2+1,-(M/2)) # even kernel, shift result to left sl = slice(M/2,-(M/2)-1) # even kernel, shift result to right else: sl = slice(M/2+1,-(M/2)-1) ret = num.slicetake(ret, sl=sl, axis=axis) assert ret.shape == data.shape, ("ups, ret.shape (%s)!= data.shape (%s)" \ %(ret.shape, data.shape)) return ret