def band_psd(t, e, cfrq, equi=True, frqosamp=1): """ Band-averaged power spectral density in vicinity of tidal constituents Parameters ---------- t : ndarray time [days] relative to an arbitrary origin e : ndarray residual from tidal fit, complex/real cfrq : ndarray frequencies of NR & R constituents [CPH] equi : boolean True (default) if sample times are uniformly spaced frqosamp : integer Lomb-Scargle frequency oversampling factor, if equi = False Returns ------- P : Bunch - P.fbnd : edges of freq bands [CPH] - P.Puu : 1-sided auto-sp dens of u err (=real(e)) [units^2/CPH] - P.Pvv : (complex case only)= as P.Puu but for v err (=imag(e)) - P.Puv (complex case only)= cross-sp dens between u and v Note ---- This is the only function in this module that is called from other modules. """ P = Bunch(fbnd=freq_bands) nt = len(e) if nt % 2: e = e[:-1] t = t[:-1] nt -= 1 hn = signal.hanning(nt, sym=False) # on real component if equi: # If even sampling, FFT. # sample interval in hours; t is in days dt = 24 * (t[1] - t[0]) fs = 1/dt # sampling frequency: cycles (samples) per hour Puu1s = _psd(np.real(e), hn, fs) allfrq = np.arange(nt//2 + 1) / (nt * dt) else: # If uneven, Lomb-scargle. # time in hours, for output in CPH and x^2 per CPH lfreq = _lomb_freqs(t * 24, fbands=freq_bands, ofac=frqosamp) ls_spec = _psd_lomb(t * 24, e, window=hn, freq=lfreq) Puu1s = ls_spec.Pxx allfrq = ls_spec.F P.Puu = fbndavg(Puu1s, allfrq, cfrq) # If e is complex, handle imaginary part. if e.dtype.kind == 'c': if equi: # If even sampling, Welch. Pvv1s = _psd(e.imag, hn, fs) Puv1s = _psd(e, hn, fs) # complex cross-periodogram else: # If uneven, lomscargle. Pvv1s = ls_spec.Pyy Puv1s = ls_spec.Pxy P.Pvv = fbndavg(Pvv1s, allfrq, cfrq) # Take co-spectrum only; ignore the quadrature (imag) part. P.Puv = fbndavg(Puv1s, allfrq, cfrq).real # I don't think we want to throw away the sign of the # co-spectrum, which determines the quadrant in which the # semi-major variance ellipse lies. # If it does matter, we need to check the sign convention. # P.Puv = np.abs(P.Puv) return P
epoch = '1700-01-01' solve_kw = dict(epoch=epoch, lat=21, constit='auto', method='ols', conf_int='linear') coef_all = utide.solve(t, h, **solve_kw) sl_month = slice(24 * 31) coef_month = utide.solve(t[sl_month], h[sl_month], **solve_kw) sl_week = slice(24*7) coef_week = utide.solve(t[sl_week], h[sl_week], **solve_kw) month_index_dict = dict({(name.strip(), i) for i, name in enumerate(coef_month.name)}) infer = Bunch() infer.inferred_names = 'S2', 'N2', 'O1' infer.reference_names = 'M2', 'M2', 'K1' infer.amp_ratios, infer.phase_offsets = [], [] for ref, inf in zip(infer.reference_names, infer.inferred_names): iref = month_index_dict[ref] iinf = month_index_dict[inf] infer.amp_ratios.append(coef_month.A[iinf] / coef_month.A[iref]) infer.phase_offsets.append(coef_month.g[iref] - coef_month.g[iinf]) coef_week_inf = utide.solve(t[sl_week], h[sl_week], infer=infer, **solve_kw) runs = [(coef_all, 'HNL2010.mat'), (coef_month, 'HNL2010_Jan.mat'), (coef_week, 'HNL2010_Jan_week1.mat'), (coef_week_inf, 'HNL2010_Jan_week1_infer_S2.mat'),
def _psd_lomb(t, x, window=None, freq=None, ofac=1): """ Periodogram estimate for irregular sampling, Lomb-Scargle method Parameters ---------- t : ndarray, (n,) time, monotonic but possibly irregularly spaced x : ndarray, (n,) signal, real or complex window : None or ndarray, (n,) if not None, a uniformly-sampled data window freq : ndarray (nfreq,) evaluation frequencies in cycles per unit time ofac : integer oversampling factor; defaults to 1 Returns ------- P : Bunch - P.F: Frequencies (units: cycles per time unit) of the Pxx estimates. - P.Pxx: One-sided auto-spectral density estimate for real(`x`). if `x` is complex: - P.Pyy: as above, for imag(`x`) - P.Pxy: complex cross-spectrum between real(`x`) and imag(`x`) Notes ----- If `freq` is None, `P.F` is calculated to coincide with the Fourier frequencies for a series of n uniformly distributed times from min(`t`) to max(`t`). The mean and Nyquist are omitted because they are irrelevant in this context. PSD units are [`x`-units^2 per cycle per unit time] """ out = Bunch() # copy inputs x = np.array(x) t = np.array(t, dtype=float) # remove mean x -= x.mean() n = len(x) if window is None: w = np.ones(t.shape, dtype=float) else: # interpolate window from uniform grid to nonuniform t t_uniform = np.linspace(np.min(t), np.max(t), n) w = np.interp(t, t_uniform, window) x *= w # Estimated record length as n delta-t, where delta-t # is the *average* time per sample. delta_t = (t[-1] - t[0]) / (n-1) reclen = n * delta_t if freq is None: ofac = int(round(ofac)) nf = n * ofac # number of "Fourier frequencies" based on oversampling # Simplify by ignoring the 0 and Nyquist frequencies. # Divide by reclen to convert cycles/record to cycles/time unit. freq = np.arange(1, nf//2) / reclen out.F = freq xr = np.real(x) # signal.lombscargle returns "(A**2) * N/4 for a harmonic signal # with amplitude A for sufficiently large N." # It takes *angular* frequencies as 3rd argument. freq_radian = freq * 2 * np.pi psdnorm = 2 * delta_t * n / (w**2).sum() out.Pxx = psdnorm * signal.lombscargle(t, xr, freq_radian) if x.dtype.kind == 'f': return out out.Pyy = psdnorm * signal.lombscargle(t, x.imag, freq_radian) # If we need to limit memory usage and don't want to use # Cython, we can segment the frequencies and loop over the # segments. The speed penalty will be minimal. out.Pxy = psdnorm * _ls_cross(t, x, freq_radian) return out
def robustfit(X, y, weight_function='bisquare', tune=None, rcond=1, tol=0.001, maxit=50): """ Multiple linear regression via iteratively reweighted least squares. Parameters ---------- X : ndarray (n, p) MLR model with `p` parameters (independent variables) at `n` times y : ndarray (n,) dependent variable weight_function : string, optional name of weighting function tune : None or float, optional Tuning parameter for normalizing residuals in weight calculation; larger numbers *decrease* the sensitivity to outliers. If `None`, a default will be provided based on the `weight_function`. rcond : float, optional minimum condition number parameter for `np.linalg.lstsq` tol : float, optional When the fractional reduction in mean squared weighted residuals is less than `tol`, the iteration stops. maxit : integer, optional Maximum number of iterations. Returns ------- rf : `utide.utilities.Bunch` - rf.b: model coefficients of the solution - rf.w: weights used for the solution - rf.s: singular values for each model component - rf.rms_resid: rms residuals (unweighted) from the fit - rf.leverage: sensitivity of the OLS estimate to each point in `y` - rf.ols_b: OLS model coefficients - rf.ols_rms_resid: rms residuals from the OLS fit - rf.iterations: number of iterations completed """ if tune is None: tune = tune_defaults[weight_function] _wfunc = wfuncdict[weight_function] if X.ndim == 1: X = X.reshape((x.size, 1)) n, p = X.shape lev = leverage(X) out = Bunch(weight_function=weight_function, tune=tune, rcond=rcond, tol=tol, maxit=maxit, leverage=lev) # LJ2009 has an incorrect expression for leverage in the # appendix, and an incorrect version of the following # multiplicative factor for scaling the residuals. rfac = 1 / (tune * np.sqrt(1 - lev)) # We probably only need to keep track of the rmeansq, but # it's cheap to carry along rsumsq until we are positive. oldrsumsq = None oldrmeansq = None oldlstsq = None oldw = None iterations = 0 # 1-based iteration exit number w = np.ones(y.shape) for i in range(maxit): wX = w[:, np.newaxis] * X wy = w * y b, rsumsq, rank, sing = np.linalg.lstsq(wX, wy, rcond) rsumsq = rsumsq[0] if i == 0: rms_resid = np.sqrt(rsumsq / n) out.update(dict(ols_b=b, ols_rms_resid=rms_resid)) # Weighted mean of squared weighted residuals: rmeansq = rsumsq / w.sum() if oldrsumsq is not None: # improvement = (oldrsumsq - rsumsq) / oldrsumsq improvement = (oldrmeansq - rmeansq) / oldrmeansq # print("improvement:", improvement) if improvement < 0: b, rsumsq, rank, sing = oldlstsq w = oldw iterations = i break if improvement < tol: iterations = i + 1 break # Save these values in case the next iteration # makes things worse. oldlstsq = b, rsumsq, rank, sing oldw = w oldrsumsq = rsumsq oldrmeansq = rmeansq # Residuals (unweighted) from latest fit: resid = y - np.dot(X, b) # Update weights based on these residuals. w = _wfunc(r_normed(resid, rfac)) if iterations == 0: iterations = maxit # Did not converge. rms_resid = np.sqrt(np.mean(np.abs(resid)**2)) out.update(dict(iterations=iterations, b=b, s=sing, w=w, rank=rank, rms_resid=rms_resid, )) return out
lat=21, constit='auto', method='ols', conf_int='linear') coef_all = utide.solve(t, h, **solve_kw) sl_month = slice(24 * 31) coef_month = utide.solve(t[sl_month], h[sl_month], **solve_kw) sl_week = slice(24 * 7) coef_week = utide.solve(t[sl_week], h[sl_week], **solve_kw) month_index_dict = dict({(name.strip(), i) for i, name in enumerate(coef_month.name)}) infer = Bunch() infer.inferred_names = 'S2', 'N2', 'O1' infer.reference_names = 'M2', 'M2', 'K1' infer.amp_ratios, infer.phase_offsets = [], [] for ref, inf in zip(infer.reference_names, infer.inferred_names): iref = month_index_dict[ref] iinf = month_index_dict[inf] infer.amp_ratios.append(coef_month.A[iinf] / coef_month.A[iref]) infer.phase_offsets.append(coef_month.g[iref] - coef_month.g[iinf]) coef_week_inf = utide.solve(t[sl_week], h[sl_week], infer=infer, **solve_kw) runs = [ (coef_all, 'HNL2010.mat'), (coef_month, 'HNL2010_Jan.mat'), (coef_week, 'HNL2010_Jan_week1.mat'),
def band_psd(t, e, cfrq, equi=True, frqosamp=1): """ Band-averaged power spectral density in vicinity of tidal constituents Parameters ---------- t : ndarray time [days] relative to an arbitrary origin e : ndarray residual from tidal fit, complex/real cfrq : ndarray frequencies of NR & R constituents [CPH] equi : boolean True (default) if sample times are uniformly spaced frqosamp : integer Lomb-Scargle frequency oversampling factor, if equi = False Returns ------- P : Bunch - P.fbnd : edges of freq bands [CPH] - P.Puu : 1-sided auto-sp dens of u err (=real(e)) [units^2/CPH] - P.Pvv : (complex case only)= as P.Puu but for v err (=imag(e)) - P.Puv (complex case only)= cross-sp dens between u and v Note ---- This is the only function in this module that is called from other modules. """ P = Bunch(fbnd=freq_bands) nt = len(e) if nt % 2: e = e[:-1] t = t[:-1] nt -= 1 hn = signal.windows.hann(nt, sym=False) # on real component if equi: # If even sampling, FFT. # sample interval in hours; t is in days dt = 24 * (t[1] - t[0]) fs = 1/dt # sampling frequency: cycles (samples) per hour Puu1s = _psd(np.real(e), hn, fs) allfrq = np.arange(nt//2 + 1) / (nt * dt) else: # If uneven, Lomb-scargle. # time in hours, for output in CPH and x^2 per CPH lfreq = _lomb_freqs(t * 24, fbands=freq_bands, ofac=frqosamp) ls_spec = _psd_lomb(t * 24, e, window=hn, freq=lfreq) Puu1s = ls_spec.Pxx allfrq = ls_spec.F P.Puu = fbndavg(Puu1s, allfrq, cfrq) # If e is complex, handle imaginary part. if e.dtype.kind == 'c': if equi: # If even sampling, Welch. Pvv1s = _psd(e.imag, hn, fs) Puv1s = _psd(e, hn, fs) # complex cross-periodogram else: # If uneven, lomscargle. Pvv1s = ls_spec.Pyy Puv1s = ls_spec.Pxy P.Pvv = fbndavg(Pvv1s, allfrq, cfrq) # Take co-spectrum only; ignore the quadrature (imag) part. P.Puv = fbndavg(Puv1s, allfrq, cfrq).real # I don't think we want to throw away the sign of the # co-spectrum, which determines the quadrant in which the # semi-major variance ellipse lies. # If it does matter, we need to check the sign convention. # P.Puv = np.abs(P.Puv) return P