def test_lag_cross_correlation(): base, lagged = lag_samples(7) base.to_frame().plot() #lagged.plot() #print(lagged.info()) #print(base.info()) #base.plot() #plt.show() max_lag = minutes(300) res = minutes(1) lag_calc = calculate_lag(base, lagged, max_lag, res, interpolate_method="linear") max_lag = minutes(100) res = minutes(1) with pytest.raises(ValueError): lag_calc = calculate_lag(base, lagged, max_lag, res, interpolate_method="linear") base, lagged = lag_samples(20) max_lag = minutes(300) with pytest.raises(ValueError): lag_calc = calculate_lag(base, lagged, max_lag, res, interpolate_method="linear")
def separate_species(ts,noise_thresh_min=40): """Separate species into subtidal, diurnal, semidiurnal and noise components Input: ts: timeseries to be decomposed into species, assumed to be at six minute intervals. The filters used have long lenghts, so avoid missing data and allow for four extra days worth of data on each end. Output: four regular time series, representing subtidal, diurnal, semi-diurnal and noise """ # the first filter eliminates noise ts_denoise= cosine_lanczos(ts,cutoff_period=minutes(noise_thresh_min)) ts_noise=ts-ts_denoise # this is the residual, the part that IS noise # the filter length assumes 6 minute data. The resulting filter is 90 hours # long which is MUCH longer than the default because this filter has to be # really sharp assert ts.index.freq == minutes(6) # 14.5 hours = 870min ts_diurnal_and_low=cosine_lanczos(ts_denoise,cutoff_period=minutes(870), filter_len=900) ts_semidiurnal_and_high=ts_denoise-ts_diurnal_and_low # The resulting filter is again 90 hours # long which is still a bit longer than the default. Again, # we want this filter to be pretty sharp. #ts_sub_tide=cosine_lanczos(ts_diurnal_and_low,cutoff_period=hours(40), # filter_len=900) ts_sub_tide=cosine_lanczos(ts_denoise,cutoff_period=hours(40), filter_len=900) ts_diurnal=ts_diurnal_and_low-ts_sub_tide return ts_sub_tide,ts_diurnal,ts_semidiurnal_and_high, ts_noise
def plot_metrics_to_figure(fig, tss, title=None, window_inst=None, window_avg=None, labels=None, max_shift=hours(2), period=minutes(int(12.24 * 60)), label_loc=1, legend_size=12): """ Plot a metrics plot Returns ------- matplotlib.figure.Figure """ grids = gen_metrics_grid() axes = dict(zip(grids.keys(), map(fig.add_subplot, grids.values()))) if labels is None: labels = [ts.props.get('label') for ts in tss] plot_inst_and_avg(axes, tss, window_inst, window_avg, labels, label_loc, legend_size) if title is not None: axes['inst'].set_title(title) if window_avg is not None: tss_clipped = [safe_window(ts, window_avg) for ts in tss] else: tss_clipped = tss lags = calculate_lag_of_tss(tss_clipped, max_shift, period) metrics, tss_scatter = calculate_metrics(tss_clipped, lags) if tss_scatter is not None: ax_scatter = axes['scatter'] plot_scatter(ax_scatter, tss_scatter) unit = tss[0].props.get('unit') if tss[0] is not None else None str_metrics = gen_metrics_string(metrics, labels[1:], unit) write_metrics_string(axes['inst'], str_metrics) return fig
def plot_metrics_to_figure(fig, tss, title=None, window_inst=None, window_avg=None, labels=None, max_shift=hours(4), period=minutes(int(12.24 * 60)), label_loc=1, legend_size=12): """ Plot a metrics plot Returns ------- matplotlib.figure.Figure """ grids = gen_metrics_grid() axes = dict( list( zip(list(grids.keys()), list(map(fig.add_subplot, list(grids.values())))))) plot_inst_and_avg(axes, tss, window_inst, window_avg, labels, label_loc, legend_size) if title is not None: axes['inst'].set_title(title) if window_avg is not None: tss_clipped = [safe_window(ts, window_avg) for ts in tss] else: tss_clipped = tss lags = calculate_lag_of_tss(tss_clipped, max_shift, minutes(1)) metrics, tss_scatter = calculate_metrics(tss_clipped, lags) unit = tss[1].unit # Get from the simulation if tss_scatter is not None: if tss_scatter[0] is not None: tss_scatter[0].unit = unit tss_scatter[1].unit = unit ax_scatter = axes['scatter'] plot_scatter(ax_scatter, tss_scatter) str_metrics = gen_metrics_string(metrics, labels[1:], unit) write_metrics_string(axes['inst'], str_metrics) return fig
def read(self, fpath, start=None, end=None, force_regular=True, selector=None): """ Read a text file with the given pattern and parsers. Parsers and a pattern must be defined and set in the child class. Parameters ---------- fpath: str file to read start: datetime.datetime, optional datetime to start reading in. If None, read from the start of the file end: datetime.datetime, optional datetime to finish reading in. If None, read till the end of the file force_regular: boolean, optional If it is true, it returns a regular time series Returns ------- vtools.data.timeseries.TimeSeries time series from the file """ # The selector (if it exists) can probably be precalculated or at least recorded. # Almost always this amounts to picking variables out of a list of column names # and recording indexes, but here we don't ask any questions about what "selector" is. n_headerlines, metadata = self.process_header(fpath, selector) metadata = dict() if not self._header_regexs is None: metadata = self.read_metadata_from_header(fpath) print "Here we are working on %s" % fpath with open(fpath, 'r') as f_in: times = list() values = list() # fast forward past header if n_headerlines > 0: for i in range(n_headerlines): f_in.readline() # process lines starting from current file pointer for i, line in enumerate(f_in): if self.is_comment(line): continue timestamp, vals = self.parse_record(line) if start and timestamp < start: continue if end and timestamp > end: break times.append(timestamp) values.append(vals) if len(times) < 1: return None arr = numpy.array(values) # Here I assume that it is more effective to retrieve too much # in the reading stage and then do this with numpy fancy indexing. # I But you can override this function arr = self.cull_using_selector(arr) ts = vts.its(times, numpy.array(values)) if force_regular: interval = vt_infer_interval(times[:11], fraction=0.5, standard=[ vtt.minutes(6), vtt.minutes(10), vtt.minutes(15), vtt.hours(1) ]) if not interval: for t in times[:10]: print t.strftime("%Y-%m-%d %H:%M:%S") raise ValueError( "Interval could not be inferred from first time steps in %s" % fpath) import warnings # todo: this really should be an option with warnings.catch_warnings(): warnings.simplefilter("ignore") ts = vts.its2rts(ts, interval) if start is not None: if start < ts.start: ts = vts.extrapolate_ts(ts, start=start) else: ts = ts.window(start=start) if end is not None: if end > ts.end: ts = vts.extrapolate_ts(ts, end=end) else: ts = ts.window(end=end) for k, v in metadata.iteritems(): ts.props[k] = v return ts