def test_lag_cross_correlation():
    base, lagged = lag_samples(7)
    base.to_frame().plot()
    #lagged.plot()
    #print(lagged.info())
    #print(base.info())
    #base.plot()
    #plt.show()
    max_lag = minutes(300)
    res = minutes(1)

    lag_calc = calculate_lag(base,
                             lagged,
                             max_lag,
                             res,
                             interpolate_method="linear")

    max_lag = minutes(100)
    res = minutes(1)

    with pytest.raises(ValueError):
        lag_calc = calculate_lag(base,
                                 lagged,
                                 max_lag,
                                 res,
                                 interpolate_method="linear")

    base, lagged = lag_samples(20)
    max_lag = minutes(300)
    with pytest.raises(ValueError):
        lag_calc = calculate_lag(base,
                                 lagged,
                                 max_lag,
                                 res,
                                 interpolate_method="linear")
Exemple #2
0
def separate_species(ts,noise_thresh_min=40):
    """Separate species into subtidal, diurnal, semidiurnal and noise components

        Input:
             ts: timeseries to be decomposed into species, assumed to be
             at six minute intervals. The filters used
             have long lenghts, so avoid missing data and allow for four extra
             days worth of data on each end.

        Output:
              four regular time series, representing subtidal, diurnal, semi-diurnal and noise
    """
    
    # the first filter eliminates noise
    ts_denoise= cosine_lanczos(ts,cutoff_period=minutes(noise_thresh_min))
    ts_noise=ts-ts_denoise   # this is the residual, the part that IS noise
    
    # the filter length assumes 6 minute data. The resulting filter is 90 hours
    # long which is MUCH longer than the default because this filter has to be
    # really sharp
    assert ts.index.freq == minutes(6)
    # 14.5 hours = 870min
    ts_diurnal_and_low=cosine_lanczos(ts_denoise,cutoff_period=minutes(870),
                                      filter_len=900)
    ts_semidiurnal_and_high=ts_denoise-ts_diurnal_and_low

    # The resulting filter is again 90 hours
    # long which is still a bit longer than the default. Again,
    # we want this filter to be pretty sharp.
    #ts_sub_tide=cosine_lanczos(ts_diurnal_and_low,cutoff_period=hours(40),
    #                           filter_len=900)
    ts_sub_tide=cosine_lanczos(ts_denoise,cutoff_period=hours(40),
                               filter_len=900)
    ts_diurnal=ts_diurnal_and_low-ts_sub_tide
    return ts_sub_tide,ts_diurnal,ts_semidiurnal_and_high, ts_noise
def plot_metrics_to_figure(fig, tss,
                           title=None, window_inst=None, window_avg=None,
                           labels=None,
                           max_shift=hours(2),
                           period=minutes(int(12.24 * 60)),
                           label_loc=1,
                           legend_size=12):
    """ Plot a metrics plot

        Returns
        -------
        matplotlib.figure.Figure
    """
    grids = gen_metrics_grid()
    axes = dict(zip(grids.keys(), map(fig.add_subplot,
                                      grids.values())))
    if labels is None:
        labels = [ts.props.get('label') for ts in tss]
    plot_inst_and_avg(axes, tss, window_inst, window_avg, labels, label_loc, legend_size)
    if title is not None:
        axes['inst'].set_title(title)
    if window_avg is not None:
        tss_clipped = [safe_window(ts, window_avg) for ts in tss]
    else:
        tss_clipped = tss
    lags = calculate_lag_of_tss(tss_clipped, max_shift, period)
    metrics, tss_scatter = calculate_metrics(tss_clipped, lags)
    if tss_scatter is not None:
        ax_scatter = axes['scatter']
        plot_scatter(ax_scatter, tss_scatter)
    unit = tss[0].props.get('unit') if tss[0] is not None else None
    str_metrics = gen_metrics_string(metrics, labels[1:], unit)
    write_metrics_string(axes['inst'], str_metrics)
    return fig
Exemple #4
0
def plot_metrics_to_figure(fig,
                           tss,
                           title=None,
                           window_inst=None,
                           window_avg=None,
                           labels=None,
                           max_shift=hours(4),
                           period=minutes(int(12.24 * 60)),
                           label_loc=1,
                           legend_size=12):
    """ Plot a metrics plot

        Returns
        -------
        matplotlib.figure.Figure
    """

    grids = gen_metrics_grid()
    axes = dict(
        list(
            zip(list(grids.keys()),
                list(map(fig.add_subplot, list(grids.values()))))))

    plot_inst_and_avg(axes, tss, window_inst, window_avg, labels, label_loc,
                      legend_size)
    if title is not None:
        axes['inst'].set_title(title)
    if window_avg is not None:
        tss_clipped = [safe_window(ts, window_avg) for ts in tss]
    else:
        tss_clipped = tss

    lags = calculate_lag_of_tss(tss_clipped, max_shift, minutes(1))
    metrics, tss_scatter = calculate_metrics(tss_clipped, lags)
    unit = tss[1].unit  # Get from the simulation

    if tss_scatter is not None:
        if tss_scatter[0] is not None:
            tss_scatter[0].unit = unit
        tss_scatter[1].unit = unit
        ax_scatter = axes['scatter']
        plot_scatter(ax_scatter, tss_scatter)

    str_metrics = gen_metrics_string(metrics, labels[1:], unit)
    write_metrics_string(axes['inst'], str_metrics)
    return fig
Exemple #5
0
    def read(self,
             fpath,
             start=None,
             end=None,
             force_regular=True,
             selector=None):
        """ Read a text file with the given pattern and parsers.
            Parsers and a pattern must be defined and set in the child class.

            Parameters
            ----------
            fpath: str
                file to read
            start: datetime.datetime, optional
                datetime to start reading in.
                If None, read from the start of the file
            end: datetime.datetime, optional
                datetime to finish reading in.
                If None, read till the end of the file
            force_regular: boolean, optional
                If it is true, it returns a regular time series

            Returns
            -------
            vtools.data.timeseries.TimeSeries
                time series from the file
        """
        # The selector (if it exists) can probably be precalculated or at least recorded.
        # Almost always this amounts to picking variables out of a list of column names
        # and recording indexes, but here we don't ask any questions about what "selector" is.
        n_headerlines, metadata = self.process_header(fpath, selector)
        metadata = dict()
        if not self._header_regexs is None:
            metadata = self.read_metadata_from_header(fpath)
        print "Here we are working on %s" % fpath
        with open(fpath, 'r') as f_in:
            times = list()
            values = list()
            # fast forward past header
            if n_headerlines > 0:
                for i in range(n_headerlines):
                    f_in.readline()
            # process lines starting from current file pointer
            for i, line in enumerate(f_in):
                if self.is_comment(line): continue
                timestamp, vals = self.parse_record(line)
                if start and timestamp < start:
                    continue
                if end and timestamp > end:
                    break
                times.append(timestamp)
                values.append(vals)

        if len(times) < 1:
            return None

        arr = numpy.array(values)

        # Here I assume that it is more effective to retrieve too much
        # in the reading stage and then do this with numpy fancy indexing.
        # I But you can override this function
        arr = self.cull_using_selector(arr)

        ts = vts.its(times, numpy.array(values))
        if force_regular:
            interval = vt_infer_interval(times[:11],
                                         fraction=0.5,
                                         standard=[
                                             vtt.minutes(6),
                                             vtt.minutes(10),
                                             vtt.minutes(15),
                                             vtt.hours(1)
                                         ])
            if not interval:
                for t in times[:10]:
                    print t.strftime("%Y-%m-%d %H:%M:%S")
                raise ValueError(
                    "Interval could not be inferred from first time steps in %s"
                    % fpath)
            import warnings
            # todo: this really should be an option
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                ts = vts.its2rts(ts, interval)
            if start is not None:
                if start < ts.start:
                    ts = vts.extrapolate_ts(ts, start=start)
                else:
                    ts = ts.window(start=start)
            if end is not None:
                if end > ts.end:
                    ts = vts.extrapolate_ts(ts, end=end)
                else:
                    ts = ts.window(end=end)
            for k, v in metadata.iteritems():
                ts.props[k] = v
            return ts