Example #1
0
    def commit(self):
        self.Error.seasonal_decompose_fail.clear()
        data = self.data
        if not data or not self.selected:
            self.Outputs.time_series.send(data)
            return

        selected_subset = Timeseries.from_table(
            Domain(self.selected, source=data.domain), data)
        # FIXME: might not pass selected interpolation method

        with self.progressBar(len(self.selected)) as progress:
            try:
                adjusted_data = seasonal_decompose(
                    selected_subset,
                    self.DECOMPOSITION_MODELS[self.decomposition],
                    self.n_periods,
                    callback=lambda *_: progress.advance())
            except ValueError as ex:
                self.Error.seasonal_decompose_fail(str(ex))
                adjusted_data = None

        if adjusted_data is not None:
            new_domain = Domain(
                data.domain.attributes + adjusted_data.domain.attributes,
                data.domain.class_vars, data.domain.metas)
            ts = Timeseries.from_numpy(new_domain,
                                       X=hstack((data.X, adjusted_data.X)),
                                       Y=data.Y,
                                       metas=data.metas)
            ts.time_variable = data.time_variable
        else:
            ts = None
        self.Outputs.time_series.send(ts)
Example #2
0
    def _as_table(self, values, what):
        """Used for residuals() and fittedvalues() methods."""
        from Orange.data import Domain, ContinuousVariable
        attrs = []
        n_vars = values.shape[1] if values.ndim == 2 else 1
        if n_vars == 1:
            values = np.atleast_2d(values).T
        tvar = None
        # If 1d, time var likely not already present, so lets add it if possible
        if n_vars == 1 and self._table_timevar:
            values = np.column_stack((self._table_timevals[-values.shape[0]:],
                                      values))
            tvar = self._table_timevar
            attrs.append(tvar)
        for i, name in zip(range(n_vars),
                           self._table_var_names or range(n_vars)):
            attrs.append(ContinuousVariable('{} ({})'.format(name, what)))

            # Make the fitted time variable time variable
            if self._table_timevar and self._table_timevar.name == name:
                tvar = attrs[-1]

        table = Timeseries.from_numpy(Domain(attrs), values)
        table.time_variable = tvar
        table.name = (self._table_name or '') + '({} {})'.format(self, what)
        return table
Example #3
0
    def _as_table(self, values, what):
        """Used for residuals() and fittedvalues() methods."""
        from Orange.data import Domain, ContinuousVariable
        attrs = []
        n_vars = values.shape[1] if values.ndim == 2 else 1
        if n_vars == 1:
            values = np.atleast_2d(values).T
        tvar = None
        # If 1d, time var likely not already present, so lets add it if possible
        if n_vars == 1 and self._table_timevar:
            values = np.column_stack(
                (self._table_timevals[-values.shape[0]:], values))
            tvar = self._table_timevar
            attrs.append(tvar)
        for i, name in zip(range(n_vars), self._table_var_names
                           or range(n_vars)):
            attrs.append(ContinuousVariable('{} ({})'.format(name, what)))

            # Make the fitted time variable time variable
            if self._table_timevar and self._table_timevar.name == name:
                tvar = attrs[-1]

        table = Timeseries.from_numpy(Domain(attrs), values)
        table.time_variable = tvar
        table.name = (self._table_name or '') + '({} {})'.format(self, what)
        return table
Example #4
0
    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.Outputs.time_series.send(None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        op = self.chosen_operation

        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if op == self.Operation.DIFF and shift == 1:
                out[order:] = np.diff(col, order)
                out[:order] = np.nan
            else:
                if op == self.Operation.DIFF:
                    out[shift:] = col[shift:] - col[:-shift]
                else:
                    out[shift:] = np.divide(col[shift:], col[:-shift])
                    if op == self.Operation.PERC:
                        out = (out - 1) * 100
                out[:shift] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            if op == self.Operation.DIFF and shift == 1:
                details = f'order={order}'
            else:
                details = f'shift={shift}'

            template = f'{var} ({op[:4].lower()}; {details})'
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries.from_numpy(Domain(data.domain.attributes + tuple(attrs),
                                          data.domain.class_vars,
                                          data.domain.metas),
                                   np.column_stack((data.X, np.column_stack(X))),
                                   data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.Outputs.time_series.send(ts)
Example #5
0
    def commit(self):
        data = self.data
        if not data:
            self.Outputs.time_series.send(None)
            return

        # Group-by expects data sorted
        sorted_indices = np.argsort(data.time_values)
        if not np.all(sorted_indices == np.arange(len(data))):
            data = Timeseries.from_data_table(
                Table.from_table_rows(data, sorted_indices))

        attrs, cvars, metas = [], [], []
        for attr, _ in self.model:
            if attr in data.domain.attributes:
                attrs.append(attr)
            elif attr in data.domain.class_vars:
                cvars.append(attr)
            else:
                metas.append(attr)

        aggregate_time = self.AGG_TIME[self.agg_interval]

        def time_key(i):
            return timestamp(
                aggregate_time(
                    fromtimestamp(data.time_values[i],
                                  tz=data.time_variable.timezone)))

        times = []
        X, Y, M = [], [], []
        for key_time, indices in groupby(np.arange(len(data)), key=time_key):
            times.append(key_time)
            subset = data[list(indices)]

            xs, ys, ms = [], [], []
            for attr, func in self.model:
                values = Table.from_table(
                    Domain([], [], [attr], source=data.domain), subset).metas
                out = (xs if attr in data.domain.attributes else
                       ys if attr in data.domain.class_vars else ms)
                out.append(func(values))

            X.append(xs)
            Y.append(ys)
            M.append(ms)

        ts = Timeseries.from_numpy(
            Domain([data.time_variable] + attrs, cvars, metas),
            np.column_stack((times, np.row_stack(X))), np.array(Y),
            np.array(np.row_stack(M), dtype=object))
        self.Outputs.time_series.send(ts)
Example #6
0
 def _predict_as_table(self, prediction, confidence):
     from Orange.data import Domain, ContinuousVariable
     means, lows, highs = [], [], []
     n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1
     for i, name in zip(range(n_vars),
                        self._table_var_names or range(n_vars)):
         mean = ContinuousVariable('{} (forecast)'.format(name))
         low = ContinuousVariable('{} ({:d}%CI low)'.format(name, confidence))
         high = ContinuousVariable('{} ({:d}%CI high)'.format(name, confidence))
         low.ci_percent = high.ci_percent = confidence
         mean.ci_attrs = (low, high)
         means.append(mean)
         lows.append(low)
         highs.append(high)
     domain = Domain(means + lows + highs)
     X = np.column_stack(prediction)
     table = Timeseries.from_numpy(domain, X)
     table.name = (self._table_name or '') + '({} forecast)'.format(self)
     return table
Example #7
0
 def _predict_as_table(self, prediction, confidence):
     from Orange.data import Domain, ContinuousVariable
     means, lows, highs = [], [], []
     n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1
     for i, name in zip(range(n_vars), self._table_var_names
                        or range(n_vars)):
         mean = ContinuousVariable('{} (forecast)'.format(name))
         low = ContinuousVariable('{} ({:d}%CI low)'.format(
             name, confidence))
         high = ContinuousVariable('{} ({:d}%CI high)'.format(
             name, confidence))
         low.ci_percent = high.ci_percent = confidence
         mean.ci_attrs = (low, high)
         means.append(mean)
         lows.append(low)
         highs.append(high)
     domain = Domain(means + lows + highs)
     X = np.column_stack(prediction)
     table = Timeseries.from_numpy(domain, X)
     table.name = (self._table_name or '') + '({} forecast)'.format(self)
     return table
Example #8
0
def moving_transform(data, spec, fixed_wlen=0):
    """
    Return data transformed according to spec.

    Parameters
    ----------
    data : Timeseries
        A table with features to transform.
    spec : list of lists
        A list of lists [feature:Variable, window_length:int, function:callable].
    fixed_wlen : int
        If not 0, then window_length in spec is disregarded and this length
        is used. Also the windows don't shift by one but instead align
        themselves side by side.

    Returns
    -------
    transformed : Timeseries
        A table of original data its transformations.
    """
    from itertools import chain
    from Orange.data import ContinuousVariable, Domain
    from orangecontrib.timeseries import Timeseries
    from orangecontrib.timeseries.widgets.utils import available_name
    from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product

    X = []
    attrs = []

    for var, wlen, func in spec:
        col = np.ravel(data[:, var])

        if fixed_wlen:
            wlen = fixed_wlen

        if func in (Cumulative_sum, Cumulative_product):
            out = list(
                chain.from_iterable(
                    func(col[i:i + wlen]) for i in range(0, len(col), wlen)))
        else:
            # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much
            col = col[::-1]
            out = [
                func(col[i:i + wlen])
                for i in range(0, len(col), wlen if bool(fixed_wlen) else 1)
            ]
            out = out[::-1]

        X.append(out)

        template = '{} ({}; {})'.format(
            var.name, wlen,
            func.__name__.lower().replace('_', ' '))
        name = available_name(data.domain, template)
        attrs.append(ContinuousVariable(name))

    dataX, dataY, dataM = data.X, data.Y, data.metas
    if fixed_wlen:
        n = len(X[0])
        dataX = dataX[::-1][::fixed_wlen][:n][::-1]
        dataY = dataY[::-1][::fixed_wlen][:n][::-1]
        dataM = dataM[::-1][::fixed_wlen][:n][::-1]

    ts = Timeseries.from_numpy(
        Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars,
               data.domain.metas),
        np.column_stack((dataX, np.column_stack(X))) if X else dataX, dataY,
        dataM)
    ts.time_variable = data.time_variable
    return ts
Example #9
0
def seasonal_decompose(data,
                       model='multiplicative',
                       period=12,
                       *,
                       callback=None):
    """
    Return table of decomposition components of original features and
    original features seasonally adjusted.

    Parameters
    ----------
    data : Timeseries
        A table of featres to decompose/adjust.
    model : str {'additive', 'multiplicative'}
        A decompostition model. See:
        https://en.wikipedia.org/wiki/Decomposition_of_time_series
    period : int
        The period length of season.
    callback : callable
        Optional callback to call (with no parameters) after each iteration.

    Returns
    -------
    table : Timeseries
        Table with columns: original series seasonally adjusted, original
        series' seasonal components, trend components, and residual components.
    """
    from operator import sub, truediv
    from Orange.data import Domain, ContinuousVariable
    from orangecontrib.timeseries import Timeseries
    from orangecontrib.timeseries.widgets.utils import available_name
    import statsmodels.api as sm

    def _interp_trend(trend):
        first = next(i for i, val in enumerate(trend) if val == val)
        last = trend.size - 1 - next(
            i for i, val in enumerate(trend[::-1]) if val == val)
        d = 3
        first_last = min(first + d, last)
        last_first = max(first, last - d)

        k, n = np.linalg.lstsq(
            np.column_stack(
                (np.arange(first, first_last), np.ones(first_last - first))),
            trend[first:first_last])[0]
        trend[:first] = np.arange(0, first) * k + n

        k, n = np.linalg.lstsq(
            np.column_stack((np.arange(last_first,
                                       last), np.ones(last - last_first))),
            trend[last_first:last])[0]
        trend[last + 1:] = np.arange(last + 1, trend.size) * k + n
        return trend

    attrs = []
    X = []
    recomposition = sub if model == 'additive' else truediv
    interp_data = data.interp()
    for var in data.domain.variables:
        decomposed = sm.tsa.seasonal_decompose(np.ravel(interp_data[:, var]),
                                               model=model,
                                               freq=period)
        adjusted = recomposition(decomposed.observed, decomposed.seasonal)

        season = decomposed.seasonal
        trend = _interp_trend(decomposed.trend)
        resid = recomposition(adjusted, trend)

        # Re-apply nans
        isnan = np.isnan(data[:, var]).ravel()
        adjusted[isnan] = np.nan
        trend[isnan] = np.nan
        resid[isnan] = np.nan

        attrs.extend(
            ContinuousVariable(
                available_name(data.domain, var.name +
                               ' ({})'.format(transform)))
            for transform in ('season. adj.', 'seasonal', 'trend', 'residual'))
        X.extend((adjusted, season, trend, resid))

        if callback:
            callback()

    ts = Timeseries.from_numpy(Domain(attrs), np.column_stack(X))
    return ts
Example #10
0
def interpolate_timeseries(data, method='linear', multivariate=False):
    """Return a new Timeseries (Table) with nan values interpolated.

    Parameters
    ----------
    data : Orange.data.Table
        A table to interpolate.
    method : str {'linear', 'cubic', 'nearest', 'mean'}
        The interpolation method to use.
    multivariate : bool
        Whether to perform multivariate (2d) interpolation first.
        Univariate interpolation of same method is always performed as a
        final step.

    Returns
    -------
    series : Timeseries
        A table with nans in original replaced with interpolated values.
    """
    from scipy.interpolate import griddata, interp1d
    from Orange.data import Domain
    from orangecontrib.timeseries import Timeseries

    attrs = data.domain.attributes
    cvars = data.domain.class_vars
    metas = data.domain.metas
    X = data.X.copy()
    Y = np.column_stack((data.Y, )).copy()  # make 2d
    M = data.metas.copy()

    # Interpolate discrete columns to mode/nearest value
    _x = Timeseries.from_data_table(data).time_values.astype(float)
    for A, vars in ((X, attrs), (Y, cvars)):
        for i, var in enumerate(vars):
            if not var.is_discrete:
                continue
            vals = A[:, i]
            isnan = np.isnan(vals)
            if not isnan.any():
                continue
            if method == 'nearest':
                nonnan = ~isnan
                x, vals = _x[nonnan], vals[nonnan]
                f = interp1d(x,
                             vals,
                             kind='nearest',
                             copy=False,
                             assume_sorted=True)
                A[isnan, i] = f(_x)[isnan]
                continue
            A[isnan, i] = np.argmax(np.bincount(vals[~isnan].astype(int)))

    # Interpolate data
    if multivariate and method != 'mean':
        for A, vars in ((X, attrs), (Y, cvars)):
            is_continuous = [var.is_continuous for var in vars]
            if sum(is_continuous) < 3 or A.shape[0] < 3:
                # griddata() doesn't work with 1d data
                continue

            # Only multivariate continuous features
            Acont = A[:, is_continuous]
            isnan = np.isnan(Acont)
            if not isnan.any():
                continue
            nonnan = ~isnan
            vals = griddata(nonnan.nonzero(),
                            Acont[nonnan],
                            isnan.nonzero(),
                            method=method)
            Acont[isnan] = vals
            A[:, is_continuous] = Acont

    # Do the 1d interpolation anyway in case 2d left some nans
    for A in (X, Y):
        for i, col in enumerate(A.T):
            isnan = np.isnan(col)
            # there is no need to interpolate if there are no nans
            # there needs to be at least two numbers
            if not isnan.any() or sum(~isnan) < 2:
                continue

            # Mean interpolation
            if method == 'mean':
                A[isnan, i] = np.nanmean(col)
                continue

            nonnan = ~isnan
            f = interp1d(_x[nonnan],
                         col[nonnan],
                         kind=method,
                         copy=False,
                         assume_sorted=True,
                         bounds_error=False)
            A[isnan, i] = f(_x[isnan])

            # nearest-interpolate any nans at vals start and end
            # TODO: replace nearest with linear/OLS?
            valid = (~np.isnan(col)).nonzero()[0]
            first, last = valid[0], valid[-1]
            col[:first] = col[first]
            col[last:] = col[last]

    ts = Timeseries.from_numpy(Domain(attrs, cvars, metas), X, Y, M)
    return ts
        gui.checkBox(box, self, 'use_exog',
                     'Use exogenous (independent) variables (ARMAX)',
                     callback=self.apply)

    def forecast(self, model):
        if self.use_exog and self.exog_data is None:
            return
        return model.predict(self.forecast_steps,
                             exog=self.exog_data,
                             alpha=1 - self.forecast_confint / 100,
                             as_table=True)

    def create_learner(self):
        return ARIMA((self.p, self.d, self.q), self.use_exog)


if __name__ == "__main__":
    from AnyQt.QtWidgets import QApplication
    from Orange.data import Domain

    a = QApplication([])
    ow = OWARIMAModel()

    data = Timeseries.from_file('airpassengers')
    domain = Domain(data.domain.attributes[:-1], data.domain.attributes[-1])
    data = Timeseries.from_numpy(domain, data.X[:, :-1], data.X[:, -1])
    ow.set_data(data)

    ow.show()
    a.exec()
Example #12
0
        gui.checkBox(box, self, 'use_exog',
                     'Use exogenous (independent) variables (ARMAX)',
                     callback=self.apply)

    def forecast(self, model):
        if self.use_exog and self.exog_data is None:
            return
        return model.predict(self.forecast_steps,
                             exog=self.exog_data,
                             alpha=1 - self.forecast_confint / 100,
                             as_table=True)

    def create_learner(self):
        return ARIMA((self.p, self.d, self.q), self.use_exog)


if __name__ == "__main__":
    from AnyQt.QtWidgets import QApplication
    from Orange.data import Domain

    a = QApplication([])
    ow = OWARIMAModel()

    data = Timeseries('airpassengers')
    domain = Domain(data.domain.attributes[:-1], data.domain.attributes[-1])
    data = Timeseries.from_numpy(domain, data.X[:, :-1], data.X[:, -1])
    ow.set_data(data)

    ow.show()
    a.exec()