def commit(self): self.Error.seasonal_decompose_fail.clear() data = self.data if not data or not self.selected: self.Outputs.time_series.send(data) return selected_subset = Timeseries.from_table( Domain(self.selected, source=data.domain), data) # FIXME: might not pass selected interpolation method with self.progressBar(len(self.selected)) as progress: try: adjusted_data = seasonal_decompose( selected_subset, self.DECOMPOSITION_MODELS[self.decomposition], self.n_periods, callback=lambda *_: progress.advance()) except ValueError as ex: self.Error.seasonal_decompose_fail(str(ex)) adjusted_data = None if adjusted_data is not None: new_domain = Domain( data.domain.attributes + adjusted_data.domain.attributes, data.domain.class_vars, data.domain.metas) ts = Timeseries.from_numpy(new_domain, X=hstack((data.X, adjusted_data.X)), Y=data.Y, metas=data.metas) ts.time_variable = data.time_variable else: ts = None self.Outputs.time_series.send(ts)
def _as_table(self, values, what): """Used for residuals() and fittedvalues() methods.""" from Orange.data import Domain, ContinuousVariable attrs = [] n_vars = values.shape[1] if values.ndim == 2 else 1 if n_vars == 1: values = np.atleast_2d(values).T tvar = None # If 1d, time var likely not already present, so lets add it if possible if n_vars == 1 and self._table_timevar: values = np.column_stack((self._table_timevals[-values.shape[0]:], values)) tvar = self._table_timevar attrs.append(tvar) for i, name in zip(range(n_vars), self._table_var_names or range(n_vars)): attrs.append(ContinuousVariable('{} ({})'.format(name, what))) # Make the fitted time variable time variable if self._table_timevar and self._table_timevar.name == name: tvar = attrs[-1] table = Timeseries.from_numpy(Domain(attrs), values) table.time_variable = tvar table.name = (self._table_name or '') + '({} {})'.format(self, what) return table
def _as_table(self, values, what): """Used for residuals() and fittedvalues() methods.""" from Orange.data import Domain, ContinuousVariable attrs = [] n_vars = values.shape[1] if values.ndim == 2 else 1 if n_vars == 1: values = np.atleast_2d(values).T tvar = None # If 1d, time var likely not already present, so lets add it if possible if n_vars == 1 and self._table_timevar: values = np.column_stack( (self._table_timevals[-values.shape[0]:], values)) tvar = self._table_timevar attrs.append(tvar) for i, name in zip(range(n_vars), self._table_var_names or range(n_vars)): attrs.append(ContinuousVariable('{} ({})'.format(name, what))) # Make the fitted time variable time variable if self._table_timevar and self._table_timevar.name == name: tvar = attrs[-1] table = Timeseries.from_numpy(Domain(attrs), values) table.time_variable = tvar table.name = (self._table_name or '') + '({} {})'.format(self, what) return table
def commit(self): data = self.data if not data or not len(self.selected): self.Outputs.time_series.send(None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order op = self.chosen_operation for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if op == self.Operation.DIFF and shift == 1: out[order:] = np.diff(col, order) out[:order] = np.nan else: if op == self.Operation.DIFF: out[shift:] = col[shift:] - col[:-shift] else: out[shift:] = np.divide(col[shift:], col[:-shift]) if op == self.Operation.PERC: out = (out - 1) * 100 out[:shift] = np.nan if invert: out = out[::-1] X.append(out) if op == self.Operation.DIFF and shift == 1: details = f'order={order}' else: details = f'shift={shift}' template = f'{var} ({op[:4].lower()}; {details})' name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries.from_numpy(Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.Outputs.time_series.send(ts)
def commit(self): data = self.data if not data: self.Outputs.time_series.send(None) return # Group-by expects data sorted sorted_indices = np.argsort(data.time_values) if not np.all(sorted_indices == np.arange(len(data))): data = Timeseries.from_data_table( Table.from_table_rows(data, sorted_indices)) attrs, cvars, metas = [], [], [] for attr, _ in self.model: if attr in data.domain.attributes: attrs.append(attr) elif attr in data.domain.class_vars: cvars.append(attr) else: metas.append(attr) aggregate_time = self.AGG_TIME[self.agg_interval] def time_key(i): return timestamp( aggregate_time( fromtimestamp(data.time_values[i], tz=data.time_variable.timezone))) times = [] X, Y, M = [], [], [] for key_time, indices in groupby(np.arange(len(data)), key=time_key): times.append(key_time) subset = data[list(indices)] xs, ys, ms = [], [], [] for attr, func in self.model: values = Table.from_table( Domain([], [], [attr], source=data.domain), subset).metas out = (xs if attr in data.domain.attributes else ys if attr in data.domain.class_vars else ms) out.append(func(values)) X.append(xs) Y.append(ys) M.append(ms) ts = Timeseries.from_numpy( Domain([data.time_variable] + attrs, cvars, metas), np.column_stack((times, np.row_stack(X))), np.array(Y), np.array(np.row_stack(M), dtype=object)) self.Outputs.time_series.send(ts)
def _predict_as_table(self, prediction, confidence): from Orange.data import Domain, ContinuousVariable means, lows, highs = [], [], [] n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1 for i, name in zip(range(n_vars), self._table_var_names or range(n_vars)): mean = ContinuousVariable('{} (forecast)'.format(name)) low = ContinuousVariable('{} ({:d}%CI low)'.format(name, confidence)) high = ContinuousVariable('{} ({:d}%CI high)'.format(name, confidence)) low.ci_percent = high.ci_percent = confidence mean.ci_attrs = (low, high) means.append(mean) lows.append(low) highs.append(high) domain = Domain(means + lows + highs) X = np.column_stack(prediction) table = Timeseries.from_numpy(domain, X) table.name = (self._table_name or '') + '({} forecast)'.format(self) return table
def _predict_as_table(self, prediction, confidence): from Orange.data import Domain, ContinuousVariable means, lows, highs = [], [], [] n_vars = prediction.shape[2] if len(prediction.shape) > 2 else 1 for i, name in zip(range(n_vars), self._table_var_names or range(n_vars)): mean = ContinuousVariable('{} (forecast)'.format(name)) low = ContinuousVariable('{} ({:d}%CI low)'.format( name, confidence)) high = ContinuousVariable('{} ({:d}%CI high)'.format( name, confidence)) low.ci_percent = high.ci_percent = confidence mean.ci_attrs = (low, high) means.append(mean) lows.append(low) highs.append(high) domain = Domain(means + lows + highs) X = np.column_stack(prediction) table = Timeseries.from_numpy(domain, X) table.name = (self._table_name or '') + '({} forecast)'.format(self) return table
def moving_transform(data, spec, fixed_wlen=0): """ Return data transformed according to spec. Parameters ---------- data : Timeseries A table with features to transform. spec : list of lists A list of lists [feature:Variable, window_length:int, function:callable]. fixed_wlen : int If not 0, then window_length in spec is disregarded and this length is used. Also the windows don't shift by one but instead align themselves side by side. Returns ------- transformed : Timeseries A table of original data its transformations. """ from itertools import chain from Orange.data import ContinuousVariable, Domain from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product X = [] attrs = [] for var, wlen, func in spec: col = np.ravel(data[:, var]) if fixed_wlen: wlen = fixed_wlen if func in (Cumulative_sum, Cumulative_product): out = list( chain.from_iterable( func(col[i:i + wlen]) for i in range(0, len(col), wlen))) else: # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much col = col[::-1] out = [ func(col[i:i + wlen]) for i in range(0, len(col), wlen if bool(fixed_wlen) else 1) ] out = out[::-1] X.append(out) template = '{} ({}; {})'.format( var.name, wlen, func.__name__.lower().replace('_', ' ')) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) dataX, dataY, dataM = data.X, data.Y, data.metas if fixed_wlen: n = len(X[0]) dataX = dataX[::-1][::fixed_wlen][:n][::-1] dataY = dataY[::-1][::fixed_wlen][:n][::-1] dataM = dataM[::-1][::fixed_wlen][:n][::-1] ts = Timeseries.from_numpy( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((dataX, np.column_stack(X))) if X else dataX, dataY, dataM) ts.time_variable = data.time_variable return ts
def seasonal_decompose(data, model='multiplicative', period=12, *, callback=None): """ Return table of decomposition components of original features and original features seasonally adjusted. Parameters ---------- data : Timeseries A table of featres to decompose/adjust. model : str {'additive', 'multiplicative'} A decompostition model. See: https://en.wikipedia.org/wiki/Decomposition_of_time_series period : int The period length of season. callback : callable Optional callback to call (with no parameters) after each iteration. Returns ------- table : Timeseries Table with columns: original series seasonally adjusted, original series' seasonal components, trend components, and residual components. """ from operator import sub, truediv from Orange.data import Domain, ContinuousVariable from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name import statsmodels.api as sm def _interp_trend(trend): first = next(i for i, val in enumerate(trend) if val == val) last = trend.size - 1 - next( i for i, val in enumerate(trend[::-1]) if val == val) d = 3 first_last = min(first + d, last) last_first = max(first, last - d) k, n = np.linalg.lstsq( np.column_stack( (np.arange(first, first_last), np.ones(first_last - first))), trend[first:first_last])[0] trend[:first] = np.arange(0, first) * k + n k, n = np.linalg.lstsq( np.column_stack((np.arange(last_first, last), np.ones(last - last_first))), trend[last_first:last])[0] trend[last + 1:] = np.arange(last + 1, trend.size) * k + n return trend attrs = [] X = [] recomposition = sub if model == 'additive' else truediv interp_data = data.interp() for var in data.domain.variables: decomposed = sm.tsa.seasonal_decompose(np.ravel(interp_data[:, var]), model=model, freq=period) adjusted = recomposition(decomposed.observed, decomposed.seasonal) season = decomposed.seasonal trend = _interp_trend(decomposed.trend) resid = recomposition(adjusted, trend) # Re-apply nans isnan = np.isnan(data[:, var]).ravel() adjusted[isnan] = np.nan trend[isnan] = np.nan resid[isnan] = np.nan attrs.extend( ContinuousVariable( available_name(data.domain, var.name + ' ({})'.format(transform))) for transform in ('season. adj.', 'seasonal', 'trend', 'residual')) X.extend((adjusted, season, trend, resid)) if callback: callback() ts = Timeseries.from_numpy(Domain(attrs), np.column_stack(X)) return ts
def interpolate_timeseries(data, method='linear', multivariate=False): """Return a new Timeseries (Table) with nan values interpolated. Parameters ---------- data : Orange.data.Table A table to interpolate. method : str {'linear', 'cubic', 'nearest', 'mean'} The interpolation method to use. multivariate : bool Whether to perform multivariate (2d) interpolation first. Univariate interpolation of same method is always performed as a final step. Returns ------- series : Timeseries A table with nans in original replaced with interpolated values. """ from scipy.interpolate import griddata, interp1d from Orange.data import Domain from orangecontrib.timeseries import Timeseries attrs = data.domain.attributes cvars = data.domain.class_vars metas = data.domain.metas X = data.X.copy() Y = np.column_stack((data.Y, )).copy() # make 2d M = data.metas.copy() # Interpolate discrete columns to mode/nearest value _x = Timeseries.from_data_table(data).time_values.astype(float) for A, vars in ((X, attrs), (Y, cvars)): for i, var in enumerate(vars): if not var.is_discrete: continue vals = A[:, i] isnan = np.isnan(vals) if not isnan.any(): continue if method == 'nearest': nonnan = ~isnan x, vals = _x[nonnan], vals[nonnan] f = interp1d(x, vals, kind='nearest', copy=False, assume_sorted=True) A[isnan, i] = f(_x)[isnan] continue A[isnan, i] = np.argmax(np.bincount(vals[~isnan].astype(int))) # Interpolate data if multivariate and method != 'mean': for A, vars in ((X, attrs), (Y, cvars)): is_continuous = [var.is_continuous for var in vars] if sum(is_continuous) < 3 or A.shape[0] < 3: # griddata() doesn't work with 1d data continue # Only multivariate continuous features Acont = A[:, is_continuous] isnan = np.isnan(Acont) if not isnan.any(): continue nonnan = ~isnan vals = griddata(nonnan.nonzero(), Acont[nonnan], isnan.nonzero(), method=method) Acont[isnan] = vals A[:, is_continuous] = Acont # Do the 1d interpolation anyway in case 2d left some nans for A in (X, Y): for i, col in enumerate(A.T): isnan = np.isnan(col) # there is no need to interpolate if there are no nans # there needs to be at least two numbers if not isnan.any() or sum(~isnan) < 2: continue # Mean interpolation if method == 'mean': A[isnan, i] = np.nanmean(col) continue nonnan = ~isnan f = interp1d(_x[nonnan], col[nonnan], kind=method, copy=False, assume_sorted=True, bounds_error=False) A[isnan, i] = f(_x[isnan]) # nearest-interpolate any nans at vals start and end # TODO: replace nearest with linear/OLS? valid = (~np.isnan(col)).nonzero()[0] first, last = valid[0], valid[-1] col[:first] = col[first] col[last:] = col[last] ts = Timeseries.from_numpy(Domain(attrs, cvars, metas), X, Y, M) return ts
gui.checkBox(box, self, 'use_exog', 'Use exogenous (independent) variables (ARMAX)', callback=self.apply) def forecast(self, model): if self.use_exog and self.exog_data is None: return return model.predict(self.forecast_steps, exog=self.exog_data, alpha=1 - self.forecast_confint / 100, as_table=True) def create_learner(self): return ARIMA((self.p, self.d, self.q), self.use_exog) if __name__ == "__main__": from AnyQt.QtWidgets import QApplication from Orange.data import Domain a = QApplication([]) ow = OWARIMAModel() data = Timeseries.from_file('airpassengers') domain = Domain(data.domain.attributes[:-1], data.domain.attributes[-1]) data = Timeseries.from_numpy(domain, data.X[:, :-1], data.X[:, -1]) ow.set_data(data) ow.show() a.exec()
gui.checkBox(box, self, 'use_exog', 'Use exogenous (independent) variables (ARMAX)', callback=self.apply) def forecast(self, model): if self.use_exog and self.exog_data is None: return return model.predict(self.forecast_steps, exog=self.exog_data, alpha=1 - self.forecast_confint / 100, as_table=True) def create_learner(self): return ARIMA((self.p, self.d, self.q), self.use_exog) if __name__ == "__main__": from AnyQt.QtWidgets import QApplication from Orange.data import Domain a = QApplication([]) ow = OWARIMAModel() data = Timeseries('airpassengers') domain = Domain(data.domain.attributes[:-1], data.domain.attributes[-1]) data = Timeseries.from_numpy(domain, data.X[:, :-1], data.X[:, -1]) ow.set_data(data) ow.show() a.exec()