def commit(self): self.Error.seasonal_decompose_fail.clear() data = self.data if not data or not self.selected: self.Outputs.time_series.send(data) return selected_subset = Timeseries( Domain(self.selected, source=data.domain), data) # FIXME: might not pass selected interpolation method with self.progressBar(len(self.selected)) as progress: try: adjusted_data = seasonal_decompose( selected_subset, self.DECOMPOSITION_MODELS[self.decomposition], self.n_periods, callback=lambda *_: progress.advance()) except ValueError as ex: self.Error.seasonal_decompose_fail(str(ex)) adjusted_data = None if adjusted_data is not None: ts = Timeseries(Timeseries.concatenate((data, adjusted_data))) ts.time_variable = data.time_variable else: ts = None self.Outputs.time_series.send(ts)
def commit(self): data = self.data if not data or not len(self.selected): self.Outputs.time_series.send(None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order op = self.chosen_operation for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if op == self.Operation.DIFF and shift == 1: out[order:] = np.diff(col, order) out[:order] = np.nan else: if op == self.Operation.DIFF: out[shift:] = col[shift:] - col[:-shift] else: out[shift:] = np.divide(col[shift:], col[:-shift]) if op == self.Operation.PERC: out = (out - 1) * 100 out[:shift] = np.nan if invert: out = out[::-1] X.append(out) if op == self.Operation.DIFF and shift == 1: details = f'order={order}' else: details = f'shift={shift}' template = f'{var} ({op[:4].lower()}; {details})' name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.Outputs.time_series.send(ts)
def finance_data(symbol, since=None, until=None, granularity='d'): """Fetch Yahoo Finance data for stock or index `symbol` within the period after `since` and before `until` (both inclusive). Parameters ---------- symbol: str A stock or index symbol, as supported by Yahoo Finance. since: date A start date (default: 1900-01-01). until: date An end date (default: today). granularity: 'd' or 'w' or 'm' or 'v' What data to get: daily, weekly, monthly, or dividends. Returns ------- data : Timeseries """ if since is None: since = date(1900, 1, 1) if until is None: until = date.today() YAHOO_URL = ( 'http://chart.finance.yahoo.com/table.csv?' 's={SYMBOL}&d={TO_MONTH}&e={TO_DAY}&f={TO_YEAR}&' 'g={GRANULARITY}&a={FROM_MONTH}&b={FROM_DAY}&c={FROM_YEAR}&ignore=.csv' ) url = YAHOO_URL.format(SYMBOL=symbol, GRANULARITY=granularity, TO_MONTH=until.month - 1, TO_DAY=until.day, TO_YEAR=until.year, FROM_MONTH=since.month - 1, FROM_DAY=since.day, FROM_YEAR=since.year) data = Timeseries.from_url(url)[::-1] # Make Adjusted Close a class variable attrs = [var.name for var in data.domain.attributes] attrs.remove('Adj Close') data = Timeseries( Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data) data.name = symbol data.time_variable = data.domain['Date'] return data
def finance_data(symbol, since=None, until=None, granularity='d'): """Fetch Yahoo Finance data for stock or index `symbol` within the period after `since` and before `until` (both inclusive). Parameters ---------- symbol: str A stock or index symbol, as supported by Yahoo Finance. since: date A start date (default: 1900-01-01). until: date An end date (default: today). granularity: 'd' or 'w' or 'm' or 'v' What data to get: daily, weekly, monthly, or dividends. Returns ------- data : Timeseries """ if since is None: since = date(1900, 1, 1) if until is None: until = date.today() YAHOO_URL = ('http://chart.finance.yahoo.com/table.csv?' 's={SYMBOL}&d={TO_MONTH}&e={TO_DAY}&f={TO_YEAR}&' 'g={GRANULARITY}&a={FROM_MONTH}&b={FROM_DAY}&c={FROM_YEAR}&ignore=.csv') url = YAHOO_URL.format(SYMBOL=symbol, GRANULARITY=granularity, TO_MONTH=until.month - 1, TO_DAY=until.day, TO_YEAR=until.year, FROM_MONTH=since.month - 1, FROM_DAY=since.day, FROM_YEAR=since.year) data = Timeseries.from_url(url)[::-1] # Make Adjusted Close a class variable attrs = [var.name for var in data.domain.attributes] attrs.remove('Adj Close') data = Timeseries(Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data) data.name = symbol data.time_variable = data.domain['Date'] return data
def commit(self): data = self.data if not data or not self.selected: self.send(Output.TIMESERIES, data) return selected_subset = Timeseries(Domain(self.selected, source=data.domain), data) # FIXME: might not pass selected interpolation method with self.progressBar(len(self.selected)) as progress: adjusted_data = seasonal_decompose( selected_subset, self.DECOMPOSITION_MODELS[self.decomposition], self.n_periods, callback=lambda *_: progress.advance()) ts = Timeseries(Timeseries.concatenate((data, adjusted_data))) ts.time_variable = data.time_variable self.send(Output.TIMESERIES, ts)
def commit(self): data = self.data self.Error.clear() if data is None or (self.selected_attr not in data.domain and not self.radio_sequential): self.Outputs.time_series.send(None) return attrs = data.domain.attributes cvars = data.domain.class_vars metas = data.domain.metas X = data.X Y = np.column_stack((data.Y, )) # make 2d M = data.metas # Set sequence attribute if self.radio_sequential: for i in chain(('', ), range(10)): name = '__seq__' + str(i) if name not in data.domain: break time_var = ContinuousVariable(name) attrs = attrs.__class__((time_var, )) + attrs X = np.column_stack((np.arange(1, len(data) + 1), X)) data = Table(Domain(attrs, cvars, metas), X, Y, M) else: # Or make a sequence attribute one of the existing attributes # and sort all values according to it time_var = data.domain[self.selected_attr] values = Table.from_table(Domain([], [], [time_var]), source=data).metas.ravel() if np.isnan(values).any(): self.Error.nan_times(time_var.name) self.Outputs.time_series.send(None) return ordered = np.argsort(values) if (ordered != np.arange(len(ordered))).any(): data = data[ordered] ts = Timeseries(data.domain, data) # TODO: ensure equidistant ts.time_variable = time_var self.Outputs.time_series.send(ts)
def commit(self): data = self.data if not data or not len(self.selected): self.send(Output.TIMESERIES, None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if shift == 1: out[:-order] = np.diff(col, order) out[-order:] = np.nan else: out[:-shift] = col[shift:] - col[:-shift] out[-shift:] = np.nan if invert: out = out[::-1] X.append(out) template = '{} (diff; {})'.format(var, 'order={}'.format(order) if shift == 1 else 'shift={}'.format(shift)) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries(Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.send(Output.TIMESERIES, ts)
def commit(self): data = self.data self.Error.clear() if data is None or (self.selected_attr not in data.domain and not self.radio_sequential): self.Outputs.time_series.send(None) return attrs = data.domain.attributes cvars = data.domain.class_vars metas = data.domain.metas X = data.X Y = np.column_stack((data.Y,)) # make 2d M = data.metas # Set sequence attribute if self.radio_sequential: for i in chain(('',), range(10)): name = '__seq__' + str(i) if name not in data.domain: break time_var = ContinuousVariable(name) attrs = attrs.__class__((time_var,)) + attrs X = np.column_stack((np.arange(1, len(data) + 1), X)) data = Table(Domain(attrs, cvars, metas), X, Y, M) else: # Or make a sequence attribute one of the existing attributes # and sort all values according to it time_var = data.domain[self.selected_attr] values = Table.from_table(Domain([], [], [time_var]), source=data).metas.ravel() if np.isnan(values).any(): self.Error.nan_times(time_var.name) self.Outputs.time_series.send(None) return ordered = np.argsort(values) if (ordered != np.arange(len(ordered))).any(): data = data[ordered] ts = Timeseries(data.domain, data) # TODO: ensure equidistant ts.time_variable = time_var self.Outputs.time_series.send(ts)
def commit(self): data = self.data if not data or not len(self.selected): self.send(Output.TIMESERIES, None) return X = [] attrs = [] invert = self.invert_direction shift = self.shift_period order = self.diff_order for var in self.selected: col = np.ravel(data[:, var]) if invert: col = col[::-1] out = np.empty(len(col)) if shift == 1: out[:-order] = np.diff(col, order) out[-order:] = np.nan else: out[:-shift] = col[shift:] - col[:-shift] out[-shift:] = np.nan if invert: out = out[::-1] X.append(out) template = '{} (diff; {})'.format( var, 'order={}'.format(order) if shift == 1 else 'shift={}'.format(shift)) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas) ts.time_variable = data.time_variable self.send(Output.TIMESERIES, ts)
def finance_data(symbol, since=None, until=None, granularity='d'): """Fetch Yahoo Finance data for stock or index `symbol` within the period after `since` and before `until` (both inclusive). Parameters ---------- symbol: str A stock or index symbol, as supported by Yahoo Finance. since: date A start date (default: 1900-01-01). until: date An end date (default: today). granularity: 'd' or 'w' or 'm' or 'v' What data to get: daily, weekly, monthly, or dividends. Returns ------- data : Timeseries """ if since is None: since = date(1900, 1, 1) if until is None: until = date.today() f = web.DataReader(symbol, 'yahoo', since, until) data = Timeseries(table_from_frame(f)) # Make Adjusted Close a class variable attrs = [var.name for var in data.domain.attributes] attrs.remove('Adj Close') data = Timeseries(Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data) data.name = symbol data.time_variable = data.domain['Date'] return data
def finance_data(symbol, since=None, until=None, granularity='d'): """Fetch Yahoo Finance data for stock or index `symbol` within the period after `since` and before `until` (both inclusive). Parameters ---------- symbol: str A stock or index symbol, as supported by Yahoo Finance. since: date A start date (default: 1900-01-01). until: date An end date (default: today). granularity: 'd' or 'w' or 'm' or 'v' What data to get: daily, weekly, monthly, or dividends. Returns ------- data : Timeseries """ if since is None: since = date(1900, 1, 1) if until is None: until = date.today() f = web.DataReader(symbol, 'yahoo', since, until) data = Timeseries(table_from_frame(f)) # Make Adjusted Close a class variable attrs = [var.name for var in data.domain.attributes] attrs.remove('Adj Close') data = Timeseries( Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data) data.name = symbol data.time_variable = data.domain['Date'] return data
def moving_transform(data, spec, fixed_wlen=0): """ Return data transformed according to spec. Parameters ---------- data : Timeseries A table with features to transform. spec : list of lists A list of lists [feature:Variable, window_length:int, function:callable]. fixed_wlen : int If not 0, then window_length in spec is disregarded and this length is used. Also the windows don't shift by one but instead align themselves side by side. Returns ------- transformed : Timeseries A table of original data its transformations. """ from itertools import chain from Orange.data import ContinuousVariable, Domain from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product X = [] attrs = [] for var, wlen, func in spec: col = np.ravel(data[:, var]) if fixed_wlen: wlen = fixed_wlen if func in (Cumulative_sum, Cumulative_product): out = list(chain.from_iterable(func(col[i:i + wlen]) for i in range(0, len(col), wlen))) else: # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much col = col[::-1] out = [func(col[i:i + wlen]) for i in range(0, len(col), wlen if bool(fixed_wlen) else 1)] out = out[::-1] X.append(out) template = '{} ({}; {})'.format(var.name, wlen, func.__name__.lower().replace('_', ' ')) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) dataX, dataY, dataM = data.X, data.Y, data.metas if fixed_wlen: n = len(X[0]) dataX = dataX[::-1][::fixed_wlen][:n][::-1] dataY = dataY[::-1][::fixed_wlen][:n][::-1] dataM = dataM[::-1][::fixed_wlen][:n][::-1] ts = Timeseries(Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack( (dataX, np.column_stack(X))) if X else dataX, dataY, dataM) ts.time_variable = data.time_variable return ts
def test_create_time_variable(self): table = Table("iris") time_series = Timeseries(table) id_1 = id(time_series.attributes) time_series.time_variable = time_series.domain.attributes[0] self.assertNotEqual(id_1, id(time_series.attributes))
def moving_transform(data, spec, fixed_wlen=0): """ Return data transformed according to spec. Parameters ---------- data : Timeseries A table with features to transform. spec : list of lists A list of lists [feature:Variable, window_length:int, function:callable]. fixed_wlen : int If not 0, then window_length in spec is disregarded and this length is used. Also the windows don't shift by one but instead align themselves side by side. Returns ------- transformed : Timeseries A table of original data its transformations. """ from itertools import chain from Orange.data import ContinuousVariable, Domain from orangecontrib.timeseries import Timeseries from orangecontrib.timeseries.widgets.utils import available_name from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product X = [] attrs = [] for var, wlen, func in spec: col = np.ravel(data[:, var]) if fixed_wlen: wlen = fixed_wlen if func in (Cumulative_sum, Cumulative_product): out = list( chain.from_iterable( func(col[i:i + wlen]) for i in range(0, len(col), wlen))) else: # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much col = col[::-1] out = [ func(col[i:i + wlen]) for i in range(0, len(col), wlen if bool(fixed_wlen) else 1) ] out = out[::-1] X.append(out) template = '{} ({}; {})'.format( var.name, wlen, func.__name__.lower().replace('_', ' ')) name = available_name(data.domain, template) attrs.append(ContinuousVariable(name)) dataX, dataY, dataM = data.X, data.Y, data.metas if fixed_wlen: n = len(X[0]) dataX = dataX[::-1][::fixed_wlen][:n][::-1] dataY = dataY[::-1][::fixed_wlen][:n][::-1] dataM = dataM[::-1][::fixed_wlen][:n][::-1] ts = Timeseries( Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars, data.domain.metas), np.column_stack((dataX, np.column_stack(X))) if X else dataX, dataY, dataM) ts.time_variable = data.time_variable return ts