def commit(self):
        self.Error.seasonal_decompose_fail.clear()
        data = self.data
        if not data or not self.selected:
            self.Outputs.time_series.send(data)
            return

        selected_subset = Timeseries(
            Domain(self.selected, source=data.domain),
            data)  # FIXME: might not pass selected interpolation method

        with self.progressBar(len(self.selected)) as progress:
            try:
                adjusted_data = seasonal_decompose(
                    selected_subset,
                    self.DECOMPOSITION_MODELS[self.decomposition],
                    self.n_periods,
                    callback=lambda *_: progress.advance())
            except ValueError as ex:
                self.Error.seasonal_decompose_fail(str(ex))
                adjusted_data = None

        if adjusted_data is not None:
            ts = Timeseries(Timeseries.concatenate((data, adjusted_data)))
            ts.time_variable = data.time_variable
        else:
            ts = None
        self.Outputs.time_series.send(ts)
    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.Outputs.time_series.send(None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        op = self.chosen_operation

        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if op == self.Operation.DIFF and shift == 1:
                out[order:] = np.diff(col, order)
                out[:order] = np.nan
            else:
                if op == self.Operation.DIFF:
                    out[shift:] = col[shift:] - col[:-shift]
                else:
                    out[shift:] = np.divide(col[shift:], col[:-shift])
                    if op == self.Operation.PERC:
                        out = (out - 1) * 100
                out[:shift] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            if op == self.Operation.DIFF and shift == 1:
                details = f'order={order}'
            else:
                details = f'shift={shift}'

            template = f'{var} ({op[:4].lower()}; {details})'
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries(
            Domain(data.domain.attributes + tuple(attrs),
                   data.domain.class_vars, data.domain.metas),
            np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.Outputs.time_series.send(ts)
Exemple #3
0
def finance_data(symbol, since=None, until=None, granularity='d'):
    """Fetch Yahoo Finance data for stock or index `symbol` within the period
    after `since` and before `until` (both inclusive).

    Parameters
    ----------
    symbol: str
        A stock or index symbol, as supported by Yahoo Finance.
    since: date
        A start date (default: 1900-01-01).
    until: date
        An end date (default: today).
    granularity: 'd' or 'w' or 'm' or 'v'
        What data to get: daily, weekly, monthly, or dividends.

    Returns
    -------
    data : Timeseries
    """
    if since is None:
        since = date(1900, 1, 1)
    if until is None:
        until = date.today()

    YAHOO_URL = (
        'http://chart.finance.yahoo.com/table.csv?'
        's={SYMBOL}&d={TO_MONTH}&e={TO_DAY}&f={TO_YEAR}&'
        'g={GRANULARITY}&a={FROM_MONTH}&b={FROM_DAY}&c={FROM_YEAR}&ignore=.csv'
    )
    url = YAHOO_URL.format(SYMBOL=symbol,
                           GRANULARITY=granularity,
                           TO_MONTH=until.month - 1,
                           TO_DAY=until.day,
                           TO_YEAR=until.year,
                           FROM_MONTH=since.month - 1,
                           FROM_DAY=since.day,
                           FROM_YEAR=since.year)

    data = Timeseries.from_url(url)[::-1]

    # Make Adjusted Close a class variable
    attrs = [var.name for var in data.domain.attributes]
    attrs.remove('Adj Close')
    data = Timeseries(
        Domain(attrs, [data.domain['Adj Close']], None, source=data.domain),
        data)

    data.name = symbol
    data.time_variable = data.domain['Date']
    return data
def finance_data(symbol,
                 since=None,
                 until=None,
                 granularity='d'):
    """Fetch Yahoo Finance data for stock or index `symbol` within the period
    after `since` and before `until` (both inclusive).

    Parameters
    ----------
    symbol: str
        A stock or index symbol, as supported by Yahoo Finance.
    since: date
        A start date (default: 1900-01-01).
    until: date
        An end date (default: today).
    granularity: 'd' or 'w' or 'm' or 'v'
        What data to get: daily, weekly, monthly, or dividends.

    Returns
    -------
    data : Timeseries
    """
    if since is None:
        since = date(1900, 1, 1)
    if until is None:
        until = date.today()

    YAHOO_URL = ('http://chart.finance.yahoo.com/table.csv?'
                 's={SYMBOL}&d={TO_MONTH}&e={TO_DAY}&f={TO_YEAR}&'
                 'g={GRANULARITY}&a={FROM_MONTH}&b={FROM_DAY}&c={FROM_YEAR}&ignore=.csv')
    url = YAHOO_URL.format(SYMBOL=symbol,
                           GRANULARITY=granularity,
                           TO_MONTH=until.month - 1,
                           TO_DAY=until.day,
                           TO_YEAR=until.year,
                           FROM_MONTH=since.month - 1,
                           FROM_DAY=since.day,
                           FROM_YEAR=since.year)

    data = Timeseries.from_url(url)[::-1]

    # Make Adjusted Close a class variable
    attrs = [var.name for var in data.domain.attributes]
    attrs.remove('Adj Close')
    data = Timeseries(Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data)

    data.name = symbol
    data.time_variable = data.domain['Date']
    return data
Exemple #5
0
    def commit(self):
        data = self.data
        if not data or not self.selected:
            self.send(Output.TIMESERIES, data)
            return

        selected_subset = Timeseries(Domain(self.selected, source=data.domain), data)  # FIXME: might not pass selected interpolation method

        with self.progressBar(len(self.selected)) as progress:
            adjusted_data = seasonal_decompose(
                selected_subset,
                self.DECOMPOSITION_MODELS[self.decomposition],
                self.n_periods,
                callback=lambda *_: progress.advance())

        ts = Timeseries(Timeseries.concatenate((data, adjusted_data)))
        ts.time_variable = data.time_variable
        self.send(Output.TIMESERIES, ts)
Exemple #6
0
    def commit(self):
        data = self.data
        self.Error.clear()
        if data is None or (self.selected_attr not in data.domain
                            and not self.radio_sequential):
            self.Outputs.time_series.send(None)
            return

        attrs = data.domain.attributes
        cvars = data.domain.class_vars
        metas = data.domain.metas
        X = data.X
        Y = np.column_stack((data.Y, ))  # make 2d
        M = data.metas

        # Set sequence attribute
        if self.radio_sequential:
            for i in chain(('', ), range(10)):
                name = '__seq__' + str(i)
                if name not in data.domain:
                    break
            time_var = ContinuousVariable(name)
            attrs = attrs.__class__((time_var, )) + attrs
            X = np.column_stack((np.arange(1, len(data) + 1), X))
            data = Table(Domain(attrs, cvars, metas), X, Y, M)
        else:
            # Or make a sequence attribute one of the existing attributes
            # and sort all values according to it
            time_var = data.domain[self.selected_attr]
            values = Table.from_table(Domain([], [], [time_var]),
                                      source=data).metas.ravel()
            if np.isnan(values).any():
                self.Error.nan_times(time_var.name)
                self.Outputs.time_series.send(None)
                return
            ordered = np.argsort(values)
            if (ordered != np.arange(len(ordered))).any():
                data = data[ordered]

        ts = Timeseries(data.domain, data)
        # TODO: ensure equidistant
        ts.time_variable = time_var
        self.Outputs.time_series.send(ts)
    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.send(Output.TIMESERIES, None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if shift == 1:
                out[:-order] = np.diff(col, order)
                out[-order:] = np.nan
            else:
                out[:-shift] = col[shift:] - col[:-shift]
                out[-shift:] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            template = '{} (diff; {})'.format(var,
                                              'order={}'.format(order) if shift == 1 else
                                              'shift={}'.format(shift))
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries(Domain(data.domain.attributes + tuple(attrs),
                               data.domain.class_vars,
                               data.domain.metas),
                        np.column_stack((data.X, np.column_stack(X))),
                        data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.send(Output.TIMESERIES, ts)
    def commit(self):
        data = self.data
        self.Error.clear()
        if data is None or (self.selected_attr not in data.domain and not self.radio_sequential):
            self.Outputs.time_series.send(None)
            return

        attrs = data.domain.attributes
        cvars = data.domain.class_vars
        metas = data.domain.metas
        X = data.X
        Y = np.column_stack((data.Y,))  # make 2d
        M = data.metas

        # Set sequence attribute
        if self.radio_sequential:
            for i in chain(('',), range(10)):
                name = '__seq__' + str(i)
                if name not in data.domain:
                    break
            time_var = ContinuousVariable(name)
            attrs = attrs.__class__((time_var,)) + attrs
            X = np.column_stack((np.arange(1, len(data) + 1), X))
            data = Table(Domain(attrs, cvars, metas), X, Y, M)
        else:
            # Or make a sequence attribute one of the existing attributes
            # and sort all values according to it
            time_var = data.domain[self.selected_attr]
            values = Table.from_table(Domain([], [], [time_var]),
                                      source=data).metas.ravel()
            if np.isnan(values).any():
                self.Error.nan_times(time_var.name)
                self.Outputs.time_series.send(None)
                return
            ordered = np.argsort(values)
            if (ordered != np.arange(len(ordered))).any():
                data = data[ordered]

        ts = Timeseries(data.domain, data)
        # TODO: ensure equidistant
        ts.time_variable = time_var
        self.Outputs.time_series.send(ts)
Exemple #9
0
    def commit(self):
        data = self.data
        if not data or not len(self.selected):
            self.send(Output.TIMESERIES, None)
            return

        X = []
        attrs = []
        invert = self.invert_direction
        shift = self.shift_period
        order = self.diff_order
        for var in self.selected:
            col = np.ravel(data[:, var])

            if invert:
                col = col[::-1]

            out = np.empty(len(col))
            if shift == 1:
                out[:-order] = np.diff(col, order)
                out[-order:] = np.nan
            else:
                out[:-shift] = col[shift:] - col[:-shift]
                out[-shift:] = np.nan

            if invert:
                out = out[::-1]

            X.append(out)

            template = '{} (diff; {})'.format(
                var, 'order={}'.format(order)
                if shift == 1 else 'shift={}'.format(shift))
            name = available_name(data.domain, template)
            attrs.append(ContinuousVariable(name))

        ts = Timeseries(
            Domain(data.domain.attributes + tuple(attrs),
                   data.domain.class_vars, data.domain.metas),
            np.column_stack((data.X, np.column_stack(X))), data.Y, data.metas)
        ts.time_variable = data.time_variable
        self.send(Output.TIMESERIES, ts)
def finance_data(symbol,
                 since=None,
                 until=None,
                 granularity='d'):
    """Fetch Yahoo Finance data for stock or index `symbol` within the period
    after `since` and before `until` (both inclusive).

    Parameters
    ----------
    symbol: str
        A stock or index symbol, as supported by Yahoo Finance.
    since: date
        A start date (default: 1900-01-01).
    until: date
        An end date (default: today).
    granularity: 'd' or 'w' or 'm' or 'v'
        What data to get: daily, weekly, monthly, or dividends.

    Returns
    -------
    data : Timeseries
    """
    if since is None:
        since = date(1900, 1, 1)
    if until is None:
        until = date.today()

    f = web.DataReader(symbol, 'yahoo', since, until)
    data = Timeseries(table_from_frame(f))

    # Make Adjusted Close a class variable
    attrs = [var.name for var in data.domain.attributes]
    attrs.remove('Adj Close')
    data = Timeseries(Domain(attrs, [data.domain['Adj Close']], None, source=data.domain), data)

    data.name = symbol
    data.time_variable = data.domain['Date']
    return data
def finance_data(symbol, since=None, until=None, granularity='d'):
    """Fetch Yahoo Finance data for stock or index `symbol` within the period
    after `since` and before `until` (both inclusive).

    Parameters
    ----------
    symbol: str
        A stock or index symbol, as supported by Yahoo Finance.
    since: date
        A start date (default: 1900-01-01).
    until: date
        An end date (default: today).
    granularity: 'd' or 'w' or 'm' or 'v'
        What data to get: daily, weekly, monthly, or dividends.

    Returns
    -------
    data : Timeseries
    """
    if since is None:
        since = date(1900, 1, 1)
    if until is None:
        until = date.today()

    f = web.DataReader(symbol, 'yahoo', since, until)
    data = Timeseries(table_from_frame(f))

    # Make Adjusted Close a class variable
    attrs = [var.name for var in data.domain.attributes]
    attrs.remove('Adj Close')
    data = Timeseries(
        Domain(attrs, [data.domain['Adj Close']], None, source=data.domain),
        data)

    data.name = symbol
    data.time_variable = data.domain['Date']
    return data
Exemple #12
0
def moving_transform(data, spec, fixed_wlen=0):
    """
    Return data transformed according to spec.

    Parameters
    ----------
    data : Timeseries
        A table with features to transform.
    spec : list of lists
        A list of lists [feature:Variable, window_length:int, function:callable].
    fixed_wlen : int
        If not 0, then window_length in spec is disregarded and this length
        is used. Also the windows don't shift by one but instead align
        themselves side by side.

    Returns
    -------
    transformed : Timeseries
        A table of original data its transformations.
    """
    from itertools import chain
    from Orange.data import ContinuousVariable, Domain
    from orangecontrib.timeseries import Timeseries
    from orangecontrib.timeseries.widgets.utils import available_name
    from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product

    X = []
    attrs = []

    for var, wlen, func in spec:
        col = np.ravel(data[:, var])

        if fixed_wlen:
            wlen = fixed_wlen

        if func in (Cumulative_sum, Cumulative_product):
            out = list(chain.from_iterable(func(col[i:i + wlen])
                                           for i in range(0, len(col), wlen)))
        else:
            # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much
            col = col[::-1]
            out = [func(col[i:i + wlen])
                   for i in range(0, len(col), wlen if bool(fixed_wlen) else 1)]
            out = out[::-1]

        X.append(out)

        template = '{} ({}; {})'.format(var.name, wlen, func.__name__.lower().replace('_', ' '))
        name = available_name(data.domain, template)
        attrs.append(ContinuousVariable(name))

    dataX, dataY, dataM = data.X, data.Y, data.metas
    if fixed_wlen:
        n = len(X[0])
        dataX = dataX[::-1][::fixed_wlen][:n][::-1]
        dataY = dataY[::-1][::fixed_wlen][:n][::-1]
        dataM = dataM[::-1][::fixed_wlen][:n][::-1]

    ts = Timeseries(Domain(data.domain.attributes + tuple(attrs),
                           data.domain.class_vars,
                           data.domain.metas),
                    np.column_stack(
                        (dataX, np.column_stack(X))) if X else dataX,
                    dataY, dataM)
    ts.time_variable = data.time_variable
    return ts
 def test_create_time_variable(self):
     table = Table("iris")
     time_series = Timeseries(table)
     id_1 = id(time_series.attributes)
     time_series.time_variable = time_series.domain.attributes[0]
     self.assertNotEqual(id_1, id(time_series.attributes))
def moving_transform(data, spec, fixed_wlen=0):
    """
    Return data transformed according to spec.

    Parameters
    ----------
    data : Timeseries
        A table with features to transform.
    spec : list of lists
        A list of lists [feature:Variable, window_length:int, function:callable].
    fixed_wlen : int
        If not 0, then window_length in spec is disregarded and this length
        is used. Also the windows don't shift by one but instead align
        themselves side by side.

    Returns
    -------
    transformed : Timeseries
        A table of original data its transformations.
    """
    from itertools import chain
    from Orange.data import ContinuousVariable, Domain
    from orangecontrib.timeseries import Timeseries
    from orangecontrib.timeseries.widgets.utils import available_name
    from orangecontrib.timeseries.agg_funcs import Cumulative_sum, Cumulative_product

    X = []
    attrs = []

    for var, wlen, func in spec:
        col = np.ravel(data[:, var])

        if fixed_wlen:
            wlen = fixed_wlen

        if func in (Cumulative_sum, Cumulative_product):
            out = list(
                chain.from_iterable(
                    func(col[i:i + wlen]) for i in range(0, len(col), wlen)))
        else:
            # In reverse cause lazy brain. Also prefer informative ends, not beginnings as much
            col = col[::-1]
            out = [
                func(col[i:i + wlen])
                for i in range(0, len(col), wlen if bool(fixed_wlen) else 1)
            ]
            out = out[::-1]

        X.append(out)

        template = '{} ({}; {})'.format(
            var.name, wlen,
            func.__name__.lower().replace('_', ' '))
        name = available_name(data.domain, template)
        attrs.append(ContinuousVariable(name))

    dataX, dataY, dataM = data.X, data.Y, data.metas
    if fixed_wlen:
        n = len(X[0])
        dataX = dataX[::-1][::fixed_wlen][:n][::-1]
        dataY = dataY[::-1][::fixed_wlen][:n][::-1]
        dataM = dataM[::-1][::fixed_wlen][:n][::-1]

    ts = Timeseries(
        Domain(data.domain.attributes + tuple(attrs), data.domain.class_vars,
               data.domain.metas),
        np.column_stack((dataX, np.column_stack(X))) if X else dataX, dataY,
        dataM)
    ts.time_variable = data.time_variable
    return ts