Beispiel #1
0
    def _init_data(self, data, copy, dtype, **kwargs):
        """
        Generate ND initialization; axes are passed
        as required objects to __init__
        """
        if data is None:
            data = {}
        if dtype is not None:
            dtype = self._validate_dtype(dtype)

        passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS]
        axes = None
        if isinstance(data, BlockManager):
            if any(x is not None for x in passed_axes):
                axes = [
                    x if x is not None else y
                    for x, y in zip(passed_axes, data.axes)
                ]
            mgr = data
        elif isinstance(data, dict):
            mgr = self._init_dict(data, passed_axes, dtype=dtype)
            copy = False
            dtype = None
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
            copy = False
            dtype = None
        else:  # pragma: no cover
            raise PandasError('Panel constructor not properly called!')

        NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
Beispiel #2
0
    def _init_data(self, data, copy, dtype, **kwargs):
        """
        Generate ND initialization; axes are passed
        as required objects to __init__
        """
        if data is None:
            data = {}
        if dtype is not None:
            dtype = self._validate_dtype(dtype)

        passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS]
        axes = None
        if isinstance(data, BlockManager):
            if any(x is not None for x in passed_axes):
                axes = [x if x is not None else y
                        for x, y in zip(passed_axes, data.axes)]
            mgr = data
        elif isinstance(data, dict):
            mgr = self._init_dict(data, passed_axes, dtype=dtype)
            copy = False
            dtype = None
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
            copy = False
            dtype = None
        else:  # pragma: no cover
            raise PandasError('Panel constructor not properly called!')

        NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
Beispiel #3
0
    def _set_new_item(self, name: str, value: np.ndarray) -> None:
        """Set a new column and avoid SettingWithCopyWarning by using
        pandas internal APIs

        see: https://github.com/pandas-dev/pandas/blob/v1.1.0/pandas/core/frame.py#L3114
        """  # noqa: E501

        NDFrame._set_item(self, name, value)
Beispiel #4
0
    def encode(self, obj: NDFrame, description: Optional[str], params: Optional[Dict]) -> FrameData:
        buf = StringIO()
        obj.to_csv(buf, index=self.index, header=self.header, encoding=self.encoding, quoting=QUOTE_ALL)
        index_type = [str(obj.index.dtype)] if self.index else []

        return FrameData(BytesContent(buf.getvalue().encode(self.encoding)), MediaType("text/csv", self.application()),
                         description, params,
                         {"header": self.header,
                          "index": self.index,
                          "schema": index_type + self.schema(obj),
                          "encoding": self.encoding,
                          "version": pandas_version})
Beispiel #5
0
class TestNDFrame(unittest.TestCase):

    _multiprocess_can_split_ = True

    def setUp(self):
        tdf = t.makeTimeDataFrame()
        self.ndf = NDFrame(tdf._data)

    def test_constructor(self):
        # with cast
        ndf = NDFrame(self.ndf._data, dtype=np.int64)
        self.assert_(ndf.values.dtype == np.int64)

    def test_ndim(self):
        self.assertEquals(self.ndf.ndim, 2)

    def test_astype(self):
        casted = self.ndf.astype(int)
        self.assert_(casted.values.dtype == np.int_)

        casted = self.ndf.astype(np.int32)
        self.assert_(casted.values.dtype == np.int32)

    def test_squeeze(self):
        # noop
        for s in [
                t.makeFloatSeries(),
                t.makeStringSeries(),
                t.makeObjectSeries()
        ]:
            t.assert_series_equal(s.squeeze(), s)
        for df in [t.makeTimeDataFrame()]:
            t.assert_frame_equal(df.squeeze(), df)
        for p in [t.makePanel()]:
            t.assert_panel_equal(p.squeeze(), p)
        for p4d in [t.makePanel4D()]:
            t.assert_panel4d_equal(p4d.squeeze(), p4d)

        # squeezing
        df = t.makeTimeDataFrame().reindex(columns=['A'])
        t.assert_series_equal(df.squeeze(), df['A'])

        p = t.makePanel().reindex(items=['ItemA'])
        t.assert_frame_equal(p.squeeze(), p['ItemA'])

        p = t.makePanel().reindex(items=['ItemA'], minor_axis=['A'])
        t.assert_series_equal(p.squeeze(), p.ix['ItemA', :, 'A'])

        p4d = t.makePanel4D().reindex(labels=['label1'])
        t.assert_panel_equal(p4d.squeeze(), p4d['label1'])

        p4d = t.makePanel4D().reindex(labels=['label1'], items=['ItemA'])
        t.assert_frame_equal(p4d.squeeze(), p4d.ix['label1', 'ItemA'])
Beispiel #6
0
def tprint(df: NDFrame, head=0, to_latex=False):

    if isinstance(df, pd.Series):
        df = pd.DataFrame(df)

    if head > 0:
        df = df.head(head)
    elif head < 0:
        df = df.tail(-head)

    print(tabulate(df, headers="keys", tablefmt="pipe", floatfmt=".3f") + '\n')

    if to_latex:
        print(df.to_latex(bold_rows=True))
class TestNDFrame(unittest.TestCase):

    _multiprocess_can_split_ = True

    def setUp(self):
        tdf = t.makeTimeDataFrame()
        self.ndf = NDFrame(tdf._data)

    def test_constructor(self):
        # with cast
        ndf = NDFrame(self.ndf._data, dtype=np.int64)
        self.assert_(ndf.values.dtype == np.int64)

    def test_ndim(self):
        self.assertEquals(self.ndf.ndim, 2)

    def test_astype(self):
        casted = self.ndf.astype(int)
        self.assert_(casted.values.dtype == np.int_)

        casted = self.ndf.astype(np.int32)
        self.assert_(casted.values.dtype == np.int32)

    def test_squeeze(self):
        # noop
        for s in [ t.makeFloatSeries(), t.makeStringSeries(), t.makeObjectSeries() ]:
            t.assert_series_equal(s.squeeze(),s)
        for df in [ t.makeTimeDataFrame() ]:
            t.assert_frame_equal(df.squeeze(),df)
        for p in [ t.makePanel() ]:
            t.assert_panel_equal(p.squeeze(),p)
        for p4d in [ t.makePanel4D() ]:
            t.assert_panel4d_equal(p4d.squeeze(),p4d)

        # squeezing
        df = t.makeTimeDataFrame().reindex(columns=['A'])
        t.assert_series_equal(df.squeeze(),df['A'])

        p = t.makePanel().reindex(items=['ItemA'])
        t.assert_frame_equal(p.squeeze(),p['ItemA'])

        p = t.makePanel().reindex(items=['ItemA'],minor_axis=['A'])
        t.assert_series_equal(p.squeeze(),p.ix['ItemA',:,'A'])

        p4d = t.makePanel4D().reindex(labels=['label1'])
        t.assert_panel_equal(p4d.squeeze(),p4d['label1'])

        p4d = t.makePanel4D().reindex(labels=['label1'],items=['ItemA'])
        t.assert_frame_equal(p4d.squeeze(),p4d.ix['label1','ItemA'])
Beispiel #8
0
    def __setitem__(self, key, value):
        _, N, K = self.shape
        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis, columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            assert value.shape == (N, K)
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)
Beispiel #9
0
    def __init__(self,
                 data=None,
                 items=None,
                 major_axis=None,
                 minor_axis=None,
                 copy=False,
                 dtype=None):
        """
        Represents wide format panel data, stored as 3-dimensional array

        Parameters
        ----------
        data : ndarray (items x major x minor), or dict of DataFrames
        items : Index or array-like
            axis=1
        major_axis : Index or array-like
            axis=1
        minor_axis : Index or array-like
            axis=2
        dtype : dtype, default None
            Data type to force, otherwise infer
        copy : boolean, default False
            Copy data from inputs. Only affects DataFrame / 2d ndarray input
        """
        if data is None:
            data = {}

        passed_axes = [items, major_axis, minor_axis]
        axes = None
        if isinstance(data, BlockManager):
            if any(x is not None for x in passed_axes):
                axes = [
                    x if x is not None else y
                    for x, y in zip(passed_axes, data.axes)
                ]
            mgr = data
        elif isinstance(data, dict):
            mgr = self._init_dict(data, passed_axes, dtype=dtype)
            copy = False
            dtype = None
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
            copy = False
            dtype = None
        else:  # pragma: no cover
            raise PandasError('Panel constructor not properly called!')

        NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
Beispiel #10
0
def to_json(
    path_or_buf,
    obj: NDFrame,
    orient: str | None = None,
    date_format: str = "epoch",
    double_precision: int = 10,
    force_ascii: bool = True,
    date_unit: str = "ms",
    default_handler: Callable[[Any], JSONSerializable] | None = None,
    lines: bool = False,
    compression: CompressionOptions = "infer",
    index: bool = True,
    indent: int = 0,
    storage_options: StorageOptions = None,
):

    if not index and orient not in ["split", "table"]:
        raise ValueError(
            "'index=False' is only valid when 'orient' is 'split' or 'table'")

    if lines and orient != "records":
        raise ValueError("'lines' keyword only valid when 'orient' is records")

    if orient == "table" and isinstance(obj, Series):
        obj = obj.to_frame(name=obj.name or "values")

    writer: type[Writer]
    if orient == "table" and isinstance(obj, DataFrame):
        writer = JSONTableWriter
    elif isinstance(obj, Series):
        writer = SeriesWriter
    elif isinstance(obj, DataFrame):
        writer = FrameWriter
    else:
        raise NotImplementedError("'obj' should be a Series or a DataFrame")

    s = writer(
        obj,
        orient=orient,
        date_format=date_format,
        double_precision=double_precision,
        ensure_ascii=force_ascii,
        date_unit=date_unit,
        default_handler=default_handler,
        index=index,
        indent=indent,
    ).write()

    if lines:
        s = convert_to_line_delimits(s)

    if path_or_buf is not None:
        # apply compression and byte/text conversion
        with get_handle(path_or_buf,
                        "w",
                        compression=compression,
                        storage_options=storage_options) as handles:
            handles.handle.write(s)
    else:
        return s
Beispiel #11
0
    def __setitem__(self, key, value):
        _, N, K = self.shape
        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis,
                                  columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            assert(value.shape == (N, K))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)
Beispiel #12
0
    def _get_sharpe_ratio(price_series: NDFrame,
                          rf_series: Series,
                          scale_to_annualise: bool):
        """"
        Returns the Sharpe ratio based on a series of asset prices
        and risk-free asset prices. The calculation is based on the
        arithmetic mean of actual returns, as appears to be standard

        :param price_series - a pandas series or data frame of prices
        :param rf_series - a pandas series representing the total
        return/price series of the risk free rate
        :param scale_to_annualise - bool governing whether or not a
        scaling factor is applied to annualise the statistic

        Notes
        -------
        https://en.wikipedia.org/wiki/Sharpe_ratio
        """

        # excess_return = get_annualised_excess_return(price_series, rf_series)
        # vol = get_annual_vol(price_series)
        # sharpe_ratio = excess_return/vol

        returns = (price_series / price_series.shift(1)).dropna()
        returns_rf = (rf_series / rf_series.shift(1)).dropna()

        rel_returns = returns.subtract(returns_rf, axis=0)
        avg_excess = rel_returns.mean()
        vol = rel_returns.std()
        annualising_scaling = TSeriesHelper._get_annualisation_factor(price_series.index) if scale_to_annualise else 1

        sharpe_ratio = avg_excess * annualising_scaling / vol

        return sharpe_ratio
Beispiel #13
0
def panel4d_reindex(self, labs=None, labels=None, items=None, major_axis=None,
                    minor_axis=None, axis=None, **kwargs):
    # Hack for reindex_axis deprecation
    # Ha, we used labels for two different things
    # I think this will work still.
    if labs is None:
        args = ()
    else:
        args = (labs,)
    kwargs_ = dict(labels=labels,
                   items=items,
                   major_axis=major_axis,
                   minor_axis=minor_axis,
                   axis=axis)
    kwargs_ = {k: v for k, v in kwargs_.items() if v is not None}
    # major = kwargs.pop("major", None)
    # minor = kwargs.pop('minor', None)

    # if major is not None:
    #     if kwargs.get("major_axis"):
    #         raise TypeError("Cannot specify both 'major' and 'major_axis'")
    #     kwargs_['major_axis'] = major
    # if minor is not None:
    #     if kwargs.get("minor_axis"):
    #         raise TypeError("Cannot specify both 'minor' and 'minor_axis'")
    #     kwargs_['minor_axis'] = minor

    if axis is not None:
        kwargs_['axis'] = axis

    axes = validate_axis_style_args(self, args, kwargs_, 'labs', 'reindex')
    kwargs.update(axes)
    return NDFrame.reindex(self, **kwargs)
Beispiel #14
0
    def price_to_return(price_series: NDFrame):
        """
        convert a series of asset prices to a series
        or returns
        :param price_series: pandas price series or dataframe
        of price series
        :return:
        """

        if (price_series == 0).any(axis=None).any():
            raise ValueError("Cannot convert price series with zeroes to a return")

        price_series = price_series / price_series.shift(1)
        price_series.dropna(inplace=True)
        price_series = price_series - 1

        return price_series
Beispiel #15
0
    def __setitem__(self, key, value):
        _, N, K = self.shape
        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis, columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            if value.shape != (N, K):
                raise AssertionError(("Shape of values must be (%d, %d), " "not (%d, %d)") % ((N, K) + values.shape))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError("Cannot set item of type: %s" % str(type(value)))

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)
Beispiel #16
0
def remove_empty_values(data_frame: NDFrame) -> NDFrame:
    modified_data_set = data_frame.fillna(" ")
    sum_empty_values = panda.isnull(modified_data_set).sum()

    if sum_empty_values.any():
        print("Has some empties values in the data frame")
        raise Exception("Problem in load data set, we need to remove them")

    return modified_data_set
Beispiel #17
0
    def __setitem__(self, key, value):
        shape = tuple(self.shape)
        if isinstance(value, self._constructor_sliced):
            value = value.reindex(
                **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
            mat = value.values
        elif isinstance(value, np.ndarray):
            if not ((value.shape == shape[1:])):
                raise AssertionError()
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype, value = _infer_dtype_from_scalar(value)
            mat = np.empty(shape[1:], dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError('Cannot set item of type: %s' % str(type(value)))

        mat = mat.reshape(tuple([1]) + shape[1:])
        NDFrame._set_item(self, key, mat)
Beispiel #18
0
    def __setitem__(self, key, value):
        shape = tuple(self.shape)
        if isinstance(value, self._constructor_sliced):
            value = value.reindex(
                **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
            mat = value.values
        elif isinstance(value, np.ndarray):
            if not ((value.shape == shape[1:])):
                raise AssertionError()
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype, value = _infer_dtype_from_scalar(value)
            mat = np.empty(shape[1:], dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError('Cannot set item of type: %s' % str(type(value)))

        mat = mat.reshape(tuple([1]) + shape[1:])
        NDFrame._set_item(self, key, mat)
Beispiel #19
0
    def __setitem__(self, key, value):
        shape = tuple(self.shape)
        if isinstance(value, self._constructor_sliced):
            value = value.reindex(**self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
            mat = value.values
        elif isinstance(value, np.ndarray):
            if value.shape != shape[1:]:
                raise ValueError(
                    "shape of value must be {0}, shape of given " "object was {1}".format(shape[1:], value.shape)
                )
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype, value = _infer_dtype_from_scalar(value)
            mat = np.empty(shape[1:], dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError("Cannot set item of type: %s" % str(type(value)))

        mat = mat.reshape(tuple([1]) + shape[1:])
        NDFrame._set_item(self, key, mat)
Beispiel #20
0
    def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
                 copy=False, dtype=None):
        """
        Represents wide format panel data, stored as 3-dimensional array

        Parameters
        ----------
        data : ndarray (items x major x minor), or dict of DataFrames
        items : Index or array-like
            axis=1
        major_axis : Index or array-like
            axis=1
        minor_axis : Index or array-like
            axis=2
        dtype : dtype, default None
            Data type to force, otherwise infer
        copy : boolean, default False
            Copy data from inputs. Only affects DataFrame / 2d ndarray input
        """
        if data is None:
            data = {}

        passed_axes = [items, major_axis, minor_axis]
        axes = None
        if isinstance(data, BlockManager):
            if any(x is not None for x in passed_axes):
                axes = [x if x is not None else y
                        for x, y in zip(passed_axes, data.axes)]
            mgr = data
        elif isinstance(data, dict):
            mgr = self._init_dict(data, passed_axes, dtype=dtype)
            copy = False
            dtype = None
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
            copy = False
            dtype = None
        else: # pragma: no cover
            raise PandasError('Panel constructor not properly called!')

        NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
Beispiel #21
0
    def __setitem__(self, key, value):
        shape = tuple(self.shape)
        if isinstance(value, self._constructor_sliced):
            value = value.reindex(
                **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]))
            mat = value.values
        elif isinstance(value, np.ndarray):
            if value.shape != shape[1:]:
                raise ValueError('shape of value must be {0}, shape of given '
                                 'object was {1}'.format(
                                     shape[1:], tuple(map(int, value.shape))))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype, value = _infer_dtype_from_scalar(value)
            mat = np.empty(shape[1:], dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError('Cannot set item of type: %s' % str(type(value)))

        mat = mat.reshape(tuple([1]) + shape[1:])
        NDFrame._set_item(self, key, mat)
Beispiel #22
0
    def __setitem__(self, key, value):
        _, N, K = self.shape
        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis,
                                  columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            if value.shape != (N, K):
                raise AssertionError(
                    ('Shape of values must be (%d, %d), '
                     'not (%d, %d)') % ((N, K) + values.shape))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)
        else:
            raise TypeError('Cannot set item of type: %s' % str(type(value)))

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)
Beispiel #23
0
    def pop(self, item):
        """
        Return item slice from panel and delete from panel

        Parameters
        ----------
        key : object
            Must be contained in panel's items

        Returns
        -------
        y : DataFrame
        """
        return NDFrame.pop(self, item)
Beispiel #24
0
    def pop(self, item):
        """
        Return item slice from panel and delete from panel

        Parameters
        ----------
        key : object
            Must be contained in panel's items

        Returns
        -------
        y : DataFrame
        """
        return NDFrame.pop(self, item)
Beispiel #25
0
    def get_annualised_vol(price_series: NDFrame):
        """"
        Returns the annualised volatility of returns based on a stream of asset prices.  The function derives
        the time period of the price data uses this to calculate a suitable factor to annualise the data with.
        Note that the function calculates the standard deviation of the NATURAL LOG of the returns, as is conventional.
        :param price_series - a pandas series or dataframe of prices

        Notes
        --------
        https://en.wikipedia.org/wiki/Volatility_(finance)
        """

        log_returns = np.log(price_series / price_series.shift(1))
        annualising_scaling = TSeriesHelper._get_annualisation_factor(price_series.index)
        return np.std(log_returns) * annualising_scaling
Beispiel #26
0
    def __setitem__(self, key, value):
        _, N, K = self.shape

        # XXX
        if isinstance(value, LongPanel):
            if len(value.items) != 1:
                raise ValueError('Input panel must have only one item!')

            value = value.to_wide()[value.items[0]]

        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis,
                                  columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            assert(value.shape == (N, K))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)
Beispiel #27
0
class TestNDFrame(unittest.TestCase):
    def setUp(self):
        tdf = t.makeTimeDataFrame()
        self.ndf = NDFrame(tdf._data)

    def test_constructor(self):
        # with cast
        ndf = NDFrame(self.ndf._data, dtype=np.int64)
        self.assert_(ndf.values.dtype == np.int64)

    def test_ndim(self):
        self.assertEquals(self.ndf.ndim, 2)

    def test_astype(self):
        casted = self.ndf.astype(int)
        self.assert_(casted.values.dtype == np.int64)
class TestNDFrame(unittest.TestCase):

    def setUp(self):
        tdf = t.makeTimeDataFrame()
        self.ndf = NDFrame(tdf._data)

    def test_constructor(self):
        # with cast
        ndf = NDFrame(self.ndf._data, dtype=np.int64)
        self.assert_(ndf.values.dtype == np.int64)

    def test_ndim(self):
        self.assertEquals(self.ndf.ndim, 2)

    def test_astype(self):
        casted = self.ndf.astype(int)
        self.assert_(casted.values.dtype == np.int64)
Beispiel #29
0
    def mean(self,
             axis=None,
             skipna=None,
             level=None,
             numeric_only=None,
             **kwargs):
        """
        The mean method.

        :param axis: inherit
        :param skipna: inherit
        :param level: inherit
        :param numeric_only: inherit
        :param kwargs: inherit
        :return: OneSeries
        """

        return OneSeries(
            NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs))
Beispiel #30
0
    def final_matching(self, gsp_results: NDFrame) -> DataFrame:
        final_columns = [''] * len(gsp_results.columns)

        period_scores = []
        for i, auto_coreffs in enumerate(self.auto_coreffs):
            avg = np.average(auto_coreffs)
            std = np.std(auto_coreffs)

            # STD of len 1 is 0
            # Remove things with only 1 coreff
            # If having trouble, could also weight by # of auto_coreffs
            if len(auto_coreffs) > 1:
                weighted_std = std / avg
                period_scores.append((i, weighted_std))

        period_scores.sort(key=lambda x: x[1])

        used_series = []

        # Match always on appliances first with highest periodicity scores
        for ((index, score), label) in zip(period_scores, self.always_on):
            final_columns[index] = label
            used_series.append(index)

        # Match the remaining appliances
        remaining_power_indices = set(range(len(
            gsp_results.columns))) - set(used_series)
        pt = np.transpose(self.pairing_table)
        mask = [not i in remaining_power_indices for i in range(len(pt[0]))]
        consensus = [(i, c, sum(c) / self.compute_entropy(c),
                      Matcher.get_ordering(c)) for (i, c) in enumerate(pt)]
        consensus.sort(key=lambda x: -x[2])
        self.past_consensus = consensus

        for (li, pi, c, o) in consensus:
            masked = ma.masked_array(pi, mask)
            choice = masked.argmax()
            mask[choice] = True
            final_columns[choice] = self.labels[li]

        gsp_results.columns = final_columns
        return gsp_results
Beispiel #31
0
    def get_iterator(self, data: NDFrame, axis: int = 0):
        """
        Groupby iterator

        Returns
        -------
        Generator yielding sequence of (name, subsetted object)
        for each group
        """
        slicer = lambda start, edge: data._slice(slice(start, edge), axis=axis)
        length = len(data.axes[axis])

        start = 0
        for edge, label in zip(self.bins, self.binlabels):
            if label is not NaT:
                yield label, slicer(start, edge)
            start = edge

        if start < length:
            yield self.binlabels[-1], slicer(start, None)
Beispiel #32
0
    def _get_downside_deviation(asset_returns: NDFrame,
                                threshold: int = 1):
        """
        Returns the downside annual vol - this is the vol of
        the returns relative to the threshold returns (capped
        at 0 at the upper bound)

        :param price_series - a pandas series or data frame of returns
        in the form (1+r)
        :param threshold - the threshold below which returns should be
        included in the calculation.  Default is a zero return (ie a
        threshold of 1).
        """

        returns_relative = asset_returns.subtract(threshold, axis=0)
        returns_clipped = returns_relative.where(returns_relative < 0, 0)

        returns_sqd = np.power(returns_clipped, 2)
        deviation = np.sum(returns_sqd) / len(returns_clipped)
        return np.sqrt(deviation)
Beispiel #33
0
    def _get_sortino_ratio(price_series: NDFrame,
                           benchmark_series: Series,
                           scale_to_annualise: bool):
        """"
        Returns the Sortino ratio

        :param price_series - a pandas series or data frame of prices
        :param benchmark_series - a pandas series representing the total
        return/price series of the chosen benchmark
        Notes
        ------
        https://en.wikipedia.org/wiki/Sortino_ratio
        """
        returns = (price_series / price_series.shift(1)).dropna()
        returns_rf = (benchmark_series / benchmark_series.shift(1)).dropna()

        avg_excess = (returns.subtract(returns_rf, axis=0)).mean()
        vol = TSeriesHelper._get_downside_deviation(returns, returns_rf)
        annualising_scaling = TSeriesHelper._get_annualisation_factor(price_series.index) if scale_to_annualise else 1

        sortino_ratio = avg_excess * annualising_scaling / vol

        return sortino_ratio
Beispiel #34
0
 def setUp(self):
     tdf = t.makeTimeDataFrame()
     self.ndf = NDFrame(tdf._data)
Beispiel #35
0
    def __init__(self, data=None, index=None, columns=None,
                 default_kind=None, default_fill_value=None,
                 dtype=None, copy=False):

        # pick up the defaults from the Sparse structures
        if isinstance(data, SparseDataFrame):
            if index is None:
                index = data.index
            if columns is None:
                columns = data.columns
            if default_fill_value is None:
                default_fill_value = data.default_fill_value
            if default_kind is None:
                default_kind = data.default_kind
        elif isinstance(data, (SparseSeries, SparseArray)):
            if index is None:
                index = data.index
            if default_fill_value is None:
                default_fill_value = data.fill_value
            if columns is None and hasattr(data, 'name'):
                columns = [data.name]
            if columns is None:
                raise Exception("cannot pass a series w/o a name or columns")
            data = {columns[0]: data}

        if default_fill_value is None:
            default_fill_value = np.nan
        if default_kind is None:
            default_kind = 'block'

        self._default_kind = default_kind
        self._default_fill_value = default_fill_value

        if isinstance(data, dict):
            mgr = self._init_dict(data, index, columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, index, columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, SparseDataFrame):
            mgr = self._init_mgr(
                data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy)
        elif isinstance(data, DataFrame):
            mgr = self._init_dict(data, data.index, data.columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, BlockManager):
            mgr = self._init_mgr(
                data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy)
        elif data is None:
            data = {}

            if index is None:
                index = Index([])
            else:
                index = _ensure_index(index)

            if columns is None:
                columns = Index([])
            else:
                for c in columns:
                    data[c] = SparseArray(np.nan,
                                          index=index,
                                          kind=self._default_kind,
                                          fill_value=self._default_fill_value)
            mgr = dict_to_manager(data, columns, index)
            if dtype is not None:
                mgr = mgr.astype(dtype)

        NDFrame.__init__(self, mgr)
Beispiel #36
0
def test_ndframe_indexing_raises(idxr, error, error_message):
    # GH 25567
    frame = NDFrame(np.random.randint(5, size=(2, 2, 2)))
    with pytest.raises(error, match=error_message):
        idxr(frame)[0]
Beispiel #37
0
    def __init__(self,
                 data=None,
                 index=None,
                 columns=None,
                 default_kind=None,
                 default_fill_value=None,
                 dtype=None,
                 copy=False):

        # pick up the defaults from the Sparse structures
        if isinstance(data, SparseDataFrame):
            if index is None:
                index = data.index
            if columns is None:
                columns = data.columns
            if default_fill_value is None:
                default_fill_value = data.default_fill_value
            if default_kind is None:
                default_kind = data.default_kind
        elif isinstance(data, (SparseSeries, SparseArray)):
            if index is None:
                index = data.index
            if default_fill_value is None:
                default_fill_value = data.fill_value
            if columns is None and hasattr(data, 'name'):
                columns = [data.name]
            if columns is None:
                raise Exception("cannot pass a series w/o a name or columns")
            data = {columns[0]: data}

        if default_fill_value is None:
            default_fill_value = np.nan
        if default_kind is None:
            default_kind = 'block'

        self._default_kind = default_kind
        self._default_fill_value = default_fill_value

        if isinstance(data, dict):
            mgr = self._init_dict(data, index, columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, index, columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, SparseDataFrame):
            mgr = self._init_mgr(data._data,
                                 dict(index=index, columns=columns),
                                 dtype=dtype,
                                 copy=copy)
        elif isinstance(data, DataFrame):
            mgr = self._init_dict(data, data.index, data.columns)
            if dtype is not None:
                mgr = mgr.astype(dtype)
        elif isinstance(data, BlockManager):
            mgr = self._init_mgr(data,
                                 axes=dict(index=index, columns=columns),
                                 dtype=dtype,
                                 copy=copy)
        elif data is None:
            data = {}

            if index is None:
                index = Index([])
            else:
                index = _ensure_index(index)

            if columns is None:
                columns = Index([])
            else:
                for c in columns:
                    data[c] = SparseArray(np.nan,
                                          index=index,
                                          kind=self._default_kind,
                                          fill_value=self._default_fill_value)
            mgr = dict_to_manager(data, columns, index)
            if dtype is not None:
                mgr = mgr.astype(dtype)

        NDFrame.__init__(self, mgr)
Beispiel #38
0
 def setUp(self):
     tdf = t.makeTimeDataFrame()
     self.ndf = NDFrame(tdf._data)
Beispiel #39
0
def _get_grouper(
    obj: NDFrame,
    key=None,
    axis=0,
    level=None,
    sort=True,
    observed=False,
    mutated=False,
    validate=True,
):
    """
    create and return a BaseGrouper, which is an internal
    mapping of how to create the grouper indexers.
    This may be composed of multiple Grouping objects, indicating
    multiple groupers

    Groupers are ultimately index mappings. They can originate as:
    index mappings, keys to columns, functions, or Groupers

    Groupers enable local references to axis,level,sort, while
    the passed in axis, level, and sort are 'global'.

    This routine tries to figure out what the passing in references
    are and then creates a Grouping for each one, combined into
    a BaseGrouper.

    If observed & we have a categorical grouper, only show the observed
    values

    If validate, then check for key/level overlaps

    """
    group_axis = obj._get_axis(axis)

    # validate that the passed single level is compatible with the passed
    # axis of the object
    if level is not None:
        # TODO: These if-block and else-block are almost same.
        # MultiIndex instance check is removable, but it seems that there are
        # some processes only for non-MultiIndex in else-block,
        # eg. `obj.index.name != level`. We have to consider carefully whether
        # these are applicable for MultiIndex. Even if these are applicable,
        # we need to check if it makes no side effect to subsequent processes
        # on the outside of this condition.
        # (GH 17621)
        if isinstance(group_axis, MultiIndex):
            if is_list_like(level) and len(level) == 1:
                level = level[0]

            if key is None and is_scalar(level):
                # Get the level values from group_axis
                key = group_axis.get_level_values(level)
                level = None

        else:
            # allow level to be a length-one list-like object
            # (e.g., level=[0])
            # GH 13901
            if is_list_like(level):
                nlevels = len(level)
                if nlevels == 1:
                    level = level[0]
                elif nlevels == 0:
                    raise ValueError("No group keys passed!")
                else:
                    raise ValueError(
                        "multiple levels only valid with MultiIndex")

            if isinstance(level, str):
                if obj.index.name != level:
                    raise ValueError(
                        "level name {} is not the name of the index".format(
                            level))
            elif level > 0 or level < -1:
                raise ValueError(
                    "level > 0 or level < -1 only valid with MultiIndex")

            # NOTE: `group_axis` and `group_axis.get_level_values(level)`
            # are same in this section.
            level = None
            key = group_axis

    # a passed-in Grouper, directly convert
    if isinstance(key, Grouper):
        binner, grouper, obj = key._get_grouper(obj, validate=False)
        if key.key is None:
            return grouper, [], obj
        else:
            return grouper, {key.key}, obj

    # already have a BaseGrouper, just return it
    elif isinstance(key, BaseGrouper):
        return key, [], obj

    # In the future, a tuple key will always mean an actual key,
    # not an iterable of keys. In the meantime, we attempt to provide
    # a warning. We can assume that the user wanted a list of keys when
    # the key is not in the index. We just have to be careful with
    # unhashable elements of `key`. Any unhashable elements implies that
    # they wanted a list of keys.
    # https://github.com/pandas-dev/pandas/issues/18314
    is_tuple = isinstance(key, tuple)
    all_hashable = is_tuple and is_hashable(key)

    if is_tuple:
        if (all_hashable and key not in obj
                and set(key).issubset(obj)) or not all_hashable:
            # column names ('a', 'b') -> ['a', 'b']
            # arrays like (a, b) -> [a, b]
            msg = ("Interpreting tuple 'by' as a list of keys, rather than "
                   "a single key. Use 'by=[...]' instead of 'by=(...)'. In "
                   "the future, a tuple will always mean a single key.")
            warnings.warn(msg, FutureWarning, stacklevel=5)
            key = list(key)

    if not isinstance(key, list):
        keys = [key]
        match_axis_length = False
    else:
        keys = key
        match_axis_length = len(keys) == len(group_axis)

    # what are we after, exactly?
    any_callable = any(callable(g) or isinstance(g, dict) for g in keys)
    any_groupers = any(isinstance(g, Grouper) for g in keys)
    any_arraylike = any(
        isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys)

    # is this an index replacement?
    if (not any_callable and not any_arraylike and not any_groupers
            and match_axis_length and level is None):
        if isinstance(obj, DataFrame):
            all_in_columns_index = all(g in obj.columns or g in obj.index.names
                                       for g in keys)
        elif isinstance(obj, Series):
            all_in_columns_index = all(g in obj.index.names for g in keys)

        if not all_in_columns_index:
            keys = [com.asarray_tuplesafe(keys)]

    if isinstance(level, (tuple, list)):
        if key is None:
            keys = [None] * len(level)
        levels = level
    else:
        levels = [level] * len(keys)

    groupings = []
    exclusions = []

    # if the actual grouper should be obj[key]
    def is_in_axis(key):
        if not _is_label_like(key):
            items = obj._data.items
            try:
                items.get_loc(key)
            except (KeyError, TypeError):
                # TypeError shows up here if we pass e.g. Int64Index
                return False

        return True

    # if the grouper is obj[name]
    def is_in_obj(gpr):
        if not hasattr(gpr, "name"):
            return False
        try:
            return gpr is obj[gpr.name]
        except (KeyError, IndexError):
            return False

    for i, (gpr, level) in enumerate(zip(keys, levels)):

        if is_in_obj(gpr):  # df.groupby(df['name'])
            in_axis, name = True, gpr.name
            exclusions.append(name)

        elif is_in_axis(gpr):  # df.groupby('name')
            if gpr in obj:
                if validate:
                    obj._check_label_or_level_ambiguity(gpr, axis=axis)
                in_axis, name, gpr = True, gpr, obj[gpr]
                exclusions.append(name)
            elif obj._is_level_reference(gpr, axis=axis):
                in_axis, name, level, gpr = False, None, gpr, None
            else:
                raise KeyError(gpr)
        elif isinstance(gpr, Grouper) and gpr.key is not None:
            # Add key to exclusions
            exclusions.append(gpr.key)
            in_axis, name = False, None
        else:
            in_axis, name = False, None

        if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]:
            raise ValueError(
                ("Length of grouper ({len_gpr}) and axis ({len_axis})"
                 " must be same length".format(len_gpr=len(gpr),
                                               len_axis=obj.shape[axis])))

        # create the Grouping
        # allow us to passing the actual Grouping as the gpr
        ping = (Grouping(
            group_axis,
            gpr,
            obj=obj,
            name=name,
            level=level,
            sort=sort,
            observed=observed,
            in_axis=in_axis,
        ) if not isinstance(gpr, Grouping) else gpr)

        groupings.append(ping)

    if len(groupings) == 0 and len(obj):
        raise ValueError("No group keys passed!")
    elif len(groupings) == 0:
        groupings.append(
            Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))

    # create the internals grouper
    grouper = BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated)
    return grouper, exclusions, obj
Beispiel #40
0
 def test_constructor(self):
     # with cast
     ndf = NDFrame(self.ndf._data, dtype=np.int64)
     self.assert_(ndf.values.dtype == np.int64)
Beispiel #41
0
    def _set_grouper(self, obj: NDFrame, sort: bool = False):
        """
        given an object and the specifications, setup the internal grouper
        for this particular specification

        Parameters
        ----------
        obj : Series or DataFrame
        sort : bool, default False
            whether the resulting grouper should be sorted
        """
        assert obj is not None

        if self.key is not None and self.level is not None:
            raise ValueError(
                "The Grouper cannot specify both a key and a level!")

        # Keep self.grouper value before overriding
        if self._grouper is None:
            # TODO: What are we assuming about subsequent calls?
            self._grouper = self._gpr_index
            self._indexer = self.indexer

        # the key must be a valid info item
        if self.key is not None:
            key = self.key
            # The 'on' is already defined
            if getattr(self._gpr_index, "name", None) == key and isinstance(
                    obj, Series):
                # Sometimes self._grouper will have been resorted while
                # obj has not. In this case there is a mismatch when we
                # call self._grouper.take(obj.index) so we need to undo the sorting
                # before we call _grouper.take.
                assert self._grouper is not None
                if self._indexer is not None:
                    reverse_indexer = self._indexer.argsort()
                    unsorted_ax = self._grouper.take(reverse_indexer)
                    ax = unsorted_ax.take(obj.index)
                else:
                    ax = self._grouper.take(obj.index)
            else:
                if key not in obj._info_axis:
                    raise KeyError(f"The grouper name {key} is not found")
                ax = Index(obj[key], name=key)

        else:
            ax = obj._get_axis(self.axis)
            if self.level is not None:
                level = self.level

                # if a level is given it must be a mi level or
                # equivalent to the axis name
                if isinstance(ax, MultiIndex):
                    level = ax._get_level_number(level)
                    ax = Index(ax._get_level_values(level),
                               name=ax.names[level])

                else:
                    if level not in (0, ax.name):
                        raise ValueError(f"The level {level} is not valid")

        # possibly sort
        if (self.sort or sort) and not ax.is_monotonic:
            # use stable sort to support first, last, nth
            # TODO: why does putting na_position="first" fix datetimelike cases?
            indexer = self.indexer = ax.array.argsort(kind="mergesort",
                                                      na_position="first")
            ax = ax.take(indexer)
            obj = obj.take(indexer, axis=self.axis)

        # error: Incompatible types in assignment (expression has type
        # "NDFrameT", variable has type "None")
        self.obj = obj  # type: ignore[assignment]
        self._gpr_index = ax
        return self._gpr_index