Exemple #1
0
    def __init__(
        self,
        X,
        y=None,
        device=None,
        length=None,
    ):
        # TODO: Remove warning in release 0.4
        if device is not None:
            warnings.warn(
                "device is no longer needed by Dataset and will be ignored.",
                DeprecationWarning)

        self.X = X
        self.y = y

        self.X_indexing = check_indexing(X)
        self.y_indexing = check_indexing(y)
        self.X_is_ndframe = is_pandas_ndframe(X)

        if length is not None:
            self._len = length
            return

        # pylint: disable=invalid-name
        len_X = get_len(X)
        if y is not None:
            len_y = get_len(y)
            if len_y != len_X:
                raise ValueError("X and y have inconsistent lengths.")
        self._len = len_X
Exemple #2
0
    def __getitem__(self, i):
        X, y = self.X, self.y
        if is_pandas_ndframe(X):
            X = {k: X[k].values.reshape(-1, 1) for k in X}

        Xi = multi_indexing(X, i)
        yi = y if y is None else multi_indexing(y, i)
        return self.transform(Xi, yi)
Exemple #3
0
    def __getitem__(self, i):
        X, y = self.X, self.y
        if is_pandas_ndframe(X):
            X = {k: X[k].values.reshape(-1, 1) for k in X}

        Xi = multi_indexing(X, i)
        yi = y if y is None else multi_indexing(y, i)
        return self.transform(Xi, yi)
Exemple #4
0
    def __init__(
        self,
        X,
        y=None,
        length=None,
    ):
        self.X = X
        self.y = y

        self.X_indexing = check_indexing(X)
        self.y_indexing = check_indexing(y)
        self.X_is_ndframe = is_pandas_ndframe(X)

        if length is not None:
            self._len = length
            return

        # pylint: disable=invalid-name
        len_X = get_len(X)
        if y is not None:
            len_y = get_len(y)
            if len_y != len_X:
                raise ValueError("X and y have inconsistent lengths.")
        self._len = len_X
Exemple #5
0
def to_tensor(
        X: Union[torch.Tensor, SliceDict],
        device: Union[torch.device, str],
        dtype: Optional[torch.dtype] = None) -> Union[torch.Tensor, SliceDict]:
    """
    Behaves slightly differently than skorch's to_tensor: (a) DataFrames simply get their `values` extracted, (b)
    supports dtype conversion.
    """
    if isinstance(X, dict):
        return SliceDict(**{
            k: to_tensor(v, device=device, dtype=dtype)
            for k, v in X.items()
        })
    elif isinstance(X, (list, tuple)):
        return type(X)(to_tensor(v, device=device, dtype=dtype) for v in X)
    else:
        if is_pandas_ndframe(X):
            X = X.values
        tensor = _to_tensor_base(X=X, device=device)

    if dtype is not None:
        tensor = tensor.to(dtype)

    return tensor
Exemple #6
0
 def _is_regular(self, x):
     return (x is None) or isinstance(x, np.ndarray) or is_pandas_ndframe(x)
Exemple #7
0
 def _is_regular(self, x):
     return (x is None) or isinstance(x, np.ndarray) or is_pandas_ndframe(x)
Exemple #8
0
def multi_indexing(data, i):
    """Perform indexing on multiple data structures.

    Currently supported data types:

    * numpy arrays
    * torch tensors
    * pandas NDFrame
    * a dictionary of the former three
    * a list/tuple of the former three

    ``i`` can be an integer or a slice.

    Example
    -------
    >>> multi_indexing(np.asarray([1, 2, 3]), 0)
    1

    >>> multi_indexing(np.asarray([1, 2, 3]), np.s_[:2])
    array([1, 2])

    >>> multi_indexing(torch.arange(0, 4), np.s_[1:3])
     1
     2
    [torch.FloatTensor of size 2]

    >>> multi_indexing([[1, 2, 3], [4, 5, 6]], np.s_[:2])
    [[1, 2], [4, 5]]

    >>> multi_indexing({'a': [1, 2, 3], 'b': [4, 5, 6]}, np.s_[-2:])
    {'a': [2, 3], 'b': [5, 6]}

    >>> multi_indexing(pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}))
       a  b
    1  2  5
    2  3  6

    """
    if isinstance(i, np.ndarray):
        if i.dtype == bool:
            i = tuple(j.tolist() for j in i.nonzero())
        elif i.dtype == int:
            i = i.tolist()
        else:
            raise IndexError("arrays used as indices must be of integer "
                             "(or boolean) type")

    if isinstance(data, dict):
        # dictionary of containers
        return {k: v[i] for k, v in data.items()}
    if isinstance(data, (list, tuple)):
        # list or tuple of containers
        try:
            return [multi_indexing(x, i) for x in data]
        except TypeError:
            pass
    if is_pandas_ndframe(data):
        # pandas NDFrame
        return data.iloc[i]
    # torch tensor, numpy ndarray, list
    if isinstance(i, (int, slice)):
        return data[i]
    return safe_indexing(data, i)