Beispiel #1
0
        def load(self, header=True, convert_float=True, nb_index=None, index_col=None):
            if not self.ndim:
                return LArray([])

            list_data = self._converted_value(convert_float=convert_float)

            if header:
                return from_lists(list_data, nb_index=nb_index, index_col=index_col)
            else:
                return LArray(list_data)
Beispiel #2
0
        def array(self, data, row_labels=None, column_labels=None, names=None):
            """

            Parameters
            ----------
            data : str
                range for data
            row_labels : str, optional
                range for row labels
            column_labels : str, optional
                range for column labels
            names : list of str, optional

            Returns
            -------
            LArray
            """
            if row_labels is not None:
                row_labels = np.asarray(self[row_labels])
            if column_labels is not None:
                column_labels = np.asarray(self[column_labels])
            if names is not None:
                labels = (row_labels, column_labels)
                axes = [
                    Axis(axis_labels, name)
                    for axis_labels, name in zip(labels, names)
                ]
            else:
                axes = (row_labels, column_labels)
            # _converted_value is used implicitly via Range.__array__
            return LArray(np.asarray(self[data]), axes)
Beispiel #3
0
    def wrapper(*args, **kwargs):
        # TODO: normalize args/kwargs like in LIAM2 so that we can also broadcast if args are given via kwargs
        #       (eg out=)
        args, combined_axes = make_numpy_broadcastable(args)

        # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those case itself
        # via __array_wrap__

        # There is a problem with np.clip though (and possibly other ufuncs): np.clip is roughly equivalent to
        # np.maximum(np.minimum(np.asarray(la), high), low)
        # the np.asarray(la) is problematic because it lose original labels
        # and then tries to get them back from high, where they are possibly
        # incomplete if broadcasting happened

        # It fails on "np.minimum(ndarray, LArray)" because it calls __array_wrap__(high, result) which cannot work if
        # there was broadcasting involved (high has potentially less labels than result).
        # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__
        raw_args = [
            np.asarray(a) if isinstance(a, LArray) else a for a in args
        ]
        res_data = func(*raw_args, **kwargs)
        if combined_axes:
            return LArray(res_data, combined_axes)
        else:
            return res_data
Beispiel #4
0
 def __eq__(self, other):
     self_keys = set(self.keys())
     all_keys = list(
         self.keys()) + [n for n in other.keys() if n not in self_keys]
     res = [
         larray_nan_equal(self.get(key), other.get(key)) for key in all_keys
     ]
     return LArray(res, [Axis(all_keys, 'name')])
Beispiel #5
0
def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=True, **kwargs):
    # the dataframe was read without index at all (ie 2D dataframe), irrespective of the actual data dimensionality
    if raw:
        columns = df.columns.values.tolist()
        try:
            # take the first column which contains '\'
            # pos_last = next(i for i, v in enumerate(columns) if '\\' in str(v))
            pos_last = next(i for i, v in enumerate(columns) if isinstance(v, basestring) and '\\' in v)
            onedim = False
        except StopIteration:
            # we assume first column will not contain data
            pos_last = 0
            onedim = True

        axes_names = columns[:pos_last + 1]
        if onedim:
            df = df.iloc[:, 1:]
        else:
            # This is required to handle int column names (otherwise we can simply use column positions in set_index).
            # This is NOT the same as df.columns[list(range(...))] !
            index_columns = [df.columns[i] for i in range(pos_last + 1)]
            # TODO: we should pass a flag to df_aslarray so that we can use inplace=True here
            # df.set_index(index_columns, inplace=True)
            df = df.set_index(index_columns)
    else:
        axes_names = [decode(name, 'utf8') for name in df.index.names]

    # handle 2 or more dimensions with the last axis name given using \
    if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]:
        last_axes = [name.strip() for name in axes_names[-1].split('\\')]
        axes_names = axes_names[:-1] + last_axes
    # handle 1D
    elif len(df) == 1 and axes_names == [None]:
        axes_names = [df.columns.name]
    # handle 2 or more dimensions with the last axis name given as the columns index name
    elif len(df) > 1:
        axes_names += [df.columns.name]

    if len(axes_names) > 1:
        df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns, **kwargs)
    else:
        axes_labels = []

    # we could inline df_aslarray into the functions that use it, so that the
    # original (non-cartesian) df is freed from memory at this point, but it
    # would be much uglier and would not lower the peak memory usage which
    # happens during cartesian_product_df.reindex

    # Pandas treats column labels as column names (strings) so we need to convert them to values
    last_axis_labels = [parse(cell) for cell in df.columns.values] if parse_header else list(df.columns.values)
    axes_labels.append(last_axis_labels)
    axes_names = [str(name) if name is not None else name
                  for name in axes_names]

    axes = [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)]
    data = df.values.reshape([len(axis) for axis in axes])
    return LArray(data, axes)
Beispiel #6
0
        def load(self,
                 header=True,
                 convert_float=True,
                 nb_index=None,
                 index_col=None,
                 fill_value=np.nan,
                 sort_rows=False,
                 sort_columns=False,
                 wide=True):
            if not self.ndim:
                return LArray([])

            list_data = self._converted_value(convert_float=convert_float)

            if header:
                return from_lists(list_data,
                                  nb_index=nb_index,
                                  index_col=index_col,
                                  fill_value=fill_value,
                                  sort_rows=sort_rows,
                                  sort_columns=sort_columns,
                                  wide=wide)
            else:
                return LArray(list_data)
Beispiel #7
0
def from_series(s, sort_rows=False):
    """
    Converts Pandas Series into 1D LArray.

    Parameters
    ----------
    s : Pandas Series
        Input Pandas Series.
    sort_rows : bool, optional
        Whether or not to sort the rows alphabetically. Defaults to False.

    Returns
    -------
    LArray
    """
    name = s.name if s.name is not None else s.index.name
    if name is not None:
        name = str(name)
    if sort_rows:
        s = s.sort_index()
    return LArray(s.values, Axis(s.index.values, name))
Beispiel #8
0
    def array_equals(self, other):
        """Test if arrays of the current session are equal to those of another session.

        Equivalent to apply :py:meth:`LArray.equals` with flag nan_equals=True to all arrays from two sessions.

        Parameters
        ----------
        other : Session
            Session to compare with.

        Returns
        -------
        Boolean LArray

        See Also
        --------
        Session.equals

        Examples
        --------
        >>> s1 = Session([('arr1', ndtest(2)), ('arr2', ndtest((2, 2)))])
        >>> s2 = Session([('arr1', ndtest(2)), ('arr2', ndtest((2, 2)))])
        >>> s1.array_equals(s2)
        name  arr1  arr2
              True  True

        Different value(s)

        >>> s2.arr1['a1'] = 0
        >>> s1.array_equals(s2)
        name   arr1  arr2
              False  True

        Different label(s)

        >>> s2.arr2 = ndtest("b=b0,b1; a=a0,a1")
        >>> s1.array_equals(s2)
        name   arr1   arr2
              False  False

        Extra/missing array(s)

        >>> s2.arr3 = ndtest((3, 3))
        >>> s1.array_equals(s2)
        name   arr1   arr2   arr3
              False  False  False
        """
        self_keys = set(self.keys())
        all_keys = list(
            self.keys()) + [n for n in other.keys() if n not in self_keys]

        def larray_nan_equal(a1, a2):
            try:
                a1 = aslarray(a1)
            except Exception:
                return False
            return a1.equals(a2, nan_equals=True)

        res = [
            larray_nan_equal(self.get(key), other.get(key)) for key in all_keys
        ]
        return LArray(res, [Axis(all_keys, 'name')])
Beispiel #9
0
 def __larray__(self):
     return LArray(self._converted_value())
Beispiel #10
0
def from_frame(df,
               sort_rows=False,
               sort_columns=False,
               parse_header=False,
               unfold_last_axis_name=False,
               **kwargs):
    """
    Converts Pandas DataFrame into LArray.

    Parameters
    ----------
    df : pandas.DataFrame
        Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the
        columns Index of the dataframe unless argument unfold_last_axis_name is set to True.
    sort_rows : bool, optional
        Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False.
    sort_columns : bool, optional
        Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
        Defaults to False.
    parse_header : bool, optional
        Whether or not to parse columns labels. Pandas treats column labels as strings.
        If True, column labels are converted into int, float or boolean when possible. Defaults to False.
    unfold_last_axis_name : bool, optional
        Whether or not to extract the names of the last two axes by splitting the name of the last index column of the
        dataframe using ``\\``. Defaults to False.

    Returns
    -------
    LArray

    See Also
    --------
    LArray.to_frame

    Examples
    --------
    >>> df = ndtest((2, 2, 2)).to_frame()
    >>> df                                                                             # doctest: +NORMALIZE_WHITESPACE
    c      c0  c1
    a  b
    a0 b0   0   1
       b1   2   3
    a1 b0   4   5
       b1   6   7
    >>> from_frame(df)
     a  b\\c  c0  c1
    a0   b0   0   1
    a0   b1   2   3
    a1   b0   4   5
    a1   b1   6   7

    Names of the last two axes written as ``before_last_axis_name\\last_axis_name``

    >>> df = ndtest((2, 2, 2)).to_frame(fold_last_axis_name=True)
    >>> df                                                                             # doctest: +NORMALIZE_WHITESPACE
            c0  c1
    a  b\\c
    a0 b0    0   1
       b1    2   3
    a1 b0    4   5
       b1    6   7
    >>> from_frame(df, unfold_last_axis_name=True)
     a  b\\c  c0  c1
    a0   b0   0   1
    a0   b1   2   3
    a1   b0   4   5
    a1   b1   6   7
    """
    axes_names = [decode(name, 'utf8') for name in df.index.names]

    # handle 2 or more dimensions with the last axis name given using \
    if unfold_last_axis_name:
        if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]:
            last_axes = [name.strip() for name in axes_names[-1].split('\\')]
            axes_names = axes_names[:-1] + last_axes
        else:
            axes_names += [None]
    else:
        axes_names += [df.columns.name]

    df, axes_labels = cartesian_product_df(df,
                                           sort_rows=sort_rows,
                                           sort_columns=sort_columns,
                                           **kwargs)

    # Pandas treats column labels as column names (strings) so we need to convert them to values
    last_axis_labels = [parse(cell) for cell in df.columns.values
                        ] if parse_header else list(df.columns.values)
    axes_labels.append(last_axis_labels)
    axes_names = [
        str(name) if name is not None else name for name in axes_names
    ]

    axes = [
        Axis(labels, name) for labels, name in zip(axes_labels, axes_names)
    ]
    data = df.values.reshape([len(axis) for axis in axes])
    return LArray(data, axes)