def load(self, header=True, convert_float=True, nb_index=None, index_col=None): if not self.ndim: return LArray([]) list_data = self._converted_value(convert_float=convert_float) if header: return from_lists(list_data, nb_index=nb_index, index_col=index_col) else: return LArray(list_data)
def array(self, data, row_labels=None, column_labels=None, names=None): """ Parameters ---------- data : str range for data row_labels : str, optional range for row labels column_labels : str, optional range for column labels names : list of str, optional Returns ------- LArray """ if row_labels is not None: row_labels = np.asarray(self[row_labels]) if column_labels is not None: column_labels = np.asarray(self[column_labels]) if names is not None: labels = (row_labels, column_labels) axes = [ Axis(axis_labels, name) for axis_labels, name in zip(labels, names) ] else: axes = (row_labels, column_labels) # _converted_value is used implicitly via Range.__array__ return LArray(np.asarray(self[data]), axes)
def wrapper(*args, **kwargs): # TODO: normalize args/kwargs like in LIAM2 so that we can also broadcast if args are given via kwargs # (eg out=) args, combined_axes = make_numpy_broadcastable(args) # We pass only raw numpy arrays to the ufuncs even though numpy is normally meant to handle those case itself # via __array_wrap__ # There is a problem with np.clip though (and possibly other ufuncs): np.clip is roughly equivalent to # np.maximum(np.minimum(np.asarray(la), high), low) # the np.asarray(la) is problematic because it lose original labels # and then tries to get them back from high, where they are possibly # incomplete if broadcasting happened # It fails on "np.minimum(ndarray, LArray)" because it calls __array_wrap__(high, result) which cannot work if # there was broadcasting involved (high has potentially less labels than result). # it does this because numpy calls __array_wrap__ on the argument with the highest __array_priority__ raw_args = [ np.asarray(a) if isinstance(a, LArray) else a for a in args ] res_data = func(*raw_args, **kwargs) if combined_axes: return LArray(res_data, combined_axes) else: return res_data
def __eq__(self, other): self_keys = set(self.keys()) all_keys = list( self.keys()) + [n for n in other.keys() if n not in self_keys] res = [ larray_nan_equal(self.get(key), other.get(key)) for key in all_keys ] return LArray(res, [Axis(all_keys, 'name')])
def df_aslarray(df, sort_rows=False, sort_columns=False, raw=False, parse_header=True, **kwargs): # the dataframe was read without index at all (ie 2D dataframe), irrespective of the actual data dimensionality if raw: columns = df.columns.values.tolist() try: # take the first column which contains '\' # pos_last = next(i for i, v in enumerate(columns) if '\\' in str(v)) pos_last = next(i for i, v in enumerate(columns) if isinstance(v, basestring) and '\\' in v) onedim = False except StopIteration: # we assume first column will not contain data pos_last = 0 onedim = True axes_names = columns[:pos_last + 1] if onedim: df = df.iloc[:, 1:] else: # This is required to handle int column names (otherwise we can simply use column positions in set_index). # This is NOT the same as df.columns[list(range(...))] ! index_columns = [df.columns[i] for i in range(pos_last + 1)] # TODO: we should pass a flag to df_aslarray so that we can use inplace=True here # df.set_index(index_columns, inplace=True) df = df.set_index(index_columns) else: axes_names = [decode(name, 'utf8') for name in df.index.names] # handle 2 or more dimensions with the last axis name given using \ if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]: last_axes = [name.strip() for name in axes_names[-1].split('\\')] axes_names = axes_names[:-1] + last_axes # handle 1D elif len(df) == 1 and axes_names == [None]: axes_names = [df.columns.name] # handle 2 or more dimensions with the last axis name given as the columns index name elif len(df) > 1: axes_names += [df.columns.name] if len(axes_names) > 1: df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns, **kwargs) else: axes_labels = [] # we could inline df_aslarray into the functions that use it, so that the # original (non-cartesian) df is freed from memory at this point, but it # would be much uglier and would not lower the peak memory usage which # happens during cartesian_product_df.reindex # Pandas treats column labels as column names (strings) so we need to convert them to values last_axis_labels = [parse(cell) for cell in df.columns.values] if parse_header else list(df.columns.values) axes_labels.append(last_axis_labels) axes_names = [str(name) if name is not None else name for name in axes_names] axes = [Axis(labels, name) for labels, name in zip(axes_labels, axes_names)] data = df.values.reshape([len(axis) for axis in axes]) return LArray(data, axes)
def load(self, header=True, convert_float=True, nb_index=None, index_col=None, fill_value=np.nan, sort_rows=False, sort_columns=False, wide=True): if not self.ndim: return LArray([]) list_data = self._converted_value(convert_float=convert_float) if header: return from_lists(list_data, nb_index=nb_index, index_col=index_col, fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, wide=wide) else: return LArray(list_data)
def from_series(s, sort_rows=False): """ Converts Pandas Series into 1D LArray. Parameters ---------- s : Pandas Series Input Pandas Series. sort_rows : bool, optional Whether or not to sort the rows alphabetically. Defaults to False. Returns ------- LArray """ name = s.name if s.name is not None else s.index.name if name is not None: name = str(name) if sort_rows: s = s.sort_index() return LArray(s.values, Axis(s.index.values, name))
def array_equals(self, other): """Test if arrays of the current session are equal to those of another session. Equivalent to apply :py:meth:`LArray.equals` with flag nan_equals=True to all arrays from two sessions. Parameters ---------- other : Session Session to compare with. Returns ------- Boolean LArray See Also -------- Session.equals Examples -------- >>> s1 = Session([('arr1', ndtest(2)), ('arr2', ndtest((2, 2)))]) >>> s2 = Session([('arr1', ndtest(2)), ('arr2', ndtest((2, 2)))]) >>> s1.array_equals(s2) name arr1 arr2 True True Different value(s) >>> s2.arr1['a1'] = 0 >>> s1.array_equals(s2) name arr1 arr2 False True Different label(s) >>> s2.arr2 = ndtest("b=b0,b1; a=a0,a1") >>> s1.array_equals(s2) name arr1 arr2 False False Extra/missing array(s) >>> s2.arr3 = ndtest((3, 3)) >>> s1.array_equals(s2) name arr1 arr2 arr3 False False False """ self_keys = set(self.keys()) all_keys = list( self.keys()) + [n for n in other.keys() if n not in self_keys] def larray_nan_equal(a1, a2): try: a1 = aslarray(a1) except Exception: return False return a1.equals(a2, nan_equals=True) res = [ larray_nan_equal(self.get(key), other.get(key)) for key in all_keys ] return LArray(res, [Axis(all_keys, 'name')])
def __larray__(self): return LArray(self._converted_value())
def from_frame(df, sort_rows=False, sort_columns=False, parse_header=False, unfold_last_axis_name=False, **kwargs): """ Converts Pandas DataFrame into LArray. Parameters ---------- df : pandas.DataFrame Input dataframe. By default, name and labels of the last axis are defined by the name and labels of the columns Index of the dataframe unless argument unfold_last_axis_name is set to True. sort_rows : bool, optional Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting). Defaults to False. sort_columns : bool, optional Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting). Defaults to False. parse_header : bool, optional Whether or not to parse columns labels. Pandas treats column labels as strings. If True, column labels are converted into int, float or boolean when possible. Defaults to False. unfold_last_axis_name : bool, optional Whether or not to extract the names of the last two axes by splitting the name of the last index column of the dataframe using ``\\``. Defaults to False. Returns ------- LArray See Also -------- LArray.to_frame Examples -------- >>> df = ndtest((2, 2, 2)).to_frame() >>> df # doctest: +NORMALIZE_WHITESPACE c c0 c1 a b a0 b0 0 1 b1 2 3 a1 b0 4 5 b1 6 7 >>> from_frame(df) a b\\c c0 c1 a0 b0 0 1 a0 b1 2 3 a1 b0 4 5 a1 b1 6 7 Names of the last two axes written as ``before_last_axis_name\\last_axis_name`` >>> df = ndtest((2, 2, 2)).to_frame(fold_last_axis_name=True) >>> df # doctest: +NORMALIZE_WHITESPACE c0 c1 a b\\c a0 b0 0 1 b1 2 3 a1 b0 4 5 b1 6 7 >>> from_frame(df, unfold_last_axis_name=True) a b\\c c0 c1 a0 b0 0 1 a0 b1 2 3 a1 b0 4 5 a1 b1 6 7 """ axes_names = [decode(name, 'utf8') for name in df.index.names] # handle 2 or more dimensions with the last axis name given using \ if unfold_last_axis_name: if isinstance(axes_names[-1], basestring) and '\\' in axes_names[-1]: last_axes = [name.strip() for name in axes_names[-1].split('\\')] axes_names = axes_names[:-1] + last_axes else: axes_names += [None] else: axes_names += [df.columns.name] df, axes_labels = cartesian_product_df(df, sort_rows=sort_rows, sort_columns=sort_columns, **kwargs) # Pandas treats column labels as column names (strings) so we need to convert them to values last_axis_labels = [parse(cell) for cell in df.columns.values ] if parse_header else list(df.columns.values) axes_labels.append(last_axis_labels) axes_names = [ str(name) if name is not None else name for name in axes_names ] axes = [ Axis(labels, name) for labels, name in zip(axes_labels, axes_names) ] data = df.values.reshape([len(axis) for axis in axes]) return LArray(data, axes)