def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # Fastpath equivalent to: # if self.axis == 0: # return sdata.iloc[slice_obj] # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) df = sdata._constructor(mgr) return df.__finalize__(sdata, method="groupby")
def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # Fastpath equivalent to: # if self.axis == 0: # return sdata.iloc[slice_obj] # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) # __finalize__ not called here, must be applied by caller if applicable return sdata._constructor(mgr)
def melt( frame: DataFrame, id_vars=None, value_vars=None, var_name=None, value_name="value", col_level=None, ) -> DataFrame: # TODO: what about the existing index? # If multiindex, gather names of columns on all level for checking presence # of `id_vars` and `value_vars` if isinstance(frame.columns, ABCMultiIndex): cols = [x for c in frame.columns for x in c] else: cols = list(frame.columns) if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] elif isinstance(frame.columns, ABCMultiIndex) and not isinstance(id_vars, list): raise ValueError( "id_vars must be a list of tuples when columns are a MultiIndex" ) else: # Check that `id_vars` are in frame id_vars = list(id_vars) missing = Index(com.flatten(id_vars)).difference(cols) if not missing.empty: raise KeyError("The following 'id_vars' are not present" " in the DataFrame: {missing}" "".format(missing=list(missing))) else: id_vars = [] if value_vars is not None: if not is_list_like(value_vars): value_vars = [value_vars] elif isinstance(frame.columns, ABCMultiIndex) and not isinstance(value_vars, list): raise ValueError( "value_vars must be a list of tuples when columns are a MultiIndex" ) else: value_vars = list(value_vars) # Check that `value_vars` are in frame missing = Index(com.flatten(value_vars)).difference(cols) if not missing.empty: raise KeyError("The following 'value_vars' are not present in" " the DataFrame: {missing}" "".format(missing=list(missing))) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() if col_level is not None: # allow list or other? # frame is a copy frame.columns = frame.columns.get_level_values(col_level) if var_name is None: if isinstance(frame.columns, ABCMultiIndex): if len(frame.columns.names) == len(set(frame.columns.names)): var_name = frame.columns.names else: var_name = [ "variable_{i}".format(i=i) for i in range(len(frame.columns.names)) ] else: var_name = [ frame.columns.name if frame.columns.name is not None else "variable" ] if isinstance(var_name, str): var_name = [var_name] N, K = frame.shape K -= len(id_vars) mdata = {} for col in id_vars: id_data = frame.pop(col) if is_extension_array_dtype(id_data): id_data = concat([id_data] * K, ignore_index=True) else: id_data = np.tile(id_data.values, K) mdata[col] = id_data mcolumns = id_vars + var_name + [value_name] mdata[value_name] = frame.values.ravel("F") for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index mdata[col] = np.asanyarray( frame.columns._get_level_values(i)).repeat(N) return frame._constructor(mdata, columns=mcolumns)
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: """ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot Parameters ---------- data : DataFrame groups : dict {new_name : list_of_columns} dropna : boolean, default True Examples -------- >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], ... 'team': ['Red Sox', 'Yankees'], ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) team year hr 0 Red Sox 2007 514 1 Yankees 2007 573 2 Red Sox 2008 545 3 Yankees 2008 526 Returns ------- reshaped : DataFrame """ if isinstance(groups, dict): keys = list(groups.keys()) values = list(groups.values()) else: keys, values = zip(*groups) all_cols = list(set.union(*[set(x) for x in values])) id_cols = list(data.columns.difference(all_cols)) K = len(values[0]) for seq in values: if len(seq) != K: raise ValueError("All column lists must be same length") mdata = {} pivot_cols = [] for target, names in zip(keys, values): to_concat = [data[col].values for col in names] mdata[target] = concat_compat(to_concat) pivot_cols.append(target) for col in id_cols: mdata[col] = np.tile(data[col].values, K) if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: mask &= notna(mdata[c]) if not mask.all(): mdata = {k: v[mask] for k, v in mdata.items()} return data._constructor(mdata, columns=id_cols + pivot_cols)