def __getitem__(self, key): is_valid, msg, target_axis = is_valid_index(key, self.axis) if not is_valid: print_warning( "{} Pnguin auto-handles this, but this could take a second for larger datasets".format( msg ), self.warnings, ) return format_input(self.data, target_axis)[key] return fetch_item(self.data, key)
def head(self, n: int = 5): """Return the head (or the top n rows) of your DataFrame Args: n (int, optional): The number of rows to be returned. 5 by default Returns: (pnguin.DataFrame): A new DataFrame with the first n rows of self """ if self.axis == Axis.row: return DataFrame(self.data[0:n], self.axis) row_data = format_input(self.data, Axis.row) return DataFrame(row_data[0:n], self.axis)
def __init__(self, data, axis: Axis = Axis.col, warnings: bool = True): """Create a new pnguin DataFrame Args: data (any): Input data for the pnguin dataframe, either in list of dicts (rows) or dict of lists (columns) form axis (str, optional): The primary axis that the DataFrame is to operate using. 'col' by default warnings (bool, optional): A flag determining whether pnguin will print warnings. True by default Returns: (pnguin.DataFrame): A new pnguin DataFrame """ self.axis = axis self.data = format_input(data, axis) self.warnings = warnings
def apply(self, x: Callable, axis: Axis = Axis.row, inplace: bool = False): """Apply a given function per row or per column on a DataFrame Args: x (callable): A function to be run per row/column axis (str): The target axis to be operated on ('row' by default) inplace (bool): Determines whether the existing DataFrame will be modified, or a new DataFrame will be returned Returns: (pnguin.DataFrame or None) """ target = self._data_as_rows() if axis == Axis.row else self._data_as_cols() applied = apply_rows(target, x) if axis == Axis.row else apply_cols(target, x) if inplace: self.data = format_input(applied, self.axis) return None return DataFrame(applied, self.axis)
def __setitem__(self, key, value): is_valid, msg, target_axis = is_valid_index(key, self.axis) if not is_valid: print_warning( "{} Pnguin auto-handles this, but this could take a second for larger datasets".format( msg ), self.warnings, ) data = ( self._data_as_cols() if target_axis == Axis.col else self._data_as_rows() ) set_item(data, key, value) self.data = format_input(data, self.axis) else: set_item(self.data, key, value)
def dropna(self, exclude: list = [], inplace: bool = False): """Drop those rows with NaN values Args: exclude (list, optional): Certain column names that may be excempt from the dropping criteria. [] by default inplace (bool, optional): Perform this operation in-place, and don't return a new DataFrame. False by default Returns: (pnguin.DataFrame or None) """ target = self._data_as_rows() rectified = drop_nan_rows(target, exclude) if inplace: self.data = format_input(rectified, self.axis) return None return DataFrame(rectified, self.axis)
def _data_as_cols(self): return self.data if self.axis == Axis.col else format_input(self.data, Axis.col)
def _data_as_rows(self): return self.data if self.axis == Axis.row else format_input(self.data, Axis.row)