def drop_rows_with_any_null_values(data: dd = None) -> dd: """ drop and rows containing null values from the input dataframe :param data: dask dataframe :return: modified dask dataframe """ return data.dropna()
def impute_nulls(self, data: dataframe): # Impute by mean data[self.cols["CONTINUOUS"]] = data[self.cols["CONTINUOUS"]].fillna( data[self.cols["CONTINUOUS"]].mean( axis=0, skipna=True).compute(num_workers=self.workers), axis=0) # Impute by mode cat_cols: list = self.cols["CATEGORICAL"]["STRING"] +\ self.cols["CATEGORICAL"]["NUMERIC"] col_modes = data[cat_cols].mode(dropna=True).compute( num_workers=self.workers) for col in cat_cols: data[col] = data[col].fillna(col_modes[col].iloc[0], axis=0) data = data.dropna(how="any") return data
def remove_papers_with_null_cols(dask_df: dd, cols: List[str]) -> None: return dask_df.dropna(subset=cols, how="all")