def plot_2d_line(df: pd.DataFrame, x: str, y: str, color: str = None, criteria: str = None) -> go.Figure: """ :param df: Data set :param x: name of column X :param y: name of column Y :param color: name of grouping column :param criteria: python condition :return: plot figure """ if criteria is not None: df = filter_records(df, criteria) if not x in df.columns: logger.log_error('x not found in columns') return if not y in df.columns: logger.log_error('y not found in columns') return if not color in df.columns and not color is None: logger.log_warn('color column not found') color = None return px.line(df, x=x, y=y, color=color)
def drop_na(df: pd.DataFrame, columns='all') -> pd.DataFrame: df_copy = df.copy() if columns is 'all': columns = df.columns n = len(columns) columns = [x for x in columns if x in df.columns] if n is not len(columns): logger.log_warn('some columns not found in dataFrame') df_copy = df_copy.dropna(subset=columns) return df_copy
def fill_na_values(df: pd.DataFrame, columns='all', value=0) -> pd.DataFrame: df_copy = df.copy() if columns is 'all': df_copy = df_copy.fillna(value) else: n = len(columns) columns = [x for x in columns if x in df.columns] if n is not len(columns): logger.log_warn('some columns not found in dataFrame') df_copy[columns] = df_copy[columns].fillna(value) return df_copy
def drop_na(df: pd.DataFrame, columns='all') -> pd.DataFrame: """ :param df: Data Set :param columns: default : "all" :return: new Data Set """ df_copy = df.copy() if columns is 'all': columns = df.columns n = len(columns) columns = [x for x in columns if x in df.columns] if n is not len(columns): logger.log_warn('some columns not found in dataFrame') df_copy = df_copy.dropna(subset=columns) return df_copy
def plot_2d(df: pd.DataFrame, x: str, y: str, color: str = None, trendline: bool = False, criteria: str = None) -> go.Figure: if criteria is not None: df = filter_records(df, criteria) if not x in df.columns: logger.log_error('x not found in columns') return if not y in df.columns: logger.log_error('y not found in columns') return if not color in df.columns and not color is None: logger.log_warn('color column not found') color = None if trendline: trendline = 'ols' return px.scatter(df, x=x, y=y, color=color, trendline=trendline)
def fill_na_values(df: pd.DataFrame, columns='all', value=0) -> pd.DataFrame: """ :param df: Data Set :param columns: default: "all" :param value: replace value default: 0 :return: new Data Set """ df_copy = df.copy() if columns is 'all': df_copy = df_copy.fillna(value) else: n = len(columns) columns = [x for x in columns if x in df.columns] if n is not len(columns): logger.log_warn('some columns not found in dataFrame') df_copy[columns] = df_copy[columns].fillna(value) return df_copy