def f(): if method == "less than": result = data.loc[data[column] < value, ] elif method == "less than or equal to": result = data.loc[data[column] <= value, ] elif method == "equal to": result = data.loc[data[column] == value, ] elif method == "not equal to": result = data.loc[data[column] != value, ] elif method == "greater than or equal to": result = data.loc[data[column] >= value, ] elif method == "greater than": result = data.loc[data[column] > value, ] print("=" * 80) print( f"Running Rowfilter: Keep rows where {repr(column)} is {method} {repr(value)}" ) print("-" * 80) print( f"Kept {len(result):,} of {len(data):,} rows ({len(result)/len(data):.2%})" ) print("=" * 80) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = clarite.modify.colfilter_percent_zero( data, filter_percent, skip, only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = data.copy( deep=True ) # This function works in-place, so a copy must be created first clarite.analyze.add_corrected_pvalues(result) if data_name is None: return result else: return Dataset(data_name, "ewas_result", result)
def f(): result = clarite.modify.transform(data=data, transform_method=method, skip=skip, only=only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = clarite.modify.recode_values( data=data, replacement_dict=replacement_dict, skip=skip, only=only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = clarite.modify.remove_outliers(data=data, method=method, cutoff=cutoff, skip=skip, only=only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = clarite.describe.get_types(data).reset_index() result.columns = ["variable", "type"] return Dataset(data_name, "datatypes", result)
def f(): result = clarite.describe.freq_table(data) return Dataset(data_name, "freqtable", result)
def f(): result = clarite.describe.correlations(data, threshold) return Dataset(data_name, "correlations", result)
def f(): result = clarite.analyze.ewas(**kwargs) return Dataset(data_name, "ewas_result", result)
def f(): result = clarite.modify.colfilter_min_n(data, n, skip, only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): result = clarite.modify.merge_observations(top, bottom) return Dataset(data_name, "dataset", result)
def f(): result = clarite.modify.merge_variables(left, right, how) return Dataset(data_name, "dataset", result)
def f(): result = clarite.describe.percent_na(data) return Dataset(data_name, "percentna", result)
def f(): result = clarite.describe.skewness(data, dropna) return Dataset(data_name, "skewness", result)
def f(): result = clarite.modify.rowfilter_incomplete_obs(data, skip, only) if data_name is None: return result else: return Dataset(data_name, "dataset", result)
def f(): if kind == "CSV": df = clarite.load.from_csv(filename, index_col) elif kind == "TSV": df = clarite.load.from_tsv(filename, index_col) return Dataset(data_name, "dataset", df)