def build(method_name): if method_name == 'sum': return NamedFunctor("Sum Replicates", lambda state, mode: aggregate_sum(state)) if method_name == 'mean': return NamedFunctor("Average Replicates", lambda state, mode: aggregate_mean(state)) if method_name == 'pick': return NamedFunctor( "Pick High Count Replicate", lambda state, mode: aggregate_pick_high_count(state))
def build(meta_col_name: str, one_set: set, pair_strategy="paired_concat"): if pair_strategy == "paired_concat": return NamedFunctor( "Target: " + meta_col_name + "(HouseholdConcat)", lambda state, mode: matched_pair_concat(state, meta_col_name, one_set)) elif pair_strategy == "paired_subtract": return NamedFunctor( "Target: " + meta_col_name + "(HouseholdSubtract)", lambda state, mode: matched_pair_subtract(state, meta_col_name, one_set)) elif pair_strategy == "unpaired": return NamedFunctor( "Target: " + meta_col_name, lambda state, mode: _target(state, meta_col_name, one_set)) elif pair_strategy == "paired_subtract_sex_balanced": return NamedFunctor( "Target: " + meta_col_name, lambda state, mode: matched_pair_subtract_sex_balanced( state, meta_col_name, one_set))
def build_zebra(cov_thresh, cov_file): zebra_df = pd.read_csv(cov_file) zebra_pass_df = zebra_df[zebra_df.coverage_ratio > cov_thresh] zebra_pass_list = zebra_pass_df['genome_id'].to_list() def wrapped(state, mode): state.df = state.df[state.df.columns.intersection(zebra_pass_list)] return state.update_df(state.df) return NamedFunctor("Zebra Filter:" + str(cov_thresh), wrapped)
def build(method_name, target_count=10000): if method_name == 'none': return NamedFunctor( "No Normalization " "(WARN: IGNORES COMPOSITIONALITY)", lambda state, mode: state) if method_name == 'rarefy': return NamedFunctor( "Rarefy", lambda state, mode: rarefy_wrapper(state, target_count)) if method_name == 'divide_total': return NamedFunctor( "Truncate Ray To Simplex", lambda state, mode: divide_total(state, target_count)) if method_name == 'ILR': # Existing functions for this in skbio raise NotImplemented() if method_name == 'CLR': return NamedFunctor("CLR Transform", lambda state, mode: clr_wrapper(state)) if method_name == 'ALR': # Existing functions for this in skbio raise NotImplemented() if method_name == 'AST': raise NotImplemented()
def sum_columns(): return NamedFunctor("Sum All Columns", lambda state, mode: _sum_columns(state))
def build_meta_encoder(col_name, encoder): def wrapped(state, mode): state.df[col_name] = state.meta_df[col_name].apply(encoder) return state.update_df(state.df) return NamedFunctor("Include " + col_name, wrapped)
def build_feature_set_transform(transformer): def wrapped(state, mode): return _apply_feature_transform(state, transformer) return NamedFunctor(transformer.name, wrapped)
def build_prefix_filter(bad_prefixes): return NamedFunctor( "Filter Samples By ID Prefix", lambda state, mode: _filter_out_sample_id_prefix(state, bad_prefixes))
def build_whitelist_metadata_value(metadata_column, metadata_values): return NamedFunctor( "Subset Samples by " + metadata_column + " for " + str(metadata_values), lambda x, mode: _filter_by_metadata( x, metadata_column, metadata_values))
def build_filter_out_empty_samples(): return NamedFunctor("Filter Empty Samples", lambda state, mode: _filter_zero_sum(state))
def build(num_training_samples): return NamedFunctor( "Downsample Train Set: " + str(num_training_samples), lambda state, mode: _downsample(state, mode, num_training_samples))
def build_exact_filter(bad_sample_ids): return NamedFunctor( "Remove Bad Samples", lambda state, mode: _filter_out_sample_ids(state, bad_sample_ids))
def plot_correlation_matrix(): return NamedFunctor("Plot Correlation Heatmap", lambda state, mode: _plot_correlation_matrix_deluxe(state))
def plot_scatter(): return NamedFunctor("Plot Scatter", _plot_scatter)
def build_column_filter(chosen_columns): return NamedFunctor( "Restrict to columns (compositional): " + str(chosen_columns), lambda state, mode: _restrict_columns_compositional( state, chosen_columns))
def build_shared_filter(): return NamedFunctor("Filter Samples To Shared IDs", lambda state, mode: _filter_by_shared_ids(state))
def build(): return NamedFunctor("Fix Sample IDs", lambda state, mode: fix_sample_ids(state))