def _run_univariate_tests(self, X, y, control='N2', n_jobs=-1):

        stats, pvals, _ = univariate_tests(
            X, y, control=control, test=self.test,
            comparison_type=self.comparison_type,
            multitest_correction=self.multitest_method,
            n_jobs=n_jobs)

        effects = get_effect_sizes(
            X, y, control=control, test=self.test,
            comparison_type=self.comparison_type)

        test_res = pd.DataFrame(pvals.min(axis=1), columns=['p-value'])

        # In most cases, the pvals and effects have the same shape
        # (when we do group-by-group comparisons, we get group-by-group
        # effect sizes too, and when we do multi-class comparisons we get one
        # effect size).
        # But for the Kruskal-Wallis case, we cannot get one effect size for the
        # test, so we get group-by-group effect sizes instead and keep the max.
        # In this case pvals has only one column, but effects has more than one
        # columns
        if pvals.shape==effects.shape:
            test_res['effect_size'] = effects.values[pvals.isin(pvals.min(axis=1)).values]
        else:
            test_res['effect_size'] = effects.max(axis=1)

        self.test_results = test_res

        return
Exemple #2
0
def single_feature_window_mutant_worm_stats(metadata,
                                            features,
                                            save_dir,
                                            window=2,
                                            feature='motion_mode_paused_fraction',
                                            pvalue_threshold=0.05,
                                            fdr_method='fdr_by'):
    """ T-tests comparing BW vs fepD for each mutant worm """
    
    # 7 worm strains:       N2 vs 'cat-2', 'eat-4', 'osm-5', 'pdfr-1', 'tax-2', 'unc-25'
    # 2 bacteria strains:   BW vs fepD
    # 1 feature:            'motion_mode_paused_fraction'
    # 1 window:             2 (corresponding to 30 minutes on food, just after first BL stimulus)

    # focus on just one window = 30min just after BL (window=2)
    window_metadata = metadata[metadata['window']==window]

    # statistics: perform t-tests comparing fepD vs BW for each worm strain
    worm_strain_list = list(window_metadata['worm_strain'].unique())

    ttest_list = []
    for worm in worm_strain_list:
        worm_window_meta = window_metadata[window_metadata['worm_strain']==worm]
        worm_window_feat = features[[feature]].reindex(worm_window_meta.index)
        
        stats, pvals, reject = univariate_tests(X=worm_window_feat,
                                                y=worm_window_meta['bacteria_strain'],
                                                control='BW',
                                                test='t-test',
                                                comparison_type='binary_each_group',
                                                multitest_correction=fdr_method,
                                                alpha=PVAL_THRESH,
                                                n_permutation_test=None)

        # get effect sizes
        effect_sizes = get_effect_sizes(X=worm_window_feat, 
                                        y=worm_window_meta['bacteria_strain'],
                                        control='BW',
                                        effect_type=None,
                                        linked_test='t-test')
        
        # compile t-test results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        effect_sizes.columns = ['effect_size_' + str(c) for c in effect_sizes.columns]
        ttest_df = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    
        # record the worm strain as the index instead of the feature
        ttest_df = ttest_df.rename(index={feature:worm})
        ttest_list.append(ttest_df)

    ttest_path = Path(save_dir) / 'pairwise_ttests' /\
        'ttest_mutant_worm_fepD_vs_BW_window_{}_results.csv'.format(window)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    ttest_results = pd.concat(ttest_list, axis=0)
    ttest_results.to_csv(ttest_path, header=True, index=True)
        
    return
Exemple #3
0
def dead_keio_stats(features, metadata, args):
    """ Perform statistical analyses on dead Keio experiment results:
        - t-tests for each feature comparing each strain vs control for paired antioxidant treatment conditions
        - t-tests for each feature comparing each strain antioxidant treatment to negative control (no antioxidant)
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - control_dict : dict
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    print("\nInvestigating variation in worm behaviour on dead vs alive hit Keio strains")  

    # assert there will be no errors due to case-sensitivity
    assert len(metadata[STRAIN_COLNAME].unique()) == len(metadata[STRAIN_COLNAME].str.upper().unique())
    assert all(type(b) == np.bool_ for b in metadata[TREATMENT_COLNAME].unique())
    
    # Load Tierpsy feature set + subset (columns) for selected features only
    features = select_feat_set(features, 'tierpsy_{}'.format(args.n_top_feats), append_bluelight=True)
    features = features[[f for f in features.columns if 'path_curvature' not in f]]
    
    assert not features.isna().any().any()
    #n_feats = features.shape[1]
    
    strain_list = list(metadata[STRAIN_COLNAME].unique())
    assert CONTROL_STRAIN in strain_list

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=[STRAIN_COLNAME, TREATMENT_COLNAME])
    print("Mean sample size of %s: %d" % (STRAIN_COLNAME, int(sample_size['n_samples'].mean())))
    
    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir =  save_dir / "Stats" / args.fdr_method 
    
    ##### ANOVA #####

    # make path to save ANOVA results
    test_path = stats_dir / 'ANOVA_results.csv'
    test_path.parent.mkdir(exist_ok=True, parents=True)

    # ANOVA across strains for significant feature differences
    if len(metadata[STRAIN_COLNAME].unique()) > 2:   
        stats, pvals, reject = univariate_tests(X=features, 
                                                y=metadata[STRAIN_COLNAME], 
                                                test='ANOVA',
                                                control=CONTROL_STRAIN,
                                                comparison_type='multiclass',
                                                multitest_correction=None, # uncorrected
                                                alpha=args.pval_threshold,
                                                n_permutation_test=None) # 'all'
    
        # get effect sizes
        effect_sizes = get_effect_sizes(X=features, 
                                        y=metadata[STRAIN_COLNAME], 
                                        control=CONTROL_STRAIN,
                                        effect_type=None,
                                        linked_test='ANOVA')
    
        # correct for multiple comparisons
        reject_corrected, pvals_corrected = _multitest_correct(pvals, 
                                                               multitest_method=args.fdr_method,
                                                               fdr=args.pval_threshold)
                                    
        # compile + save results (corrected)
        test_results = pd.concat([stats, effect_sizes, pvals_corrected, reject_corrected], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        test_results.to_csv(test_path, header=True, index=True)
        
        nsig = test_results['reject'].sum()
        print("%d features (%.f%%) signficantly different among '%s'" % (nsig, 
              len(test_results.index)/nsig, STRAIN_COLNAME))

    
    ##### t-tests #####

    for strain in strain_list:                                   
        strain_meta = metadata[metadata[STRAIN_COLNAME]==strain]
        strain_feat = features.reindex(strain_meta.index)
                     
        ### t-tests for each feature comparing live vs dead behaviour
    
        ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format((t_test + '_' + strain))
        ttest_path = stats_dir / '{}_results.csv'.format((t_test + '_' + strain))  
        ttest_path.parent.mkdir(exist_ok=True, parents=True)

        # perform t-tests (without correction for multiple testing)
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat, 
                                                      y=strain_meta[TREATMENT_COLNAME], 
                                                      control=CONTROL_TREATMENT, 
                                                      test=t_test,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=None, 
                                                      alpha=0.05)
        # get effect sizes for comparisons
        effect_sizes_t =  get_effect_sizes(X=strain_feat, 
                                           y=strain_meta[TREATMENT_COLNAME], 
                                           control=CONTROL_TREATMENT,
                                           effect_type=None,
                                           linked_test=t_test)
        
        # compile + save t-test results (uncorrected)
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
        
        # correct for multiple comparisons
        pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
        reject_t, pvals_t = _multitest_correct(pvals_t, 
                                               multitest_method=args.fdr_method,
                                               fdr=args.pval_threshold)

        # compile + save t-test results (corrected)
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_corrected.to_csv(ttest_path, header=True, index=True)

        # record t-test significant features (not ordered)
        fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
        #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
        print("%d significant features for %s on any %s vs %s (%s, %s, P<%.2f)" % (len(fset_ttest),
              strain, TREATMENT_COLNAME, CONTROL_TREATMENT, t_test, args.fdr_method, args.pval_threshold))

        if len(fset_ttest) > 0:
            ttest_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format((t_test + '_' + strain))
            write_list_to_file(fset_ttest, ttest_sigfeats_path)

    ##### for LIVE bacteria: compare each strain with control #####
    
    live_metadata = metadata[metadata['dead']==False]
    live_features = features.reindex(live_metadata.index)    

    ttest_path_uncorrected = stats_dir / '{}_live_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_live_results.csv'.format(t_test)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    
    # perform t-tests (without correction for multiple testing)   
    stats_t, pvals_t, reject_t = univariate_tests(X=live_features, 
                                                  y=live_metadata[STRAIN_COLNAME], 
                                                  control=CONTROL_STRAIN, 
                                                  test=t_test,
                                                  comparison_type='binary_each_group',
                                                  multitest_correction=None, 
                                                  alpha=0.05)
    
    # get effect sizes for comparisons
    effect_sizes_t =  get_effect_sizes(X=live_features, 
                                       y=live_metadata[STRAIN_COLNAME], 
                                       control=CONTROL_STRAIN,
                                       effect_type=None,
                                       linked_test=t_test)
    
    # compile + save t-test results (uncorrected)
    stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
    ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
    
    # correct for multiple comparisons
    pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
    reject_t, pvals_t = _multitest_correct(pvals_t, 
                                           multitest_method=args.fdr_method,
                                           fdr=args.pval_threshold)

    # compile + save t-test results (corrected)
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_corrected.to_csv(ttest_path, header=True, index=True)

    # record t-test significant features (not ordered)
    fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
    #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
    print("LIVE BACTERIA: %d significant features for any %s vs %s (%s, %s, P<%.2f)" %\
          (len(fset_ttest), STRAIN_COLNAME, CONTROL_STRAIN, t_test, args.fdr_method, 
           args.pval_threshold))

    if len(fset_ttest) > 0:
        ttest_sigfeats_path = stats_dir / '{}_live_sigfeats.txt'.format(t_test)
        write_list_to_file(fset_ttest, ttest_sigfeats_path)

    ##### for DEAD bacteria: compare each strain with control #####
    
    dead_metadata = metadata[metadata['dead']==True]
    dead_features = features.reindex(dead_metadata.index)    

    ttest_path_uncorrected = stats_dir / '{}_dead_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_dead_results.csv'.format(t_test)
    ttest_path.parent.mkdir(exist_ok=True, parents=True)
    
    # perform t-tests (without correction for multiple testing)   
    stats_t, pvals_t, reject_t = univariate_tests(X=dead_features, 
                                                  y=dead_metadata[STRAIN_COLNAME], 
                                                  control=CONTROL_STRAIN, 
                                                  test=t_test,
                                                  comparison_type='binary_each_group',
                                                  multitest_correction=None, 
                                                  alpha=0.05)
    
    # get effect sizes for comparisons
    effect_sizes_t =  get_effect_sizes(X=dead_features, 
                                       y=dead_metadata[STRAIN_COLNAME], 
                                       control=CONTROL_STRAIN,
                                       effect_type=None,
                                       linked_test=t_test)
    
    # compile + save t-test results (uncorrected)
    stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
    ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
    
    # correct for multiple comparisons
    pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
    reject_t, pvals_t = _multitest_correct(pvals_t, 
                                           multitest_method=args.fdr_method,
                                           fdr=args.pval_threshold)

    # compile + save t-test results (corrected)
    pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
    reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
    ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
    ttest_corrected.to_csv(ttest_path, header=True, index=True)

    # record t-test significant features (not ordered)
    fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
    #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
    print("DEAD BACTERIA: %d significant features for any %s vs %s (%s, %s, P<%.2f)" %\
          (len(fset_ttest), STRAIN_COLNAME, CONTROL_STRAIN, t_test, args.fdr_method, 
           args.pval_threshold))

    if len(fset_ttest) > 0:
        ttest_sigfeats_path = stats_dir / '{}_dead_sigfeats.txt'.format(t_test)
        write_list_to_file(fset_ttest, ttest_sigfeats_path)  
    ### statistics
    # ANOVA to test to variation among strains
    if len(metadata_df[args.strain_colname].unique()) > 2:
        stats, pvals, reject = univariate_tests(
            X=features_df,
            y=metadata_df[args.strain_colname],
            control=args.control,
            test='ANOVA',
            comparison_type='multiclass',
            multitest_correction='fdr_by',
            alpha=0.05)
        # get effect sizes
        effect_sizes = get_effect_sizes(X=features_df,
                                        y=metadata_df[args.strain_colname],
                                        control=args.control,
                                        effect_type=None,
                                        linked_test='ANOVA')
        # compile + save results
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
        test_results = test_results.sort_values(by=['pvals'],
                                                ascending=True)  # rank pvals
        anova_save_path = save_dir / 'stats' / 'ANOVA_results.csv'
        anova_save_path.parent.mkdir(exist_ok=True, parents=True)
        test_results.to_csv(anova_save_path, header=True, index=True)

    # t-tests between each strain vs control
    stats_t, pvals_t, reject_t = univariate_tests(
        X=features_df,
        y=metadata_df[args.strain_colname],
    #    for any strain

    # perform ANOVA (correct for multiple comparisons) - is there variation in egg count across strains?
    stats, pvals, reject = univariate_tests(X=eggs[['number_eggs_1hr']],
                                            y=eggs['gene_name'],
                                            test='ANOVA',
                                            control=CONTROL_STRAIN,
                                            comparison_type='multiclass',
                                            multitest_correction='fdr_by',
                                            alpha=0.05,
                                            n_permutation_test=None)  # 'all'

    # get effect sizes
    effect_sizes = get_effect_sizes(X=eggs[['number_eggs_1hr']],
                                    y=eggs['gene_name'],
                                    control=CONTROL_STRAIN,
                                    effect_type=None,
                                    linked_test='ANOVA')

    # compile
    test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
    test_results = test_results.sort_values(by=['pvals'],
                                            ascending=True)  # rank pvals

    # save results
    anova_path = Path(SAVE_DIR) / 'ANOVA_egg_laying_variation_on_food.csv'
    test_results.to_csv(anova_path, header=True, index=True)

    # TODO: Chi-square tests should be performed here, not t-tests!
    # t-tests: is egg count different on any food vs control?
Exemple #6
0
    # 1. perform chi-sq tests to see if number of eggs laid is significantly different from control

    # perform ANOVA - is there variation in egg laying across antioxidants? (pooled strain data)
    stats, pvals, reject = univariate_tests(X=eggs[['number_eggs_24hrs']],
                                            y=eggs['antioxidant'],
                                            test='ANOVA',
                                            control=CONTROL_ANTIOXIDANT,
                                            comparison_type='multiclass',
                                            multitest_correction='fdr_by',
                                            alpha=0.05,
                                            n_permutation_test=None)  # 'all'

    # get effect sizes
    effect_sizes = get_effect_sizes(X=eggs[['number_eggs_24hrs']],
                                    y=eggs['antioxidant'],
                                    control=CONTROL_ANTIOXIDANT,
                                    effect_type=None,
                                    linked_test='ANOVA')

    # compile
    test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
    test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
    test_results = test_results.sort_values(by=['pvals'],
                                            ascending=True)  # rank pvals

    # save results
    anova_path = Path(SAVE_DIR) / 'ANOVA_egg_laying_variation_on_food.csv'
    test_results.to_csv(anova_path, header=True, index=True)

    # TODO: Chi-square tests should be performed here, not t-tests!
    # Is there any difference in egg laying between strains, after accounting for antioxidant treatment?
Exemple #7
0
def acute_rescue_stats(features, 
                       metadata, 
                       save_dir, 
                       control_strain, 
                       control_antioxidant, 
                       control_window,
                       fdr_method='fdr_by',
                       pval_threshold=0.05):
    """ Pairwise t-tests for each window comparing worm 'motion mode paused fraction' on 
        Keio mutants vs BW control 
        
        # One could fit a multiple linear regression model: to account for strain*antioxidant in a 
        # single model: Y (motion_mode) = b0 + b1*X1 (strain) + b2*X2 (antiox) + e (error)
        # But this is a different type of question: we care about difference in means between 
        # fepD vs BW (albeit under different antioxidant treatments), and not about modelling their 
        # relationship, therefore individual t-tests (multiple-test-corrected) should suffice
        
        1. For each treatment condition, t-tests comparing fepD vs BW for motion_mode
        
        2. For fepD and BW separately, f-tests for equal variance among antioxidant treatment groups,
        then ANOVA tests for significant differences between antioxidants, then individual t-tests
        comparing each treatment to control
        
        Inputs
        ------
        features, metadata : pandas.DataFrame
        
        window_list : list
            List of windows (int) to perform statistics (separately for each window provided, 
            p-values are adjusted for multiple test correction)
        
        save_dir : str
            Directory to save statistics results
            
        control_strain
        control_antioxidant
        fdr_method
        
    """

    stats_dir =  Path(save_dir) / "Stats" / args.fdr_method
    stats_dir.mkdir(parents=True, exist_ok=True)

    strain_list = [control_strain] + [s for s in set(metadata['gene_name'].unique()) if s != control_strain]  
    antiox_list = [control_antioxidant] + [a for a in set(metadata['antioxidant'].unique()) if 
                                           a != control_antioxidant]
    window_list = [control_window] + [w for w in set(metadata['window'].unique()) if w != control_window]

    # categorical variables to investigate: 'gene_name', 'antioxidant' and 'window'
    print("\nInvestigating difference in fraction of worms paused between hit strain and control " +
          "(for each window), in the presence/absence of antioxidants:\n")    

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=['gene_name', 'antioxidant', 'window'])
    print("Mean sample size of strain/antioxidant for each window: %d" %\
          (int(sample_size['n_samples'].mean())))
      
    # For each strain separately...
    for strain in strain_list:
        strain_meta = metadata[metadata['gene_name']==strain]
        strain_feat = features.reindex(strain_meta.index)

        # 1. Is there any variation in fraction paused wtr antioxidant treatment?
        #    - ANOVA on pooled window data, then pairwise t-tests for each antioxidant
        
        print("Performing ANOVA on pooled window data for significant variation in fraction " +
              "of worms paused among different antioxidant treatments for %s..." % strain)
        
        # perform ANOVA (correct for multiple comparisons)             
        stats, pvals, reject = univariate_tests(X=strain_feat[[FEATURE]], 
                                                y=strain_meta['antioxidant'], 
                                                test='ANOVA',
                                                control=control_antioxidant,
                                                comparison_type='multiclass',
                                                multitest_correction=fdr_method,
                                                alpha=pval_threshold,
                                                n_permutation_test=None) # 'all'
    
        # get effect sizes
        effect_sizes = get_effect_sizes(X=strain_feat[[FEATURE]], 
                                        y=strain_meta['antioxidant'],
                                        control=control_antioxidant,
                                        effect_type=None,
                                        linked_test='ANOVA')
    
        # compile
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        
        # save results
        anova_path = Path(stats_dir) / 'ANOVA_{}_variation_across_antioxidants.csv'.format(strain)
        test_results.to_csv(anova_path, header=True, index=True)
              
        print("Performing t-tests comparing each antioxidant treatment to None (pooled window data)")
        
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat[[FEATURE]],
                                                      y=strain_meta['antioxidant'],
                                                      test='t-test',
                                                      control=control_antioxidant,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=fdr_method,
                                                      alpha=pval_threshold)
        effect_sizes_t =  get_effect_sizes(X=strain_feat[[FEATURE]], 
                                           y=strain_meta['antioxidant'], 
                                           control=control_antioxidant,
                                           effect_type=None,
                                           linked_test='t-test')
            
        # compile + save t-test results
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_results = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_save_path = stats_dir / 't-test_{}_antioxidant_results.csv'.format(strain)
        ttest_save_path.parent.mkdir(exist_ok=True, parents=True)
        ttest_results.to_csv(ttest_save_path, header=True, index=True)
    
        # 2. Is there any variation in fraction paused wrt window (time) across the videos?
        #    - ANOVA on pooled antioxidant data, then pairwise for each window
        
        print("Performing ANOVA on pooled antioxidant data for significant variation in fraction " +
              "of worms paused across (bluelight) window summaries for %s..." % strain)
        
        # perform ANOVA (correct for multiple comparisons)
        stats, pvals, reject = univariate_tests(X=strain_feat[[FEATURE]],
                                                y=strain_meta['window'],
                                                test='ANOVA',
                                                control=control_window,
                                                comparison_type='multiclass',
                                                multitest_correction=fdr_method,
                                                alpha=pval_threshold,
                                                n_permutation_test=None)
        
        # get effect sizes
        effect_sizes = get_effect_sizes(X=strain_feat[[FEATURE]],
                                        y=strain_meta['window'],
                                        control=control_window,
                                        effect_type=None,
                                        linked_test='ANOVA')

        # compile
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats','effect_size','pvals','reject']     
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
        
        # save results
        anova_path = Path(stats_dir) / 'ANOVA_{}_variation_across_windows.csv'.format(strain)
        test_results.to_csv(anova_path, header=True, index=True)

        print("Performing t-tests comparing each window with the first (pooled antioxidant data)")
        
        stats_t, pvals_t, reject_t = univariate_tests(X=strain_feat[[FEATURE]],
                                                      y=strain_meta['window'],
                                                      test='t-test',
                                                      control=control_window,
                                                      comparison_type='binary_each_group',
                                                      multitest_correction=fdr_method,
                                                      alpha=pval_threshold)
        effect_sizes_t =  get_effect_sizes(X=strain_feat[[FEATURE]], 
                                           y=strain_meta['window'], 
                                           control=control_window,
                                           effect_type=None,
                                           linked_test='t-test')
            
        # compile + save t-test results
        stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
        pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
        reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
        effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
        ttest_results = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
        ttest_save_path = stats_dir / 't-test_{}_window_results.csv'.format(strain)
        ttest_save_path.parent.mkdir(exist_ok=True, parents=True)
        ttest_results.to_csv(ttest_save_path, header=True, index=True)   
         
    # Pairwise t-tests - is there a difference between strain vs control?

    control_meta = metadata[metadata['gene_name']==control_strain]
    control_feat = features.reindex(control_meta.index)
    control_df = control_meta.join(control_feat[[FEATURE]])

    for strain in strain_list[1:]: # skip control_strain at first index postion         
        strain_meta = metadata[metadata['gene_name']==strain]
        strain_feat = features.reindex(strain_meta.index)
        strain_df = strain_meta.join(strain_feat[[FEATURE]])

        # 3. Is there a difference between strain vs control at any window?
        
        print("\nPairwise t-tests for each window (pooled antioxidants) comparing fraction of " + 
              "worms paused on %s vs control:" % strain)

        stats, pvals, reject = pairwise_ttest(control_df, 
                                              strain_df, 
                                              feature_list=[FEATURE], 
                                              group_by='window', 
                                              fdr_method=fdr_method,
                                              fdr=0.05)
 
        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)
        
        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / 'window' /\
                            '{}_window_results.csv'.format(strain)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)
                             
        # for each antioxidant treatment condition...
        for antiox in antiox_list:
            print("Pairwise t-tests for each window comparing fraction of " + 
                  "worms paused on %s vs control with '%s'" % (strain, antiox))

            antiox_control_df = control_df[control_df['antioxidant']==antiox]
            antiox_strain_df = strain_df[strain_df['antioxidant']==antiox]
            
            stats, pvals, reject = pairwise_ttest(antiox_control_df,
                                                  antiox_strain_df,
                                                  feature_list=[FEATURE],
                                                  group_by='window',
                                                  fdr_method=fdr_method,
                                                  fdr=0.05)
        
            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)
            
            # save results
            ttest_strain_path = stats_dir / 'pairwise_ttests' / 'window' /\
                                '{0}_{1}_window_results.csv'.format(strain, antiox)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
            test_results.to_csv(ttest_strain_path, header=True, index=True)

        # 4. Is there a difference between strain vs control for any antioxidant?

        print("\nPairwise t-tests for each antioxidant (pooled windows) comparing fraction of " + 
              "worms paused on %s vs control:" % strain)

        stats, pvals, reject = pairwise_ttest(control_df, 
                                              strain_df, 
                                              feature_list=[FEATURE], 
                                              group_by='antioxidant', 
                                              fdr_method=fdr_method,
                                              fdr=0.05)
 
        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)
        
        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / 'antioxidant' /\
                            '{}_antioxidant_results.csv'.format(strain)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)
                             
        # For each window...
        for window in window_list:
            print("Pairwise t-tests for each antioxidant comparing fraction of " + 
                  "worms paused on %s vs control at window %d" % (strain, window))

            window_control_df = control_df[control_df['window']==window]
            window_strain_df = strain_df[strain_df['window']==window]
            
            stats, pvals, reject = pairwise_ttest(window_control_df,
                                                  window_strain_df,
                                                  feature_list=[FEATURE],
                                                  group_by='antioxidant',
                                                  fdr_method=fdr_method,
                                                  fdr=0.05)
        
            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)
            
            # save results
            ttest_strain_path = stats_dir / 'pairwise_ttests' / 'antioxidant' /\
                                '{0}_window{1}_antioxidant_results.csv'.format(strain, window)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
            test_results.to_csv(ttest_strain_path, header=True, index=True)
               
    return
def antioxidant_stats(features, metadata, args):
    """ Perform statistical analyses on Keio antioxidant rescue experiment results:
        - ANOVA tests for significant feature variation between strains (for each antioxidant treatment in turn)
        - ANOVA tests for significant feature variation in antioxidant treatment (for each strain in turn)
        - t-tests for each feature comparing each strain vs control for paired antioxidant treatment conditions
        - t-tests for each feature comparing each strain antioxidant treatment to negative control (no antioxidant)
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - control_dict : dict
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    # categorical variables to investigate: 'gene_name' and 'antioxidant'
    print(
        "\nInvestigating variation in worm behaviour on hit strains treated with different antioxidants"
    )

    # assert there will be no errors due to case-sensitivity
    assert len(metadata[STRAIN_COLNAME].unique()) == len(
        metadata[STRAIN_COLNAME].str.upper().unique())
    assert len(metadata[TREATMENT_COLNAME].unique()) == len(
        metadata[TREATMENT_COLNAME].str.upper().unique())

    assert not features.isna().any().any()

    strain_list = list(metadata[STRAIN_COLNAME].unique())
    antioxidant_list = list(metadata[TREATMENT_COLNAME].unique())
    assert CONTROL_STRAIN in strain_list and CONTROL_TREATMENT in antioxidant_list

    # print mean sample size
    sample_size = df_summary_stats(metadata,
                                   columns=[STRAIN_COLNAME, TREATMENT_COLNAME])
    print("Mean sample size of %s: %d" %
          (STRAIN_COLNAME, int(sample_size['n_samples'].mean())))

    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir = save_dir / "Stats" / args.fdr_method

    ### For each antioxidant treatment in turn...

    for antiox in antioxidant_list:
        print("\n%s" % antiox)
        meta_antiox = metadata[metadata[TREATMENT_COLNAME] == antiox]
        feat_antiox = features.reindex(meta_antiox.index)

        ### ANOVA tests for significant variation between strains

        # make path to save ANOVA results
        test_path_unncorrected = stats_dir / '{}_uncorrected.csv'.format(
            (args.test + '_' + antiox))
        test_path = stats_dir / '{}_results.csv'.format(
            (args.test + '_' + antiox))
        test_path.parent.mkdir(exist_ok=True, parents=True)

        if len(meta_antiox[STRAIN_COLNAME].unique()) > 2:
            # perform ANOVA + record results before & after correcting for multiple comparisons
            stats, pvals, reject = univariate_tests(
                X=feat_antiox,
                y=meta_antiox[STRAIN_COLNAME],
                test=args.test,
                control=CONTROL_STRAIN,
                comparison_type='multiclass',
                multitest_correction=None,  # uncorrected
                alpha=args.pval_threshold,
                n_permutation_test=None)  # 'all'

            # get effect sizes
            effect_sizes = get_effect_sizes(X=feat_antiox,
                                            y=meta_antiox[STRAIN_COLNAME],
                                            control=CONTROL_STRAIN,
                                            effect_type=None,
                                            linked_test=args.test)

            # compile + save results (uncorrected)
            test_results = pd.concat([stats, effect_sizes, pvals, reject],
                                     axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path_unncorrected,
                                header=True,
                                index=True)

            # correct for multiple comparisons
            reject_corrected, pvals_corrected = _multitest_correct(
                pvals,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save results (corrected)
            test_results = pd.concat(
                [stats, effect_sizes, pvals_corrected, reject_corrected],
                axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path, header=True, index=True)

            print(
                "%s differences in '%s' across strains on %s (%s, P<%.2f, %s)"
                % (("SIGNIFICANT" if reject_corrected.loc[FEATURE, args.test]
                    else "No significant"), FEATURE, antiox, args.test,
                   args.pval_threshold, args.fdr_method))
        else:
            print("\nWARNING: Not enough %s groups for %s (n=%d)" %\
                  (STRAIN_COLNAME, args.test, len(strain_list)))

        ### t-tests comparing each strain vs control for each antioxidant treatment conditions

        if len(meta_antiox[STRAIN_COLNAME].unique()) == 2 or (
                len(meta_antiox[STRAIN_COLNAME].unique()) > 2
                and reject_corrected.loc[FEATURE, args.test]):

            # t-test to use
            t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney'  # aka. Wilcoxon rank-sum
            ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format(
                (t_test + '_' + antiox))
            ttest_path = stats_dir / '{}_results.csv'.format(
                (t_test + '_' + antiox))
            ttest_path.parent.mkdir(exist_ok=True, parents=True)

            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(
                X=feat_antiox,
                y=meta_antiox[STRAIN_COLNAME],
                control=CONTROL_STRAIN,
                test=t_test,
                comparison_type='binary_each_group',
                multitest_correction=None,
                alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t = get_effect_sizes(X=feat_antiox,
                                              y=meta_antiox[STRAIN_COLNAME],
                                              control=CONTROL_STRAIN,
                                              effect_type=None,
                                              linked_test=t_test)

            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = [
                'effect_size_' + str(c) for c in effect_sizes_t.columns
            ]
            ttest_uncorrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected,
                                     header=True,
                                     index=True)

            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(
                pvals_t,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            nsig = reject_t.loc[FEATURE].sum()
            print("%d %ss differ from %s in '%s' on %s (%s, P<%.2f, %s)" %
                  (nsig, STRAIN_COLNAME, CONTROL_STRAIN, FEATURE, antiox,
                   t_test, args.pval_threshold, args.fdr_method))

    ### For each strain in turn...

    for strain in strain_list:
        print("\n%s" % strain)
        meta_strain = metadata[metadata[STRAIN_COLNAME] == strain]
        feat_strain = features.reindex(meta_strain.index)

        ### ANOVA tests for significant feature variation in antioxidant treatment

        # make path to save ANOVA results
        test_path_unncorrected = stats_dir / '{}_uncorrected.csv'.format(
            (args.test + '_' + strain))
        test_path = stats_dir / '{}_results.csv'.format(
            (args.test + '_' + strain))
        test_path.parent.mkdir(exist_ok=True, parents=True)

        if len(meta_strain[TREATMENT_COLNAME].unique()) > 2:
            # perform ANOVA + record results before & after correcting for multiple comparisons
            stats, pvals, reject = univariate_tests(
                X=feat_strain,
                y=meta_strain[TREATMENT_COLNAME],
                test=args.test,
                control=CONTROL_TREATMENT,
                comparison_type='multiclass',
                multitest_correction=None,  # uncorrected
                alpha=args.pval_threshold,
                n_permutation_test=None)  # 'all'

            # get effect sizes
            effect_sizes = get_effect_sizes(X=feat_strain,
                                            y=meta_strain[TREATMENT_COLNAME],
                                            control=CONTROL_TREATMENT,
                                            effect_type=None,
                                            linked_test=args.test)

            # compile + save results (uncorrected)
            test_results = pd.concat([stats, effect_sizes, pvals, reject],
                                     axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path_unncorrected,
                                header=True,
                                index=True)

            # correct for multiple comparisons
            reject_corrected, pvals_corrected = _multitest_correct(
                pvals,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save results (corrected)
            test_results = pd.concat(
                [stats, effect_sizes, pvals_corrected, reject_corrected],
                axis=1)
            test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
            test_results['significance'] = sig_asterix(test_results['pvals'])
            test_results = test_results.sort_values(
                by=['pvals'], ascending=True)  # rank pvals
            test_results.to_csv(test_path, header=True, index=True)

            print("%s differences in '%s' across %ss for %s (%s, P<%.2f, %s)" %
                  (("SIGNIFICANT" if reject_corrected.loc[FEATURE, args.test]
                    else "No"), FEATURE, TREATMENT_COLNAME, strain, args.test,
                   args.pval_threshold, args.fdr_method))
        else:
            print("\nWARNING: Not enough %s groups for %s (n=%d)" %\
                  (TREATMENT_COLNAME, args.test, len(antioxidant_list)))

        ### t-tests comparing each antioxidant treatment to no antioxidant for each strain

        if len(meta_strain[TREATMENT_COLNAME].unique()) == 2 or (
                len(meta_strain[TREATMENT_COLNAME].unique()) > 2
                and reject_corrected.loc[FEATURE, args.test]):
            # t-test to use
            t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney'  # aka. Wilcoxon rank-sum
            ttest_path_uncorrected = stats_dir / '{}_uncorrected.csv'.format(
                (t_test + '_' + strain))
            ttest_path = stats_dir / '{}_results.csv'.format(
                (t_test + '_' + strain))
            ttest_path.parent.mkdir(exist_ok=True, parents=True)

            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(
                X=feat_strain,
                y=meta_strain[TREATMENT_COLNAME],
                control=CONTROL_TREATMENT,
                test=t_test,
                comparison_type='binary_each_group',
                multitest_correction=None,
                alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t = get_effect_sizes(X=feat_strain,
                                              y=meta_strain[TREATMENT_COLNAME],
                                              control=CONTROL_TREATMENT,
                                              effect_type=None,
                                              linked_test=t_test)

            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = [
                'effect_size_' + str(c) for c in effect_sizes_t.columns
            ]
            ttest_uncorrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected,
                                     header=True,
                                     index=True)

            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(
                pvals_t,
                multitest_method=args.fdr_method,
                fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat(
                [stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            nsig = reject_t.loc[FEATURE].sum()
            print("%d %ss differ from %s in '%s' for %s (%s, P<%.2f, %s)" %
                  (nsig, TREATMENT_COLNAME, CONTROL_TREATMENT, FEATURE, strain,
                   t_test, args.pval_threshold, args.fdr_method))

    ### Pairwise t-tests comparing strain vs control behaviour on each antioxidant
    print("\nPerforming pairwise t-tests:")
    # subset for control data
    control_strain_meta = metadata[metadata[STRAIN_COLNAME] == CONTROL_STRAIN]
    control_strain_feat = features.reindex(control_strain_meta.index)
    control_df = control_strain_meta.join(control_strain_feat)

    for strain in strain_list:
        if strain == CONTROL_STRAIN:
            continue

        # subset for strain data
        strain_meta = metadata[metadata[STRAIN_COLNAME] == strain]
        strain_feat = features.reindex(strain_meta.index)
        strain_df = strain_meta.join(strain_feat)

        # perform pairwise t-tests comparing strain with control for each antioxidant treatment
        stats, pvals, reject = pairwise_ttest(control_df,
                                              strain_df,
                                              feature_list=[FEATURE],
                                              group_by=TREATMENT_COLNAME,
                                              fdr_method=args.fdr_method,
                                              fdr=args.pval_threshold)

        # compile table of results
        stats.columns = ['stats_' + str(c) for c in stats.columns]
        pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
        reject.columns = ['reject_' + str(c) for c in reject.columns]
        test_results = pd.concat([stats, pvals, reject], axis=1)

        # save results
        ttest_strain_path = stats_dir / 'pairwise_ttests' / '{}_results.csv'.format(
            strain + "_vs_" + CONTROL_STRAIN)
        ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)
        test_results.to_csv(ttest_strain_path, header=True, index=True)

        for antiox in antioxidant_list:
            print("%s difference in '%s' between %s vs %s on %s (paired t-test, P=%.3f, %s)" %\
                  (("SIGNIFICANT" if reject.loc[FEATURE, 'reject_{}'.format(antiox)] else "No"),
                  FEATURE, strain, CONTROL_STRAIN, antiox, pvals.loc[FEATURE, 'pvals_{}'.format(antiox)],
                  args.fdr_method))
def keio_stats(features, metadata, args):
    """ Perform statistical analyses on Keio screen results:
        - ANOVA tests for significant between strain variation among all strains for each feature
        - t-tests for significant differences between each strain and control for each feature
        - k-significant feature selection for agreement with ANOVA significant feature set
        
        Inputs
        ------
        features, metadata : pd.DataFrame
            Clean feature summaries and accompanying metadata
        
        args : Object
            Python object with the following attributes:
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            - omit_strains : list
            - grouping_variable : str
            - control_dict : dict
            - collapse_control : bool
            - n_top_feats : int
            - tierpsy_top_feats_dir (if n_top_feats) : str
            - test : str
            - f_test : bool
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int           
    """

    # categorical variable to investigate, eg.'gene_name'
    grouping_var = args.grouping_variable
    print("\nInvestigating '%s' variation" % grouping_var)    

    # assert there will be no errors duee to case-sensitivity
    assert len(metadata[grouping_var].unique()) == len(metadata[grouping_var].str.upper().unique())
    
    # Subset results (rows) to omit selected strains
    if args.omit_strains is not None:
        features, metadata = subset_results(features, metadata, grouping_var, args.omit_strains)

    # Load Tierpsy Top feature set + subset (columns) for top feats only
    if args.n_top_feats is not None:
        top_feats_path = Path(args.tierpsy_top_feats_dir) / "tierpsy_{}.csv".format(str(args.n_top_feats))
        topfeats = load_topfeats(top_feats_path, add_bluelight=args.align_bluelight, 
                                 remove_path_curvature=True, header=None)
        
        # Drop features that are not in results
        top_feats_list = [feat for feat in list(topfeats) if feat in features.columns]
        features = features[top_feats_list]
    
    assert not features.isna().any().any()
    
    strain_list = list(metadata[grouping_var].unique())
    control = args.control_dict[grouping_var] # control strain to use
    assert control in strain_list
    
    if args.collapse_control:
        print("Collapsing control data (mean of each day)")
        features, metadata = average_plate_control_data(features, 
                                                        metadata, 
                                                        control=control, 
                                                        grouping_var=grouping_var, 
                                                        plate_var='imaging_plate_id')

    _ = df_summary_stats(metadata) # summary df # TODO: plot from?

    # Record mean sample size per group
    mean_sample_size = int(np.round(metadata.join(features).groupby([grouping_var], 
                                                                    as_index=False).size().mean()))
    print("Mean sample size: %d" % mean_sample_size)

    # construct save paths (args.save_dir / topfeats? etc)
    save_dir = get_save_dir(args)
    stats_dir =  save_dir / grouping_var / "Stats" / args.fdr_method
    plot_dir = save_dir / grouping_var / "Plots" / args.fdr_method              

#%% F-test for equal variances

    # Compare variance in samples with control (and correct for multiple comparisons)
    # Sample size matters in that unequal variances don't pose a problem for a t-test with 
    # equal sample sizes. So as long as your sample sizes are equal, you don't have to worry about 
    # homogeneity of variances. If they are not equal, perform F-tests first to see if variance is 
    # equal before doing a t-test
    if args.f_test:
        levene_stats_path = stats_dir / 'levene_results.csv'
        levene_stats = levene_f_test(features, metadata, grouping_var, 
                                      p_value_threshold=args.pval_threshold, 
                                      multitest_method=args.fdr_method,
                                      saveto=levene_stats_path,
                                      del_if_exists=False)
        # if p < 0.05 then variances are not equal, and sample size matters
        prop_eqvar = (levene_stats['pval'] > args.pval_threshold).sum() / len(levene_stats['pval'])
        print("Percentage equal variance %.1f%%" % (prop_eqvar * 100))
          
#%% ANOVA / Kruskal-Wallis tests for significantly different features across groups

    test_path_unncorrected = stats_dir / '{}_results_uncorrected.csv'.format(args.test)
    test_path = stats_dir / '{}_results.csv'.format(args.test)
    
    if not (test_path.exists() and test_path_unncorrected.exists()):
        test_path.parent.mkdir(exist_ok=True, parents=True)
    
        if (args.test == "ANOVA" or args.test == "Kruskal"):
            if len(strain_list) > 2:   
                # perform ANOVA + record results before & after correcting for multiple comparisons               
                stats, pvals, reject = univariate_tests(X=features, 
                                                        y=metadata[grouping_var], 
                                                        control=control, 
                                                        test=args.test,
                                                        comparison_type='multiclass',
                                                        multitest_correction=None, # uncorrected
                                                        alpha=args.pval_threshold,
                                                        n_permutation_test='all')

                # get effect sizes
                effect_sizes = get_effect_sizes(X=features, 
                                                y=metadata[grouping_var],
                                                control=control,
                                                effect_type=None,
                                                linked_test=args.test)

                # compile + save results (uncorrected)
                test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
                test_results.columns = ['stats','effect_size','pvals','reject']     
                test_results['significance'] = sig_asterix(test_results['pvals'])
                test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
                test_results.to_csv(test_path_unncorrected, header=True, index=True)

                # correct for multiple comparisons
                reject_corrected, pvals_corrected = _multitest_correct(pvals, 
                                                                       multitest_method=args.fdr_method,
                                                                       fdr=args.pval_threshold)
                                            
                # compile + save results (corrected)
                test_results = pd.concat([stats, effect_sizes, pvals_corrected, reject_corrected], axis=1)
                test_results.columns = ['stats','effect_size','pvals','reject']     
                test_results['significance'] = sig_asterix(test_results['pvals'])
                test_results = test_results.sort_values(by=['pvals'], ascending=True) # rank pvals
                test_results.to_csv(test_path, header=True, index=True)
        
                # use reject mask to find significant feature set
                fset = pvals.loc[reject[args.test]].sort_values(by=args.test, ascending=True).index.to_list()
                #assert set(fset) == set(anova_corrected['pvals'].index[np.where(anova_corrected['pvals'] < 
                #args.pval_threshold)[0]])

                if len(fset) > 0:
                    print("%d significant features found by %s for '%s' (P<%.2f, %s)" % (len(fset), 
                          args.test, grouping_var, args.pval_threshold, args.fdr_method))
                    anova_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(args.test)
                    write_list_to_file(fset, anova_sigfeats_path)
            else:
                fset = []
                print("\nWARNING: Not enough groups for %s for '%s' (n=%d groups)" %\
                      (args.test, grouping_var, len(strain_list)))
                    
#%% Linear Mixed Models (LMMs), accounting for day-to-day variation
        # NB: Ideally report:  parameter | beta | lower-95 | upper-95 | random effect (SD)
        elif args.test == 'LMM':
            with warnings.catch_warnings():
                # Filter warnings as parameter is often on the boundary
                warnings.filterwarnings("ignore")
                #warnings.simplefilter("ignore", ConvergenceWarning)
                (signif_effect, low_effect,  error, mask, pvals
                 ) = compounds_with_low_effect_univariate(feat=features, 
                                                          drug_name=metadata[grouping_var], 
                                                          drug_dose=None, 
                                                          random_effect=metadata[args.lmm_random_effect], 
                                                          control=control, 
                                                          test=args.test, 
                                                          comparison_type='multiclass',
                                                          multitest_method=args.fdr_method)
            assert len(error) == 0

            # save pvals
            pvals.to_csv(test_path_unncorrected, header=True, index=True)

            # save significant features -- if any strain significant for any feature
            fset = pvals.columns[(pvals < args.pval_threshold).any()].to_list()
            if len(fset) > 0:
                lmm_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(args.test)
                write_list_to_file(fset, lmm_sigfeats_path)

            # save significant effect strains
            if len(signif_effect) > 0:
                print(("%d significant features found (%d significant %ss vs %s control, "\
                      % (len(fset), len(signif_effect), grouping_var.replace('_',' '), 
                          control) if len(signif_effect) > 0 else\
                      "No significant differences found between %s "\
                      % grouping_var.replace('_',' '))
                      + "after accounting for %s variation, %s, P<%.2f, %s)"\
                      % (args.lmm_random_effect.split('_yyyymmdd')[0], args.test, 
                          args.pval_threshold, args.fdr_method))
                signif_effect_path = stats_dir / '{}_signif_effect_strains.txt'.format(args.test)
                write_list_to_file(signif_effect, signif_effect_path)
        
        else:
            raise IOError("Test '{}' not recognised".format(args.test))
    
#%% t-tests / Mann-Whitney tests
    
    # t-test to use        
    t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney' # aka. Wilcoxon rank-sum      
    ttest_path_uncorrected = stats_dir / '{}_results_uncorrected.csv'.format(t_test)
    ttest_path = stats_dir / '{}_results.csv'.format(t_test)               

    if not (ttest_path_uncorrected.exists() and ttest_path.exists()):    
        ttest_path.parent.mkdir(exist_ok=True, parents=True)

        if len(fset) > 0 or len(strain_list) == 2:
            # perform t-tests (without correction for multiple testing)
            stats_t, pvals_t, reject_t = univariate_tests(X=features, 
                                                          y=metadata[grouping_var], 
                                                          control=control, 
                                                          test=t_test,
                                                          comparison_type='binary_each_group',
                                                          multitest_correction=None, 
                                                          alpha=0.05)
            # get effect sizes for comparisons
            effect_sizes_t =  get_effect_sizes(X=features, 
                                               y=metadata[grouping_var], 
                                               control=control,
                                               effect_type=None,
                                               linked_test=t_test)
            
            # compile + save t-test results (uncorrected)
            stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
            ttest_uncorrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_uncorrected.to_csv(ttest_path_uncorrected, header=True, index=True)
            
            # correct for multiple comparisons
            pvals_t.columns = [c.split("_")[-1] for c in pvals_t.columns]
            reject_t, pvals_t = _multitest_correct(pvals_t, 
                                                   multitest_method=args.fdr_method,
                                                   fdr=args.pval_threshold)

            # compile + save t-test results (corrected)
            pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
            reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
            ttest_corrected = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)
            ttest_corrected.to_csv(ttest_path, header=True, index=True)

            # record t-test significant features (not ordered)
            fset_ttest = pvals_t[np.asmatrix(reject_t)].index.unique().to_list()
            #assert set(fset_ttest) == set(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
            print("%d significant features found for any %s vs %s (%s, P<%.2f)" %\
                  (len(fset_ttest), grouping_var, control, t_test, args.pval_threshold))

            if len(fset_ttest) > 0:
                ttest_sigfeats_path = stats_dir / '{}_sigfeats.txt'.format(t_test)
                write_list_to_file(fset_ttest, ttest_sigfeats_path)
                                 
#%% K significant features
    
    ksig_uncorrected_path = stats_dir / 'k_significant_features_uncorrected.csv'
    ksig_corrected_path = stats_dir / 'k_significant_features.csv'
    if not (ksig_uncorrected_path.exists() and ksig_corrected_path.exists()):
        ksig_corrected_path.parent.mkdir(exist_ok=True, parents=True)      
        fset_ksig, (scores, pvalues_ksig), support = k_significant_feat(feat=features, 
                                                                        y_class=metadata[grouping_var], 
                                                                        k=len(fset),
                                                                        score_func='f_classif', 
                                                                        scale=None, 
                                                                        feat_names=None, 
                                                                        plot=False, 
                                                                        k_to_plot=None, 
                                                                        close_after_plotting=True,
                                                                        saveto=None, #k_sigfeat_dir
                                                                        figsize=None, 
                                                                        title=None, 
                                                                        xlabel=None)
        # compile + save k-significant features (uncorrected) 
        ksig_table = pd.concat([pd.Series(scores), pd.Series(pvalues_ksig)], axis=1)
        ksig_table.columns = ['scores','pvals']
        ksig_table.index = fset_ksig
        ksig_table.to_csv(ksig_uncorrected_path, header=True, index=True)   
        
        # Correct for multiple comparisons
        _, ksig_table['pvals'] = _multitest_correct(ksig_table['pvals'], 
                                                    multitest_method=args.fdr_method,
                                                    fdr=args.pval_threshold)
        
        # save k-significant features (corrected)
        ksig_table.to_csv(ksig_corrected_path, header=True, index=True)   

#%% mRMR feature selection: minimum Redunduncy, Maximum Relevance #####
    
    mrmr_dir = plot_dir / 'mrmr'
    mrmr_dir.mkdir(exist_ok=True, parents=True)
    mrmr_results_path = mrmr_dir / "mrmr_results.csv"

    if not mrmr_results_path.exists():
        estimator = Pipeline([('scaler', StandardScaler()), ('estimator', LogisticRegression())])
        y = metadata[grouping_var].values
        (mrmr_feat_set, 
         mrmr_scores, 
         mrmr_support) = mRMR_feature_selection(features, y_class=y, k=10,
                                                redundancy_func='pearson_corr',
                                                relevance_func='kruskal',
                                                n_bins=10, mrmr_criterion='MID',
                                                plot=True, k_to_plot=5, 
                                                close_after_plotting=True,
                                                saveto=mrmr_dir, figsize=None)
        # save results                                        
        mrmr_table = pd.concat([pd.Series(mrmr_feat_set), pd.Series(mrmr_scores)], axis=1)
        mrmr_table.columns = ['feature','score']
        mrmr_table.to_csv(mrmr_results_path, header=True, index=False)
        
        n_cv = 5
        cv_scores_mrmr = cross_val_score(estimator, features[mrmr_feat_set], y, cv=n_cv)
        cv_scores_mrmr = pd.DataFrame(cv_scores_mrmr, columns=['cv_score'])
        cv_scores_mrmr.to_csv(mrmr_dir / "cv_scores.csv", header=True, index=False)        
        print('MRMR CV Score: %f (n=%d)' % (np.mean(cv_scores_mrmr), n_cv))        
    else:
        # load mrmr results
        mrmr_table = pd.read_csv(mrmr_results_path)
        
    mrmr_feat_set = mrmr_table['feature'].to_list()
    print("\nTop %d features found by MRMR:" % len(mrmr_feat_set))
    for feat in mrmr_feat_set:
        print(feat)
Exemple #10
0
def single_feature_window_stats(metadata,
                                features,
                                group_by,
                                control,
                                save_dir,
                                windows=None,
                                feat='motion_mode_paused_fraction',
                                pvalue_threshold=0.05,
                                fdr_method='fdr_by'):
    """ Pairwise t-tests for each window comparing a feature of worm behaviour on mutant strains 
        vs control 
        
        Parameters
        ----------
        metadata : pandas.DataFrame
        
        features : pandas.DataFrame
            Dataframe of compiled window summaries
            
        group_by : str
            Column name of variable containing control and other groups to compare, eg. 'gene_name'
            
        control : str
            Name of control group in 'group_by' column in metadata
            
        save_dir : str
            Path to directory to save results files
            
        windows : list
            List of window numbers at which to compare strains (corrected for multiple testing)
            
        feat : str
            Feature to test
        
        pvalue_threshold : float
            P-value significance threshold
            
        fdr_method : str
            Multiple testing correction method to use
    """

    import pandas as pd
    from pathlib import Path
    from statistical_testing.stats_helper import pairwise_ttest
    from statistical_testing.perform_keio_stats import df_summary_stats
    from visualisation.plotting_helper import sig_asterix
    from write_data.write import write_list_to_file
    from tierpsytools.analysis.statistical_tests import univariate_tests, get_effect_sizes

    # categorical variables to investigate: 'gene_name' and 'window'
    print(
        "\nInvestigating variation in fraction of worms paused between hit strains and control "
        + "(for each window)")

    # check there will be no errors due to case-sensitivity
    assert len(metadata[group_by].unique()) == len(
        metadata[group_by].str.upper().unique())

    # subset for list of windows
    if windows is None:
        windows = sorted(metadata['window'].unique())
    else:
        assert all(w in sorted(metadata['window'].unique()) for w in windows)
        metadata = metadata[metadata['window'].isin(windows)]
        features = features[[feat]].reindex(metadata.index)

    # print mean sample size
    sample_size = df_summary_stats(metadata, columns=[group_by, 'window'])
    print("Mean sample size of %s/window: %d" %
          (group_by, int(sample_size['n_samples'].mean())))

    control_meta = metadata[metadata[group_by] == control]
    control_feat = features.reindex(control_meta.index)
    control_df = control_meta.join(control_feat)

    n = len(metadata[group_by].unique())
    strain_list = list(
        [s for s in metadata[group_by].unique() if s != control])
    fset = []

    if n > 2:

        # Perform ANOVA - is there variation among strains at each window?
        anova_path = Path(
            save_dir) / 'ANOVA' / 'ANOVA_{}window_results.csv'.format(
                len(windows))
        anova_path.parent.mkdir(parents=True, exist_ok=True)

        stats, pvals, reject = univariate_tests(
            X=features,
            y=metadata[group_by],
            control=control,
            test='ANOVA',
            comparison_type='multiclass',
            multitest_correction=fdr_method,
            alpha=pvalue_threshold,
            n_permutation_test=None)

        # get effect sizes
        effect_sizes = get_effect_sizes(X=features,
                                        y=metadata[group_by],
                                        control=control,
                                        effect_type=None,
                                        linked_test='ANOVA')

        # compile + save results
        test_results = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
        test_results.columns = ['stats', 'effect_size', 'pvals', 'reject']
        test_results['significance'] = sig_asterix(test_results['pvals'])
        test_results = test_results.sort_values(
            by=['pvals'], ascending=True)  # rank by p-value
        test_results.to_csv(anova_path, header=True, index=True)

        # use reject mask to find significant feature set
        fset = pvals.loc[reject['ANOVA']].sort_values(
            by='ANOVA', ascending=True).index.to_list()

        if len(fset) > 0:
            print("%d significant features found by ANOVA for '%s' (P<%.2f, %s)" %\
                  (len(fset), group_by, pvalue_threshold, fdr_method))
            anova_sigfeats_path = anova_path.parent / 'ANOVA_sigfeats.txt'
            write_list_to_file(fset, anova_sigfeats_path)

    if n == 2 or len(fset) > 0:

        # pairwise t-tests
        for strain in strain_list:
            print(
                "\nPairwise t-tests for each window comparing fraction of worms paused "
                + "on %s vs control" % strain)

            ttest_strain_path = Path(save_dir) / 'pairwise_ttests' /\
                '{}_window_results.csv'.format(strain)
            ttest_strain_path.parent.mkdir(parents=True, exist_ok=True)

            strain_meta = metadata[metadata[group_by] == strain]
            strain_feat = features.reindex(strain_meta.index)
            strain_df = strain_meta.join(strain_feat[[feat]])

            stats, pvals, reject = pairwise_ttest(control_df,
                                                  strain_df,
                                                  feature_list=[feat],
                                                  group_by='window',
                                                  fdr_method=fdr_method,
                                                  fdr=pvalue_threshold)

            # compile table of results
            stats.columns = ['stats_' + str(c) for c in stats.columns]
            pvals.columns = ['pvals_' + str(c) for c in pvals.columns]
            reject.columns = ['reject_' + str(c) for c in reject.columns]
            test_results = pd.concat([stats, pvals, reject], axis=1)

            # save results
            test_results.to_csv(ttest_strain_path, header=True, index=True)

            for window in windows:
                print("%s difference in '%s' between %s vs %s in window %s (t-test, P=%.3f, %s)" %\
                      (("SIGNIFICANT" if reject.loc[feat, 'reject_{}'.format(window)] else "No"),
                      feat, strain, control, window, pvals.loc[feat, 'pvals_{}'.format(window)],
                      fdr_method))

    return
def compounds_univariate_effect_sizes(feat,
                                      drug_name,
                                      drug_dose=None,
                                      control='DMSO',
                                      test='ANOVA',
                                      comparison_type='multiclass',
                                      ignore_names=['NoCompound'],
                                      n_jobs=-1):
    """
    Estimates the effect size of drugs at different doses compared to control.

    Parameters
    ----------
    feat : dataframe
        feature dataframe, shape=(n_samples, n_features)
    drug_name : array-like, shape=(n_samples)
        defines the type of drug in each sample
    drug_dose : array-like or None, shape=(n_samples)
        defines the drug dose in each sample.
        If None, it is assumed that each drug was tested at a single dose.
    control : str
        the name of the control samples in the drug_name array
    test : str, options: ['ANOVA', 't-test', 'Kruskal_Wallis', 'Wilkoxon_Rank_Sum']
        The type of statistical comparison that will define the type of effect size.
    comparison_type : str, options: ['multiclass', 'binary_each_dose']
        defines the groups seen in the statistical test.
        - If 'multiclass', then one comparison is made considering the controls
        and each drug dose as separate groups.
        - If 'binary_each_dose', then separate tests are performed for each
        dose in every feature. Each test compares one dose to the controls.
        If any of the tests reaches the significance thresshold (after
        correction for multiple comparisons), then the feature is
        considered significant.
        * When only one dose was tested per compound, then this parameter can take any value.
    ignore_names : list or None, optional
        list of names from the drug_name array to ignore in the comparisons
        (in addition to the control)
    n_jobs: int
        Number of jobs for parallel processing.

    Return
    ------
    effect : dataframe
        a dataframe with all the effect sizes per drug
    """
    from tierpsytools.analysis.statistical_tests import get_effect_sizes

    if drug_dose is None:
        drug_dose = np.ones(drug_name.shape)
        drug_dose[drug_name == control] = 0

    # get the dose entry for the control
    control_dose = np.unique(drug_dose[drug_name == control])
    if control_dose.shape[0] > 1:
        raise ValueError('The dose assinged to the control data is not ' +
                         'unique.')
    control_dose = control_dose[0]

    # Ignore the control and any names defined by user
    if ignore_names is None:
        ignore_names = []
    ignore_names.append(control)

    # Get the list of drug names to test
    drug_names = np.array(
        [drug for drug in np.unique(drug_name) if drug not in ignore_names])

    # Initialize list to store effect_sizes
    effect = {}

    # Loop over drugs to test
    for idrug, drug in enumerate(drug_names):
        print('Getting effect sizes for compound {}...'.format(drug))

        # get mask for the samples of the drug and the control
        mask = np.isin(drug_name, [drug, control])

        # Run all the tests
        _effect = get_effect_sizes(feat[mask],
                                   drug_dose[mask],
                                   control=control_dose,
                                   test=test,
                                   comparison_type=comparison_type)

        # Transpose df to have doses as index
        _effect = _effect.T
        _effect.index.rename('drug_dose', inplace=True)

        effect[drug] = _effect.reset_index(drop=False)

    # concatenate individual dataframes
    effect = pd.concat(
        [x.assign(drug_name=drug) for drug, x in effect.items()],
        axis=0).reset_index(drop=True)
    # bring drug name amd drug dose to the front
    effect = effect[
        ['drug_name', 'drug_dose'] + \
        effect.columns.difference(['drug_name', 'drug_dose']).to_list()
        ]

    return effect
def control_variation(feat, 
                      meta, 
                      args,
                      variables=['date_yyyymmdd','instrument_name','imaging_run_number'],
                      n_sig_features=None):
    """ Analyse variation in control data with respect to each categorical variable in 'variables'
        
        Inputs
        ------
        feat, meta : pd.DataFrame
            Matching features summaries and metadata for control data
        
        args : Object 
            Python object with the following attributes:
            - remove_outliers : bool
            - grouping_variable : str
            - control_dict : dict
            - test : str
            - pval_threshold : float
            - fdr_method : str
            - n_sig_features : int
            - n_top_feats : int
            - drop_size_features : bool
            - norm_features_only : bool
            - percentile_to_use : str
            - remove_outliers : bool
            
        variables : list
            List of categorical random variables to analyse variation in control data
    """
    
    assert set(feat.index) == set(meta.index)
            
    save_dir = get_save_dir(args) / "control"

    # Stats test to use
    assert args.test in ['ANOVA','Kruskal','LMM']
    t_test = 't-test' if args.test == 'ANOVA' else 'Mann-Whitney' # aka. Wilcoxon rank-sums
    
    for grouping_var in tqdm(variables):
        
        # convert grouping variable column to factor (categorical)
        meta[grouping_var] = meta[grouping_var].astype(str)
                
        # get control group for eg. date_yyyymmdd
        control_group = str(args.control_dict[grouping_var])
        print("\nInvestigating variation in '%s' (control: '%s')" % (grouping_var, control_group))

        # Record mean sample size per group
        mean_sample_size = int(np.round(meta.groupby([grouping_var]).size().mean()))
        print("Mean sample size: %d" % mean_sample_size)
        
        group_list = list(meta[grouping_var].unique())
        stats_dir =  save_dir / "Stats" / grouping_var
        plot_dir = save_dir / "Plots" / grouping_var

        ##### STATISTICS #####
                      
        stats_path = stats_dir / '{}_results.csv'.format(args.test) # LMM/ANOVA/Kruskal  
        ttest_path = stats_dir / '{}_results.csv'.format(t_test)
    
        if not np.logical_and(stats_path.exists(), ttest_path.exists()):
            stats_path.parent.mkdir(exist_ok=True, parents=True)
            ttest_path.parent.mkdir(exist_ok=True, parents=True)
        
            ### ANOVA / Kruskal-Wallis tests for significantly different features across groups
            if (args.test == "ANOVA" or args.test == "Kruskal"):
                if len(group_list) > 2:                    
                    stats, pvals, reject = univariate_tests(X=feat, 
                                                            y=meta[grouping_var], 
                                                            control=control_group, 
                                                            test=args.test,
                                                            comparison_type='multiclass',
                                                            multitest_correction=args.fdr_method, 
                                                            alpha=0.05)
                    # get effect sizes
                    effect_sizes = get_effect_sizes(X=feat, 
                                                    y=meta[grouping_var],
                                                    control=control_group,
                                                    effect_type=None,
                                                    linked_test=args.test)
                                                
                    anova_table = pd.concat([stats, effect_sizes, pvals, reject], axis=1)
                    anova_table.columns = ['stats','effect_size','pvals','reject']     

                    anova_table['significance'] = sig_asterix(anova_table['pvals'])
        
                    # Sort pvals + record significant features
                    anova_table = anova_table.sort_values(by=['pvals'], ascending=True)
                    fset = list(anova_table['pvals'].index[np.where(anova_table['pvals'] < 
                                                                    args.pval_threshold)[0]])
                    
                    # Save statistics results + significant feature set to file
                    anova_table.to_csv(stats_path, header=True, index=True)
            
                    if len(fset) > 0:
                        anova_sigfeats_path = Path(str(stats_path).replace('_results.csv', '_sigfeats.txt'))
                        write_list_to_file(fset, anova_sigfeats_path)
                        print("\n%d significant features found by %s for '%s' (P<%.2f, %s)" %\
                              (len(fset), args.test, grouping_var, args.pval_threshold, args.fdr_method))
                else:
                    fset = []
                    print("\nWARNING: Not enough groups for %s for '%s' (n=%d groups)" %\
                          (args.test, grouping_var, len(group_list)))
            
            ### t-tests / Mann-Whitney tests
            if len(fset) > 0 or len(group_list) == 2:
                stats_t, pvals_t, reject_t = univariate_tests(X=feat, 
                                                              y=meta[grouping_var], 
                                                              control=control_group, 
                                                              test=t_test,
                                                              comparison_type='binary_each_group',
                                                              multitest_correction=args.fdr_method, 
                                                              alpha=0.05)
                effect_sizes_t =  get_effect_sizes(X=feat, y=meta[grouping_var], 
                                                   control=control_group,
                                                   effect_type=None,
                                                   linked_test=t_test)
                
                stats_t.columns = ['stats_' + str(c) for c in stats_t.columns]
                pvals_t.columns = ['pvals_' + str(c) for c in pvals_t.columns]
                reject_t.columns = ['reject_' + str(c) for c in reject_t.columns]
                effect_sizes_t.columns = ['effect_size_' + str(c) for c in effect_sizes_t.columns]
                
                ttest_table = pd.concat([stats_t, effect_sizes_t, pvals_t, reject_t], axis=1)

                # Record t-test significant feature set (NOT ORDERED)
                fset_ttest = list(pvals_t.index[(pvals_t < args.pval_threshold).sum(axis=1) > 0])
                
                # Save t-test results to file
                ttest_table.to_csv(ttest_path, header=True, index=True) # Save test results to CSV

                if len(fset_ttest) > 0:
                    ttest_sigfeats_path = Path(str(ttest_path).replace('_results.csv', '_sigfeats.txt'))
                    write_list_to_file(fset_ttest, ttest_sigfeats_path)
                    print("%d signficant features found for any %s vs %s (%s, P<%.2f)" %\
                          (len(fset_ttest), grouping_var, control_group, t_test, args.pval_threshold))
                
                # Barplot of number of significantly different features for each strain   
                barplot_sigfeats(test_pvalues_df=pvals_t, 
                                 saveDir=plot_dir,
                                 p_value_threshold=args.pval_threshold,
                                 test_name=t_test)
                                 
        ### Load statistics results
        
        # Read ANOVA results and record significant features
        print("\nLoading statistics results")
        if len(group_list) > 2:
            anova_table = pd.read_csv(stats_path, index_col=0)
            pvals = anova_table.sort_values(by='pvals', ascending=True)['pvals']
            fset = pvals[pvals < args.pval_threshold].index.to_list()
            print("%d significant features found by %s (P<%.2f)" % (len(fset), args.test, 
                                                                    args.pval_threshold))
        
        # Read t-test results and record significant features (NOT ORDERED)
        ttest_table = pd.read_csv(ttest_path, index_col=0)
        pvals_t = ttest_table[[c for c in ttest_table if "pvals_" in c]]             
        fset_ttest = pvals_t[(pvals_t < args.pval_threshold).sum(axis=1) > 0].index.to_list()
        print("%d significant features found by %s (P<%.2f)" % (len(fset_ttest), t_test, args.pval_threshold))
            
        # Use t-test significant feature set if comparing just 2 strains
        if len(group_list) == 2:
            fset = fset_ttest
                       
        if not n_sig_features:
            if args.n_sig_features is not None:
                n_sig_features = args.n_sig_features 
            else:
                n_sig_features = len(fset)
                                   
        ##### Plotting #####
        
        superplot_dir = plot_dir / 'superplots' 

        if len(fset) > 1:        
            for feature in tqdm(fset[:n_sig_features]):                
                # plot variation in variable with respect to 'date_yyyymmdd'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'date_yyyymmdd' else 'date_yyyymmdd',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'date_yyyymmdd' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                # plot variation in variable with respect to 'instrument_name'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'instrument_name' else 'instrument_name',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'instrument_name' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                # plot variation in variable with respect to 'imaging_ruun_number'
                superplot(feat, meta, feature, 
                          x1=grouping_var, 
                          x2=None if grouping_var == 'imaging_run_number' else 'imaging_run_number',
                          saveDir=superplot_dir,
                          pvals=pvals_t if grouping_var == 'imaging_run_number' else None,
                          pval_threshold=args.pval_threshold,
                          show_points=True, 
                          plot_means=True,
                          dodge=True)
                            
            # # Boxplots of significant features by ANOVA/LMM (all groups)
            # boxplots_grouped(feat_meta_df=meta.join(feat), 
            #                  group_by=grouping_var,
            #                  control_group=str(control_group),
            #                  test_pvalues_df=pvals_t.T, # ranked by test pvalue significance
            #                  feature_set=fset,
            #                  saveDir=(plot_dir / 'grouped_boxplots'),
            #                  max_feats2plt=args.n_sig_features, 
            #                  max_groups_plot_cap=None,
            #                  p_value_threshold=args.pval_threshold,
            #                  drop_insignificant=False,
            #                  sns_colour_palette="tab10",
            #                  figsize=[6, (len(group_list)/3 if len(group_list)>10 else 12)])
                    
            # Individual boxplots of significant features by pairwise t-test (each group vs control)
            # boxplots_sigfeats(feat_meta_df=meta.join(feat), 
            #                   test_pvalues_df=pvals_t, 
            #                   group_by=grouping_var, 
            #                   control_strain=control_group, 
            #                   feature_set=fset, #['speed_norm_50th_bluelight'],
            #                   saveDir=plot_dir / 'paired_boxplots',
            #                   max_feats2plt=args.n_sig_features,
            #                   p_value_threshold=args.pval_threshold,
            #                   drop_insignificant=True,
            #                   verbose=False)
                
            # from tierpsytools.analysis.significant_features import plot_feature_boxplots
            # plot_feature_boxplots(feat_to_plot=fset,
            #                       y_class=grouping_var,
            #                       scores=pvalues.rank(axis=1),
            #                       feat=feat,
            #                       pvalues=np.asarray(pvalues).flatten(),
            #                       saveto=None,
            #                       close_after_plotting=False)
        
        ##### Hierarchical Clustering Analysis #####
        print("\nPerforming hierarchical clustering analysis...")

        assert not feat.isna().sum(axis=1).any()
        assert not (feat.std(axis=1) == 0).any()
        
        # Z-normalise data
        featZ = feat.apply(zscore, axis=0)
        #featZ = (feat-feat.mean())/feat.std() # minus mean, divide by std
        
        #from tierpsytools.preprocessing.scaling_class import scalingClass
        #scaler = scalingClass(scaling='standardize')
        #featZ = scaler.fit_transform(feat)

        # NOT NEEDED?
        # # Drop features with NaN values after normalising
        # n_cols = len(featZ.columns)
        # featZ.dropna(axis=1, inplace=True)
        # n_dropped = n_cols - len(featZ.columns)
        # if n_dropped > 0:
        #     print("Dropped %d features after normalisation (NaN)" % n_dropped)
    
        ### Control clustermap
        
        # control data is clustered and feature order is stored and applied to full data
        if len(group_list) > 1 and len(group_list) < 50 and grouping_var != 'date_yyyymmdd':
            control_clustermap_path = plot_dir / 'heatmaps' / (grouping_var + '_date_clustermap.pdf')
            cg = plot_clustermap(featZ, meta,
                                 group_by=([grouping_var] if grouping_var == 'date_yyyymmdd' 
                                           else [grouping_var, 'date_yyyymmdd']),
                                 col_linkage=None,
                                 method=METHOD,#[linkage, complete, average, weighted, centroid]
                                 metric=METRIC,
                                 figsize=[15,8],
                                 sub_adj={'bottom':0.02,'left':0.02,'top':1,'right':0.85},
                                 label_size=12,
                                 show_xlabels=False,
                                 saveto=control_clustermap_path)
    
            #col_linkage = cg.dendrogram_col.calculated_linkage
            clustered_features = np.array(featZ.columns)[cg.dendrogram_col.reordered_ind]
        else:
            clustered_features = None
                    
        ## Save z-normalised values
        # z_stats = featZ.join(meta[grouping_var]).groupby(by=grouping_var).mean().T
        # z_stats.columns = ['z-mean_' + v for v in z_stats.columns.to_list()]
        # z_stats.to_csv(z_stats_path, header=True, index=None)
        
        # Clustermap of full data       
        full_clustermap_path = plot_dir / 'heatmaps' / (grouping_var + '_clustermap.pdf')
        fg = plot_clustermap(featZ, meta, 
                             group_by=grouping_var,
                             col_linkage=None,
                             method=METHOD,
                             metric=METRIC,
                             figsize=[15,8],
                             sub_adj={'bottom':0.02,'left':0.02,'top':1,'right':0.9},
                             label_size=12,
                             saveto=full_clustermap_path)
        
        # If no control clustering (due to no day variation) then use clustered features for all 
        # strains to order barcode heatmaps
        if clustered_features is None:
            clustered_features = np.array(featZ.columns)[fg.dendrogram_col.reordered_ind]
        
        if len(group_list) > 2:
            pvals_heatmap = anova_table.loc[clustered_features, 'pvals']
        elif len(group_list) == 2:
            pvals_heatmap = pvals_t.loc[clustered_features, pvals_t.columns[0]]
        pvals_heatmap.name = 'P < {}'.format(args.pval_threshold)
    
        assert all(f in featZ.columns for f in pvals_heatmap.index)
                
        # Plot barcode heatmap (grouping by date)
        if len(group_list) > 1 and len(group_list) < 50 and grouping_var != 'date_yyyymmdd':
            heatmap_date_path = plot_dir / 'heatmaps' / (grouping_var + '_date_heatmap.pdf')
            plot_barcode_heatmap(featZ=featZ[clustered_features], 
                                 meta=meta, 
                                 group_by=[grouping_var, 'date_yyyymmdd'],
                                 pvalues_series=pvals_heatmap,
                                 p_value_threshold=args.pval_threshold,
                                 selected_feats=fset if len(fset) > 0 else None,
                                 saveto=heatmap_date_path,
                                 figsize=[20,7],
                                 sns_colour_palette="Pastel1")
        
        # Plot group-mean heatmap (averaged across days)
        heatmap_path = plot_dir / 'heatmaps' / (grouping_var + '_heatmap.pdf')
        plot_barcode_heatmap(featZ=featZ[clustered_features], 
                             meta=meta, 
                             group_by=[grouping_var], 
                             pvalues_series=pvals_heatmap,
                             p_value_threshold=args.pval_threshold,
                             selected_feats=fset if len(fset) > 0 else None,
                             saveto=heatmap_path,
                             figsize=[20, (int(len(group_list) / 4) if len(group_list) > 10 else 6)],
                             sns_colour_palette="Pastel1")        
                        
        ##### Principal Components Analysis #####
        print("Performing principal components analysis")
    
        if args.remove_outliers:
            outlier_path = plot_dir / 'mahalanobis_outliers.pdf'
            feat, inds = remove_outliers_pca(df=feat, 
                                             features_to_analyse=None, 
                                             saveto=outlier_path)
            meta = meta.reindex(feat.index) # reindex metadata
            featZ = feat.apply(zscore, axis=0) # re-normalise data

            # Drop features with NaN values after normalising
            n_cols = len(featZ.columns)
            featZ.dropna(axis=1, inplace=True)
            n_dropped = n_cols - len(featZ.columns)
            if n_dropped > 0:
                print("Dropped %d features after normalisation (NaN)" % n_dropped)

        #from tierpsytools.analysis.decomposition import plot_pca
        pca_dir = plot_dir / 'PCA'
        _ = plot_pca(featZ, meta, 
                     group_by=grouping_var, 
                     control=control_group,
                     var_subset=None, 
                     saveDir=pca_dir,
                     PCs_to_keep=10,
                     n_feats2print=10,
                     sns_colour_palette="plasma",
                     n_dims=2,
                     label_size=15,
                     figsize=[9,8],
                     sub_adj={'bottom':0.13,'left':0.12,'top':0.98,'right':0.98},
                     # legend_loc='upper right',
                     # n_colours=20,
                     hypercolor=False)