Python ANOVA Examples, gdsctools.ANOVA Python Examples

Example #1

0

Show file

File: test_volcano.py Project: howard-lightfoot/gdsctools

def test_volcano_plot():

    an = ANOVA(ic50_test)
    an.features.df = an.features.df[an.features.df.columns[0:10]]
    an = ANOVA(ic50_test, genomic_features=an.features.df)

    results = an.anova_all()

    # try the constructors
    v = VolcanoANOVA(results.df)
    v = VolcanoANOVA(results)

    # the selector metho
    v.df = v.selector(v.df)

    v.settings.savefig = False

    # some of the plotting
    v.volcano_plot_all_drugs()
    v.volcano_plot_all_features()
    v.volcano_plot_all()


    v._get_fdr_from_pvalue_interp(1e-10)
    v._get_pvalue_from_fdr(50)
    v._get_pvalue_from_fdr([50,60])

Example #2

0

Show file

File: test_boxplots.py Project: howard-lightfoot/gdsctools

def test_get_boxplot_data():
    an = ANOVA(ic50_test)
    odof = an._get_one_drug_one_feature_data('Drug_1047_IC50','TP53_mut')

    bb = BoxPlots(odof)

    data = bb._get_boxplot_data(mode='msi')
    assert data[1] == ['***MSI-stable neg', '***MSI-stable pos',
                  '**MSI-unstable neg',  '**MSI-unstable pos']
    expected = [2.0108071495663922e-47, 0.0012564798887037905]
    assert_list_almost_equal([data[2][0], data[2][1]], expected)

Example #3

0

Show file

File: test_boxplots.py Project: shukwong/gdsctools

def test_get_boxplot_data():
    an = ANOVA(ic50_test)
    odof = an._get_one_drug_one_feature_data(1047, 'TP53_mut')

    bb = BoxPlots(odof)

    data = bb._get_boxplot_data(mode='msi')
    assert data[1] == [
        '***MSI-stable neg', '***MSI-stable pos', '**MSI-unstable neg',
        '**MSI-unstable pos'
    ]
    expected = [2.0108071495663922e-47, 0.0012564798887037905]
    assert_list_almost_equal([data[2][0], data[2][1]], expected)

Example #4

0

Show file

def test_IC50Cluster():
    dataset = gdsctools_data("test_v18_clustering.tsv")
    ic50 = IC50Cluster(dataset)

    # In this data set, a drug is reported 3 times (1211) and should appear
    # as follows:
    assert 1211 in ic50.df.columns
    assert 11211 in ic50.df.columns
    assert 21211 in ic50.df.columns

    assert len(ic50.drugIds) == 860
    assert len(ic50.df) == 50

    an = ANOVA(ic50, dataset)
    an.diagnostics()['feasible_tests'] == 65026

Example #5

0

Show file

File: test_gdsc.py Project: CancerRxGene/gdsctools

def test_IC50Cluster():
    dataset = gdsctools_data("test_v18_clustering.tsv")
    ic50 = IC50Cluster(dataset)


    # In this data set, a drug is reported 3 times (1211) and should appear 
    # as follows:
    assert 1211 in ic50.df.columns
    assert 11211 in ic50.df.columns
    assert 21211 in ic50.df.columns

    assert len(ic50.drugIds) == 860
    assert len(ic50.df) == 50


    an = ANOVA(ic50, dataset)
    an.diagnostics()['feasible_tests'] == 65026

Example #6

0

Show file

File: pipelines.py Project: howard-lightfoot/gdsctools

def anova_pipeline(args=None):
    """This function is used by the standalone application called
    **gdsctools_anova**

    Type::

        gdsctools_anova --help

    to get some help.
    """
    msg = "Welcome to GDSCTools standalone"
    print_color(msg, purple, underline=True)

    # Keep the argument args as None by default to
    # allow testing e.g., in nosetests
    if args is None:
        args = sys.argv[:]
    elif len(args) == 1:
        args += ['--help']

    user_options = ANOVAOptions(prog="gdsctools_anova")
    try:
        options = user_options.parse_args(args[1:])
    except SystemExit:
        return

    # -----------------------------------------------------------------
    # ---------------------------------------- options without analysis
    # -----------------------------------------------------------------

    if options.version is True:
        print("This is version %s of gdsctools_anova" % gdsctools.version)
        return

    if options.testing is True:
        print('Testing mode:')
        from gdsctools import ANOVA, ic50_test
        an = ANOVA(ic50_test)
        df = an.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut')

        assert df.loc[1,'N_FEATURE_pos'] == 554, \
            "N_feature_pos must be equal to 554"
        print(df.T)
        print(darkgreen("\nGDSCTools seems to be installed properly"))
        return

    if options.save_settings:
        from gdsctools import ANOVA, ic50_test
        an = ANOVA(ic50_test)
        an.settings.to_json(options.save_settings)
        print('Save a default parameter set in %s' % options.save_settings)
        return 

    if options.license is True:
        print(gdsctools.license)
        return

    if options.summary is True:
        from gdsctools import anova
        an = anova.ANOVA(options.input_ic50, options.input_features)
        print(an)
        return


    if options.print_tissues is True:
        from gdsctools import anova
        an = anova.ANOVA(options.input_ic50, options.input_features)

        tissues = an.tissue_factor
        try:
            tissues = tissues.sort_values('Tissue Factor').unique()
        except:
            tissues = tissues.sort(inplace=False).unique()
        for name in tissues:
            print(name)
        return

    if options.print_drugs is True:
        from gdsctools import anova
        gdsc = anova.ANOVA(options.input_ic50, options.input_features)
        import textwrap
        print("\n".join(textwrap.wrap(" , ".join(gdsc.drugIds))))
        return

    if options.print_features is True:
        from gdsctools import anova
        gdsc = anova.ANOVA(options.input_ic50, options.input_features)
        import textwrap
        print("\n".join(textwrap.wrap(" , ".join(gdsc.feature_names))))
        return

    # -----------------------------------------------------------------
    # --------------------------------------------------- real analysis
    # -----------------------------------------------------------------
    # dispatcher to the functions according to the user parameters


    from gdsctools import ANOVA, ANOVAReport
    anova = ANOVA(options.input_ic50, options.input_features,
            options.input_drug,
            low_memory=not options.fast)
    anova = _set_settings(anova, options)


    if options.drug and options.drug not in anova.ic50.df.columns:
        print(red("Invalid Drug. Try --print-drug-names"))
        sys.exit(1)


    if options.drug is not None and options.feature is not None:
        print_color("ODOF mode", purple)
        anova_one_drug_one_feature(anova, options)
    elif options.drug is not None:
        print_color("ODAF mode", purple)
        anova_one_drug(anova, options)
    else: # analyse everything
        if options.feature is None:
            print_color("ADAF mode", purple)
        else:
            print_color("ADOF mode", purple)
        anova_all(anova, options)

    if options.onweb is False and options.no_html is False:
        msg = "\nNote that a directory {} was created and files saved into it"
        print(purple(msg.format(options.directory)))

    return

Example #7

0

Show file

File: plot_volcano.py Project: CancerRxGene/gdsctools

"""
Analyse all associations (drug/feature)
=========================================

Volcano plot (all associations)
"""



#####################################################
#
from gdsctools import ANOVA, ic50_test
gdsc = ANOVA(ic50_test)
results = gdsc.anova_all()
results.volcano()

Example #8

0

Show file

File: test_anova_all_brca.py Project: saezrodriguez/gdsctools

def test_anova_brca():

    an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz'))
    an1.set_cancer_type('breast')



    an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz'))
    dfori = an.anova_all()

    df = dfori.df.sum()
    df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE'])
    df = df.fillna(0)
    totest = df.to_dict()

    exact = {'ANOVA_FEATURE_FDR': 1133416.7761055394,
        'ANOVA_FEATURE_pval': 5824.8201538614458,
        'FEATURE_IC50_T_pval': 5824.8201538614449,
        'FEATURE_IC50_effect_size': 4408.511449781573,
        'FEATURE_delta_MEAN_IC50': 261.11373729866705,
        'FEATURE_neg_Glass_delta': 4487.7401723134735,
        'FEATURE_neg_IC50_sd': 14701.868130868914,
        'FEATURE_neg_logIC50_MEAN': 28701.510736736222,
        'FEATURE_pos_Glass_delta': 6536.8938399490198,
        'FEATURE_pos_IC50_sd': 13362.588398939894,
        'FEATURE_pos_logIC50_MEAN': 28962.624474034845,
        'ANOVA_MSI_pval': 0.0,
        'N_FEATURE_neg': 439196.0,
        'N_FEATURE_pos': 92140.0,
        'ANOVA_TISSUE_pval': 0.0,
        'ASSOC_ID': 68509365.0,
        'index': 68497660.0}

    for k, v in totest.items():
        if k in ['ANOVA_MEDIA_pval']:
            continue
        assert_almost_equal(v, exact[k])

    # test part of the report (summary section)
    r = ANOVAReport(an, dfori)
    totest = r.diagnostics().to_dict()

    exact = {'text': {0: 'Type of analysis',
  1: 'Total number of possible drug/feature associations',
  2: 'Total number of ANOVA tests performed',
  3: 'Percentage of tests performed',
  4: '',
  5: 'Total number of tested drugs',
  6: 'Total number of genomic features used',
  7: 'Total number of screened cell lines',
  8: 'MicroSatellite instability included as factor',
  9: '',
  10: 'Total number of significant associations',
  11: ' - sensitive',
  12: ' - resistant',
  13: 'p-value significance threshold',
  14: 'FDR significance threshold',
  15: 'Range of significant p-values',
  16: 'Range of significant % FDRs'},
 'value': {0: 'breast',
  1: 13780,
  2: 11705,
  3: 84.94,
  4: '',
  5: 265,
  6: 52,
  7: 51,
  8: False,
  9: '',
  10: 27,
  11: 17,
  12: 10,
  13: 0.001,
  14: 25,
  15: '[2.098e-09, 0.0004356]',
  16: '[0.002456 18.89]'}}

 
    assert totest == exact

    import shutil
    shutil.rmtree('breast')

Example #9

0

Show file

"""
Analyse all associations (drug/feature)
=========================================

Volcano plot (all associations)
"""

#####################################################
#
from gdsctools import ANOVA, ic50_test
gdsc = ANOVA(ic50_test)
results = gdsc.anova_all()
results.volcano()

Example #10

0

Show file

File: test_anova_all_brca.py Project: shukwong/gdsctools

def test_anova_brca():

    an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz'))
    an1.set_cancer_type('breast')

    an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz'))
    dfori = an.anova_all()

    df = dfori.df.sum()
    df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE'])
    df = df.fillna(0)
    totest = df.to_dict()

    exact = {
        'ANOVA_FEATURE_FDR': 1133416.7761055394,
        'ANOVA_FEATURE_pval': 5824.8201538614458,
        'FEATURE_IC50_T_pval': 5824.8201538614449,
        'FEATURE_IC50_effect_size': 4408.511449781573,
        'FEATURE_delta_MEAN_IC50': 261.11373729866705,
        'FEATURE_neg_Glass_delta': 4487.7401723134735,
        'FEATURE_neg_IC50_sd': 14701.868130868914,
        'FEATURE_neg_logIC50_MEAN': 28701.510736736222,
        'FEATURE_pos_Glass_delta': 6536.8938399490198,
        'FEATURE_pos_IC50_sd': 13362.588398939894,
        'FEATURE_pos_logIC50_MEAN': 28962.624474034845,
        'ANOVA_MSI_pval': 0.0,
        'N_FEATURE_neg': 439196.0,
        'N_FEATURE_pos': 92140.0,
        'ANOVA_TISSUE_pval': 0.0,
        'ASSOC_ID': 68509365.0,
        'index': 68497660.0
    }

    for k, v in totest.items():
        if k in ['ANOVA_MEDIA_pval']:
            continue
        assert_almost_equal(v, exact[k])

    # test part of the report (summary section)
    r = ANOVAReport(an, dfori)
    totest = r.diagnostics().to_dict()

    exact = {
        'text': {
            0: 'Type of analysis',
            1: 'Total number of possible drug/feature associations',
            2: 'Total number of ANOVA tests performed',
            3: 'Percentage of tests performed',
            4: '',
            5: 'Total number of tested drugs',
            6: 'Total number of genomic features used',
            7: 'Total number of screened cell lines',
            8: 'MicroSatellite instability included as factor',
            9: '',
            10: 'Total number of significant associations',
            11: ' - sensitive',
            12: ' - resistant',
            13: 'p-value significance threshold',
            14: 'FDR significance threshold',
            15: 'Range of significant p-values',
            16: 'Range of significant % FDRs'
        },
        'value': {
            0: 'breast',
            1: 13780,
            2: 11705,
            3: 84.94,
            4: '',
            5: 265,
            6: 52,
            7: 51,
            8: False,
            9: '',
            10: 27,
            11: 17,
            12: 10,
            13: 0.001,
            14: 25,
            15: '[2.098e-09, 0.0004356]',
            16: '[0.002456 18.89]'
        }
    }

    assert totest == exact

Example #11

0

Show file

File: plot_association.py Project: CancerRxGene/gdsctools

"""
Association between a Drug and a Feature
=========================================

Boxplot association
"""



#####################################################
#
from gdsctools import ANOVA, ic50_test
gdsc = ANOVA(ic50_test)
gdsc.set_cancer_type('breast')
df = gdsc.anova_one_drug_one_feature(1047, 'TP53_mut', show=True)

Example #12

0

Show file

File: pipelines.py Project: howard-lightfoot/gdsctools

def anova_pipeline(args=None):
    """This function is used by the standalone application called
    **gdsctools_anova**

    Type::

        gdsctools_anova --help

    to get some help.
    """
    msg = "Welcome to GDSCTools standalone"
    print_color(msg, purple, underline=True)

    # Keep the argument args as None by default to
    # allow testing e.g., in nosetests
    if args is None:
        args = sys.argv[:]
    elif len(args) == 1:
        args += ['--help']

    user_options = ANOVAOptions(prog="gdsctools_anova")
    try:
        options = user_options.parse_args(args[1:])
    except SystemExit:
        return

    # -----------------------------------------------------------------
    # ---------------------------------------- options without analysis
    # -----------------------------------------------------------------

    if options.version is True:
        print("This is version %s of gdsctools_anova" % gdsctools.version)
        return

    if options.testing is True:
        print('Testing mode:')
        from gdsctools import ANOVA, ic50_test
        an = ANOVA(ic50_test)
        df = an.anova_one_drug_one_feature('Drug_1047_IC50', 'TP53_mut')

        assert df.loc[1,'N_FEATURE_pos'] == 554, \
            "N_feature_pos must be equal to 554"
        print(df.T)
        print(darkgreen("\nGDSCTools seems to be installed properly"))
        return

    if options.save_settings:
        from gdsctools import ANOVA, ic50_test
        an = ANOVA(ic50_test)
        an.settings.to_json(options.save_settings)
        print('Save a default parameter set in %s' % options.save_settings)
        return

    if options.license is True:
        print(gdsctools.license)
        return

    if options.summary is True:
        from gdsctools import anova
        an = anova.ANOVA(options.input_ic50, options.input_features)
        print(an)
        return

    if options.print_tissues is True:
        from gdsctools import anova
        an = anova.ANOVA(options.input_ic50, options.input_features)

        tissues = an.tissue_factor
        try:
            tissues = tissues.sort_values('Tissue Factor').unique()
        except:
            tissues = tissues.sort(inplace=False).unique()
        for name in tissues:
            print(name)
        return

    if options.print_drugs is True:
        from gdsctools import anova
        gdsc = anova.ANOVA(options.input_ic50, options.input_features)
        import textwrap
        print("\n".join(textwrap.wrap(" , ".join(gdsc.drugIds))))
        return

    if options.print_features is True:
        from gdsctools import anova
        gdsc = anova.ANOVA(options.input_ic50, options.input_features)
        import textwrap
        print("\n".join(textwrap.wrap(" , ".join(gdsc.feature_names))))
        return

    # -----------------------------------------------------------------
    # --------------------------------------------------- real analysis
    # -----------------------------------------------------------------
    # dispatcher to the functions according to the user parameters

    from gdsctools import ANOVA, ANOVAReport
    anova = ANOVA(options.input_ic50,
                  options.input_features,
                  options.input_drug,
                  low_memory=not options.fast)
    anova = _set_settings(anova, options)

    if options.drug and options.drug not in anova.ic50.df.columns:
        print(red("Invalid Drug. Try --print-drug-names"))
        sys.exit(1)

    if options.drug is not None and options.feature is not None:
        print_color("ODOF mode", purple)
        anova_one_drug_one_feature(anova, options)
    elif options.drug is not None:
        print_color("ODAF mode", purple)
        anova_one_drug(anova, options)
    else:  # analyse everything
        if options.feature is None:
            print_color("ADAF mode", purple)
        else:
            print_color("ADOF mode", purple)
        anova_all(anova, options)

    if options.onweb is False and options.no_html is False:
        msg = "\nNote that a directory {} was created and files saved into it"
        print(purple(msg.format(options.directory)))

    return

Example #13

0

Show file

"""
Association between a Drug and a Feature
=========================================

Boxplot association
"""

#####################################################
#
from gdsctools import ANOVA, ic50_test
gdsc = ANOVA(ic50_test)
gdsc.set_cancer_type('breast')
df = gdsc.anova_one_drug_one_feature(1047, 'TP53_mut', show=True)