Esempio n. 1
0
    def mpld3_to_html(self):
        """This require to call a plotting figure before hand"""
        from gdsctools import gdsctools_data
        # This copy the full path and therefore HTML cannot
        # be moved in another directory. to be fixed.
        js_path1 = gdsctools_data('d3.v3.min.js', where='javascript')
        js_path2 = gdsctools_data('mpld3.v0.2.js', where='javascript')
        try:
            # mpld3 is great but there are a couple of issues
            # 1 - legend zorder is not used so dots may be below the legend,
            #     hence we set the framealpha =0.5
            # 2 - % character even though there well interpreted in matploltib
            #     using \%, they are not once parsed by mpld3. So, here
            #     we remove the \ character
            axl = pylab.legend(loc='best', framealpha=0.8, borderpad=1)
            axl.set_zorder(10) # in case there is a circle behind the legend.
            texts = [this.get_text() for this in axl.get_texts()]

            for i, text in enumerate(texts):
                text = text.replace("\\%", "%")
                text += "  "
                axl.get_texts()[i].set_text(text)
            import mpld3
            htmljs = mpld3.fig_to_html(self.current_fig,
                            d3_url=js_path1,
                            mpld3_url=js_path2)
        except:
            htmljs = ""
        return """<div class="jsimage"> """ + htmljs + "</div>"
Esempio n. 2
0
    def mpld3_to_html(self):
        """This require to call a plotting figure before hand"""
        from gdsctools import gdsctools_data
        # This copy the full path and therefore HTML cannot
        # be moved in another directory. to be fixed.
        js_path1 = gdsctools_data('d3.v3.min.js', where='javascript')
        js_path2 = gdsctools_data('mpld3.v0.2.js', where='javascript')
        try:
            # mpld3 is great but there are a couple of issues
            # 1 - legend zorder is not used so dots may be below the legend,
            #     hence we set the framealpha =0.5
            # 2 - % character even though there well interpreted in matploltib
            #     using \%, they are not once parsed by mpld3. So, here
            #     we remove the \ character
            axl = pylab.legend(loc='best', framealpha=0.8, borderpad=1)
            axl.set_zorder(10)  # in case there is a circle behind the legend.
            texts = [this.get_text() for this in axl.get_texts()]

            for i, text in enumerate(texts):
                text = text.replace("\\%", "%")
                text += "  "
                axl.get_texts()[i].set_text(text)
            import mpld3
            htmljs = mpld3.fig_to_html(self.current_fig,
                                       d3_url=js_path1,
                                       mpld3_url=js_path2)
        except:
            htmljs = ""
        return """<div class="jsimage"> """ + htmljs + "</div>"
Esempio n. 3
0
def test_regression_elastic_net():
    ic = IC50(gdsctools_data("IC50_v5.csv.gz"))
    gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz"))
    gd = GDSCElasticNet(ic, gf, verbose=True)
    print(gd)
    drugid = 1047

    # automatic CV to get best model
    results = gd.runCV(drugid, n_folds=5)
    bestalpha = results.alpha
    results.coefficients
    best_model = gd.get_model(alpha=bestalpha)

    # Some plotting
    gd.plot_weight(drugid, best_model)
    #gd.plot_weight(drugid, best_model, Nmax=4)

    gd.plot_importance(drugid, model=best_model)
    gd.plot_importance(drugid, model=None)

    # manual fit
    scores = gd.fit(drugid, alpha=bestalpha)

    results = gd.tune_alpha(drugid, alpha_range=(-3.5,-1))

    res = gd.check_randomness(drugid, N=10)

    res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=False)
    res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=True)

    gd.dendogram_coefficients()
    gd.dendogram_coefficients(stacked=False)
Esempio n. 4
0
def test_regression_elastic_net():
    ic = IC50(gdsctools_data("IC50_v5.csv.gz"))
    gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz"))
    gd = GDSCElasticNet(ic, gf, verbose=True)
    print(gd)
    drugid = 1047

    # automatic CV to get best model
    results = gd.runCV(drugid, kfolds=5)
    bestalpha = results.alpha
    results.coefficients
    best_model = gd.get_model(alpha=bestalpha)

    # Some plotting
    gd.plot_weight(drugid, best_model)
    #gd.plot_weight(drugid, best_model, Nmax=4)

    gd.plot_importance(drugid, model=best_model)
    gd.plot_importance(drugid, model=None)

    # manual fit
    scores = gd.fit(drugid, alpha=bestalpha)

    results = gd.tune_alpha(drugid, alpha_range=(-3.5, -1))

    res = gd.check_randomness(drugid, N=10)

    res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=False)
    res = gd.boxplot(drugid, model=best_model, n=10, bx_vert=True)

    gd.dendogram_coefficients()
    gd.dendogram_coefficients(stacked=False)
def test_analysis():
    GF = gdsctools_data("genomic_features_v5.csv.gz")
    IC = gdsctools_data("IC50_v5.csv.gz")

    # Test that database must be provided
    import tempfile
    pname = tempfile.mkdtemp()
    df = regression.main([prog, '-F', GF, '-I', IC, "-O", pname, "--force"])
Esempio n. 6
0
def test_readers_tabs():
    # If the files ends in csv but its content is tsv, this may be an issue
    try:
        IC50(gdsctools_data("test_IC50_tabs.csv"))
        assert False
    except:
        assert True
Esempio n. 7
0
def test_readers_tabs():
    # If the files ends in csv but its content is tsv, this may be an issue
    try:
        IC50(gdsctools_data("test_IC50_tabs.csv"))
        assert False
    except:
        assert True
Esempio n. 8
0
    def _init_report(self):
        super(ReportMain, self)._init_report()

        for filename in ['EBI_logo.png', 'sanger-logo.png']:
            target = os.sep.join([self.directory, 'images', filename])
            if os.path.isfile(target) is False:
                filename = gdsctools_data("images" + os.sep + filename)
                shutil.copy(filename, target)
Esempio n. 9
0
    def _init_report(self):
        super(ReportMain, self)._init_report()

        for filename in ["EBI_logo.png", "sanger-logo.png"]:
            target = os.sep.join([self.directory, "images", filename])
            if os.path.isfile(target) is False:
                filename = gdsctools_data("images" + os.sep + filename)
                shutil.copy(filename, target)
Esempio n. 10
0
def test_gdsc():

    import tempfile
    tempdir = tempfile.mkdtemp()
    compdir = tempdir + os.sep + "company_packages"
    tissuedir = tempdir + os.sep + "tissue_packages"

    pathtoGF = os.path.split(gdsctools_data("GF_BRCA_v17.csv.gz"))[0]
    ic50 = gdsctools_data('IC50_v17.csv.gz')
    DD = gdsctools_data("test_drug_decode2.csv")

    gg = GDSC(ic50, DD, pathtoGF + '/GF_*_v17.csv.gz')
    gg.company_directory = compdir
    gg.tissue_directory = tissuedir
    gg.analyse()
    assert gg.companies == ['COMPANY_A', 'COMPANY_B']
    gg.create_data_packages_for_companies()
    gg.create_summary_pages()
Esempio n. 11
0
    def _init_report(self):
        """create the report directory and return the directory name"""
        self.sections = []
        self.section_names = []
        # if the directory already exists, print a warning

        try:
            if os.path.isdir(self.directory) is False:
                print("Created directory {}".format(self.directory))
                os.mkdir(self.directory)

            # list of directories created in the constructor
            for this in self._to_create:
                try:
                    os.mkdir(self.directory + os.sep + this)
                except:
                    pass # already created ?
        except Exception:
            pass
        finally:
            for filename in ['gdsc.css', 'github-gist.css']:
                target = os.sep.join([self.directory, 'css', filename ])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data(filename)
                    shutil.copy(filename, target)

            for filename in ['sorttable.js', 'highlight.pack.js']:
                target = os.sep.join([self.directory, 'js', filename ])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data(filename)
                    shutil.copy(filename, target)

            for filename in ['EBI_logo.png', 'sanger-logo.png']:
                target = os.sep.join([self.directory, 'images', filename ])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data("images" + os.sep + filename)
                    shutil.copy(filename, target)
Esempio n. 12
0
    def _init_report(self):
        """create the report directory and return the directory name"""
        self.sections = []
        self.section_names = []
        # if the directory already exists, print a warning

        try:
            if os.path.isdir(self.directory) is False:
                print("Created directory {}".format(self.directory))
                os.mkdir(self.directory)

            # list of directories created in the constructor
            for this in self._to_create:
                try:
                    os.mkdir(self.directory + os.sep + this)
                except:
                    pass  # already created ?
        except Exception:
            pass
        finally:
            for filename in ['gdsc.css', 'github-gist.css']:
                target = os.sep.join([self.directory, 'css', filename])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data(filename)
                    shutil.copy(filename, target)

            for filename in ['sorttable.js', 'highlight.pack.js']:
                target = os.sep.join([self.directory, 'js', filename])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data(filename)
                    shutil.copy(filename, target)

            for filename in ['EBI_logo.png', 'sanger-logo.png']:
                target = os.sep.join([self.directory, 'images', filename])
                if os.path.isfile(target) is False:
                    filename = gdsctools_data("images" + os.sep + filename)
                    shutil.copy(filename, target)
Esempio n. 13
0
def test_IC50Cluster():
    dataset = gdsctools_data("test_v18_clustering.tsv")
    ic50 = IC50Cluster(dataset)

    # In this data set, a drug is reported 3 times (1211) and should appear
    # as follows:
    assert 1211 in ic50.df.columns
    assert 11211 in ic50.df.columns
    assert 21211 in ic50.df.columns

    assert len(ic50.drugIds) == 860
    assert len(ic50.df) == 50

    an = ANOVA(ic50, dataset)
    an.diagnostics()['feasible_tests'] == 65026
Esempio n. 14
0
def test_IC50Cluster():
    dataset = gdsctools_data("test_v18_clustering.tsv")
    ic50 = IC50Cluster(dataset)


    # In this data set, a drug is reported 3 times (1211) and should appear 
    # as follows:
    assert 1211 in ic50.df.columns
    assert 11211 in ic50.df.columns
    assert 21211 in ic50.df.columns

    assert len(ic50.drugIds) == 860
    assert len(ic50.df) == 50


    an = ANOVA(ic50, dataset)
    an.diagnostics()['feasible_tests'] == 65026
Esempio n. 15
0
def test_drugs():
    r1 = DrugDecode(testing.drug_test_csv)
    r1.drugIds
    r2 = DrugDecode(testing.drug_test_tsv)
    r2.drugIds
    assert r1 == r2

    # r1.get_info() this example fails because all webrelease are NAN
    assert len(r1) == 11

    dd = DrugDecode(gdsctools_data("test_drug_decode_comp.csv"))
    assert dd.companies == ["ME"]
    assert dd.is_public(5) == 'Y'
    dd.check()
    assert dd.get_info()['N_prop'] == 1
    # test repr and print
    print(dd)
    dd
    # test __add__
    assert dd + dd == dd
    assert len(dd.get_public_and_one_company("ME")) == 10
Esempio n. 16
0
def test_drugs():
    r1 = DrugDecode(testing.drug_test_csv)
    r1.drugIds
    r2 = DrugDecode(testing.drug_test_tsv)
    r2.drugIds
    assert r1 == r2

    # r1.get_info() this example fails because all webrelease are NAN
    assert len(r1) == 11

    

    dd = DrugDecode(gdsctools_data("test_drug_decode_comp.csv"))
    assert dd.companies == ["ME"]
    assert dd.is_public(5) == 'Y'
    dd.check()
    assert dd.get_info()['N_prop'] == 1
    # test repr and print
    print(dd)
    dd
    # test __add__
    assert dd + dd == dd
    assert len(dd.get_public_and_one_company("ME")) == 10
Esempio n. 17
0
from gdsctools.omnibem import OmniBEMBuilder
from gdsctools import gdsctools_data

omnibem_data = gdsctools_data("test_omnibem_genomic_alterations.csv.gz")
omnibem_genes = gdsctools_data("test_omnibem_genes.txt")


def test_omnibem():
    ob = OmniBEMBuilder(omnibem_data)
    assert len(ob) == 56943
    ob.filter_by_gene_list(omnibem_genes)
    mobem = ob.get_mobem()
    assert mobem[mobem.columns[3:]].sum().sum() == 54061

    #
    ob.plot_number_alteration_by_tissue()
    ob.plot_alterations_per_cellline()
    ob.get_significant_genes()

    # filter by cosmic id
    ob = OmniBEMBuilder(omnibem_data)
    ob.filter_by_cosmic_list([910916])
    mobem = ob.get_mobem()
    assert mobem.shape == (1, 105)
    assert mobem.ix[0, 3:].sum() == 102

    ob = OmniBEMBuilder(omnibem_data)
    ob.filter_by_type_list(["Methylation"])
    mobem = ob.get_mobem()
    assert mobem[mobem.columns[3:]].sum().sum() == 12964
Esempio n. 18
0
def test_set_cancer_type():
    an = ANOVA(gdsctools_data("IC50_v17.csv.gz"))
    an.set_cancer_type("breast")
    assert_list_almost_equal([an.ic50.df.sum().sum()], [27721.255627472943])
Esempio n. 19
0
def test_lasso():
    ic = IC50(gdsctools_data("IC50_v5.csv.gz"))
    gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz"))
    gd = GDSCLasso(ic, gf, verbose=True)
    gd.runCV(1047).alpha
Esempio n. 20
0
    def __init__(self, filename='index.html', directory='report',
                 overwrite=True, verbose=True,
                template_filename='index.html', mode=None):
        """.. rubric:: Constructor

        :param filename: default to **index.html**
        :param directory: defaults to **report**
        :param overwrite: default to True
        :param verbose: default to True
        :param dependencies: add the dependencies table at the end of the
            document if True.
        :param str mode: if none, report have a structure that contains the
            following directories: OUTPUT, INPUT, js, css, images, code.
            Otherwise, if mode is set to 'summary', only the following
            directories are created: js, css, images, code

        """
        #: name of the analysis added in the title
        self.analysis = 'anova'
        self.pkgname = 'gdsctools'

        from gdsctools import version
        #: version added in the sub title
        self.version = version

        self._directory = directory
        self._filename = filename

        # This contains the sections and their names when
        # method add_section is used
        self.sections = []
        self.section_names = []

        #: flag to add dependencies
        self.add_dependencies = False

        self.title = 'ANOVA analysis summary'
        self.analysis_type = "PANCAN"

        # For jinja2 inheritance, we need to use the environment
        # to indicate where are the parents' templates
        template_directory = gdsctools_data('templates')

        self.env = Environment()
        self.env.loader = FileSystemLoader(template_directory)

        # use template provided inside gdsctools
        self.template = self.env.get_template(template_filename)

        self.jinja = {
                'time_now': self.get_time_now(),
                "analysis": self.analysis,
                "version": self.version,
                "title": self.title,
                "analysis_domain": self.analysis_type,
                'dependencies': self.get_table_dependencies().to_html(),
                }

        if mode is None:
            self._to_create = ['OUTPUT', 'INPUT', 'images', 'css',
                    'js', 'code']
        elif mode == 'summary':
            self._to_create = ['images', 'css', 'js',]

        self._init_report()
Esempio n. 21
0
def test_lasso():
    ic = IC50(gdsctools_data("IC50_v5.csv.gz"))
    gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz"))
    gd = GDSCLasso(ic, gf, verbose=True)
    gd.runCV(1047).alpha
Esempio n. 22
0
def _gsf(filename):
    from gdsctools import gdsctools_data
    return gdsctools_data(filename)
Esempio n. 23
0
def main(args=None):
    """This function is used by **gdsctools_regression**

    Type::

        gdsctools_regression --help

    to get some help.
    """
    msg = "Welcome to GDSCTools standalone (lasso, ridge, elastic net)"
    print_color(msg, purple, underline=True)

    # Keep the argument args as None by default to
    # allow testing e.g., in nosetests
    if args is None:
        args = sys.argv[:]
    elif len(args) == 1:
        args += ['--help']

    user_options = RegressionOptions(prog="gdsctools_regression")
    try:
        options = user_options.parse_args(args[1:])
    except SystemExit:
        return

    # -----------------------------------------------------------------

    if options.version is True:
        print("This is version %s of gdsctools_regression" % gdsctools.version)
        return

    if options.license is True:
        print(gdsctools.license)
        return

    # -------------------------------------------- real analysis --------
    try:
        os.mkdir(options.output_directory)
    except:
        if options.force is False:
            print_color(("directory already exists, erase it or choose a different "
               " output directory with --output-directory. Use --force to force"
                " your choice"), "red")
            return
        else:
            pass # keep going

    # Copy the regression pipeline
    filename = gdsctools.gdsctools_data("regression.rules", '../pipelines')
    #gdsctools_path = easydev.get_package_location('gdsctools')
    #filename = os.sep.join([gdsctools_path, "gdsctools", "pipelines",
    #    "regression.rules"])
    shutil.copy(filename, options.output_directory)

    # create the config
    params = {"method":options.method,
              "kfold": options.kfold,
              "ic50": os.path.realpath(options.input_ic50),
              "features": os.path.realpath(options.input_features)}

    config_template = """
# Analysis
regression:
  method: %(method)s       # lasso, elasticnet or ridge
  kfold: %(kfold)s        # Used to automatically estimate best alpha parameter
  randomness: 50
  boxplot_n: 5

# Input data sets
input:
  ic50: %(ic50)s
  genomic_features: %(features)s
    """

    with open(options.output_directory + os.sep + "config.yaml", "w") as fh:
        fh.write(config_template % params)


    print("File config.yaml and regression.rules created in ./%s" %
        options.output_directory)

    print("First go to the directory where analysis will be performed\n\n")
    print("    cd %s\n" % options.output_directory)
    msg = """You have two choices now. Either you are on a laptop, or you are on
a cluster.

1. LOCAL COMPUTER:
------------------

    snakemake -s regression.rules -p

where -p means 'print statements'

2. CLUSTERS:
------------

On a SLURM cluster, you can make use of the many cores available by typing for
instance:

    srun --qos normal snakemake -s regression.rules -j 40 --cluster "sbatch --qos normal"

For more information about snakemake commands, type

    snakemake --help

"""
    print(msg)
    with open(options.output_directory + os.sep + "README", "w") as fh:
        fh.write(msg)
Esempio n. 24
0
from gdsctools.omnibem import OmniBEMBuilder
from gdsctools import gdsctools_data

omnibem_data = gdsctools_data("test_omnibem_genomic_alterations.csv.gz")
omnibem_genes = gdsctools_data("test_omnibem_genes.txt")


def test_omnibem():
    ob = OmniBEMBuilder(omnibem_data)
    ob.filter_by_gene_list(omnibem_genes)
    mobem = ob.get_mobem()
    assert mobem[mobem.columns[3:]].sum().sum() == 54061

    #
    ob.plot_number_alteration_by_tissue()
    ob.plot_alterations_per_cellline()
    ob.get_significant_genes()


    # filter by cosmic id
    ob = OmniBEMBuilder(omnibem_data)
    ob.filter_by_cosmic_list([910916])
    mobem = ob.get_mobem()
    assert mobem.shape == (1,105)
    assert mobem.ix[0,3:].sum() == 102
    

    ob = OmniBEMBuilder(omnibem_data)
    ob.filter_by_type_list(["Methylation"])
    mobem = ob.get_mobem()
    assert mobem[mobem.columns[3:]].sum().sum() == 12964
Esempio n. 25
0
    def create_summary_pages(self, main_directory='ALL'):
        # Read in ALL all directories

        # create directories and copy relevant files
        self.mkdir(main_directory + os.sep + 'images')
        self.mkdir(main_directory + os.sep + 'css')
        self.mkdir(main_directory + os.sep + 'js')
        from gdsctools import gdsctools_data
        for filename in ['gdsc.css', 'github-gist.css']:
            target = os.sep.join([main_directory, 'css', filename])
            if os.path.isfile(target) is False:
                filename = gdsctools_data(filename)
                shutil.copy(filename, target)
        for filename in ['highlight.pack.js']:
            target = os.sep.join([main_directory, 'js', filename])
            if os.path.isfile(target) is False:
                filename = gdsctools_data(filename)
                shutil.copy(filename, target)
        for filename in ['EBI_logo.png', 'sanger-logo.png']:
            target = os.sep.join([main_directory, 'images', filename])
            if os.path.isfile(target) is False:
                dire = 'data' + os.sep + 'images'
                filename = gdsctools_data("images" + os.sep + filename)
                shutil.copy(filename, target)
        directories = glob.glob('ALL' + os.sep + '*')
        directories = [x for x in directories if os.path.isdir(x)]

        summary = []
        for directory in sorted(directories):

            tcga = directory.split(os.sep)[1]
            if tcga in ['css', 'images']:
                continue

            # number of hits
            path = directory + os.sep + 'OUTPUT' + os.sep
            try:
                hits = pd.read_csv(path + 'drugs_summary.csv', sep=',')
            except:
                summary.append([tcga] + [None] * 5)
                continue
            total_hits = hits.total.sum()

            drug_involved = get_drug_id(hits['Unnamed: 0'].unique())

            results = ANOVAResults(path + 'results.csv')
            if len(results) > 0:
                drug_ids = get_drug_id(results.df.DRUG_ID.unique())
            else:
                drug_ids = []

            path = directory + os.sep + 'INPUT' + os.sep
            drug_decode = DrugDecode(path + 'DRUG_DECODE.csv')
            info = drug_decode.get_info()

            webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE
            drug_inv_public = sum(webrelease == 'Y')
            drug_inv_prop = sum(webrelease != 'Y')

            summary.append([
                tcga, total_hits, drug_inv_prop, info['N_prop'],
                drug_inv_public, info['N_public']
            ])
        df = pd.DataFrame(summary)
        df.columns = [
            'Analysis name', 'Number of hits',
            'Number of involved proprietary compounds', 'out of',
            'Number of involved public', 'out of'
        ]

        # FIXME include css and images of logo
        # FIXME save in the proper directory
        output_dir = main_directory + os.sep + '..' + os.sep
        output_file = output_dir + os.sep + 'index.html'
        self.html_page = ReportMAIN(directory='ALL',
                                    filename='index.html',
                                    template_filename='datapack_summary.html')

        # Let us use our HTMLTable to add the HTML references
        from gdsctools.report import HTMLTable
        self.html_table = HTMLTable(df)
        self.html_table.add_href('Analysis name',
                                 newtab=True,
                                 url=None,
                                 suffix='/index.html')
        #html_table.add_bgcolor('Number of hits')

        self.html_page.jinja['data_table'] = self.html_table.to_html()
        self.html_page.write()

        return df
Esempio n. 26
0
def test_anova_brca():

    an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz'))
    an1.set_cancer_type('breast')

    an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz'))
    dfori = an.anova_all()

    df = dfori.df.sum()
    df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE'])
    df = df.fillna(0)
    totest = df.to_dict()

    exact = {
        'ANOVA_FEATURE_FDR': 1133416.7761055394,
        'ANOVA_FEATURE_pval': 5824.8201538614458,
        'FEATURE_IC50_T_pval': 5824.8201538614449,
        'FEATURE_IC50_effect_size': 4408.511449781573,
        'FEATURE_delta_MEAN_IC50': 261.11373729866705,
        'FEATURE_neg_Glass_delta': 4487.7401723134735,
        'FEATURE_neg_IC50_sd': 14701.868130868914,
        'FEATURE_neg_logIC50_MEAN': 28701.510736736222,
        'FEATURE_pos_Glass_delta': 6536.8938399490198,
        'FEATURE_pos_IC50_sd': 13362.588398939894,
        'FEATURE_pos_logIC50_MEAN': 28962.624474034845,
        'ANOVA_MSI_pval': 0.0,
        'N_FEATURE_neg': 439196.0,
        'N_FEATURE_pos': 92140.0,
        'ANOVA_TISSUE_pval': 0.0,
        'ASSOC_ID': 68509365.0,
        'index': 68497660.0
    }

    for k, v in totest.items():
        if k in ['ANOVA_MEDIA_pval']:
            continue
        assert_almost_equal(v, exact[k])

    # test part of the report (summary section)
    r = ANOVAReport(an, dfori)
    totest = r.diagnostics().to_dict()

    exact = {
        'text': {
            0: 'Type of analysis',
            1: 'Total number of possible drug/feature associations',
            2: 'Total number of ANOVA tests performed',
            3: 'Percentage of tests performed',
            4: '',
            5: 'Total number of tested drugs',
            6: 'Total number of genomic features used',
            7: 'Total number of screened cell lines',
            8: 'MicroSatellite instability included as factor',
            9: '',
            10: 'Total number of significant associations',
            11: ' - sensitive',
            12: ' - resistant',
            13: 'p-value significance threshold',
            14: 'FDR significance threshold',
            15: 'Range of significant p-values',
            16: 'Range of significant % FDRs'
        },
        'value': {
            0: 'breast',
            1: 13780,
            2: 11705,
            3: 84.94,
            4: '',
            5: 265,
            6: 52,
            7: 51,
            8: False,
            9: '',
            10: 27,
            11: 17,
            12: 10,
            13: 0.001,
            14: 25,
            15: '[2.098e-09, 0.0004356]',
            16: '[0.002456 18.89]'
        }
    }

    assert totest == exact
Esempio n. 27
0
def test_set_cancer_type():
    an = ANOVA(gdsctools_data("IC50_v17.csv.gz"))
    an.set_cancer_type("breast")
    assert_list_almost_equal([an.ic50.df.sum().sum()], [27721.255627472943])
Esempio n. 28
0
def _gsf(filename):
    from gdsctools import gdsctools_data
    return gdsctools_data(filename)
Esempio n. 29
0
    def create_summary_pages(self, main_directory='ALL'):
        # Read in ALL all directories

        # create directories and copy relevant files
        self.mkdir(main_directory + os.sep + 'images')
        self.mkdir(main_directory + os.sep + 'css')
        self.mkdir(main_directory + os.sep + 'js')
        from gdsctools import gdsctools_data
        for filename in ['gdsc.css', 'github-gist.css']:
            target = os.sep.join([main_directory, 'css', filename ])
            if os.path.isfile(target) is False:
                filename = gdsctools_data(filename)
                shutil.copy(filename, target)
        for filename in ['highlight.pack.js']:
            target = os.sep.join([main_directory, 'js', filename ])
            if os.path.isfile(target) is False:
                filename = gdsctools_data(filename)
                shutil.copy(filename, target)
        for filename in ['EBI_logo.png', 'sanger-logo.png']:
            target = os.sep.join([main_directory, 'images', filename ])
            if os.path.isfile(target) is False:
                dire = 'data' + os.sep + 'images'
                filename = gdsctools_data("images" + os.sep +filename)
                shutil.copy(filename, target)
        directories = glob.glob('ALL' + os.sep + '*')
        directories = [x for x in directories if os.path.isdir(x)]

        summary = []
        for directory in sorted(directories):

            tcga = directory.split(os.sep)[1]
            if tcga in ['css', 'images']:
                continue

            # number of hits
            path = directory + os.sep + 'OUTPUT' + os.sep
            try:
                hits = pd.read_csv(path + 'drugs_summary.csv', sep=',')
            except:
                summary.append([tcga] + [None] * 5)
                continue
            total_hits = hits.total.sum()

            drug_involved = get_drug_id(hits['Unnamed: 0'].unique())

            results = ANOVAResults(path + 'results.csv')
            if len(results)>0:
                drug_ids = get_drug_id(results.df.DRUG_ID.unique())
            else:
                drug_ids = []

            path = directory + os.sep + 'INPUT' + os.sep
            drug_decode = DrugDecode(path + 'DRUG_DECODE.csv')
            info = drug_decode.get_info()

            webrelease = drug_decode.df.ix[drug_involved].WEBRELEASE
            drug_inv_public = sum(webrelease == 'Y')
            drug_inv_prop = sum(webrelease != 'Y')
            

            summary.append([tcga, total_hits,
                drug_inv_prop, info['N_prop'], 
                drug_inv_public, info['N_public']])
        df = pd.DataFrame(summary)
        df.columns = ['Analysis name', 'Number of hits', 
            'Number of involved proprietary compounds', 'out of',
            'Number of involved public', 'out of']


        # FIXME include css and images of logo
        # FIXME save in the proper directory
        output_dir = main_directory + os.sep + '..' + os.sep
        output_file = output_dir + os.sep + 'index.html'
        self.html_page = ReportMAIN(directory='ALL', filename='index.html',
                template_filename='datapack_summary.html' )

        # Let us use our HTMLTable to add the HTML references
        from gdsctools.report import HTMLTable
        self.html_table = HTMLTable(df)
        self.html_table.add_href('Analysis name', newtab=True, url=None,
                suffix='/index.html')
        #html_table.add_bgcolor('Number of hits')

        self.html_page.jinja['data_table'] = self.html_table.to_html()
        self.html_page.write()

        return df
def test_anova_brca():

    an1 = ANOVA(gdsctools_data('IC50_v17.csv.gz'))
    an1.set_cancer_type('breast')



    an = ANOVA(an1.ic50, gdsctools_data('GF_BRCA_v17.csv.gz'))
    dfori = an.anova_all()

    df = dfori.df.sum()
    df = df.drop(['DRUG_TARGET', 'DRUG_NAME', 'DRUG_ID', 'FEATURE'])
    df = df.fillna(0)
    totest = df.to_dict()

    exact = {'ANOVA_FEATURE_FDR': 1133416.7761055394,
        'ANOVA_FEATURE_pval': 5824.8201538614458,
        'FEATURE_IC50_T_pval': 5824.8201538614449,
        'FEATURE_IC50_effect_size': 4408.511449781573,
        'FEATURE_delta_MEAN_IC50': 261.11373729866705,
        'FEATURE_neg_Glass_delta': 4487.7401723134735,
        'FEATURE_neg_IC50_sd': 14701.868130868914,
        'FEATURE_neg_logIC50_MEAN': 28701.510736736222,
        'FEATURE_pos_Glass_delta': 6536.8938399490198,
        'FEATURE_pos_IC50_sd': 13362.588398939894,
        'FEATURE_pos_logIC50_MEAN': 28962.624474034845,
        'ANOVA_MSI_pval': 0.0,
        'N_FEATURE_neg': 439196.0,
        'N_FEATURE_pos': 92140.0,
        'ANOVA_TISSUE_pval': 0.0,
        'ASSOC_ID': 68509365.0,
        'index': 68497660.0}

    for k, v in totest.items():
        if k in ['ANOVA_MEDIA_pval']:
            continue
        assert_almost_equal(v, exact[k])

    # test part of the report (summary section)
    r = ANOVAReport(an, dfori)
    totest = r.diagnostics().to_dict()

    exact = {'text': {0: 'Type of analysis',
  1: 'Total number of possible drug/feature associations',
  2: 'Total number of ANOVA tests performed',
  3: 'Percentage of tests performed',
  4: '',
  5: 'Total number of tested drugs',
  6: 'Total number of genomic features used',
  7: 'Total number of screened cell lines',
  8: 'MicroSatellite instability included as factor',
  9: '',
  10: 'Total number of significant associations',
  11: ' - sensitive',
  12: ' - resistant',
  13: 'p-value significance threshold',
  14: 'FDR significance threshold',
  15: 'Range of significant p-values',
  16: 'Range of significant % FDRs'},
 'value': {0: 'breast',
  1: 13780,
  2: 11705,
  3: 84.94,
  4: '',
  5: 265,
  6: 52,
  7: 51,
  8: False,
  9: '',
  10: 27,
  11: 17,
  12: 10,
  13: 0.001,
  14: 25,
  15: '[2.098e-09, 0.0004356]',
  16: '[0.002456 18.89]'}}

 
    assert totest == exact

    import shutil
    shutil.rmtree('breast')
Esempio n. 31
0
    def __init__(self,
                 filename='index.html',
                 directory='report',
                 overwrite=True,
                 verbose=True,
                 template_filename='index.html',
                 mode=None):
        """.. rubric:: Constructor

        :param filename: default to **index.html**
        :param directory: defaults to **report**
        :param overwrite: default to True
        :param verbose: default to True
        :param dependencies: add the dependencies table at the end of the
            document if True.
        :param str mode: if none, report have a structure that contains the
            following directories: OUTPUT, INPUT, js, css, images, code.
            Otherwise, if mode is set to 'summary', only the following
            directories are created: js, css, images, code

        """
        #: name of the analysis added in the title
        self.analysis = 'anova'
        self.pkgname = 'gdsctools'

        from gdsctools import version
        #: version added in the sub title
        self.version = version

        self._directory = directory
        self._filename = filename

        # This contains the sections and their names when
        # method add_section is used
        self.sections = []
        self.section_names = []

        #: flag to add dependencies
        self.add_dependencies = False

        self.title = 'ANOVA analysis summary'
        self.analysis_type = "PANCAN"

        # For jinja2 inheritance, we need to use the environment
        # to indicate where are the parents' templates
        template_directory = gdsctools_data('templates')

        self.env = Environment()
        self.env.loader = FileSystemLoader(template_directory)

        # use template provided inside gdsctools
        self.template = self.env.get_template(template_filename)

        self.jinja = {
            'time_now': self.get_time_now(),
            "analysis": self.analysis,
            "version": self.version,
            "title": self.title,
            "analysis_domain": self.analysis_type,
            'dependencies': self.get_table_dependencies().to_html(),
        }

        if mode is None:
            self._to_create = [
                'OUTPUT', 'INPUT', 'images', 'css', 'js', 'code'
            ]
        elif mode == 'summary':
            self._to_create = [
                'images',
                'css',
                'js',
            ]

        self._init_report()