Esempio n. 1
0
def test_coverage_module(tmpdir):
    bed = bedtools.GenomeCov(sequana_data("JB409847.cov.csv"),
                             sequana_data("JB409847.gbk"))
    directory = tmpdir.mkdir('test_coverage_module')
    config.output_dir = str(directory)
    config.sample_name = "JB409847"
    CoverageModule(bed)
Esempio n. 2
0
    def __init__(self, data):
        """.. rubric:: constructor

        :param data: it can be a csv filename created by sequana_coverage or a
        :class:`bedtools.GenomeCov` object.
        """
        super().__init__()
        try:
            self.bed = bedtools.GenomeCov(data)
        except FileNotFoundError:
            msg = ("The csv file is not present. Please, check if your"
                   " file is present.")
            raise FileNotFoundError(msg)
        except TypeError:
            self.bed = data
        try:
            html_list = self.create_reports()
        except TypeError:
            msg = ("Data must be either a csv file or a :class:`GenomeCov` "
                   "instance where zscore is computed.")
            raise TypeError(msg)
        self.title = "Coverage Report of {0}".format(config.sample_name)
        self.intro = ("<p>Report the coverage of your sample {0} to check the "
                      "quality of your mapping and to highlight regions of "
                      "interest (under and over covered).</p>".format(
                          config.sample_name))
        self.create_report_content(html_list)
        self.create_html("sequana_coverage.html")
Esempio n. 3
0
def test_binining():
    filename = sequana_data('JB409847.bed')
    # using chunksize of 7000, we test odd number
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=7000)
    chrom = bed.chr_list[0]
    chrom.run(501, k=2, circular=True, binning=2, cnv_delta=100)
Esempio n. 4
0
def test_ChromosomeCovMultiChunk():
    filename = sequana_data('JB409847.bed')
    # using chunksize of 7000, we test odd number
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=7000)
    chrom = bed.chr_list[0]
    res = chrom.run(501, k=2, circular=True)
    res.get_summary()
    res.get_rois()
Esempio n. 5
0
def test_chromosome():
    filename = sequana_data('JB409847.bed')
    # using chunksize of 7000, we test odd number
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=7000)
    chrom = bed.chr_list[0]
    chrom.run(501, k=2, circular=True)
    print(chrom)

    # using chunksize of 7000, we test even number
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=7000)
    chrom = bed.chr_list[0]
    chrom.run(501, k=2, circular=True)

    # no chunksize
    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    chrom = bed.chr_list[0]
    chrom.run(501, k=2, circular=True)
    print(chrom)

    # no chunksize
    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    chrom = bed.chr_list[0]
    try:
        chrom._coverage_scaling()
        assert False
    except KeyError:
        assert True
    except:
        assert False

    # zscore not computed yet, so error
    try:
        chrom.plot_rois(3000, 8000)
        assert False
    except:
        assert True
    chrom.run(4001)
    chrom.plot_rois(3000, 8000)
Esempio n. 6
0
def test_coverage_module(tmpdir):

    bed = bedtools.GenomeCov(sequana_data("JB409847.bed"))
    fasta = sequana_data("JB409847.fasta")
    bed.compute_gc_content(fasta)
    c = bed.chr_list[0]
    c.run(4001)

    directory = tmpdir.mkdir('test_coverage_module')
    config.output_dir = str(directory)
    config.sample_name = "JB409847"
    CoverageModule(bed)
Esempio n. 7
0
def test_canvasjs_linegraph():
    bed = bedtools.GenomeCov(sequana_data("JB409847.bed"))
    fasta = sequana_data("JB409847.fasta")
    bed.compute_gc_content(fasta)


    c = bed.chr_list[0]
    c.run(4001)

    df = bed[0].df
    csv = df.to_csv(columns=['pos', 'cov', 'gc'], index=False,
                    float_format='%.3g')
    # create CanvasJS stuff
    cjs = CanvasJSLineGraph(csv, 'cov', 'pos', ['cov', 'gc'])
    # set options
    cjs.set_options({'zoomEnabled': 'true',
                     'zoomType': 'x',
                     'exportEnabled': 'true'})
    # set title
    cjs.set_title("Genome Coverage")
    # set legend
    cjs.set_legend({'verticalAlign': 'bottom',
                    'horizontalAlign': 'center',
                    'cursor':'pointer'},
                    hide_on_click=True)
    # set axis
    cjs.set_axis_x({'title': "Position (bp)",
                    'labelAngle': 30,
                    'minimum': 0,
                    'maximum': len(df)})
    cjs.set_axis_y({'title': "Coverage (Count)"})
    cjs.set_axis_y2({'title': "GC content (ratio)",
                     'minimum':0,
                     'maximum': 1,
                     'lineColor': '#FFC425',
                     'titleFontColor': '#FFC425',
                     'labelFontColor': '#FFC425'})
    # set datas
    cjs.set_data(index=0, data_dict={'type': 'line',
                                     'name': "Coverage",
                                     'showInLegend': 'true',
                                     'color': '#5BC0DE',
                                     'lineColor': '#5BC0DE'})
    cjs.set_data(index=1, data_dict={'type': 'line',
                                     'axisYType': 'secondary',
                                     'name': "GC content",
                                     'showInLegend': 'true',
                                     'color': '#FFC425',
                                     'lineColor': '#FFC425'})
    # create canvasJS
    cjs.create_canvasjs()
Esempio n. 8
0
def test_datatables():
    bed = bedtools.GenomeCov(sequana_data("JB409847.cov.csv"),
                             sequana_data("JB409847.gbk"))
    rois = bed[0].get_roi()
    rois.df['link'] = 'test'
    datatable_js = DataTableFunction(rois.df, 'roi')
    datatable_js.set_links_to_column('link', 'start')
    datatable_js.datatable_options = {
        'scrollX': 'true',
        'pageLength': 15,
        'scrollCollapse': 'true',
        'dom': 'Bfrtip',
        'buttons': ['copy', 'csv']
    }
    datatable = DataTable(rois.df, 'rois', datatable_js)
    html_table = datatable.create_datatable(float_format='%.3g')
Esempio n. 9
0
def test_gc_content():
    bed = sequana_data('JB409847.bed')
    fasta = sequana_data('JB409847.fasta')
    cov = bedtools.GenomeCov(bed)
    cov.compute_gc_content(fasta)
    cov.get_stats()
    ch = cov[0]
    ch.moving_average(4001, circular=True)
    ch.running_median(4001,circular=True)
    ch.compute_zscore()

    ch.get_evenness()
    ch.get_cv()
    assert ch.get_centralness() > 0.84 and ch.get_centralness()<0.85
    with TempFile(suffix=".png") as fh:
        ch.plot_gc_vs_coverage(filename=fh.name)

    with TempFile() as fh:
        ch.to_csv(fh.name)

    ch.get_max_gc_correlation(fasta)
Esempio n. 10
0
def test_datatables():
    bed = bedtools.GenomeCov(sequana_data("JB409847.bed"),
                             sequana_data("JB409847.gbk"))
    fasta = sequana_data("JB409847.fasta")
    bed.compute_gc_content(fasta)

    c = bed.chr_list[0]
    c.run(4001)
    rois = c.get_rois()
    rois.df['link'] = 'test'
    datatable_js = DataTableFunction(rois.df, 'roi')
    datatable_js.set_links_to_column('link', 'start')
    datatable_js.datatable_options = {
        'scrollX': 'true',
        'pageLength': 15,
        'scrollCollapse': 'true',
        'dom': 'Bfrtip',
        'buttons': ['copy', 'csv']
    }
    datatable = DataTable(rois.df, 'rois', datatable_js)
    html_table = datatable.create_datatable(float_format='%.3g')
Esempio n. 11
0
def test_genomecov():
    filename = sequana_data('JB409847.bed')

    try:
        bed = bedtools.GenomeCov("dummy.csv")
        assert False
    except:
        assert True

    try:
        bed = bedtools.GenomeCov(filename, "dummy.gbk")
        assert False
    except:
        assert True

    # !now let us read the good data sets
    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    bed.compute_coverage(4001)

    bed = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    bed2 = bedtools.GenomeCov(filename, sequana_data('JB409847.gbk'))
    assert bed == bed

    # test equality for same chromosome but different data
    bed2.chr_list[0].df["cov"] += 100
    assert bed != bed2
    # test equality for same chromosome but different data
    bed2.chr_list[0].df["cov"] -= 100
    bed2.chr_list.append("dummy")
    assert bed != bed2


    # setter must be bool
    try:
        bed.circular = 1
        assert False
    except:
        assert True

    # cant use setter
    try:
        bed.feature_dict = {}
        assert False
    except:
        assert True

    assert len(bed) == 1
    # a getter for the first chromosome
    bed[0]

    # setter available but not sure this is useful
    bed.window_size = 4001
    bed.hist()

    # This requires to call other method before
    for chrom in bed:
        chrom.moving_average(n=501)
        chrom.running_median(n=501, circular=True)
        chrom.running_median(n=501, circular=False)

        chrom.compute_zscore()
        roi = chrom.get_roi()
        with TempFile(suffix='.png') as fh:
            chrom.plot_coverage(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_zscore(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_normalized_coverage(filename=fh.name)

        len(chrom)
        print(chrom)
        chrom.get_size()
        chrom.get_mean_cov()
        chrom.get_var_coef()
    with TempFile(suffix='.csv') as fh:
        bed.to_csv(fh.name)
        bed2 = bedtools.GenomeCov(fh.name, sequana_data('JB409847.gbk'))

    # plotting
    bed.chr_list[0].plot_hist_coverage()
    bed.chr_list[0].plot_hist_coverage(logx=False,logy=True)
    bed.chr_list[0].plot_hist_coverage(logx=True,logy=False)
    with TempFile(suffix=".png") as fh:
        bed.chr_list[0].plot_hist_coverage(logx=False,logy=False,
            filename=fh.name)
Esempio n. 12
0
def test_genomecov():
    filename = sequana_data('JB409847.bed')

    # wrong file
    try:
        bed = bedtools.GenomeCov("dummy.csv")
        assert False
    except:
        assert True

    # wrong threshold
    try:
        bed = bedtools.GenomeCov(filename, high_threshold=2)
        assert False
    except:
        assert True

    # wrong threshold
    try:
        bed = bedtools.GenomeCov(filename, low_threshold=-2)
        assert False
    except:
        assert True

    # wrong genbank
    try:
        bed = bedtools.GenomeCov(filename, "dummy.gbk")
        assert False
    except:
        assert True

    # !now let us read the good data sets by chunkd
    bed = bedtools.GenomeCov(filename,
                             sequana_data('JB409847.gbk'),
                             chunksize=5000)
    for c in bed.chr_list:
        c.run(1001, k=2)

    # setter must be bool
    try:
        bed.circular = 1
        assert False
    except:
        assert True

    # cant use setter
    try:
        bed.feature_dict = {}
        assert False
    except:
        assert True

    assert len(bed) == 1
    # a getter for the first chromosome
    bed[0]

    # setter available but not sure this is useful
    bed.window_size = 4000
    bed.window_size = 4001
    bed.hist()

    # This requires to call other method before
    for chrom in bed:
        chrom.moving_average(n=501)
        chrom.running_median(n=501, circular=True)
        chrom.running_median(n=501, circular=False)

        chrom.compute_zscore()
        roi = chrom.get_rois()
        with TempFile(suffix='.png') as fh:
            chrom.plot_coverage(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_zscore(filename=fh.name)
        with TempFile(suffix='.png') as fh:
            chrom.plot_hist_normalized_coverage(filename=fh.name)

        len(chrom)
        print(chrom)
        chrom.get_size()
        chrom.DOC
        chrom.CV
    with TempFile(suffix='.csv') as fh:
        bed.gc_window_size = 100
        bed.to_csv(fh.name)

    # plotting
    bed.chr_list[0].plot_hist_coverage()
    bed.chr_list[0].plot_hist_coverage(logx=False, logy=True)
    bed.chr_list[0].plot_hist_coverage(logx=True, logy=False)
    with TempFile(suffix=".png") as fh:
        bed.chr_list[0].plot_hist_coverage(logx=False,
                                           logy=False,
                                           filename=fh.name)