Ejemplo n.º 1
0
def test_plotgenometracks():

    roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed')

    bw_file = pkg_resources.resource_filename('janggu', 'resources/sample.bw')



    cover = Cover.create_from_bigwig('coverage2',
                                     bigwigfiles=bw_file,
                                     roi=roi,
                                     binsize=200,
                                     stepsize=200,
                                     resolution=50)



    cover2 = Cover.create_from_bigwig('coverage2',
                                      bigwigfiles=bw_file,
                                      roi=roi,
                                      binsize=200,
                                      stepsize=200,
                                      resolution=50)



    a = plotGenomeTrack([cover,cover2],'chr1',16000,18000)
    a = plotGenomeTrack(cover,'chr1',16000,18000)
Ejemplo n.º 2
0
def test_bigwig_store_whole_genome_option():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")
    bwfile_ = os.path.join(data_path, "sample.bw")

    cover1 = Cover.create_from_bigwig('test',
                                      bigwigfiles=bwfile_,
                                      regions=bed_file,
                                      store_whole_genome=True,
                                      binsize=200,
                                      stepsize=200,
                                      storage='ndarray')
    cover2 = Cover.create_from_bigwig('test2',
                                      bigwigfiles=bwfile_,
                                      regions=bed_file,
                                      store_whole_genome=False,
                                      binsize=200,
                                      stepsize=200,
                                      storage='ndarray')

    assert len(cover1) == 100
    assert len(cover2) == len(cover1)
    assert cover1.shape == (100, 200, 1, 1)
    assert cover1.shape == cover2.shape
    np.testing.assert_equal(cover1[:], cover2[:])
    assert cover1[:].sum() == 1044.0
Ejemplo n.º 3
0
def test_channel_last_first():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")

    bwfile_ = os.path.join(data_path, "sample.bw")

    cover = Cover.create_from_bigwig('test',
                                     bigwigfiles=bwfile_,
                                     resolution=1,
                                     binsize=200,
                                     regions=bed_file,
                                     store_whole_genome=True,
                                     channel_last=True,
                                     storage='ndarray')
    assert cover.shape == (100, 200, 1, 1)
    assert cover[0].shape == (1, 200, 1, 1)
    cover1 = cover

    cover = Cover.create_from_bigwig('test',
                                     bigwigfiles=bwfile_,
                                     resolution=1,
                                     binsize=200,
                                     regions=bed_file,
                                     store_whole_genome=True,
                                     channel_last=False,
                                     storage='ndarray')
    assert cover.shape == (100, 1, 200, 1)
    assert cover[0].shape == (1, 1, 200, 1)

    np.testing.assert_equal(cover1[0], np.transpose(cover[0], (0, 2, 3, 1)))
Ejemplo n.º 4
0
def test_load_cover_bigwig_resolutionNone(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    data_path = pkg_resources.resource_filename('janggu', 'resources/')

    bwfile_ = os.path.join(data_path, "sample.bw")

    bed_file = os.path.join(data_path, "sample.bed")

    for store in ['ndarray', 'hdf5', 'sparse']:
        # base pair binsize
        print(store)
        cover1 = Cover.create_from_bigwig(
            "cov",
            bigwigfiles=bwfile_,
            roi=bed_file,
            binsize=200, stepsize=200,
            resolution=1,
            storage=store, cache=True)

        cover = Cover.create_from_bigwig(
            "cov",
            bigwigfiles=bwfile_,
            roi=bed_file,
            binsize=200, stepsize=200,
            resolution=None,
            storage=store, cache=True, datatags=['None'],
            collapser='sum')
        np.testing.assert_equal(len(cover), 100)
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))

        np.testing.assert_equal(cover1[:].sum(axis=1), cover[:].sum(axis=1))
Ejemplo n.º 5
0
def test_load_cover_bigwig_default(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    data_path = pkg_resources.resource_filename('janggu', 'resources/')

    bwfile_ = os.path.join(data_path, "sample.bw")
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')

    gsize = pandas.read_csv(gsfile_,
                            sep='\t',
                            names=['chr', 'length'],
                            index_col='chr').to_dict()['length']

    bed_file = os.path.join(data_path, "sample.bed")

    for store in ['ndarray', 'hdf5', 'sparse']:
        # base pair binsize
        print(store)
        cover = Cover.create_from_bigwig("cov",
                                         bigwigfiles=bwfile_,
                                         regions=bed_file,
                                         binsize=200,
                                         stepsize=200,
                                         genomesize=gsize,
                                         storage=store,
                                         cache=True)

        np.testing.assert_equal(len(cover), 100)
        np.testing.assert_equal(cover.shape, (100, 200, 1, 1))

        # there is one read in the region
        np.testing.assert_allclose(cover[4].sum(), 36.)
        np.testing.assert_allclose(cover[52].sum(), 2 * 36.)
Ejemplo n.º 6
0
def get_data(params):
    zscore = ZScore()
    LABELS_TRAIN = ReduceDim(Cover.create_from_bam(
        'geneexpr',
        bamfiles=RNA.format(params['traincell'], params['trainrep']),
        roi=ROI_INPUT_TRAIN,
        flank=params['cageflank'],
        conditions=['GeneExpr'],
        resolution=None,
        store_whole_genome=False,
        storage='ndarray',
        normalizer=[LogTransform(), zscore],
        stranded=False,
        cache=True),
                             aggregator="mean")
    train_labels = LABELS_TRAIN
    train_input = []
    if params['inputs'] in ['dna_only', 'epi_dna']:
        dnaflank = params['dnaflank']
        order = params['order']
        # DNA
        DNA_TRAIN = Bioseq.create_from_refgenome('dna',
                                                 refgenome=REFGENOME,
                                                 roi=ROI_INPUT_TRAIN,
                                                 flank=dnaflank,
                                                 order=order,
                                                 cache=True,
                                                 store_whole_genome=False)
        train_input += [DNA_TRAIN]
    if params['inputs'] in ['epi_only', 'epi_dna']:
        zscore = ZScore()
        dnase_TRAIN = ReduceDim(Cover.create_from_bam(
            'dnase',
            bamfiles=DNASE.format(params['traincell']),
            roi=ROI_INPUT_TRAIN,
            flank=params['dnaseflank'],
            resolution=None,
            store_whole_genome=False,
            normalizer=[LogTransform(), zscore],
            cache=True),
                                aggregator="mean")
        train_input += [dnase_TRAIN]
        zscore = ZScore()
        h3k4_TRAIN = ReduceDim(Cover.create_from_bigwig(
            'h3k4',
            bigwigfiles=[H3K4me3.format(params['traincell'])],
            roi=ROI_INPUT_TRAIN,
            flank=params['dnaseflank'],
            store_whole_genome=False,
            normalizer=[LogTransform(), zscore],
            cache=True),
                               aggregator="mean")
        train_input += [h3k4_TRAIN]
    if len(train_input) == 0:
        raise ValueError('no input')
    return (train_input, train_labels)
Ejemplo n.º 7
0
def test_load_cover_bigwig_resolution1(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    data_path = pkg_resources.resource_filename('janggu', 'resources/')

    bwfile_ = os.path.join(data_path, "sample.bw")

    bed_file = os.path.join(data_path, "sample.bed")

    for store in ['ndarray', 'hdf5', 'sparse']:
        # base pair binsize
        print(store)
        cover = Cover.create_from_bigwig(
            "cov",
            bigwigfiles=bwfile_,
            roi=bed_file,
            binsize=200, stepsize=200,
            resolution=1,
            storage=store, cache=True)

        np.testing.assert_equal(len(cover), 100)
        np.testing.assert_equal(cover.shape, (100, 200, 1, 1))

        # there is one read in the region 4
        np.testing.assert_allclose(cover[4].sum(), 36)
        np.testing.assert_equal(cover[4][0, :, 0, 0],
         np.asarray(
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
           1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))
        # and two reads in region 52
        np.testing.assert_allclose(cover[52].sum(), 2*36)
        np.testing.assert_equal(cover[52][0, :, 0, 0],
        np.asarray(
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))
Ejemplo n.º 8
0
def test_bigwig_inferred_binsize():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "positive.bed")

    bwfile_ = os.path.join(data_path, "sample.bw")

    cover = Cover.create_from_bigwig('test',
                                     bigwigfiles=bwfile_,
                                     resolution=1,
                                     regions=bed_file,
                                     storage='ndarray')
    assert len(cover) == 25
    assert cover.shape == (25, 200, 1, 1)
Ejemplo n.º 9
0
def test_cover_export_bigwig(tmpdir):
    path = tmpdir.strpath
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")

    bwfile_ = os.path.join(data_path, "sample.bw")

    for resolution in [1, 50]:
        for storage in [True, False]:
            print('resolution=', resolution)
            print('store_whole_genome', storage)
            cover = Cover.create_from_bigwig(
                'test',
                bigwigfiles=bwfile_,
                resolution=resolution,
                binsize=200,
                roi=bed_file,
                store_whole_genome=storage,
                storage='ndarray')

            cover.export_to_bigwig(output_dir=path)

            cov2 = Cover.create_from_bigwig('test',
                bigwigfiles='{path}/{name}.{sample}.bigwig'.format(
                path=path, name=cover.name,
                sample=cover.conditions[0]),
                resolution=resolution,
                binsize=200,
                roi=bed_file,
                store_whole_genome=storage,
                storage='ndarray')

            assert cover.shape == (100, 200 // resolution, 1, 1)
            assert cover.shape == cov2.shape
            k = list(cover.garray.handle.keys())[0]
            np.testing.assert_allclose(cover[:].sum(), 1044.0 / resolution)
            np.testing.assert_allclose(cov2[:].sum(), 1044.0 / resolution)
Ejemplo n.º 10
0
def test_bigwig_genomic_interval_access_part_genome():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")

    bamfile_ = os.path.join(data_path, "sample.bw")

    storage = False
    for reso in [1, 50]:
        for shift in [0, 1]:
            cover = Cover.create_from_bigwig(
                'test',
                bigwigfiles=bamfile_,
                roi=bed_file,
                flank=0,
                storage='ndarray',
                store_whole_genome=storage,
                resolution=reso)

            for i in range(len(cover)):
                print('storage :',storage,'/ resolution :',reso,'/ shift :',shift)
                print(i, cover.gindexer[i])


                np.testing.assert_equal(np.repeat(cover[i],
                                    cover.garray.resolution,
                                    axis=1), cover[cover.gindexer[i]])

                chrom, start, end, strand = cover.gindexer[i].chrom, \
                    cover.gindexer[i].start, \
                    cover.gindexer[i].end, \
                    cover.gindexer[i].strand

                np.testing.assert_equal(np.repeat(cover[i],
                                    cover.garray.resolution, axis=1),
                                    cover[chrom, start, end, strand])

                if shift != 0:
                    start += shift * reso
                    end += shift * reso

                    if strand != '-':
                        gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:]
                        np.testing.assert_equal(cover[i][:, shift:,:, :],
                            gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
                    else:
                        gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:]
                        np.testing.assert_equal(cover[i][:, :-shift,:, :],
                        gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
Ejemplo n.º 11
0
def test_bigwig_genomic_interval_access():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")

    bamfile_ = os.path.join(data_path, "sample.bw")

    for storage in [True, False]:
        for reso in [1, 50]:
            for shift in [0, 1]:
                cover = Cover.create_from_bigwig('test',
                                                 bigwigfiles=bamfile_,
                                                 regions=bed_file,
                                                 flank=0,
                                                 storage='ndarray',
                                                 store_whole_genome=storage,
                                                 resolution=reso)

                for i in range(len(cover)):
                    print('storage :', storage, '/ resolution :', reso,
                          '/ shift :', shift)
                    print(i, cover.gindexer[i])

                    np.testing.assert_equal(cover[i], cover[cover.gindexer[i]])
                    chrom, start, end, strand = cover.gindexer[
                        i].chrom, cover.gindexer[i].start, cover.gindexer[
                            i].end, cover.gindexer[i].strand
                    np.testing.assert_equal(cover[i], cover[chrom, start, end,
                                                            strand])

                    if shift != 0:
                        start += shift * reso
                        end += shift * reso

                        if strand != '-':
                            np.testing.assert_equal(
                                cover[i][:, shift:, :, :],
                                cover[chrom, start, end,
                                      strand][:, :-shift, :, :])
                        else:
                            np.testing.assert_equal(
                                cover[i][:, :-shift, :, :],
                                cover[chrom, start, end, strand][:,
                                                                 shift:, :, :])
Ejemplo n.º 12
0
data, high, low = get_high_low_data(input, high_hbf, low_hbf)
roi_A, roi = get_roi(high + low)
seq = roi2fasta(roi_A, refgenome, flank)
test = pd.DataFrame.from_dict(seq, orient='index')
data['seq'] = test[0]

# 1. using janggu get DNA one-hot
## get one-hot data and ATAC feature matrix
dna_A = Bioseq.create_from_refgenome(name='dna',
                                     refgenome=refgenome,
                                     roi=roi_A,
                                     flank=flank)
Tn5 = Cover.create_from_bigwig('bigwig_coverage',
                               bigwigfiles=bw_file,
                               roi=roi,
                               binsize=1,
                               stepsize=1,
                               flank=flank)

## ReShape
dna_A = np.reshape(dna_A, (len(high + low), flank * 2 + 1, 4))
bw_values = np.reshape(Tn5, (len(high + low), flank * 2 + 1))

## get motif PWM, 3. read meme get motif PWMs in both strands
motifs = read_motif(meme_file)

# 4. scan motifs get score_list, max(pos_strand,neg_strand)
score_list_A = Parallel(n_jobs=-1)(
    delayed(DNA_motif_scan)(dna_A, motifs[m][0], motifs[m][1]) for m in motifs)

Ejemplo n.º 13
0
import matplotlib.pyplot as plt
from pkg_resources import resource_filename

from janggu.data import Cover
from janggu.data import plotGenomeTrack

roi = resource_filename('janggu', 'resources/sample.bed')

bw_file = resource_filename('janggu', 'resources/sample.bw')

cover = Cover.create_from_bigwig('coverage1',
                                 bigwigfiles=[bw_file] * 2,
                                 conditions=['rep1', 'rep2'],
                                 roi=roi,
                                 binsize=200,
                                 stepsize=200,
                                 resolution=50)

cover2 = Cover.create_from_bigwig('coverage2',
                                  bigwigfiles=bw_file,
                                  roi=roi,
                                  binsize=200,
                                  stepsize=200,
                                  resolution=50)

a = plotGenomeTrack([cover, cover2], 'chr1', 16000, 18000)

a.savefig('coverage.png')
#plt.show(a)
Ejemplo n.º 14
0
def test_cover_from_bigwig_sanity():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, "sample.bed")

    bwfile_ = os.path.join(data_path, "sample.bw")
    cover = Cover.create_from_bigwig('test',
                                     bigwigfiles=bwfile_,
                                     regions=bed_file,
                                     binsize=200,
                                     stepsize=50,
                                     resolution=50,
                                     flank=0,
                                     storage='ndarray')
    cover[0]
    assert len(cover.gindexer) == 394
    assert len(cover.garray.handle) == 394

    cover = Cover.create_from_bigwig('test',
                                     bigwigfiles=bwfile_,
                                     regions=bed_file,
                                     binsize=200,
                                     stepsize=50,
                                     resolution=50,
                                     flank=0,
                                     storage='ndarray',
                                     store_whole_genome=True)
    cover[0]
    assert len(cover.gindexer) == 394
    assert len(cover.garray.handle) == 2
    cov2 = Cover.create_from_bigwig('test',
                                    bigwigfiles=bwfile_,
                                    resolution=7,
                                    storage='ndarray',
                                    store_whole_genome=True)

    assert len(cov2.garray.handle) == 2
    assert cov2['chr1', 100, 200].shape == (1, 100 // 7 + 1, 1, 1)

    with pytest.raises(Exception):
        cov2.shape
    with pytest.raises(Exception):
        cov2[0]

    with pytest.raises(Exception):
        # name must be a string
        Cover.create_from_bigwig(1.2,
                                 bigwigfiles=bwfile_,
                                 regions=bed_file,
                                 binsize=1,
                                 stepsize=1,
                                 storage='ndarray')

    with pytest.raises(Exception):
        Cover.create_from_bigwig('test',
                                 bigwigfiles=bwfile_,
                                 regions=bed_file,
                                 binsize=1,
                                 stepsize=1,
                                 flank=-1,
                                 storage='ndarray')
    with pytest.raises(Exception):
        Cover.create_from_bigwig('test',
                                 bigwigfiles=bwfile_,
                                 regions=bed_file,
                                 binsize=1,
                                 stepsize=-1,
                                 flank=0,
                                 storage='ndarray')
    with pytest.raises(Exception):
        Cover.create_from_bigwig('test',
                                 bigwigfiles=bwfile_,
                                 regions=bed_file,
                                 binsize=-1,
                                 stepsize=1,
                                 flank=0,
                                 storage='ndarray')
    with pytest.raises(Exception):
        # resolution must be greater than stepsize
        Cover.create_from_bigwig('test',
                                 bigwigfiles=bwfile_,
                                 regions=bed_file,
                                 binsize=200,
                                 stepsize=50,
                                 resolution=300,
                                 flank=0,
                                 storage='ndarray')