Esempio n. 1
0
def plot_maf_depth(variations, data_dir, min_depth=DEF_MIN_DEPTH,
                chunk_size=SNPS_PER_CHUNK):
    
    maf_dp_distribs = calc_maf_depth_distribs_per_sample(variations,
                                                         min_depth=min_depth,
                                                         n_bins=100,
                                                         chunk_size=SNPS_PER_CHUNK)
    maf_dp_distribs, bins = maf_dp_distribs

    maf_dp_dir = os.path.join(data_dir, 'maf_depth')
    if not os.path.exists(maf_dp_dir):
        os.mkdir(maf_dp_dir)
    
    samples = variations.samples
    if samples is None:
        samples = range(maf_dp_distribs.shape[0])
    
    for sample, distrib in zip(samples, maf_dp_distribs):
        fpath = join(maf_dp_dir, '{}.png'.format(sample))
        title = 'Depth based Maximum allele frequency (MAF) distribution {}'
        title = title.format(sample)
        mpl_params = {'set_xlabel': {'args': ['MAF (depth)'], 'kwargs': {}},
                      'set_ylabel': {'args': ['SNPs number'], 'kwargs': {}},
                      'set_title': {'args': [title], 'kwargs': {}},
                      'set_yscale': {'args': ['log'], 'kwargs': {}}}
        plot_distrib(distrib, bins, fhand=open(fpath, 'w'), figsize=(10, 10),
                     mpl_params=mpl_params, n_ticks=10)
Esempio n. 2
0
    def test_calc_maf_depth_distribs_per_sample(self):
        variations = VariationsArrays()
        variations['/calls/AO'] = numpy.array([])
        variations['/calls/RO'] = numpy.array([])
        distribs, bins = calc_maf_depth_distribs_per_sample(variations,
                                                            chunk_size=None)
        assert distribs is None
        assert bins is None

        variations = VariationsArrays()
        variations['/calls/AO'] = numpy.array([[[0, 0], [0, 0], [15, -1]]])
        variations['/calls/RO'] = numpy.array([[10, 5, 15]])
        variations.samples = list(range(3))
        distribs, _ = calc_maf_depth_distribs_per_sample(variations, n_bins=4,
                                                         min_depth=6,
                                                         chunk_size=None)
        expected = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 1, 0]]
        assert numpy.all(distribs == expected)

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        distribs1, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6,
                                                          chunk_size=None)
        distribs2, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6)
        assert numpy.all(distribs1 == distribs2)
    def test_calc_maf_depth_distribs_per_sample(self):
        variations = VariationsArrays()
        variations['/calls/AO'] = numpy.array([])
        variations['/calls/RO'] = numpy.array([])
        distribs, bins = calc_maf_depth_distribs_per_sample(variations,
                                                            chunk_size=None)
        assert distribs is None
        assert bins is None

        variations = VariationsArrays()
        variations['/calls/AO'] = numpy.array([[[0, 0], [0, 0], [15, -1]]])
        variations['/calls/RO'] = numpy.array([[10, 5, 15]])
        variations.samples = list(range(3))
        distribs, _ = calc_maf_depth_distribs_per_sample(variations, n_bins=4,
                                                         min_depth=6,
                                                         chunk_size=None)
        expected = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 1, 0]]
        assert numpy.all(distribs == expected)

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        distribs1, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6,
                                                          chunk_size=None)
        distribs2, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6)
        assert numpy.all(distribs1 == distribs2)
Esempio n. 4
0
def plot_maf_depth(variations,
                   data_dir,
                   min_depth=DEF_MIN_DEPTH,
                   chunk_size=SNPS_PER_CHUNK):

    maf_dp_distribs = calc_maf_depth_distribs_per_sample(
        variations, min_depth=min_depth, n_bins=100, chunk_size=SNPS_PER_CHUNK)
    maf_dp_distribs, bins = maf_dp_distribs

    maf_dp_dir = os.path.join(data_dir, 'maf_depth')
    if not os.path.exists(maf_dp_dir):
        os.mkdir(maf_dp_dir)

    samples = variations.samples
    if samples is None:
        samples = range(maf_dp_distribs.shape[0])

    for sample, distrib in zip(samples, maf_dp_distribs):
        fpath = join(maf_dp_dir, '{}.png'.format(sample))
        title = 'Depth based Maximum allele frequency (MAF) distribution {}'
        title = title.format(sample)
        mpl_params = {
            'set_xlabel': {
                'args': ['MAF (depth)'],
                'kwargs': {}
            },
            'set_ylabel': {
                'args': ['SNPs number'],
                'kwargs': {}
            },
            'set_title': {
                'args': [title],
                'kwargs': {}
            },
            'set_yscale': {
                'args': ['log'],
                'kwargs': {}
            }
        }
        plot_distrib(distrib,
                     bins,
                     fhand=open(fpath, 'w'),
                     figsize=(10, 10),
                     mpl_params=mpl_params,
                     n_ticks=10)