Ejemplo n.º 1
0
def plot_allele_obs_distrib_2D(variations, data_dir, max_allele_counts,
                               chunk_size=SNPS_PER_CHUNK):
    # Allele observation distribution 2D
    masks = [call_is_het, call_is_hom_alt, call_is_hom_ref]
    names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous']
    
    fig = Figure(figsize=(22, 25))
    canvas = FigureCanvas(fig)
    gs = gridspec.GridSpec(3, 2)
    fpath = join(data_dir, 'allele_obs_distrib_per_gt.png')
    fhand = open(fpath, 'w')
    
    counts_range = [[0, max_allele_counts], [0, max_allele_counts]]
    
    for i, (mask_func, name) in enumerate(zip(masks, names)):
        hist2d = hist2d_allele_observations(variations,
                                            n_bins=max_allele_counts,
                                            range_=counts_range,
                                            mask_func=mask_func,
                                            chunk_size=chunk_size)
        counts_distrib2d, xbins, ybins = hist2d
        
        axes = fig.add_subplot(gs[i, 0])
        title = 'Allele counts distribution 2D {}'.format(name)
        plot_hist2d(numpy.log10(counts_distrib2d), xbins, ybins, axes=axes,
                    mpl_params={'set_xlabel': {'args': ['Alt allele counts'],
                                               'kwargs': {}},
                                'set_ylabel': {'args': ['Ref allele counts'],
                                               'kwargs': {}},
                                'set_title': {'args': [title], 'kwargs': {}}},
                    colorbar_label='log10(counts)', fig=fig)

        hist2d = hist2d_gq_allele_observations(variations,
                                               n_bins=max_allele_counts,
                                               range_=counts_range,
                                               mask_func=mask_func,
                                               chunk_size=chunk_size,
                                               hist_counts=counts_distrib2d)
        gq_distrib2d, xbins, ybins = hist2d
        
        axes = fig.add_subplot(gs[i, 1])
        title = 'Allele counts GQ distribution 2D {}'.format(name)
        plot_hist2d(gq_distrib2d, xbins, ybins, axes=axes, fig=fig,
                    mpl_params={'set_xlabel': {'args': ['Alt allele counts'],
                                               'kwargs': {}},
                                'set_ylabel': {'args': ['Ref allele counts'],
                                               'kwargs': {}},
                                'set_title': {'args': [title], 'kwargs': {}}},
                    colorbar_label='Genotype Quality (GQ)')

    canvas.print_figure(fhand)
Ejemplo n.º 2
0
    def test_calc_allele_obs_gq_distrib_2D(self):
        variations = {'/calls/AO': numpy.array([[[0, 0], [5, 0], [-1, -1],
                                                 [0, -1], [0, 0], [0, 10],
                                                 [20, 0], [25, 0], [20, 20],
                                                 [0, 0]]]),
                      '/calls/RO': numpy.array([[0, 5, 15, 7, 10, 0, 0, 25,
                                                 20, 10]]),
                      '/calls/GQ': numpy.array([[40, 30, 35, 30, 0,
                                                 40, 30, 35, 30, 0]]),
                      '/calls/GT': numpy.array([[[0, 0], [1, 0], [-1, -1],
                                                 [0, -1], [0, 0], [0, 10],
                                                 [1, 0], [0, 0], [0, 0],
                                                 [1, 0]]])}
        hist, _, _ = hist2d_gq_allele_observations(variations, chunk_size=None)
        assert hist[0, 0] == 40
        assert hist[-1, -1] == 35

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hist, xbins, ybins = hist2d_gq_allele_observations(hdf5)
        hist2, xbins2, ybins2 = hist2d_gq_allele_observations(hdf5,
                                                              chunk_size=10)
        assert numpy.allclose(xbins, xbins2)
        assert numpy.allclose(ybins, ybins2)
        assert numpy.all(hist == hist2)
    def test_calc_allele_obs_gq_distrib_2D(self):
        variations = {'/calls/AO': numpy.array([[[0, 0], [5, 0], [-1, -1],
                                                 [0, -1], [0, 0], [0, 10],
                                                 [20, 0], [25, 0], [20, 20],
                                                 [0, 0]]]),
                      '/calls/RO': numpy.array([[0, 5, 15, 7, 10, 0, 0, 25,
                                                 20, 10]]),
                      '/calls/GQ': numpy.array([[40, 30, 35, 30, 0,
                                                 40, 30, 35, 30, 0]]),
                      '/calls/GT': numpy.array([[[0, 0], [1, 0], [-1, -1],
                                                 [0, -1], [0, 0], [0, 10],
                                                 [1, 0], [0, 0], [0, 0],
                                                 [1, 0]]])}
        hist, _, _ = hist2d_gq_allele_observations(variations, chunk_size=None)
        assert hist[0, 0] == 40
        assert hist[-1, -1] == 35

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hist, xbins, ybins = hist2d_gq_allele_observations(hdf5)
        hist2, xbins2, ybins2 = hist2d_gq_allele_observations(hdf5,
                                                              chunk_size=10)
        assert numpy.allclose(xbins, xbins2)
        assert numpy.allclose(ybins, ybins2)
        assert numpy.all(hist == hist2)
Ejemplo n.º 4
0
def plot_allele_obs_distrib_2D(variations,
                               data_dir,
                               max_allele_counts,
                               chunk_size=SNPS_PER_CHUNK):
    # Allele observation distribution 2D
    masks = [call_is_het, call_is_hom_alt, call_is_hom_ref]
    names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous']

    fig = Figure(figsize=(22, 25))
    canvas = FigureCanvas(fig)
    gs = gridspec.GridSpec(3, 2)
    fpath = join(data_dir, 'allele_obs_distrib_per_gt.png')
    fhand = open(fpath, 'w')

    counts_range = [[0, max_allele_counts], [0, max_allele_counts]]

    for i, (mask_func, name) in enumerate(zip(masks, names)):
        hist2d = hist2d_allele_observations(variations,
                                            n_bins=max_allele_counts,
                                            range_=counts_range,
                                            mask_func=mask_func,
                                            chunk_size=chunk_size)
        counts_distrib2d, xbins, ybins = hist2d

        axes = fig.add_subplot(gs[i, 0])
        title = 'Allele counts distribution 2D {}'.format(name)
        plot_hist2d(numpy.log10(counts_distrib2d),
                    xbins,
                    ybins,
                    axes=axes,
                    mpl_params={
                        'set_xlabel': {
                            'args': ['Alt allele counts'],
                            'kwargs': {}
                        },
                        'set_ylabel': {
                            'args': ['Ref allele counts'],
                            'kwargs': {}
                        },
                        'set_title': {
                            'args': [title],
                            'kwargs': {}
                        }
                    },
                    colorbar_label='log10(counts)',
                    fig=fig)

        hist2d = hist2d_gq_allele_observations(variations,
                                               n_bins=max_allele_counts,
                                               range_=counts_range,
                                               mask_func=mask_func,
                                               chunk_size=chunk_size,
                                               hist_counts=counts_distrib2d)
        gq_distrib2d, xbins, ybins = hist2d

        axes = fig.add_subplot(gs[i, 1])
        title = 'Allele counts GQ distribution 2D {}'.format(name)
        plot_hist2d(gq_distrib2d,
                    xbins,
                    ybins,
                    axes=axes,
                    fig=fig,
                    mpl_params={
                        'set_xlabel': {
                            'args': ['Alt allele counts'],
                            'kwargs': {}
                        },
                        'set_ylabel': {
                            'args': ['Ref allele counts'],
                            'kwargs': {}
                        },
                        'set_title': {
                            'args': [title],
                            'kwargs': {}
                        }
                    },
                    colorbar_label='Genotype Quality (GQ)')

    canvas.print_figure(fhand)