Ejemplo n.º 1
0
 def test_lambda_gc(self):
     N = 5000000
     ht = hl.utils.range_table(N).annotate(x=hl.scan.count() / N,
                                           x2=(hl.scan.count() / N)**1.5)
     lgc = hl.lambda_gc(ht.x)
     lgc2 = hl.lambda_gc(ht.x2)
     self.assertAlmostEqual(lgc, 1,
                            places=1)  # approximate, 1 place is safe
     self.assertAlmostEqual(lgc2, 1.89,
                            places=1)  # approximate, 1 place is safe
Ejemplo n.º 2
0
def qqplot(pvals, title: str = None, figsize: tuple = (10, 10)):
    source = pvals._indices.source
    if isinstance(source, Table):
        ht = source.select(p_value=pvals)
    else:
        ht = source.select_rows(p_value=pvals).rows()

    ht = ht.key_by().select('p_value').key_by('p_value').persist()
    lambda_gc = hl.lambda_gc(ht['p_value'])
    n = ht.count()
    ht = ht.annotate(
        observed_p=-hl.log10(ht['p_value']),
        expected_p=-hl.log10((hl.scan.count() + 1) / n),
        p_val=ht['p_value']
    ).persist()

    p_val_pd = ht.to_pandas()
    p_val_pd['observed_p'].values[p_val_pd['observed_p'] > 10] = 10
    mini = min(p_val_pd['expected_p'].max(), p_val_pd['observed_p'].max())
    maxi = max(p_val_pd['expected_p'].max(), p_val_pd['observed_p'].max())

    title = f'{title}' if title else 'QQ Plot'

    fig = plt.figure(figsize=figsize)
    plt.scatter(p_val_pd['expected_p'], p_val_pd['observed_p'], c='black', s=0.5)
    plt.plot((0, mini), (0, mini), 'red')
    plt.xlim([0, maxi + 0.5])
    plt.ylim([0, maxi + 0.5])
    plt.title(title, fontsize=20)
    plt.ylabel('Observed -log10(' + r'$p$' + ')', fontsize=15)
    plt.xlabel('Expected -log10(' + r'$p$' + ')', fontsize=15)
    plt.close()

    return fig, round(lambda_gc, 3)