def test_tdt(self): pedigree = hl.Pedigree.read(resource('tdt.fam')) tdt_tab = (hl.transmission_disequilibrium_test( hl.split_multi_hts(hl.import_vcf(resource('tdt.vcf'), min_partitions=4)), pedigree)) truth = hl.import_table( resource('tdt_results.tsv'), types={'POSITION': hl.tint32, 'T': hl.tint32, 'U': hl.tint32, 'Chi2': hl.tfloat64, 'Pval': hl.tfloat64}) truth = (truth .transmute(locus=hl.locus(truth.CHROM, truth.POSITION), alleles=[truth.REF, truth.ALT]) .key_by('locus', 'alleles')) if tdt_tab.count() != truth.count(): self.fail('Result has {} rows but should have {} rows'.format(tdt_tab.count(), truth.count())) bad = (tdt_tab.filter(hl.is_nan(tdt_tab.p_value), keep=False) .join(truth.filter(hl.is_nan(truth.Pval), keep=False), how='outer')) bad.describe() bad = bad.filter(~( (bad.t == bad.T) & (bad.u == bad.U) & (hl.abs(bad.chi_sq - bad.Chi2) < 0.001) & (hl.abs(bad.p_value - bad.Pval) < 0.001))) if bad.count() != 0: bad.order_by(hl.asc(bad.v)).show() self.fail('Found rows in violation of the predicate (see show output)')
def test_tdt(self): pedigree = hl.Pedigree.read(resource('tdt.fam')) tdt_tab = (hl.transmission_disequilibrium_test( hl.split_multi_hts(hl.import_vcf(resource('tdt.vcf'), min_partitions=4)), pedigree)) truth = hl.import_table( resource('tdt_results.tsv'), types={'POSITION': hl.tint32, 'T': hl.tint32, 'U': hl.tint32, 'Chi2': hl.tfloat64, 'Pval': hl.tfloat64}) truth = (truth .transmute(locus=hl.locus(truth.CHROM, truth.POSITION), alleles=[truth.REF, truth.ALT]) .key_by('locus', 'alleles')) if tdt_tab.count() != truth.count(): self.fail('Result has {} rows but should have {} rows'.format(tdt_tab.count(), truth.count())) bad = (tdt_tab.filter(hl.is_nan(tdt_tab.p_value), keep=False) .join(truth.filter(hl.is_nan(truth.Pval), keep=False), how='outer')) bad = bad.filter(~( (bad.t == bad.T) & (bad.u == bad.U) & (hl.abs(bad.chi2 - bad.Chi2) < 0.001) & (hl.abs(bad.p_value - bad.Pval) < 0.001))) if bad.count() != 0: bad.order_by(hl.asc(bad.v)).show() self.fail('Found rows in violation of the predicate (see show output)')
def family_stats(mt: hl.MatrixTable, ped: hl.Pedigree, group_name: str) -> Tuple[hl.expr.StructExpression, hl.Table]: tdt_table = hl.transmission_disequilibrium_test(mt, ped) _, _, per_sample, per_variant = hl.mendel_errors(mt.GT, ped) family_stats_struct = hl.struct(mendel=per_variant[mt.row_key], tdt=tdt_table[mt.row_key], unrelated_qc_callstats=hl.agg.filter(mt.unrelated_sample, hl.agg.call_stats(mt.GT, mt.alleles)), meta={'group': group_name}) return family_stats_struct, per_sample