def _is_dnm( proband_gt: hl.expr.CallExpression, father_gt: hl.expr.CallExpression, mother_gt: hl.expr.CallExpression, locus: hl.expr.LocusExpression, proband_is_female: Optional[hl.expr.BooleanExpression], ) -> hl.expr.BooleanExpression: """ Helper method to get whether a given genotype combination is a DNM at a given locus with a given proband sex. """ if proband_is_female is None: logger.warning( "Since no proband sex expression was given to generate_trio_stats_expr, only DNMs in autosomes will be counted." ) return hl.or_missing( locus.in_autosome(), proband_gt.is_het() & father_gt.is_hom_ref() & mother_gt.is_hom_ref(), ) return hl.cond( locus.in_autosome_or_par() | (proband_is_female & locus.in_x_nonpar()), proband_gt.is_het() & father_gt.is_hom_ref() & mother_gt.is_hom_ref(), hl.or_missing(~proband_is_female, proband_gt.is_hom_var() & father_gt.is_hom_ref()), )
def get_adj_expr( gt_expr: hl.expr.CallExpression, gq_expr: Union[hl.expr.Int32Expression, hl.expr.Int64Expression], dp_expr: Union[hl.expr.Int32Expression, hl.expr.Int64Expression], ad_expr: hl.expr.ArrayNumericExpression, adj_gq: int = 20, adj_dp: int = 10, adj_ab: float = 0.2, haploid_adj_dp: int = 10 ) -> hl.expr.BooleanExpression: """ Gets adj genotype annotation. Defaults correspond to gnomAD values. """ return ( (gq_expr >= adj_gq) & hl.cond( gt_expr.is_haploid(), dp_expr >= haploid_adj_dp, dp_expr >= adj_dp ) & ( hl.case() .when(~gt_expr.is_het(), True) .when(gt_expr.is_het_ref(), ad_expr[gt_expr[1]] / dp_expr >= adj_ab) .default((ad_expr[gt_expr[0]] / dp_expr >= adj_ab ) & (ad_expr[gt_expr[1]] / dp_expr >= adj_ab )) ) )
def adjusted_sex_ploidy_expr( locus_expr: hl.expr.LocusExpression, gt_expr: hl.expr.CallExpression, karyotype_expr: hl.expr.StringExpression, xy_karyotype_str: str = "XY", xx_karyotype_str: str = "XX", ) -> hl.expr.CallExpression: """ Creates an entry expression to convert males to haploid on non-PAR X/Y and females to missing on Y :param locus_expr: Locus :param gt_expr: Genotype :param karyotype_expr: Karyotype :param xy_karyotype_str: Male sex karyotype representation :param xx_karyotype_str: Female sex karyotype representation :return: Genotype adjusted for sex ploidy """ male = karyotype_expr == xy_karyotype_str female = karyotype_expr == xx_karyotype_str x_nonpar = locus_expr.in_x_nonpar() y_par = locus_expr.in_y_par() y_nonpar = locus_expr.in_y_nonpar() return (hl.case(missing_false=True).when( female & (y_par | y_nonpar), hl.null(hl.tcall)).when( male & (x_nonpar | y_nonpar) & gt_expr.is_het(), hl.null(hl.tcall)).when(male & (x_nonpar | y_nonpar), hl.call(gt_expr[0], phased=False)).default(gt_expr))
def age_hists_expr( adj_expr: hl.expr.BooleanExpression, gt_expr: hl.expr.CallExpression, age_expr: hl.expr.NumericExpression, lowest_boundary: int = 30, highest_boundary: int = 80, n_bins: int = 10, ) -> hl.expr.StructExpression: """ Returns a StructExpression with the age histograms for hets and homs. :param adj_expr: Entry expression containing whether a genotype is high quality (adj) or not :param gt_expr: Entry expression containing the genotype :param age_expr: Col expression containing the sample's age :param lowest_boundary: Lowest bin boundary (any younger sample will be binned in n_smaller) :param highest_boundary: Highest bin boundary (any older sample will be binned in n_larger) :param n_bins: Total number of bins :return: A struct with `age_hist_het` and `age_hist_hom` """ return hl.struct( age_hist_het=hl.agg.filter( adj_expr & gt_expr.is_het(), hl.agg.hist(age_expr, lowest_boundary, highest_boundary, n_bins), ), age_hist_hom=hl.agg.filter( adj_expr & gt_expr.is_hom_var(), hl.agg.hist(age_expr, lowest_boundary, highest_boundary, n_bins), ), )
def call_to_one_hot_alleles_array(call: hl.expr.CallExpression, alleles: hl.expr.ArrayExpression) -> hl.expr.ArrayExpression: """ Get the set of all different one-hot-encoded allele-vectors in a genotype call. It is returned as an ordered array where the first vector corresponds to the first allele, and the second vector (only present if het) the second allele. :param CallExpression call: genotype :param ArrayExpression alleles: Alleles at the site :return: Array of one-hot-encoded alleles :rtype: ArrayExpression """ return hl.cond( call.is_het(), hl.array([ hl.call(call[0]).one_hot_alleles(alleles), hl.call(call[1]).one_hot_alleles(alleles), ]), hl.array([hl.call(call[0]).one_hot_alleles(alleles)]) )
def call_to_one_hot_alleles_array(call: hl.expr.CallExpression, alleles: hl.expr.ArrayExpression) -> hl.expr.ArrayExpression: """ Get the set of all different one-hot-encoded allele-vectors in a genotype call. It is returned as an ordered array where the first vector corresponds to the first allele, and the second vector (only present if het) the second allele. :param CallExpression call: genotype :param ArrayExpression alleles: Alleles at the site :return: Array of one-hot-encoded alleles :rtype: ArrayExpression """ return hl.cond( call.is_het(), hl.array([ hl.call(call[0]).one_hot_alleles(alleles), hl.call(call[1]).one_hot_alleles(alleles), ]), hl.array([hl.call(call[0]).one_hot_alleles(alleles)]) )