Variants to filter on. keep: :obj:`bool` Whether to keep (default), or filter out the variants from `variants_table`. Returns ------- :class:`.VariantDataset`. """ if keep: variant_data = vds.variant_data.semi_join_rows(variants_table) else: variant_data = vds.variant_data.anti_join_rows(variants_table) return VariantDataset(vds.reference_data, variant_data) @typecheck(vds=VariantDataset, intervals=expr_array(expr_interval(expr_any)), keep=bool) def filter_intervals(vds: 'VariantDataset', intervals: 'ArrayExpression', *, keep: bool = False) -> 'VariantDataset': """Filter intervals in a :class:`.VariantDataset` (only on variant data for now). Parameters ---------- vds : :class:`.VariantDataset` Dataset in VariantDataset representation. intervals : :class:`.ArrayExpression` of type :class:`.tinterval` Intervals to filter on. keep : :obj:`bool` Whether to keep, or filter out (default) rows that fall within any interval in `intervals`. Returns
if s_ != s: mapping.append((s, s_)) uniques.add(s_) new_ids.append(s_) if mapping: info(f'Renamed {len(mapping)} duplicate {plural("sample ID", len(mapping))}. Mangled IDs as follows:' + ''.join(f'\n "{pre}" => "{post}"' for pre, post in mapping)) else: info('No duplicate sample IDs found.') return dataset.annotate_cols(**{name: hl.literal(new_ids)[hl.int(hl.scan.count())]}) @typecheck(ds=oneof(Table, MatrixTable), intervals=expr_array(expr_interval(expr_any)), keep=bool) def filter_intervals(ds, intervals, keep=True) -> Union[Table, MatrixTable]: """Filter rows with a list of intervals. Examples -------- Filter to loci falling within one interval: >>> ds_result = hl.filter_intervals(dataset, [hl.parse_locus_interval('17:38449840-38530994')]) Remove all loci within list of intervals: >>> intervals = [hl.parse_locus_interval(x) for x in ['1:50M-75M', '2:START-400000', '3-22']] >>> ds_result = hl.filter_intervals(dataset, intervals, keep=False)
vd = vd.annotate_entries(LA=vd.LA.map(lambda la: new_la_index(la))) vd = vd.key_rows_by('locus') vd = vd.annotate_rows( alleles=vd.__kept_indices.keys().map(lambda i: vd.alleles[i])) vd = vd._key_rows_by_assert_sorted('locus', 'alleles') vd = vd.drop('__allele_counts', '__kept_indices', '__old_to_new_LA') return VariantDataset(reference_data, vd) variant_data = variant_data.filter_rows(hl.agg.count() > 0) return VariantDataset(reference_data, variant_data) @typecheck(vds=VariantDataset, calling_intervals=oneof(Table, expr_array(expr_interval(expr_locus()))), normalization_contig=str) def impute_sex_chromosome_ploidy(vds: VariantDataset, calling_intervals, normalization_contig: str) -> hl.Table: """Impute sex chromosome ploidy from depth of reference data within calling intervals. Returns a :class:`.Table` with sample ID keys, with the following fields: - ``autosomal_mean_dp`` (*float64*): Mean depth on calling intervals on normalization contig. - ``x_mean_dp`` (*float64*): Mean depth on calling intervals on X chromosome. - ``x_ploidy`` (*float64*): Estimated ploidy on X chromosome. Equal to ``2 * x_mean_dp / autosomal_mean_dp``. - ``y_mean_dp`` (*float64*): Mean depth on calling intervals on chromosome. - ``y_ploidy`` (*float64*): Estimated ploidy on Y chromosome. Equal to ``2 * y_mean_db / autosomal_mean_dp``. Parameters ----------