def _import_clinvar(**kwargs) -> hl.Table: clinvar = import_sites_vcf(**kwargs) clinvar = clinvar.filter( hl.len(clinvar.alleles) > 1 ) # Get around problematic single entry in alleles array in the clinvar vcf clinvar = vep_or_lookup_vep(clinvar, reference="GRCh38") return clinvar
def _import_dbsnp(**kwargs) -> hl.Table: dbsnp = import_sites_vcf(**kwargs) # Note: permit_shuffle is set because the dbsnp vcf has duplicate loci (turned into a set) so might be out of order dbsnp = hl.split_multi(dbsnp, permit_shuffle=True) dbsnp = dbsnp.group_by( dbsnp.locus, dbsnp.alleles).aggregate(rsid=hl.agg.collect_as_set(dbsnp.rsid)) return dbsnp