Example #1
0
def _import_clinvar(**kwargs) -> hl.Table:
    clinvar = import_sites_vcf(**kwargs)
    clinvar = clinvar.filter(
        hl.len(clinvar.alleles) > 1
    )  # Get around problematic single entry in alleles array in the clinvar vcf
    clinvar = vep_or_lookup_vep(clinvar, reference="GRCh38")
    return clinvar
Example #2
0
def _import_dbsnp(**kwargs) -> hl.Table:
    dbsnp = import_sites_vcf(**kwargs)
    # Note: permit_shuffle is set because the dbsnp vcf has duplicate loci (turned into a set) so might be out of order
    dbsnp = hl.split_multi(dbsnp, permit_shuffle=True)
    dbsnp = dbsnp.group_by(
        dbsnp.locus,
        dbsnp.alleles).aggregate(rsid=hl.agg.collect_as_set(dbsnp.rsid))

    return dbsnp