def load_variants( adapter, vcf_obj, case_obj, skip_case_id=False, gq_treshold=None, max_window=3000, variant_type="snv", genome_build=None, ): """Load variants for a family into the database. Args: adapter (loqusdb.plugins.Adapter): initialized plugin case_obj(Case): dict with case information nr_variants(int) skip_case_id (bool): whether to include the case id on variant level or not gq_treshold(int) max_window(int): Specify the max size for sv windows variant_type(str): 'sv' or 'snv' Returns: nr_inserted(int) """ if variant_type == "snv": nr_variants = case_obj["nr_variants"] else: nr_variants = case_obj["nr_sv_variants"] nr_inserted = 0 case_id = case_obj["case_id"] if skip_case_id: case_id = None # Loop over the variants in the vcf with click.progressbar(vcf_obj, label="Inserting variants", length=nr_variants) as bar: variants = (build_variant(variant, case_obj, case_id, gq_treshold, genome_build=genome_build) for variant in bar) if variant_type == "sv": for sv_variant in variants: if not sv_variant: continue adapter.add_structural_variant(variant=sv_variant, max_window=max_window) nr_inserted += 1 if variant_type == "snv": nr_inserted = adapter.add_variants(variants) LOG.info("Inserted %s variants of type %s", nr_inserted, variant_type) return nr_inserted
def test_format_indel(del_variant, case_obj): ## GIVEN a SV deletion variant = del_variant case_id = case_obj["case_id"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) expected_id = "_".join( [variant.CHROM, str(variant.POS), variant.REF, variant.ALT[0]]) ## THEN assert the sv is parsed correct assert formated_variant assert formated_variant["variant_id"] == expected_id assert formated_variant["chrom"] == variant.CHROM assert formated_variant["end_chrom"] == variant.CHROM assert formated_variant["pos"] == variant.POS assert formated_variant["end"] == variant.INFO["END"] assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"]) assert formated_variant["ref"] == variant.REF assert formated_variant["alt"] == variant.ALT[0] assert formated_variant["sv_type"] == "DEL" assert formated_variant["case_id"] == case_id assert formated_variant["homozygote"] == 0 assert formated_variant["hemizygote"] == 0
def variant_obj(request, het_variant, ind_positions, individuals): return build_variant( variant=het_variant, individuals=individuals, ind_positions=ind_positions, case_id="test", gq_treshold=None, )
def test_format_variant_no_gq(variant_no_gq, case_obj): ## GIVEN a variant without GQ variant = variant_no_gq case_id = case_obj["case_id"] ## WHEN parsing the variant using a GQ treshold formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id, gq_treshold=20) ## THEN assert that None is returned since requirements are not fulfilled assert formated_variant is None
def test_format_variant_chr_prefix(variant_chr, case_obj): ## GIVEN a variant with 'chr' prefix in chromosome name variant = variant_chr assert variant.CHROM.startswith("chr") case_id = case_obj["case_id"] ## WHEN parsing the variant using a GQ treshold formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id, gq_treshold=20) ## THEN assert that the 'chr' part has been stripped away assert formated_variant["chrom"] == variant.CHROM[3:]
def test_format_variant_no_family_id(het_variant, case_obj): ## GIVEN a parsed variant variant = het_variant case_id = case_obj["case_id"] ## WHEN parsing the variant telling that 'case_id' is None formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=None) ## THEN assert that case_id was not added assert formated_variant.get("case_id") == None assert formated_variant["homozygote"] == 0 assert formated_variant["hemizygote"] == 0
def test_format_hemizygote_variant(hem_variant, case_obj): ## GIVEN a parsed hemizygous variant variant = hem_variant case_id = case_obj["case_id"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) ## THEN assert that hemizygote count is 1 assert formated_variant["homozygote"] == 0 assert formated_variant["hemizygote"] == 1
def test_format_homozygote_variant(hom_variant, case_obj): ## GIVEN a parsed hom variant variant = hom_variant case_id = case_obj['case_id'] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) ## THEN assert that the variant has hom count assert formated_variant['homozygote'] == 1 assert formated_variant['hemizygote'] == 0
def test_get_insertion(small_insert_variant, mongo_adapter, case_obj): adapter = mongo_adapter ## GIVEN a mongo adapter with a small insertion variant = small_insert_variant case_id = case_obj["case_id"] formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_case(case_obj) adapter.add_structural_variant(formated_variant) for variant_obj in adapter.db.structural_variant.find(): assert variant_obj
def test_get_translocation(translocation_variant, mongo_adapter, case_obj): adapter = mongo_adapter ## GIVEN a mongo adapter with a translocation variant = translocation_variant case_id = case_obj['case_id'] formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_case(case_obj) adapter.add_structural_variant(formated_variant) for variant_obj in adapter.db.structural_variant.find(): assert variant_obj
def test_format_variant_no_call(variant_no_call, case_obj): ## GIVEN a parsed variant with no call in all individuals variant = variant_no_call case_id = case_obj["case_id"] for call in variant.gt_types: assert GENOTYPE_MAP[call] in ["no_call", "hom_ref"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) ## THEN assert that the result is None assert formated_variant is None
def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None): """Delete structural variants for a case in the database Args: adapter(loqusdb.plugins.Adapter) vcf_obj(iterable(dict)) ind_positions(dict) case_id(str) Returns: nr_deleted (int): Number of deleted variants""" case_id = case_id or case_obj["case_id"] nr_deleted = 0 start_deleting = datetime.now() chrom_time = datetime.now() current_chrom = None new_chrom = None for variant in vcf_obj: formated_variant = build_variant( variant=variant, case_obj=case_obj, case_id=case_id, ) if not formated_variant: continue new_chrom = formated_variant.get("chrom") adapter.delete_structural_variant(formated_variant) nr_deleted += 1 if not current_chrom: LOG.info("Start deleting chromosome {}".format(new_chrom)) current_chrom = new_chrom chrom_time = datetime.now() continue if new_chrom != current_chrom: LOG.info("Chromosome {0} done".format(current_chrom)) LOG.info("Time to delete chromosome {0}: {1}".format( current_chrom, datetime.now() - chrom_time)) LOG.info("Start deleting chromosome {0}".format(new_chrom)) current_chrom = new_chrom return nr_deleted
def test_load_translocation(translocation_variant, case_obj, mongo_adapter): adapter = mongo_adapter ## GIVEN a mongo adapter with a case variant = translocation_variant case_id = case_obj["case_id"] adapter.add_case(case_obj) ## WHEN loading a small insertion formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_structural_variant(formated_variant) ## THEN assert the object returned is correct variant_cluster = adapter.db.structural_variant.find_one() assert variant_cluster["families"] == [case_id]
def test_load_insertion(small_insert_variant, mongo_adapter, case_obj): adapter = mongo_adapter ## GIVEN a mongo adapter with a case variant = small_insert_variant case_id = case_obj['case_id'] adapter.add_case(case_obj) ## WHEN loading a small insertion formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_structural_variant(formated_variant) ## THEN assert the object returned is correct variant_cluster = adapter.db.structural_variant.find_one() assert variant_cluster['families'] == [case_id]
def test_format_translocation(translocation_variant, case_obj): ## GIVEN a small insertion (This means that the insertion is included in ALT field) variant = translocation_variant case_id = case_obj["case_id"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) ## THEN assert the sv is parsed correct assert formated_variant["chrom"] == variant.CHROM assert formated_variant["end_chrom"] == "11" assert formated_variant["pos"] == variant.POS assert formated_variant["end"] == 119123896 assert formated_variant["sv_len"] == float("inf") assert formated_variant["ref"] == variant.REF assert formated_variant["alt"] == variant.ALT[0] assert formated_variant["sv_type"] == "BND"
def test_format_dup_tandem(duptandem_variant, case_obj): ## GIVEN a small insertion (This means that the insertion is included in ALT field) variant = duptandem_variant case_id = case_obj["case_id"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) ## THEN assert the sv is parsed correct assert formated_variant["chrom"] == variant.CHROM assert formated_variant["end_chrom"] == variant.CHROM assert formated_variant["pos"] == variant.POS assert formated_variant["end"] == variant.INFO["END"] assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"]) assert formated_variant["ref"] == variant.REF assert formated_variant["alt"] == variant.ALT[0] assert formated_variant["sv_type"] == "DUP"
def test_remove_one_SV(self, mongo_adapter, del_variant, case_obj): # GIVEN a database poulated with one SV db = mongo_adapter.db formated_variant = build_variant(del_variant, case_obj=case_obj, case_id=case_obj["case_id"]) mongo_adapter.add_structural_variant(formated_variant) mongo_SV = db.structural_variant.find_one() mongo_identity = db.identity.find_one() assert mongo_SV is not None assert mongo_identity is not None # WHEN deleting SV mongo_adapter.delete_structural_variant(formated_variant) # THEN there should be no remaining SVs in the database mongo_SV = db.structural_variant.find_one() mongo_identity = db.indentity.find_one() assert mongo_SV is None assert mongo_identity is None
def test_load_same_insertion_twice(small_insert_variant, mongo_adapter, case_obj): adapter = mongo_adapter ## GIVEN a mongo adapter with a case variant = small_insert_variant case_id = case_obj["case_id"] adapter.add_case(case_obj) ## WHEN loading a small insertion formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_structural_variant(formated_variant) formated_variant["case_id"] = "2" adapter.add_structural_variant(formated_variant) ## THEN assert the object returned is correct variant_cluster = adapter.db.structural_variant.find_one() assert set(variant_cluster["families"]) == set([case_id, "2"])
def test_load_same_translocation_twice(translocation_variant, case_obj, mongo_adapter): adapter = mongo_adapter ## GIVEN a mongo adapter with a case variant = translocation_variant case_id = case_obj['case_id'] adapter.add_case(case_obj) ## WHEN loading a small insertion formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) adapter.add_structural_variant(formated_variant) formated_variant['case_id'] = '2' adapter.add_structural_variant(formated_variant) ## THEN assert the object returned is correct variant_cluster = adapter.db.structural_variant.find_one() assert set(variant_cluster['families']) == set([case_id, '2'])
def test_format_variant(het_variant, case_obj): ## GIVEN a parsed variant variant = het_variant case_id = case_obj['case_id'] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) expected_id = '_'.join( [variant.CHROM, str(variant.POS), variant.REF, variant.ALT[0]]) ## THEN assert it was built in a correct way assert formated_variant assert formated_variant['variant_id'] == expected_id assert formated_variant['chrom'] == variant.CHROM assert formated_variant['pos'] == variant.POS assert formated_variant['ref'] == variant.REF assert formated_variant['alt'] == variant.ALT[0] assert formated_variant['case_id'] == case_id assert formated_variant['homozygote'] == 0
def test_format_variant(het_variant, case_obj): ## GIVEN a parsed variant variant = het_variant case_id = case_obj["case_id"] ## WHEN parsing the variant formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id) expected_id = "_".join( [variant.CHROM, str(variant.POS), variant.REF, variant.ALT[0]]) ## THEN assert it was built in a correct way assert formated_variant assert formated_variant["variant_id"] == expected_id assert formated_variant["chrom"] == variant.CHROM assert formated_variant["pos"] == variant.POS assert formated_variant["ref"] == variant.REF assert formated_variant["alt"] == variant.ALT[0] assert formated_variant["case_id"] == case_id assert formated_variant["homozygote"] == 0
def test_remove_one_of_two_SV(self, mongo_adapter, duptandem_variant, case_obj): # GIVEN a database poulated with one SV db = mongo_adapter.db formated_variant = build_variant(duptandem_variant, case_obj=case_obj, case_id=case_obj["case_id"]) mongo_adapter.add_structural_variant(formated_variant) # Add second of same variant, changing the start and end position slightly formated_variant_ = copy.deepcopy(formated_variant) formated_variant_["pos"] = formated_variant_["pos"] + 2 formated_variant_["end"] = formated_variant_["end"] - 1 formated_variant_["case_id"] = "case_2" mongo_adapter.add_structural_variant(formated_variant_) # This should correspond to one structural variant document mongo_svs = list(db.structural_variant.find()) assert len(mongo_svs) == 1 mongo_sv = mongo_svs[0] assert mongo_sv[ "pos_sum"] == formated_variant["pos"] + formated_variant_["pos"] # And two identity documents mongo_identities = list(db.identity.find()) assert len(mongo_identities) == 2 # WHEN deleting the variant from the first case mongo_adapter.delete_structural_variant(formated_variant) # THEN the SV document should have the pos_sum equal to the pos of the # SV from the second case mongo_svs = list(db.structural_variant.find()) assert len(mongo_svs) == 1 mongo_sv = mongo_svs[0] assert mongo_sv["pos_sum"] == formated_variant_["pos"] # And one identity documents mongo_identities = list(db.identity.find()) assert len(mongo_identities) == 1
def test_build_het_variant(het_variant, case_obj): variant_obj = build_variant(variant=het_variant, case_obj=case_obj) assert variant_obj["chrom"] == het_variant.CHROM assert variant_obj["homozygote"] == 0 assert variant_obj["hemizygote"] == 0
def delete_variants(adapter, vcf_obj, case_obj, case_id=None, genome_build=None): """Delete variants for a case in the database Args: adapter(loqusdb.plugins.Adapter) vcf_obj(iterable(dict)) ind_positions(dict) case_id(str) Returns: nr_deleted (int): Number of deleted variants """ case_id = case_id or case_obj["case_id"] nr_deleted = 0 start_deleting = datetime.now() chrom_time = datetime.now() current_chrom = None new_chrom = None variant_list = [] for variant in vcf_obj: formated_variant = build_variant(variant=variant, case_obj=case_obj, case_id=case_id, genome_build=genome_build) if not formated_variant: continue variant_list.append(formated_variant) new_chrom = formated_variant.get("chrom") # When there are enough variants in the variant list # They are passed to delete_variants if len(variant_list) == 10000: adapter.delete_variants(variant_list) variant_list.clear() nr_deleted += 1 if not current_chrom: LOG.info("Start deleting chromosome {}".format(new_chrom)) current_chrom = new_chrom chrom_time = datetime.now() continue if new_chrom != current_chrom: LOG.info("Chromosome {0} done".format(current_chrom)) LOG.info("Time to delete chromosome {0}: {1}".format( current_chrom, datetime.now() - chrom_time)) LOG.info("Start deleting chromosome {0}".format(new_chrom)) current_chrom = new_chrom if variant_list: adapter.delete_variants(variant_list) variant_list.clear() return nr_deleted