def test_load_variants(real_mongo_adapter, het_variant, case_obj): mongo_adapter = real_mongo_adapter ## GIVEN an adapter and a vcf with one heterozygote variant db = mongo_adapter.db vcf = [] vcf.append(het_variant) mongo_variant = db.variant.find_one() assert mongo_variant == None ## WHEN loading the variant into the database load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, ) mongo_variant = db.variant.find_one() ## THEN assert that the variant is loaded correct assert mongo_variant["families"] == [case_obj["case_id"]] assert mongo_variant["observations"] == 1 assert mongo_variant["homozygote"] == 0 assert mongo_variant["hemizygote"] == 0
def test_load_same_variant_different_case(real_mongo_adapter, het_variant, case_obj): mongo_adapter = real_mongo_adapter ## GIVEN an adapter and a vcf db = mongo_adapter.db vcf = [] vcf.append(het_variant) ## WHEN loading the variant into the database load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, ) case_id = case_obj["case_id"] case_id2 = "2" case_obj["case_id"] = case_id2 load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, ) mongo_variant = db.variant.find_one() assert mongo_variant["observations"] == 2 assert mongo_variant["families"] == [case_id, case_id2]
def test_load_variants_skip_case_id(real_mongo_adapter, het_variant, case_obj): mongo_adapter = real_mongo_adapter ## GIVEN an adapter and a vcf with tho heterygote variants db = mongo_adapter.db vcf = [] vcf.append(het_variant) ## WHEN loading the variants into the database load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, skip_case_id=True, ) mongo_variant = db.variant.find_one() ## THEN assert that the variant is loaded correct assert mongo_variant.get("families") == None
def test_load_two_variants(real_mongo_adapter, het_variant, case_obj): mongo_adapter = real_mongo_adapter ## GIVEN an adapter and a vcf with tho heterygote variants db = mongo_adapter.db vcf = [] vcf.append(het_variant) vcf.append(het_variant) ## WHEN loading the variants into the database load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, ) ## THEN assert that the variant is loaded correct mongo_variant = db.variant.find_one() assert mongo_variant["observations"] == 2
def test_load_homozygote(real_mongo_adapter, hom_variant, case_obj): mongo_adapter = real_mongo_adapter ## GIVEN an adapter and a vcf with one homozygote variant db = mongo_adapter.db vcf = [] vcf.append(hom_variant) assert db.variant.find_one() == None ## WHEN loading the variant into the database load_variants( adapter=mongo_adapter, vcf_obj=vcf, case_obj=case_obj, ) mongo_variant = db.variant.find_one() ## THEN assert that the variant is loaded correct assert mongo_variant['families'] == [case_obj['case_id']] assert mongo_variant['observations'] == 1 assert mongo_variant['homozygote'] == 1 assert mongo_variant['hemizygote'] == 0
def test_load_case_variants(real_mongo_adapter, case_obj): mongo_adapter = real_mongo_adapter db = mongo_adapter.db ## GIVEN a mongo adatper with snv variant file vcf_obj = VCF(case_obj["vcf_path"]) ## WHEN loading the variants nr_variants = load_variants( adapter=mongo_adapter, vcf_obj=vcf_obj, case_obj=case_obj, ) nr_loaded = 0 for nr_loaded, variant in enumerate(db.variant.find(), 1): pass ## THEN assert that the correct number of variants was loaded assert nr_loaded > 0 assert nr_loaded == case_obj["nr_variants"]
def test_load_sv_case_variants(mongo_adapter, sv_case_obj): db = mongo_adapter.db ## GIVEN a mongo adatper with snv variant file vcf_obj = VCF(sv_case_obj["vcf_sv_path"]) ## WHEN loading the variants nr_variants = load_variants( adapter=mongo_adapter, vcf_obj=vcf_obj, case_obj=sv_case_obj, variant_type="sv", ) nr_loaded_svs = 0 for nr_loaded_svs, variant in enumerate(db.structural_variant.find(), 1): pass nr_loaded_snvs = 0 for nr_loaded_snvs, variant in enumerate(db.variant.find(), 1): pass ## THEN assert that the correct number of variants was loaded assert nr_loaded_svs > 0 assert nr_loaded_snvs == 0 assert nr_loaded_svs == sv_case_obj["nr_sv_variants"]
def update_database( adapter, variant_file=None, sv_file=None, family_file=None, family_type="ped", skip_case_id=False, gq_treshold=None, case_id=None, max_window=3000, ): """Update a case in the database Args: adapter: Connection to database variant_file(str): Path to variant file sv_file(str): Path to sv variant file family_file(str): Path to family file family_type(str): Format of family file skip_case_id(bool): If no case information should be added to variants gq_treshold(int): If only quality variants should be considered case_id(str): If different case id than the one in family file should be used max_window(int): Specify the max size for sv windows Returns: nr_inserted(int) """ vcf_files = [] nr_variants = None vcf_individuals = None if variant_file: vcf_info = check_vcf(variant_file) nr_variants = vcf_info["nr_variants"] variant_type = vcf_info["variant_type"] vcf_files.append(variant_file) # Get the indivuduals that are present in vcf file vcf_individuals = vcf_info["individuals"] nr_sv_variants = None sv_individuals = None if sv_file: vcf_info = check_vcf(sv_file, "sv") nr_sv_variants = vcf_info["nr_variants"] vcf_files.append(sv_file) sv_individuals = vcf_info["individuals"] # If a gq treshold is used the variants needs to have GQ for _vcf_file in vcf_files: # Get a cyvcf2.VCF object vcf = get_vcf(_vcf_file) if gq_treshold: if not vcf.contains("GQ"): LOG.warning( "Set gq-treshold to 0 or add info to vcf {0}".format( _vcf_file)) raise SyntaxError("GQ is not defined in vcf header") # Get a ped_parser.Family object from family file family = None family_id = None if family_file: with open(family_file, "r") as family_lines: family = get_case(family_lines=family_lines, family_type=family_type) family_id = family.family_id # There has to be a case_id or a family at this stage. case_id = case_id or family_id # Convert infromation to a loqusdb Case object case_obj = build_case( case=family, case_id=case_id, vcf_path=variant_file, vcf_individuals=vcf_individuals, nr_variants=nr_variants, vcf_sv_path=sv_file, sv_individuals=sv_individuals, nr_sv_variants=nr_sv_variants, ) existing_case = adapter.case(case_obj) if not existing_case: raise CaseError("Case {} does not exist in database".format( case_obj["case_id"])) # Update the existing case in database case_obj = load_case( adapter=adapter, case_obj=case_obj, update=True, ) nr_inserted = 0 # If case was succesfully added we can store the variants for file_type in ["vcf_path", "vcf_sv_path"]: variant_type = "snv" if file_type == "vcf_sv_path": variant_type = "sv" if case_obj.get(file_type) is None: continue vcf_obj = get_vcf(case_obj[file_type]) try: nr_inserted += load_variants( adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, skip_case_id=skip_case_id, gq_treshold=gq_treshold, max_window=max_window, variant_type=variant_type, ) except Exception as err: # If something went wrong do a rollback LOG.warning(err) delete( adapter=adapter, case_obj=case_obj, update=True, existing_case=existing_case, ) raise err return nr_inserted