def test_profile_match(real_mongo_adapter, profile_vcf_path, profile_list, case_obj): # Load profile variants load_profile_variants(real_mongo_adapter, profile_vcf_path) # Load case having profiles profile_list load_case(real_mongo_adapter, case_obj) # Get profiles from vcf profiles = {"test_individual": profile_list} # Assert that error is raised with pytest.raises(ProfileError) as error: profile_match(real_mongo_adapter, profiles)
def test_check_duplicates(real_mongo_adapter, profile_vcf_path, profile_list, case_obj): # Load profile variants load_profile_variants(real_mongo_adapter, profile_vcf_path) # Load case having profiles profile_list load_case(real_mongo_adapter, case_obj) # Create profiles dictionary profiles = {"test_individual": profile_list} # match profiles to the profiles in the database match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.95) # This should match with the sample in the database assert match["profile"] == profile_list # Change last genotype, now no matches should be found profiles = {"test_individual": profile_list[:-1] + ["NN"]} match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.80) assert match is None # Lower threshold. Now match should be found match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.75) assert match["profile"] == profile_list
def test_load_complete_case(mongo_adapter, complete_case_obj): ## GIVEN a case that includes both svs and snvs db = mongo_adapter.db ## WHEN loading the case case_obj = load_case(mongo_adapter,complete_case_obj) ## THEN assert that all info is added loaded_case = db.case.find_one() assert len(loaded_case['individuals']) == 3 assert len(loaded_case['sv_individuals']) == 3 assert loaded_case['nr_variants'] > 0 assert loaded_case['nr_sv_variants'] > 0
def update_database( adapter, variant_file=None, sv_file=None, family_file=None, family_type="ped", skip_case_id=False, gq_treshold=None, case_id=None, max_window=3000, ): """Update a case in the database Args: adapter: Connection to database variant_file(str): Path to variant file sv_file(str): Path to sv variant file family_file(str): Path to family file family_type(str): Format of family file skip_case_id(bool): If no case information should be added to variants gq_treshold(int): If only quality variants should be considered case_id(str): If different case id than the one in family file should be used max_window(int): Specify the max size for sv windows Returns: nr_inserted(int) """ vcf_files = [] nr_variants = None vcf_individuals = None if variant_file: vcf_info = check_vcf(variant_file) nr_variants = vcf_info["nr_variants"] variant_type = vcf_info["variant_type"] vcf_files.append(variant_file) # Get the indivuduals that are present in vcf file vcf_individuals = vcf_info["individuals"] nr_sv_variants = None sv_individuals = None if sv_file: vcf_info = check_vcf(sv_file, "sv") nr_sv_variants = vcf_info["nr_variants"] vcf_files.append(sv_file) sv_individuals = vcf_info["individuals"] # If a gq treshold is used the variants needs to have GQ for _vcf_file in vcf_files: # Get a cyvcf2.VCF object vcf = get_vcf(_vcf_file) if gq_treshold: if not vcf.contains("GQ"): LOG.warning( "Set gq-treshold to 0 or add info to vcf {0}".format( _vcf_file)) raise SyntaxError("GQ is not defined in vcf header") # Get a ped_parser.Family object from family file family = None family_id = None if family_file: with open(family_file, "r") as family_lines: family = get_case(family_lines=family_lines, family_type=family_type) family_id = family.family_id # There has to be a case_id or a family at this stage. case_id = case_id or family_id # Convert infromation to a loqusdb Case object case_obj = build_case( case=family, case_id=case_id, vcf_path=variant_file, vcf_individuals=vcf_individuals, nr_variants=nr_variants, vcf_sv_path=sv_file, sv_individuals=sv_individuals, nr_sv_variants=nr_sv_variants, ) existing_case = adapter.case(case_obj) if not existing_case: raise CaseError("Case {} does not exist in database".format( case_obj["case_id"])) # Update the existing case in database case_obj = load_case( adapter=adapter, case_obj=case_obj, update=True, ) nr_inserted = 0 # If case was succesfully added we can store the variants for file_type in ["vcf_path", "vcf_sv_path"]: variant_type = "snv" if file_type == "vcf_sv_path": variant_type = "sv" if case_obj.get(file_type) is None: continue vcf_obj = get_vcf(case_obj[file_type]) try: nr_inserted += load_variants( adapter=adapter, vcf_obj=vcf_obj, case_obj=case_obj, skip_case_id=skip_case_id, gq_treshold=gq_treshold, max_window=max_window, variant_type=variant_type, ) except Exception as err: # If something went wrong do a rollback LOG.warning(err) delete( adapter=adapter, case_obj=case_obj, update=True, existing_case=existing_case, ) raise err return nr_inserted