def test_profile_match(real_mongo_adapter, profile_vcf_path, profile_list, case_obj): # Load profile variants load_profile_variants(real_mongo_adapter, profile_vcf_path) # Load case having profiles profile_list load_case(real_mongo_adapter, case_obj) # Get profiles from vcf profiles = {"test_individual": profile_list} # Assert that error is raised with pytest.raises(ProfileError) as error: profile_match(real_mongo_adapter, profiles)
def test_get_profiles(real_mongo_adapter, profile_vcf_path, zipped_vcf_path): # Load profile variants load_profile_variants(real_mongo_adapter, profile_vcf_path) vcf_info = check_vcf(zipped_vcf_path) # Get profiles from vcf profiles = get_profiles(real_mongo_adapter, zipped_vcf_path) # Assert that all individuals are included assert list(profiles.keys()) == vcf_info["individuals"] # Assert that profile strings are of same lengths for i, individual in enumerate(profiles.keys()): if i == 0: length = len(profiles[individual]) assert len(profiles[individual]) == length
def load_profile(ctx, load, variant_file, update, stats, profile_threshold, check_vcf): """ Command for profiling of samples. User may upload variants used in profiling from a vcf, update the profiles for all samples, and get some stats from the profiles in the database. Profiling is used to monitor duplicates in the database. The profile is based on the variants in the 'profile_variant' collection, assessing the genotypes for each sample at the position of these variants. """ adapter = ctx.obj['adapter'] LOG.info("Running loqusdb profile") if check_vcf: LOG.info(f"Check if profile in {check_vcf} has match in database") vcf_file = check_vcf profiles = get_profiles(adapter, vcf_file) duplicate = check_duplicates(adapter, profiles, profile_threshold) if duplicate is not None: duplicate = json.dumps(duplicate) click.echo(duplicate) else: LOG.info("No duplicates found in the database") if load: genome_build = ctx.obj['genome_build'] vcf_path = MAF_PATH[genome_build] if variant_file is not None: vcf_path = variant_file LOG.info(f"Loads variants in {vcf_path} to be used in profiling") load_profile_variants(adapter, vcf_path) if update: LOG.info("Updates profiles in database") update_profiles(adapter) if stats: LOG.info("Prints profile stats") distance_dict = profile_stats(adapter, threshold=profile_threshold) click.echo(table_from_dict(distance_dict))
def test_check_duplicates(real_mongo_adapter, profile_vcf_path, profile_list, case_obj): # Load profile variants load_profile_variants(real_mongo_adapter, profile_vcf_path) # Load case having profiles profile_list load_case(real_mongo_adapter, case_obj) # Create profiles dictionary profiles = {"test_individual": profile_list} # match profiles to the profiles in the database match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.95) # This should match with the sample in the database assert match["profile"] == profile_list # Change last genotype, now no matches should be found profiles = {"test_individual": profile_list[:-1] + ["NN"]} match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.80) assert match is None # Lower threshold. Now match should be found match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.75) assert match["profile"] == profile_list