Example #1
0
def test_profile_match(real_mongo_adapter, profile_vcf_path, profile_list,
                       case_obj):
    # Load profile variants
    load_profile_variants(real_mongo_adapter, profile_vcf_path)

    # Load case having profiles profile_list
    load_case(real_mongo_adapter, case_obj)

    # Get profiles from vcf
    profiles = {"test_individual": profile_list}

    # Assert that error is raised
    with pytest.raises(ProfileError) as error:
        profile_match(real_mongo_adapter, profiles)
Example #2
0
def test_get_profiles(real_mongo_adapter, profile_vcf_path, zipped_vcf_path):
    # Load profile variants
    load_profile_variants(real_mongo_adapter, profile_vcf_path)

    vcf_info = check_vcf(zipped_vcf_path)

    # Get profiles from vcf
    profiles = get_profiles(real_mongo_adapter, zipped_vcf_path)

    # Assert that all individuals are included
    assert list(profiles.keys()) == vcf_info["individuals"]

    # Assert that profile strings are of same lengths
    for i, individual in enumerate(profiles.keys()):
        if i == 0:
            length = len(profiles[individual])
        assert len(profiles[individual]) == length
Example #3
0
def load_profile(ctx, load, variant_file, update, stats, profile_threshold,
                 check_vcf):
    """
        Command for profiling of samples. User may upload variants used in profiling
        from a vcf, update the profiles for all samples, and get some stats
        from the profiles in the database.

        Profiling is used to monitor duplicates in the database. The profile is
        based on the variants in the 'profile_variant' collection, assessing
        the genotypes for each sample at the position of these variants.
    """

    adapter = ctx.obj['adapter']

    LOG.info("Running loqusdb profile")

    if check_vcf:
        LOG.info(f"Check if profile in {check_vcf} has match in database")
        vcf_file = check_vcf
        profiles = get_profiles(adapter, vcf_file)
        duplicate = check_duplicates(adapter, profiles, profile_threshold)

        if duplicate is not None:
            duplicate = json.dumps(duplicate)
            click.echo(duplicate)
        else:
            LOG.info("No duplicates found in the database")

    if load:
        genome_build = ctx.obj['genome_build']
        vcf_path = MAF_PATH[genome_build]
        if variant_file is not None:
            vcf_path = variant_file
        LOG.info(f"Loads variants in {vcf_path} to be used in profiling")
        load_profile_variants(adapter, vcf_path)

    if update:
        LOG.info("Updates profiles in database")
        update_profiles(adapter)

    if stats:
        LOG.info("Prints profile stats")
        distance_dict = profile_stats(adapter, threshold=profile_threshold)
        click.echo(table_from_dict(distance_dict))
Example #4
0
def test_check_duplicates(real_mongo_adapter, profile_vcf_path, profile_list,
                          case_obj):
    # Load profile variants
    load_profile_variants(real_mongo_adapter, profile_vcf_path)
    # Load case having profiles profile_list
    load_case(real_mongo_adapter, case_obj)
    # Create profiles dictionary
    profiles = {"test_individual": profile_list}
    # match profiles to the profiles in the database
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.95)
    # This should match with the sample in the database
    assert match["profile"] == profile_list

    # Change last genotype, now no matches should be found
    profiles = {"test_individual": profile_list[:-1] + ["NN"]}
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.80)
    assert match is None

    # Lower threshold. Now match should be found
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.75)
    assert match["profile"] == profile_list