예제 #1
0
def test_profile_match(real_mongo_adapter, profile_vcf_path, profile_list,
                       case_obj):
    # Load profile variants
    load_profile_variants(real_mongo_adapter, profile_vcf_path)

    # Load case having profiles profile_list
    load_case(real_mongo_adapter, case_obj)

    # Get profiles from vcf
    profiles = {"test_individual": profile_list}

    # Assert that error is raised
    with pytest.raises(ProfileError) as error:
        profile_match(real_mongo_adapter, profiles)
예제 #2
0
def test_check_duplicates(real_mongo_adapter, profile_vcf_path, profile_list,
                          case_obj):
    # Load profile variants
    load_profile_variants(real_mongo_adapter, profile_vcf_path)
    # Load case having profiles profile_list
    load_case(real_mongo_adapter, case_obj)
    # Create profiles dictionary
    profiles = {"test_individual": profile_list}
    # match profiles to the profiles in the database
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.95)
    # This should match with the sample in the database
    assert match["profile"] == profile_list

    # Change last genotype, now no matches should be found
    profiles = {"test_individual": profile_list[:-1] + ["NN"]}
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.80)
    assert match is None

    # Lower threshold. Now match should be found
    match = check_duplicates(real_mongo_adapter, profiles, hard_threshold=0.75)
    assert match["profile"] == profile_list
예제 #3
0
def test_load_complete_case(mongo_adapter, complete_case_obj):
    ## GIVEN a case that includes both svs and snvs
    db = mongo_adapter.db
    
    ## WHEN loading the case
    case_obj = load_case(mongo_adapter,complete_case_obj)
    ## THEN assert that all info is added
    loaded_case = db.case.find_one()
    
    assert len(loaded_case['individuals']) == 3
    assert len(loaded_case['sv_individuals']) == 3
    assert loaded_case['nr_variants'] > 0
    assert loaded_case['nr_sv_variants'] > 0
    
예제 #4
0
def update_database(
    adapter,
    variant_file=None,
    sv_file=None,
    family_file=None,
    family_type="ped",
    skip_case_id=False,
    gq_treshold=None,
    case_id=None,
    max_window=3000,
):
    """Update a case in the database

    Args:
          adapter: Connection to database
          variant_file(str): Path to variant file
          sv_file(str): Path to sv variant file
          family_file(str): Path to family file
          family_type(str): Format of family file
          skip_case_id(bool): If no case information should be added to variants
          gq_treshold(int): If only quality variants should be considered
          case_id(str): If different case id than the one in family file should be used
          max_window(int): Specify the max size for sv windows

    Returns:
          nr_inserted(int)
    """
    vcf_files = []
    nr_variants = None
    vcf_individuals = None
    if variant_file:
        vcf_info = check_vcf(variant_file)
        nr_variants = vcf_info["nr_variants"]
        variant_type = vcf_info["variant_type"]
        vcf_files.append(variant_file)
        # Get the indivuduals that are present in vcf file
        vcf_individuals = vcf_info["individuals"]

    nr_sv_variants = None
    sv_individuals = None
    if sv_file:
        vcf_info = check_vcf(sv_file, "sv")
        nr_sv_variants = vcf_info["nr_variants"]
        vcf_files.append(sv_file)
        sv_individuals = vcf_info["individuals"]

    # If a gq treshold is used the variants needs to have GQ
    for _vcf_file in vcf_files:
        # Get a cyvcf2.VCF object
        vcf = get_vcf(_vcf_file)

        if gq_treshold:
            if not vcf.contains("GQ"):
                LOG.warning(
                    "Set gq-treshold to 0 or add info to vcf {0}".format(
                        _vcf_file))
                raise SyntaxError("GQ is not defined in vcf header")

    # Get a ped_parser.Family object from family file
    family = None
    family_id = None
    if family_file:
        with open(family_file, "r") as family_lines:
            family = get_case(family_lines=family_lines,
                              family_type=family_type)
            family_id = family.family_id

    # There has to be a case_id or a family at this stage.
    case_id = case_id or family_id

    # Convert infromation to a loqusdb Case object
    case_obj = build_case(
        case=family,
        case_id=case_id,
        vcf_path=variant_file,
        vcf_individuals=vcf_individuals,
        nr_variants=nr_variants,
        vcf_sv_path=sv_file,
        sv_individuals=sv_individuals,
        nr_sv_variants=nr_sv_variants,
    )

    existing_case = adapter.case(case_obj)
    if not existing_case:
        raise CaseError("Case {} does not exist in database".format(
            case_obj["case_id"]))

    # Update the existing case in database
    case_obj = load_case(
        adapter=adapter,
        case_obj=case_obj,
        update=True,
    )

    nr_inserted = 0
    # If case was succesfully added we can store the variants
    for file_type in ["vcf_path", "vcf_sv_path"]:
        variant_type = "snv"
        if file_type == "vcf_sv_path":
            variant_type = "sv"
        if case_obj.get(file_type) is None:
            continue

        vcf_obj = get_vcf(case_obj[file_type])
        try:
            nr_inserted += load_variants(
                adapter=adapter,
                vcf_obj=vcf_obj,
                case_obj=case_obj,
                skip_case_id=skip_case_id,
                gq_treshold=gq_treshold,
                max_window=max_window,
                variant_type=variant_type,
            )
        except Exception as err:
            # If something went wrong do a rollback
            LOG.warning(err)
            delete(
                adapter=adapter,
                case_obj=case_obj,
                update=True,
                existing_case=existing_case,
            )
            raise err
    return nr_inserted