def test_load_variants(real_mongo_adapter, het_variant, case_obj):
    mongo_adapter = real_mongo_adapter
    ## GIVEN an adapter and a vcf with one heterozygote variant
    db = mongo_adapter.db

    vcf = []
    vcf.append(het_variant)
    mongo_variant = db.variant.find_one()

    assert mongo_variant == None

    ## WHEN loading the variant into the database
    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
    )

    mongo_variant = db.variant.find_one()

    ## THEN assert that the variant is loaded correct
    assert mongo_variant["families"] == [case_obj["case_id"]]
    assert mongo_variant["observations"] == 1
    assert mongo_variant["homozygote"] == 0
    assert mongo_variant["hemizygote"] == 0
def test_load_same_variant_different_case(real_mongo_adapter, het_variant,
                                          case_obj):
    mongo_adapter = real_mongo_adapter
    ## GIVEN an adapter and a vcf
    db = mongo_adapter.db

    vcf = []
    vcf.append(het_variant)

    ## WHEN loading the variant into the database
    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
    )

    case_id = case_obj["case_id"]
    case_id2 = "2"
    case_obj["case_id"] = case_id2

    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
    )

    mongo_variant = db.variant.find_one()

    assert mongo_variant["observations"] == 2
    assert mongo_variant["families"] == [case_id, case_id2]
def test_load_variants_skip_case_id(real_mongo_adapter, het_variant, case_obj):
    mongo_adapter = real_mongo_adapter
    ## GIVEN an adapter and a vcf with tho heterygote variants
    db = mongo_adapter.db

    vcf = []
    vcf.append(het_variant)

    ## WHEN loading the variants into the database
    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
        skip_case_id=True,
    )

    mongo_variant = db.variant.find_one()

    ## THEN assert that the variant is loaded correct
    assert mongo_variant.get("families") == None
def test_load_two_variants(real_mongo_adapter, het_variant, case_obj):
    mongo_adapter = real_mongo_adapter
    ## GIVEN an adapter and a vcf with tho heterygote variants
    db = mongo_adapter.db

    vcf = []
    vcf.append(het_variant)
    vcf.append(het_variant)

    ## WHEN loading the variants into the database
    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
    )

    ## THEN assert that the variant is loaded correct
    mongo_variant = db.variant.find_one()

    assert mongo_variant["observations"] == 2
Exemple #5
0
def test_load_homozygote(real_mongo_adapter, hom_variant, case_obj):
    mongo_adapter = real_mongo_adapter
    ## GIVEN an adapter and a vcf with one homozygote variant
    db = mongo_adapter.db

    vcf = []
    vcf.append(hom_variant)
    assert db.variant.find_one() == None

    ## WHEN loading the variant into the database
    load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf,
        case_obj=case_obj,
    )
    mongo_variant = db.variant.find_one()

    ## THEN assert that the variant is loaded correct
    assert mongo_variant['families'] == [case_obj['case_id']]
    assert mongo_variant['observations'] == 1
    assert mongo_variant['homozygote'] == 1
    assert mongo_variant['hemizygote'] == 0
def test_load_case_variants(real_mongo_adapter, case_obj):
    mongo_adapter = real_mongo_adapter

    db = mongo_adapter.db
    ## GIVEN a mongo adatper with snv variant file
    vcf_obj = VCF(case_obj["vcf_path"])
    ## WHEN loading the variants
    nr_variants = load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf_obj,
        case_obj=case_obj,
    )

    nr_loaded = 0
    for nr_loaded, variant in enumerate(db.variant.find(), 1):
        pass
    ## THEN assert that the correct number of variants was loaded
    assert nr_loaded > 0
    assert nr_loaded == case_obj["nr_variants"]
def test_load_sv_case_variants(mongo_adapter, sv_case_obj):
    db = mongo_adapter.db
    ## GIVEN a mongo adatper with snv variant file
    vcf_obj = VCF(sv_case_obj["vcf_sv_path"])
    ## WHEN loading the variants
    nr_variants = load_variants(
        adapter=mongo_adapter,
        vcf_obj=vcf_obj,
        case_obj=sv_case_obj,
        variant_type="sv",
    )

    nr_loaded_svs = 0
    for nr_loaded_svs, variant in enumerate(db.structural_variant.find(), 1):
        pass
    nr_loaded_snvs = 0
    for nr_loaded_snvs, variant in enumerate(db.variant.find(), 1):
        pass
    ## THEN assert that the correct number of variants was loaded
    assert nr_loaded_svs > 0
    assert nr_loaded_snvs == 0
    assert nr_loaded_svs == sv_case_obj["nr_sv_variants"]
Exemple #8
0
def update_database(
    adapter,
    variant_file=None,
    sv_file=None,
    family_file=None,
    family_type="ped",
    skip_case_id=False,
    gq_treshold=None,
    case_id=None,
    max_window=3000,
):
    """Update a case in the database

    Args:
          adapter: Connection to database
          variant_file(str): Path to variant file
          sv_file(str): Path to sv variant file
          family_file(str): Path to family file
          family_type(str): Format of family file
          skip_case_id(bool): If no case information should be added to variants
          gq_treshold(int): If only quality variants should be considered
          case_id(str): If different case id than the one in family file should be used
          max_window(int): Specify the max size for sv windows

    Returns:
          nr_inserted(int)
    """
    vcf_files = []
    nr_variants = None
    vcf_individuals = None
    if variant_file:
        vcf_info = check_vcf(variant_file)
        nr_variants = vcf_info["nr_variants"]
        variant_type = vcf_info["variant_type"]
        vcf_files.append(variant_file)
        # Get the indivuduals that are present in vcf file
        vcf_individuals = vcf_info["individuals"]

    nr_sv_variants = None
    sv_individuals = None
    if sv_file:
        vcf_info = check_vcf(sv_file, "sv")
        nr_sv_variants = vcf_info["nr_variants"]
        vcf_files.append(sv_file)
        sv_individuals = vcf_info["individuals"]

    # If a gq treshold is used the variants needs to have GQ
    for _vcf_file in vcf_files:
        # Get a cyvcf2.VCF object
        vcf = get_vcf(_vcf_file)

        if gq_treshold:
            if not vcf.contains("GQ"):
                LOG.warning(
                    "Set gq-treshold to 0 or add info to vcf {0}".format(
                        _vcf_file))
                raise SyntaxError("GQ is not defined in vcf header")

    # Get a ped_parser.Family object from family file
    family = None
    family_id = None
    if family_file:
        with open(family_file, "r") as family_lines:
            family = get_case(family_lines=family_lines,
                              family_type=family_type)
            family_id = family.family_id

    # There has to be a case_id or a family at this stage.
    case_id = case_id or family_id

    # Convert infromation to a loqusdb Case object
    case_obj = build_case(
        case=family,
        case_id=case_id,
        vcf_path=variant_file,
        vcf_individuals=vcf_individuals,
        nr_variants=nr_variants,
        vcf_sv_path=sv_file,
        sv_individuals=sv_individuals,
        nr_sv_variants=nr_sv_variants,
    )

    existing_case = adapter.case(case_obj)
    if not existing_case:
        raise CaseError("Case {} does not exist in database".format(
            case_obj["case_id"]))

    # Update the existing case in database
    case_obj = load_case(
        adapter=adapter,
        case_obj=case_obj,
        update=True,
    )

    nr_inserted = 0
    # If case was succesfully added we can store the variants
    for file_type in ["vcf_path", "vcf_sv_path"]:
        variant_type = "snv"
        if file_type == "vcf_sv_path":
            variant_type = "sv"
        if case_obj.get(file_type) is None:
            continue

        vcf_obj = get_vcf(case_obj[file_type])
        try:
            nr_inserted += load_variants(
                adapter=adapter,
                vcf_obj=vcf_obj,
                case_obj=case_obj,
                skip_case_id=skip_case_id,
                gq_treshold=gq_treshold,
                max_window=max_window,
                variant_type=variant_type,
            )
        except Exception as err:
            # If something went wrong do a rollback
            LOG.warning(err)
            delete(
                adapter=adapter,
                case_obj=case_obj,
                update=True,
                existing_case=existing_case,
            )
            raise err
    return nr_inserted