Example #1
0
def read_variant(variant_id: str, db: MongoAdapter = Depends(database)):
    variant = db.get_variant({"_id": variant_id})
    if not variant:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND,
                            detail=f"Variant {variant_id} not found")
    variant["total"] = db.nr_cases(snv_cases=True, sv_cases=False)
    return variant
Example #2
0
def insert_sv_variants(adapter: MongoAdapter, case_obj: Case) -> None:
    """Build sv_variant documents and insert them into database on the fly, one at a time"""

    for variant in VCF(case_obj.vcf_sv_path, threads=settings.cyvcf_threads):
        variant_id = get_variant_id(variant=variant)
        ref = variant.REF
        alt = variant.ALT[0]
        coordinates = get_coords(variant)
        chrom = coordinates["chrom"]
        pos = coordinates["pos"]

        variant_obj = Variant(
            variant_id=variant_id,
            chrom=chrom,
            pos=pos,
            end=coordinates["end"],
            ref=ref,
            alt=alt,
            end_chrom=coordinates["end_chrom"],
            sv_type=coordinates["sv_type"],
            sv_len=coordinates["sv_length"],
            case_id=case_obj.case_id,
            homozygote=0,
            hemizygote=0,
            is_sv=True,
            id_column=variant.ID,
        )
        adapter.add_structural_variant(variant=variant_obj, max_window=settings.load_sv_window)
Example #3
0
def read_case(case_id: str, db: MongoAdapter = Depends(database)):
    """Return a specific case given petname ID"""
    case = db.case({"case_id": case_id})
    if not case:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND,
                            detail=f"Case {case_id} not found")
    return JSONResponse(jsonable_encoder(Case(**case)),
                        status_code=status.HTTP_200_OK)
Example #4
0
def read_cases(db: MongoAdapter = Depends(database)):
    """Return counts of SNV and SV variants in database"""
    nr_cases_snvs = db.nr_cases(snv_cases=True, sv_cases=False)
    nr_cases_svs = db.nr_cases(snv_cases=False, sv_cases=True)

    return dict(
        nr_cases_snvs=nr_cases_snvs,
        nr_cases_svs=nr_cases_svs,
    )
Example #5
0
def insert_snv_variants(adapter: MongoAdapter, case_obj: Case) -> None:
    """Build variant documents and bulk insert them into database"""
    variants = []
    for variant in VCF(case_obj.vcf_path, threads=settings.cyvcf_threads):
        variant_id = get_variant_id(variant=variant)
        ref = variant.REF
        alt = variant.ALT[0]

        coordinates = get_coords(variant)
        chrom = coordinates["chrom"]
        pos = coordinates["pos"]
        found_homozygote = 0
        found_hemizygote = 0

        for ind_obj in case_obj.individuals:
            ind_pos = ind_obj["ind_index"]
            if int(variant.gt_quals[ind_pos]) < settings.load_gq_threshold:
                continue

            genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]]
            if genotype not in ["het", "hom_alt"]:
                continue

            if genotype == "hom_alt":
                found_homozygote = 1

            if (
                chrom in ["X", "Y"]
                and ind_obj["sex"] == 1
                and not check_par(chrom, pos, genome_build=settings.genome_build)
            ):
                found_hemizygote = 1

            variant_obj = Variant(
                variant_id=variant_id,
                chrom=chrom,
                pos=pos,
                end=coordinates["end"],
                ref=ref,
                alt=alt,
                end_chrom=coordinates["end_chrom"],
                sv_type=coordinates["sv_type"],
                sv_len=coordinates["sv_length"],
                case_id=case_obj.case_id,
                homozygote=found_homozygote,
                hemizygote=found_hemizygote,
                is_sv=False,
                id_column=variant.ID,
            )
            variants.append(variant_obj)
    adapter.add_variants(variants=variants)
Example #6
0
def build_case_object(
    adapter: MongoAdapter,
    case_id: str,
    profile_path: Union[Path, str],
    vcf_path: Union[Path, str],
    vcf_sv_path: Union[Path, str] = None,
) -> Case:
    """Build case document and insert into the database, return resulting document"""

    # Parse MAF profiles from profile files and save in the case object
    profiles: dict = get_profiles(adapter=adapter, vcf_file=profile_path)
    # Check if profiles have any duplicates in the database
    check_profile_duplicates(adapter=adapter, profiles=profiles)
    # CHeck that SNV file has GQ field
    check_vcf_gq_field(vcf_path=vcf_path)
    # CHeck that SNV file doesnt have SV variants
    check_snv_variant_types(vcf_path=vcf_path)
    individuals = {
        sample: Individual(
            ind_id=sample,
            case_id=case_id,
            ind_index=sample_index,
            profile=profile,
        )
        for sample_index, (sample, profile) in enumerate(profiles.items())
    }
    individuals_list: List = list(individuals.values())
    case_object = Case(
        case_id=case_id,
        profile_path=profile_path,
        vcf_path=vcf_path,
        vcf_sv_path=vcf_sv_path,
        nr_sv_variants=0,
        nr_variants=get_vcf_variant_count(vcf_path=vcf_path),
        individuals=individuals_list,
        inds=individuals,
        id=case_id,
    )

    if vcf_sv_path:
        case_object.nr_sv_variants = get_vcf_variant_count(vcf_path=vcf_sv_path)
        case_object.sv_individuals = individuals_list
        case_object.sv_inds = individuals

    adapter.add_case(case_object.dict(by_alias=True, exclude={"id"}))

    return Case(**adapter.case({"case_id": case_id}))
Example #7
0
def get_profiles(adapter: MongoAdapter, vcf_file: str) -> Dict[str, str]:
    """
    Reads VCF file containing one or more samples.
    Creates a dictionary where each sample ID from VCF file is a key.
    Retrieves coordinates for each variant from loqusdb.profile_variants
    Adds each variant of each sample as value of the dictionary.
    Returns a dictionary :
    {SAMPLE_ID : [var1, var2, ..., var50]}

    """

    vcf = VCF(vcf_file, threads=settings.cyvcf_threads)
    individuals = vcf.samples
    profiles = {individual: [] for individual in individuals}

    for profile_variant in adapter.profile_variants():

        ref = profile_variant["ref"]
        alt = profile_variant["alt"]

        pos = profile_variant["pos"]
        end = pos + 1
        chrom = profile_variant["chrom"]

        region = f"{chrom}:{pos}-{end}"

        # Find variants in region

        found_variant = False
        for variant in vcf(region):

            variant_id = get_variant_id(variant)

            # If variant id i.e. chrom_pos_ref_alt matches
            if variant_id == profile_variant["_id"]:
                found_variant = True
                # find genotype for each individual in vcf
                for i, individual in enumerate(individuals):

                    genotype = GENOTYPE_MAP[variant.gt_types[i]]
                    if genotype == "hom_alt":
                        gt_str = f"{alt}{alt}"
                    elif genotype == "het":
                        gt_str = f"{ref}{alt}"
                    else:
                        gt_str = f"{ref}{ref}"

                    # Append genotype to profile string of individual
                    profiles[individual].append(gt_str)

                # Break loop if variant is found in region
                break

        # If no call was found for variant, give all samples a hom ref genotype
        if not found_variant:
            for individual in individuals:
                profiles[individual].append(f"{ref}{ref}")

    return profiles
Example #8
0
def database(uri: str = None, db_name: str = None) -> MongoAdapter:
    uri = uri or settings.uri
    db_name = db_name or settings.db_name
    try:
        client = get_client(uri=uri, )
    except DB_Error:
        raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                            detail="Could not connect to database")

    return MongoAdapter(client, db_name=db_name)
Example #9
0
def cli(ctx, database, username, password, authdb, port, host, uri, verbose,
        config, test, genome_build):
    """loqusdb: manage a local variant count database."""
    loglevel = "INFO"
    if verbose:
        loglevel = "DEBUG"
    coloredlogs.install(level=loglevel)
    LOG.info("Running loqusdb version %s", __version__)

    configs = {}
    if config:
        try:
            configs = yaml.safe_load(config)
        except yaml.YAMLError as err:
            LOG.warning(err)
            ctx.abort()

    uri = configs.get("uri") or uri
    if test:
        uri = "mongomock://"
    try:
        client = get_client(
            host=configs.get("host") or host,
            port=configs.get("port") or port,
            username=configs.get("username") or username,
            password=configs.get("password") or password,
            authdb=authdb or database or "loqusdb",
            uri=uri,
        )
    except DB_Error as err:
        LOG.warning(err)
        ctx.abort()

    database = configs.get("db_name") or database

    if not database:
        database = "loqusdb"
        if uri:
            uri_info = uri_parser.parse_uri(uri)
            database = uri_info.get("database")

    adapter = MongoAdapter(client, db_name=database)

    genome_build = genome_build or configs.get("genome_build") or GRCH37

    ctx.obj = {}
    ctx.obj["db"] = database
    if uri:
        ctx.obj["uri"] = uri
    else:
        ctx.obj["port"] = port
        ctx.obj["host"] = host
    ctx.obj["adapter"] = adapter
    ctx.obj["version"] = __version__
    ctx.obj["genome_build"] = genome_build
Example #10
0
def read_sv(
        chrom: str,
        pos: int,
        end: int,
        sv_type: str,
        db: MongoAdapter = Depends(database),
        end_chrom: str = None,
):
    structural_variant = db.get_structural_variant({
        "chrom": chrom,
        "end_chrom": end_chrom or chrom,
        "sv_type": sv_type,
        "pos": pos,
        "end": end,
    })
    if not structural_variant:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND,
                            detail="Variant not found")
    structural_variant["total"] = db.nr_cases(snv_cases=False, sv_cases=True)

    return structural_variant
Example #11
0
def delete_case(case_id: str, db: MongoAdapter = Depends(database)):
    """Delete a specific case given petname ID"""
    existing_case = db.case({"case_id": case_id})
    if not existing_case:
        return JSONResponse(f"Case {case_id} does not exist",
                            status_code=status.HTTP_404_NOT_FOUND)
    try:
        delete(adapter=db,
               case_obj=existing_case,
               genome_build=settings.genome_build)
        return JSONResponse(f"Case {case_id} had been deleted",
                            status_code=status.HTTP_200_OK)
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=
            f"Error {e.__class__.__name__}: {e}; Case may be partially deleted",
        )
Example #12
0
def check_profile_duplicates(adapter: MongoAdapter, profiles: dict) -> None:
    """Compare profile variants from upload with all profiles of all cases in database.
    Raises error if profile matches any of the existing profiles"""
    for existing_case in adapter.cases():

        if existing_case.get("individuals") is None:
            continue

        for individual in existing_case["individuals"]:
            if not individual.get("profile"):
                continue

            for sample, profile in profiles.items():
                similarity = compare_profiles(profile, individual["profile"])
                if similarity >= settings.load_hard_threshold:
                    raise ProfileDuplicationError(
                        f"Profile of sample {sample} "
                        f"matches existing profile {individual.get('ind_id')}"
                    )
Example #13
0
def load_case(
        case_id: str,
        snv_file: str,
        profile_file: str,
        sv_file: Optional[str] = None,
        db: MongoAdapter = Depends(database),
):
    """Upload a case to loqusdb"""
    if db.case({"case_id": case_id}):
        return JSONResponse(f"Case {case_id} already exists",
                            status_code=status.HTTP_409_CONFLICT)

    if ((sv_file and not Path(sv_file).exists())
            or not Path(snv_file).exists() or not Path(profile_file).exists()):
        raise HTTPException(
            detail="Input file path does not exist",
            status_code=status.HTTP_406_NOT_ACCEPTABLE,
        )

    try:
        case_object: Case = build_case_object(
            case_id=case_id,
            vcf_path=snv_file,
            vcf_sv_path=sv_file,
            profile_path=profile_file,
            adapter=db,
        )
        insert_case_variants(adapter=db, case_obj=case_object)
        return JSONResponse(jsonable_encoder(case_object),
                            status_code=status.HTTP_200_OK)
    except LoqusdbAPIError as e:
        LOG.error(e)
        raise HTTPException(
            detail=f"Exception {e.__class__.__name__}: {e.message}",
            status_code=status.HTTP_400_BAD_REQUEST,
        )
    except Exception as e:
        LOG.error(e)
        raise HTTPException(
            detail=f"Exception {e.__class__.__name__} {e}",
            status_code=status.HTTP_400_BAD_REQUEST,
        )
Example #14
0
def real_mongo_adapter(request, real_mongo_client):
    """Return a mongo adapter"""
    db_name = REAL_DATABASE
    adapter = MongoAdapter(real_mongo_client, db_name)

    return adapter
Example #15
0
def mongo_adapter(request, mongo_client):
    """Return a mongo adapter"""
    db_name = TEST_DATABASE
    adapter = MongoAdapter(mongo_client, db_name)

    return adapter
Example #16
0
def test_connect(mongo_client):
    db_name = "test"
    adapter = MongoAdapter(mongo_client, db_name)

    assert adapter.db_name == db_name
def test_init_app(mongo_client):
    app = MockFlaskApp()
    adapter = MongoAdapter()
    adapter.init_app(app)
    assert adapter.db_name == app.config["MONGO_DBNAME"]