def read_variant(variant_id: str, db: MongoAdapter = Depends(database)): variant = db.get_variant({"_id": variant_id}) if not variant: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Variant {variant_id} not found") variant["total"] = db.nr_cases(snv_cases=True, sv_cases=False) return variant
def insert_sv_variants(adapter: MongoAdapter, case_obj: Case) -> None: """Build sv_variant documents and insert them into database on the fly, one at a time""" for variant in VCF(case_obj.vcf_sv_path, threads=settings.cyvcf_threads): variant_id = get_variant_id(variant=variant) ref = variant.REF alt = variant.ALT[0] coordinates = get_coords(variant) chrom = coordinates["chrom"] pos = coordinates["pos"] variant_obj = Variant( variant_id=variant_id, chrom=chrom, pos=pos, end=coordinates["end"], ref=ref, alt=alt, end_chrom=coordinates["end_chrom"], sv_type=coordinates["sv_type"], sv_len=coordinates["sv_length"], case_id=case_obj.case_id, homozygote=0, hemizygote=0, is_sv=True, id_column=variant.ID, ) adapter.add_structural_variant(variant=variant_obj, max_window=settings.load_sv_window)
def read_case(case_id: str, db: MongoAdapter = Depends(database)): """Return a specific case given petname ID""" case = db.case({"case_id": case_id}) if not case: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Case {case_id} not found") return JSONResponse(jsonable_encoder(Case(**case)), status_code=status.HTTP_200_OK)
def read_cases(db: MongoAdapter = Depends(database)): """Return counts of SNV and SV variants in database""" nr_cases_snvs = db.nr_cases(snv_cases=True, sv_cases=False) nr_cases_svs = db.nr_cases(snv_cases=False, sv_cases=True) return dict( nr_cases_snvs=nr_cases_snvs, nr_cases_svs=nr_cases_svs, )
def insert_snv_variants(adapter: MongoAdapter, case_obj: Case) -> None: """Build variant documents and bulk insert them into database""" variants = [] for variant in VCF(case_obj.vcf_path, threads=settings.cyvcf_threads): variant_id = get_variant_id(variant=variant) ref = variant.REF alt = variant.ALT[0] coordinates = get_coords(variant) chrom = coordinates["chrom"] pos = coordinates["pos"] found_homozygote = 0 found_hemizygote = 0 for ind_obj in case_obj.individuals: ind_pos = ind_obj["ind_index"] if int(variant.gt_quals[ind_pos]) < settings.load_gq_threshold: continue genotype = GENOTYPE_MAP[variant.gt_types[ind_pos]] if genotype not in ["het", "hom_alt"]: continue if genotype == "hom_alt": found_homozygote = 1 if ( chrom in ["X", "Y"] and ind_obj["sex"] == 1 and not check_par(chrom, pos, genome_build=settings.genome_build) ): found_hemizygote = 1 variant_obj = Variant( variant_id=variant_id, chrom=chrom, pos=pos, end=coordinates["end"], ref=ref, alt=alt, end_chrom=coordinates["end_chrom"], sv_type=coordinates["sv_type"], sv_len=coordinates["sv_length"], case_id=case_obj.case_id, homozygote=found_homozygote, hemizygote=found_hemizygote, is_sv=False, id_column=variant.ID, ) variants.append(variant_obj) adapter.add_variants(variants=variants)
def build_case_object( adapter: MongoAdapter, case_id: str, profile_path: Union[Path, str], vcf_path: Union[Path, str], vcf_sv_path: Union[Path, str] = None, ) -> Case: """Build case document and insert into the database, return resulting document""" # Parse MAF profiles from profile files and save in the case object profiles: dict = get_profiles(adapter=adapter, vcf_file=profile_path) # Check if profiles have any duplicates in the database check_profile_duplicates(adapter=adapter, profiles=profiles) # CHeck that SNV file has GQ field check_vcf_gq_field(vcf_path=vcf_path) # CHeck that SNV file doesnt have SV variants check_snv_variant_types(vcf_path=vcf_path) individuals = { sample: Individual( ind_id=sample, case_id=case_id, ind_index=sample_index, profile=profile, ) for sample_index, (sample, profile) in enumerate(profiles.items()) } individuals_list: List = list(individuals.values()) case_object = Case( case_id=case_id, profile_path=profile_path, vcf_path=vcf_path, vcf_sv_path=vcf_sv_path, nr_sv_variants=0, nr_variants=get_vcf_variant_count(vcf_path=vcf_path), individuals=individuals_list, inds=individuals, id=case_id, ) if vcf_sv_path: case_object.nr_sv_variants = get_vcf_variant_count(vcf_path=vcf_sv_path) case_object.sv_individuals = individuals_list case_object.sv_inds = individuals adapter.add_case(case_object.dict(by_alias=True, exclude={"id"})) return Case(**adapter.case({"case_id": case_id}))
def get_profiles(adapter: MongoAdapter, vcf_file: str) -> Dict[str, str]: """ Reads VCF file containing one or more samples. Creates a dictionary where each sample ID from VCF file is a key. Retrieves coordinates for each variant from loqusdb.profile_variants Adds each variant of each sample as value of the dictionary. Returns a dictionary : {SAMPLE_ID : [var1, var2, ..., var50]} """ vcf = VCF(vcf_file, threads=settings.cyvcf_threads) individuals = vcf.samples profiles = {individual: [] for individual in individuals} for profile_variant in adapter.profile_variants(): ref = profile_variant["ref"] alt = profile_variant["alt"] pos = profile_variant["pos"] end = pos + 1 chrom = profile_variant["chrom"] region = f"{chrom}:{pos}-{end}" # Find variants in region found_variant = False for variant in vcf(region): variant_id = get_variant_id(variant) # If variant id i.e. chrom_pos_ref_alt matches if variant_id == profile_variant["_id"]: found_variant = True # find genotype for each individual in vcf for i, individual in enumerate(individuals): genotype = GENOTYPE_MAP[variant.gt_types[i]] if genotype == "hom_alt": gt_str = f"{alt}{alt}" elif genotype == "het": gt_str = f"{ref}{alt}" else: gt_str = f"{ref}{ref}" # Append genotype to profile string of individual profiles[individual].append(gt_str) # Break loop if variant is found in region break # If no call was found for variant, give all samples a hom ref genotype if not found_variant: for individual in individuals: profiles[individual].append(f"{ref}{ref}") return profiles
def database(uri: str = None, db_name: str = None) -> MongoAdapter: uri = uri or settings.uri db_name = db_name or settings.db_name try: client = get_client(uri=uri, ) except DB_Error: raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Could not connect to database") return MongoAdapter(client, db_name=db_name)
def cli(ctx, database, username, password, authdb, port, host, uri, verbose, config, test, genome_build): """loqusdb: manage a local variant count database.""" loglevel = "INFO" if verbose: loglevel = "DEBUG" coloredlogs.install(level=loglevel) LOG.info("Running loqusdb version %s", __version__) configs = {} if config: try: configs = yaml.safe_load(config) except yaml.YAMLError as err: LOG.warning(err) ctx.abort() uri = configs.get("uri") or uri if test: uri = "mongomock://" try: client = get_client( host=configs.get("host") or host, port=configs.get("port") or port, username=configs.get("username") or username, password=configs.get("password") or password, authdb=authdb or database or "loqusdb", uri=uri, ) except DB_Error as err: LOG.warning(err) ctx.abort() database = configs.get("db_name") or database if not database: database = "loqusdb" if uri: uri_info = uri_parser.parse_uri(uri) database = uri_info.get("database") adapter = MongoAdapter(client, db_name=database) genome_build = genome_build or configs.get("genome_build") or GRCH37 ctx.obj = {} ctx.obj["db"] = database if uri: ctx.obj["uri"] = uri else: ctx.obj["port"] = port ctx.obj["host"] = host ctx.obj["adapter"] = adapter ctx.obj["version"] = __version__ ctx.obj["genome_build"] = genome_build
def read_sv( chrom: str, pos: int, end: int, sv_type: str, db: MongoAdapter = Depends(database), end_chrom: str = None, ): structural_variant = db.get_structural_variant({ "chrom": chrom, "end_chrom": end_chrom or chrom, "sv_type": sv_type, "pos": pos, "end": end, }) if not structural_variant: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Variant not found") structural_variant["total"] = db.nr_cases(snv_cases=False, sv_cases=True) return structural_variant
def delete_case(case_id: str, db: MongoAdapter = Depends(database)): """Delete a specific case given petname ID""" existing_case = db.case({"case_id": case_id}) if not existing_case: return JSONResponse(f"Case {case_id} does not exist", status_code=status.HTTP_404_NOT_FOUND) try: delete(adapter=db, case_obj=existing_case, genome_build=settings.genome_build) return JSONResponse(f"Case {case_id} had been deleted", status_code=status.HTTP_200_OK) except Exception as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail= f"Error {e.__class__.__name__}: {e}; Case may be partially deleted", )
def check_profile_duplicates(adapter: MongoAdapter, profiles: dict) -> None: """Compare profile variants from upload with all profiles of all cases in database. Raises error if profile matches any of the existing profiles""" for existing_case in adapter.cases(): if existing_case.get("individuals") is None: continue for individual in existing_case["individuals"]: if not individual.get("profile"): continue for sample, profile in profiles.items(): similarity = compare_profiles(profile, individual["profile"]) if similarity >= settings.load_hard_threshold: raise ProfileDuplicationError( f"Profile of sample {sample} " f"matches existing profile {individual.get('ind_id')}" )
def load_case( case_id: str, snv_file: str, profile_file: str, sv_file: Optional[str] = None, db: MongoAdapter = Depends(database), ): """Upload a case to loqusdb""" if db.case({"case_id": case_id}): return JSONResponse(f"Case {case_id} already exists", status_code=status.HTTP_409_CONFLICT) if ((sv_file and not Path(sv_file).exists()) or not Path(snv_file).exists() or not Path(profile_file).exists()): raise HTTPException( detail="Input file path does not exist", status_code=status.HTTP_406_NOT_ACCEPTABLE, ) try: case_object: Case = build_case_object( case_id=case_id, vcf_path=snv_file, vcf_sv_path=sv_file, profile_path=profile_file, adapter=db, ) insert_case_variants(adapter=db, case_obj=case_object) return JSONResponse(jsonable_encoder(case_object), status_code=status.HTTP_200_OK) except LoqusdbAPIError as e: LOG.error(e) raise HTTPException( detail=f"Exception {e.__class__.__name__}: {e.message}", status_code=status.HTTP_400_BAD_REQUEST, ) except Exception as e: LOG.error(e) raise HTTPException( detail=f"Exception {e.__class__.__name__} {e}", status_code=status.HTTP_400_BAD_REQUEST, )
def real_mongo_adapter(request, real_mongo_client): """Return a mongo adapter""" db_name = REAL_DATABASE adapter = MongoAdapter(real_mongo_client, db_name) return adapter
def mongo_adapter(request, mongo_client): """Return a mongo adapter""" db_name = TEST_DATABASE adapter = MongoAdapter(mongo_client, db_name) return adapter
def test_connect(mongo_client): db_name = "test" adapter = MongoAdapter(mongo_client, db_name) assert adapter.db_name == db_name
def test_init_app(mongo_client): app = MockFlaskApp() adapter = MongoAdapter() adapter.init_app(app) assert adapter.db_name == app.config["MONGO_DBNAME"]