def test_query_gene_function_biosample(db: Session): sample1 = fakes.BiosampleFactory(id="sample1") fakes.BiosampleFactory(id="sample2") gene_functions = [ fakes.MGAGeneFunction(function__id=f"function{i}") for i in range(10) ] fakes.MetagenomeAnnotationFactory(gene_functions=gene_functions, omics_processing__biosample=sample1) db.commit() models.MGAGeneFunctionAggregation.populate(db) models.MetaPGeneFunctionAggregation.populate(db) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "function1", }], ) assert {r.id for r in q.execute(db)} == {"sample1"} q = query.BiosampleQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "invalid", }], ) assert {r.id for r in q.execute(db)} == set()
def test_faceted_filtered_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "id", "op": "==", "value": "sample2" }]) assert q.facet(db, "key1") == {"value1": 1} assert q.facet(db, "key2") == {"value3": 1}
def test_date_range_bins_year(db: Session, biosamples): q = query.BiosampleQuerySchema() bins, result = q.binned_facet(db, "collection_date", resolution=DateBinResolution.year) assert len(bins) == 2 assert result == [6]
def test_query_pi(db: Session): study1 = fakes.StudyFactory(id="study1", principal_investigator__name="John Doe") study2 = fakes.StudyFactory(id="study2", principal_investigator__name="Jane Doe") fakes.BiosampleFactory(id="sample1", study=study1) fakes.BiosampleFactory(id="sample2", study=study2) db.commit() q = query.StudyQuerySchema() assert q.facet(db, "principal_investigator_name") == { "John Doe": 1, "Jane Doe": 1, } q = query.StudyQuerySchema(conditions=[{ "table": "study", "field": "principal_investigator_name", "value": "John Doe", }]) assert ["study1"] == [r.id for r in q.execute(db)] qp = query.BiosampleQuerySchema(conditions=[{ "table": "study", "field": "principal_investigator_name", "value": "John Doe", }]) assert ["sample1"] == [r.id for r in qp.execute(db)]
def test_date_range_bins_month(db: Session, biosamples): q = query.BiosampleQuerySchema() bins, result = q.binned_facet(db, "collection_date", resolution=DateBinResolution.month) assert len(bins) == 3 assert bins[0] <= datetime(2020, 1, 1) # type: ignore assert result == [4, 2]
def binned_facet_biosample( db: Session, attribute: str, conditions: List[query.ConditionSchema], **kwargs, ) -> query.BinnedFacetResponse: bins, facets = query.BiosampleQuerySchema( conditions=conditions).binned_facet(db, attribute, **kwargs) return query.BinnedFacetResponse(bins=bins, facets=facets)
def test_range_bins(db: Session, biosamples): q = query.BiosampleQuerySchema() bins, result = q.binned_facet(db, "depth", minimum=0, maximum=30, num_bins=3) assert bins == [0, 10, 20, 30] assert result == [3, 2, 1]
def test_filtered_bins(db: Session, biosamples): q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "depth", "op": "<", "value": 20, }]) bins, result = q.binned_facet(db, "depth", num_bins=2) assert bins == [1, 6.5, 12] assert result == [3, 2]
def test_numeric_query(db: Session, condition, expected): condition["table"] = "biosample" fakes.BiosampleFactory(id="sample1", annotations={"key1": 1, "key2": 2}) fakes.BiosampleFactory(id="sample2", annotations={"key1": 1, "key2": 3}) for _ in range(10): fakes.BiosampleFactory() db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) assert {s.id for s in q.execute(db)} == expected
def test_query_envo(db: Session, condition, expected): condition["table"] = "biosample" env_local = fakes.EnvoTermFactory(label="local1") env_broad = fakes.EnvoTermFactory(label="broad1") env_medium = fakes.EnvoTermFactory(label="medium1") fakes.BiosampleFactory(id="sample1", env_local_scale=env_local) fakes.BiosampleFactory(id="sample2", env_broad_scale=env_broad) fakes.BiosampleFactory(id="sample3", env_medium=env_medium) db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) assert [s.id for s in q.execute(db).all()] == [expected]
def test_facet_envo(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 2, }
def test_latitude_query(db: Session, op, value, expected): fakes.BiosampleFactory(id="sample1", latitude=0) fakes.BiosampleFactory(id="sample2", latitude=10) fakes.BiosampleFactory(id="sample3", latitude=-10) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "latitude", "op": op, "value": value }]) assert {r.id for r in q.execute(db)} == expected
def test_envo_ancestor_query(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") fakes.EnvoAncestorFactory(term=env_local1, ancestor=env_local2) fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local1" }]) assert {s.id for s in q.execute(db)} == {"sample1"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local2" }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2", "sample3"}
def test_between_query_column(db: Session): fakes.BiosampleFactory(id="sample0", depth=0, add_date=date0) fakes.BiosampleFactory(id="sample1", depth=1, add_date=date1) fakes.BiosampleFactory(id="sample2", depth=10, add_date=date2) fakes.BiosampleFactory(id="sample3", depth=100, add_date=date3) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "depth", "op": "between", "value": [0.5, 10] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "add_date", "op": "between", "value": [date0, date2] }]) assert {s.id for s in q.execute(db)} == {"sample0", "sample1", "sample2"}
def test_between_query_annotations(db: Session): fakes.BiosampleFactory(id="sample0", annotations={ "number": 0, "string": "a" }) fakes.BiosampleFactory(id="sample1", annotations={ "number": 1, "string": "c" }) fakes.BiosampleFactory(id="sample2", annotations={ "number": 10, "string": "e" }) fakes.BiosampleFactory(id="sample3", annotations={ "number": 100, "string": "t" }) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "number", "op": "between", "value": [0.5, 10] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "string", "op": "between", "value": ["b", "e"] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"}
def test_envo_ancestor_facet(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") env_local3 = fakes.EnvoTermFactory(label="local3") fakes.EnvoAncestorFactory(term=env_local1, ancestor=env_local2) fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample4", env_local_scale=env_local3) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 3, "local3": 1, } q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local1" }]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 1, } q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local2" }]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 3, }
def test_query_multiomics(db: Session, value: int, result: bool): biosample = fakes.BiosampleFactory() fakes.OmicsProcessingFactory(annotations={"omics_type": "Metabolomics"}, biosample=biosample) fakes.OmicsProcessingFactory( annotations={"omics_type": "Metatranscriptome"}, biosample=biosample) db.commit() models.Biosample.populate_multiomics(db) db.commit() qs = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "multiomics", "op": "has", "value": value }]) assert bool(list(qs.execute(db))) is result
def test_string_query(db: Session, condition, expected): condition["table"] = "biosample" fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) for _ in range(10): fakes.BiosampleFactory() db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) results = {s.id for s in q.execute(db)} assert q.count(db) == len(results) assert results == expected
def test_faceted_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "key1") == {"value1": 2, "value4": 1} assert q.facet(db, "key2") == {"value2": 2, "value3": 1}
def test_grouped_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[ { "table": "biosample", "field": "key2", "value": "value2", "op": "==" }, { "table": "biosample", "field": "key1", "value": "value1", "op": "==" }, { "table": "biosample", "field": "key2", "value": "value3", "op": "==" }, ], ) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"}
def test_basic_query(db: Session, table): tests: Dict[str, Tuple[fakes.AnnotatedFactory, query.BaseQuerySchema]] = { "study": (fakes.StudyFactory(), query.StudyQuerySchema()), "omics_processing": (fakes.OmicsProcessingFactory(), query.OmicsProcessingQuerySchema()), "biosample": (fakes.BiosampleFactory(), query.BiosampleQuerySchema()), "reads_qc": (fakes.ReadsQCFactory(), query.ReadsQCQuerySchema()), "metagenome_assembly": ( fakes.MetagenomeAssemblyFactory(), query.MetagenomeAssemblyQuerySchema(), ), "metagenome_annotation": ( fakes.MetagenomeAnnotationFactory(), query.MetagenomeAnnotationQuerySchema(), ), "metaproteomic_analysis": ( fakes.MetaproteomicAnalysisFactory(), query.MetaproteomicAnalysisQuerySchema(), ), } db.commit() q = tests[table][1].execute(db) assert tests[table][0].id in {r.id for r in q.all()}
async def get_environmental_sankey( query: query.BiosampleQuerySchema = query.BiosampleQuerySchema(), db: Session = Depends(get_db), ): return crud.get_environmental_sankey(db, query)
async def get_environmental_geospatial( query: query.BiosampleQuerySchema = query.BiosampleQuerySchema(), db: Session = Depends(get_db)): return crud.get_environmental_geospatial(db, query)
def search_biosample(db: Session, conditions: List[query.ConditionSchema]) -> Query: return query.BiosampleQuerySchema(conditions=conditions).execute(db)
def test_range_bins_default_min_max(db: Session, biosamples): q = query.BiosampleQuerySchema() bins, result = q.binned_facet(db, "depth", num_bins=3) assert bins == [1, 8, 15, 22] assert result == [3, 2, 1]
def test_faceted_query_with_no_results(db: Session): q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "key1") == {}
def facet_biosample(db: Session, attribute: str, conditions: List[query.ConditionSchema], **kwargs) -> query.FacetResponse: facets = query.BiosampleQuerySchema(conditions=conditions).facet( db, attribute) return query.FacetResponse(facets=facets)