def test_query_pi(db: Session): study1 = fakes.StudyFactory(id="study1", principal_investigator__name="John Doe") study2 = fakes.StudyFactory(id="study2", principal_investigator__name="Jane Doe") fakes.BiosampleFactory(id="sample1", study=study1) fakes.BiosampleFactory(id="sample2", study=study2) db.commit() q = query.StudyQuerySchema() assert q.facet(db, "principal_investigator_name") == { "John Doe": 1, "Jane Doe": 1, } q = query.StudyQuerySchema(conditions=[{ "table": "study", "field": "principal_investigator_name", "value": "John Doe", }]) assert ["study1"] == [r.id for r in q.execute(db)] qp = query.BiosampleQuerySchema(conditions=[{ "table": "study", "field": "principal_investigator_name", "value": "John Doe", }]) assert ["sample1"] == [r.id for r in qp.execute(db)]
def test_faceted_filtered_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "id", "op": "==", "value": "sample2" }]) assert q.facet(db, "key1") == {"value1": 1} assert q.facet(db, "key2") == {"value3": 1}
def test_query_gene_function_biosample(db: Session): sample1 = fakes.BiosampleFactory(id="sample1") fakes.BiosampleFactory(id="sample2") gene_functions = [ fakes.MGAGeneFunction(function__id=f"function{i}") for i in range(10) ] fakes.MetagenomeAnnotationFactory(gene_functions=gene_functions, omics_processing__biosample=sample1) db.commit() models.MGAGeneFunctionAggregation.populate(db) models.MetaPGeneFunctionAggregation.populate(db) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "function1", }], ) assert {r.id for r in q.execute(db)} == {"sample1"} q = query.BiosampleQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "invalid", }], ) assert {r.id for r in q.execute(db)} == set()
def test_distinct_results(db: Session): study = fakes.StudyFactory(id="study1") fakes.BiosampleFactory(id="sample1", study=study) fakes.BiosampleFactory(id="sample2", study=study) fakes.BiosampleFactory(id="sample3", study=study) fakes.BiosampleFactory(id="sample4", study=study) db.commit() q = query.StudyQuerySchema(conditions=[]) assert len(q.execute(db).all()) == 1
def test_numeric_query(db: Session, condition, expected): condition["table"] = "biosample" fakes.BiosampleFactory(id="sample1", annotations={"key1": 1, "key2": 2}) fakes.BiosampleFactory(id="sample2", annotations={"key1": 1, "key2": 3}) for _ in range(10): fakes.BiosampleFactory() db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) assert {s.id for s in q.execute(db)} == expected
def test_facet_foreign_table(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) db.commit() q = query.StudyQuerySchema(conditions=[]) assert q.facet(db, "env_local_scale") == {} assert q.facet(db, "sample_id") == {}
def test_query_envo(db: Session, condition, expected): condition["table"] = "biosample" env_local = fakes.EnvoTermFactory(label="local1") env_broad = fakes.EnvoTermFactory(label="broad1") env_medium = fakes.EnvoTermFactory(label="medium1") fakes.BiosampleFactory(id="sample1", env_local_scale=env_local) fakes.BiosampleFactory(id="sample2", env_broad_scale=env_broad) fakes.BiosampleFactory(id="sample3", env_medium=env_medium) db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) assert [s.id for s in q.execute(db).all()] == [expected]
def test_api_query(db: Session, client: TestClient, condition, expected): condition["table"] = "biosample" fakes.BiosampleFactory(id="sample1", annotations={"key1": "value1", "key2": "value2"}) fakes.BiosampleFactory(id="sample2", annotations={"key1": "value1", "key2": "value3"}) for _ in range(10): fakes.BiosampleFactory() db.commit() resp = client.post("/api/biosample/search", json={"conditions": [condition]}) assert_status(resp) results = resp.json()["results"] assert {s["id"] for s in results} == expected
def test_facet_envo(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 2, }
def test_api_faceting(db: Session, client: TestClient): fakes.BiosampleFactory(id="sample1", annotations={"key1": "value1", "key2": "value2"}) fakes.BiosampleFactory(id="sample2", annotations={"key1": "value1", "key2": "value3"}) fakes.BiosampleFactory(id="sample3", annotations={"key1": "value4", "key2": "value2"}) db.commit() resp = client.post("/api/biosample/facet", json={"conditions": [], "attribute": "key1"}) assert_status(resp) assert resp.json()["facets"] == {"value1": 2, "value4": 1} resp = client.post("/api/biosample/facet", json={"conditions": [], "attribute": "key2"}) assert_status(resp) assert resp.json()["facets"] == {"value2": 2, "value3": 1}
def test_latitude_query(db: Session, op, value, expected): fakes.BiosampleFactory(id="sample1", latitude=0) fakes.BiosampleFactory(id="sample2", latitude=10) fakes.BiosampleFactory(id="sample3", latitude=-10) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "latitude", "op": op, "value": value }]) assert {r.id for r in q.execute(db)} == expected
def test_query_gene_function_mga_metap(db: Session): sample1 = fakes.BiosampleFactory(id="sample1") fakes.BiosampleFactory(id="sample2") gene_functions = [ fakes.MGAGeneFunction(function__id=f"function{i}") for i in range(10) ] fakes.MetagenomeAnnotationFactory(id="mga1", gene_functions=gene_functions, omics_processing__biosample=sample1) metap = fakes.MetaproteomicAnalysisFactory(id="metap1") peptide = fakes.MetaproteomicPeptideFactory( metaproteomic_analysis=metap, best_protein_object=gene_functions[2]) fakes.PeptideMGAGeneFunctionFactory(mga_gene_function=gene_functions[1], metaproteomic_peptide=peptide) db.commit() models.MGAGeneFunctionAggregation.populate(db) models.MetaPGeneFunctionAggregation.populate(db) db.commit() q = query.MetagenomeAnnotationQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "function1", }], ) assert {r.id for r in q.execute(db)} == {"mga1"} q = query.MetagenomeAnnotationQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "invalid", }], ) assert {r.id for r in q.execute(db)} == set() q1 = query.MetaproteomicAnalysisQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "function1", }], ) assert {r.id for r in q1.execute(db)} == {"metap1"} q1 = query.MetaproteomicAnalysisQuerySchema(conditions=[{ "table": "gene_function", "field": "id", "value": "invalid", }], ) assert {r.id for r in q1.execute(db)} == set()
def test_get_environmental_aggregation(db: Session, client: TestClient): for _ in range(10): fakes.BiosampleFactory() assert_status(client.post("/api/environment/sankey")) resp = client.post( "/api/environment/sankey", json={ "conditions": [ { "table": "study", "field": "id", "value": "not a study", } ] }, ) assert_status(resp) assert resp.json() == [] assert_status(client.post("/api/environment/geospatial")) resp = client.post( "/api/environment/geospatial", json={ "conditions": [ { "table": "study", "field": "id", "value": "not a study", } ] }, ) assert_status(resp) assert resp.json() == []
def test_all_results(db: Session, client: TestClient): for _ in range(10): fakes.BiosampleFactory() db.commit() resp = client.post("/api/biosample/search?limit=20") assert len(resp.json()["results"]) == 10 assert resp.json()["count"] == 10 assert int(resp.headers["Resource-Count"]) == 10
def test_api_summary(db: Session, client: TestClient): # TODO: This would be better queried against the real data for _ in range(10): fakes.BiosampleFactory() fakes.MetagenomeAnnotationFactory() fakes.MetagenomeAssemblyFactory() fakes.MetaproteomicAnalysisFactory() fakes.DataObjectFactory() db.commit() assert_status(client.get("/api/summary")) assert_status(client.get("/api/stats"))
def test_string_query(db: Session, condition, expected): condition["table"] = "biosample" fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) for _ in range(10): fakes.BiosampleFactory() db.commit() q = query.BiosampleQuerySchema(conditions=[condition]) results = {s.id for s in q.execute(db)} assert q.count(db) == len(results) assert results == expected
def test_faceted_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "key1") == {"value1": 2, "value4": 1} assert q.facet(db, "key2") == {"value2": 2, "value3": 1}
def biosamples(db: Session): depths = [1, 2, 3, 11, 12, 22] dates = [ datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 8), datetime(2020, 1, 3), datetime(2020, 2, 9), datetime(2020, 2, 10), ] for depth, date in zip(depths, dates): fakes.BiosampleFactory(depth=depth, collection_date=date) db.commit()
def test_grouped_query(db: Session): fakes.BiosampleFactory(id="sample1", annotations={ "key1": "value1", "key2": "value2" }) fakes.BiosampleFactory(id="sample2", annotations={ "key1": "value1", "key2": "value3" }) fakes.BiosampleFactory(id="sample3", annotations={ "key1": "value4", "key2": "value2" }) db.commit() q = query.BiosampleQuerySchema(conditions=[ { "table": "biosample", "field": "key2", "value": "value2", "op": "==" }, { "table": "biosample", "field": "key1", "value": "value1", "op": "==" }, { "table": "biosample", "field": "key2", "value": "value3", "op": "==" }, ], ) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"}
def test_between_query_column(db: Session): fakes.BiosampleFactory(id="sample0", depth=0, add_date=date0) fakes.BiosampleFactory(id="sample1", depth=1, add_date=date1) fakes.BiosampleFactory(id="sample2", depth=10, add_date=date2) fakes.BiosampleFactory(id="sample3", depth=100, add_date=date3) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "depth", "op": "between", "value": [0.5, 10] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "add_date", "op": "between", "value": [date0, date2] }]) assert {s.id for s in q.execute(db)} == {"sample0", "sample1", "sample2"}
def test_envo_ancestor_query(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") fakes.EnvoAncestorFactory(term=env_local1, ancestor=env_local2) fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local1" }]) assert {s.id for s in q.execute(db)} == {"sample1"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local2" }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2", "sample3"}
def test_between_query_annotations(db: Session): fakes.BiosampleFactory(id="sample0", annotations={ "number": 0, "string": "a" }) fakes.BiosampleFactory(id="sample1", annotations={ "number": 1, "string": "c" }) fakes.BiosampleFactory(id="sample2", annotations={ "number": 10, "string": "e" }) fakes.BiosampleFactory(id="sample3", annotations={ "number": 100, "string": "t" }) db.commit() q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "number", "op": "between", "value": [0.5, 10] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"} q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "string", "op": "between", "value": ["b", "e"] }]) assert {s.id for s in q.execute(db)} == {"sample1", "sample2"}
def test_last_page(db: Session, client: TestClient): for _ in range(10): fakes.BiosampleFactory() db.commit() resp = client.post("/api/biosample/search?limit=9&offset=9") assert len(resp.json()["results"]) == 1 assert resp.json()["count"] == 10 assert int(resp.headers["Resource-Count"]) == 10 links = parse_links(resp) assert "offset=0" in links["first"] assert "next" not in links assert "offset=9" in links["last"]
def test_envo_ancestor_facet(db: Session): env_local1 = fakes.EnvoTermFactory(label="local1") env_local2 = fakes.EnvoTermFactory(label="local2") env_local3 = fakes.EnvoTermFactory(label="local3") fakes.EnvoAncestorFactory(term=env_local1, ancestor=env_local2) fakes.BiosampleFactory(id="sample1", env_local_scale=env_local1) fakes.BiosampleFactory(id="sample2", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample3", env_local_scale=env_local2) fakes.BiosampleFactory(id="sample4", env_local_scale=env_local3) db.commit() q = query.BiosampleQuerySchema(conditions=[]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 3, "local3": 1, } q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local1" }]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 1, } q = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "env_local_scale", "value": "local2" }]) assert q.facet(db, "env_local_scale") == { "local1": 1, "local2": 3, }
def test_query_multiomics(db: Session, value: int, result: bool): biosample = fakes.BiosampleFactory() fakes.OmicsProcessingFactory(annotations={"omics_type": "Metabolomics"}, biosample=biosample) fakes.OmicsProcessingFactory( annotations={"omics_type": "Metatranscriptome"}, biosample=biosample) db.commit() models.Biosample.populate_multiomics(db) db.commit() qs = query.BiosampleQuerySchema(conditions=[{ "table": "biosample", "field": "multiomics", "op": "has", "value": value }]) assert bool(list(qs.execute(db))) is result
def test_basic_query(db: Session, table): tests: Dict[str, Tuple[fakes.AnnotatedFactory, query.BaseQuerySchema]] = { "study": (fakes.StudyFactory(), query.StudyQuerySchema()), "omics_processing": (fakes.OmicsProcessingFactory(), query.OmicsProcessingQuerySchema()), "biosample": (fakes.BiosampleFactory(), query.BiosampleQuerySchema()), "reads_qc": (fakes.ReadsQCFactory(), query.ReadsQCQuerySchema()), "metagenome_assembly": ( fakes.MetagenomeAssemblyFactory(), query.MetagenomeAssemblyQuerySchema(), ), "metagenome_annotation": ( fakes.MetagenomeAnnotationFactory(), query.MetagenomeAnnotationQuerySchema(), ), "metaproteomic_analysis": ( fakes.MetaproteomicAnalysisFactory(), query.MetaproteomicAnalysisQuerySchema(), ), } db.commit() q = tests[table][1].execute(db) assert tests[table][0].id in {r.id for r in q.all()}
def gold_tree_biosamples(db): samples = [] iterator = product( range(2), range(2), range(2), range(2), range(2), ) for item in iterator: id_ = "_".join([str(i) for i in item]) samples.append( fakes.BiosampleFactory( id=id_, ecosystem=f"ecosystem_{item[0]}", ecosystem_category=f"category_{item[1]}", ecosystem_type=f"type_{item[2]}", ecosystem_subtype=f"subtype_{item[3]}", specific_ecosystem=f"specific_{item[4]}", ) ) db.commit() yield samples