def get_documents_by_method(solr_api_url, auth_object, q="*:*", method="BET"): docs_query = client_solr.StudyDocuments() docs_query.settings['endpointfilter'] = None materialfilter = None docs_query.settings['query_guidance'] = None docs_query.settings['query_organism'] = None docs_query.setStudyFilter({ "topcategory_s": "*", "endpointcategory_s": "*", "E.method_s": "({})".format(method) }) docs_query.settings['fields'] = "*" query = docs_query.getQuery(textfilter=q, facets=None, fq=None, rows=10000, _params=True, _conditions=True, _composition=True) #print(query) r = client_solr.post(solr_api_url, query=query, auth=auth_object) results = None if r.status_code == 200: try: rows = docs_query.parse(r.json()['response']['docs'], process=None) results = pd.DataFrame(rows) except: print(r.text) else: logger.info(r.status_code) return results
def get_docs(query): docs_query = client_solr.StudyDocuments() docs_query.settings['endpointfilter'] = None materialfilter = None docs_query.settings['query_guidance'] = None docs_query.settings['query_organism'] = None #docs_query.setStudyFilter({"topcategory_s" : "*", "endpointcategory_s" : row["endpoint"], "E.method_s" : "({})".format(row["method"])}) #docs_query.setStudyFilter("*") docs_query.settings['fields'] = "*" _query = docs_query.getQuery(textfilter=query, facets=None, fq=None, rows=10000, _params=True, _conditions=True, _composition=False) print(_query) r = client_solr.post(solr_api_url, query=_query, auth=auth_object) results = None if r.status_code == 200: rows = docs_query.parse(r.json()['response']['docs'], process=None) results = pd.DataFrame(rows) return results else: print(r.status_code) return None
def dose_response_overview(query): _query = "*:*" if query=="" else query facets = client_solr.Facets() facets.set_annotation_folder(annotation_folder) _tag_method = "E.method_s" _tag_endpoint= "effectendpoint_s" _tag_endpoint_type= "effectendpoint_type_s" _tag_conc_unit = '_CONDITION_concentration_UNIT_s' _tag_conc = '_CONDITION_concentration_d' _tag_time_unit = '_CONDITION_exposure_time_UNIT_s' _tag_time = '_CONDITION_exposure_time_d' df = facets.summary(solr_api_url,auth_object, query=_query,fields=[ "topcategory_s","endpointcategory_s",_tag_method,_tag_endpoint,"effectendpoint_type_s", _tag_conc,_tag_conc_unit, _tag_time,_tag_time_unit ],log_query=log_query,log_result=beautify) df.head() columns = ['id', 'name_s', 'publicname_s', 'owner_name_s', 'substanceType_s', 's_uuid_s', 'type_s', 'document_uuid_s', 'investigation_uuid_s', 'assay_uuid_s', 'topcategory_s', 'endpointcategory_s', 'guidance_s', 'endpoint_s', 'effectendpoint_s', 'effectendpoint_type_s', 'reference_owner_s', 'reference_year_s', 'reference_s', 'loValue_d', 'unit_s','E.method_s', 'E.cell_type_s', _tag_conc_unit, _tag_conc, _tag_time_unit, _tag_time, '_CONDITION_replicate_s', '_CONDITION_material_s', '_version_', 'err_d', 'errQualifier_s', 'guidance_synonym_ss', 'effectendpoint_synonym_ss', 'E.method_synonym_ss'] for index,row in df.iterrows(): method = row[_tag_method] endpoint= row[_tag_endpoint] query = "{}:{} {}:{} {}:[* TO *] ".format(_tag_method,method,_tag_endpoint,endpoint,_tag_conc) _query = {'q': query, 'fq': 'type_s:study', 'wt': 'json', 'rows': 10000} r = client_solr.post(solr_api_url,query=_query,auth=auth_object) tmp = pd.DataFrame(r.json()['response']['docs']) tmp["time"] = "" if not _tag_conc in tmp.columns: continue time = "" if _tag_time in tmp.columns: tmp["time"] = tmp[_tag_time] if _tag_time_unit in tmp.columns: tmp["time"] = tmp["time"] + " " + row[_tag_time_unit] r.close() cells = tmp["E.cell_type_s"].unique() for cell in cells: tmp_cell = tmp.loc[tmp["E.cell_type_s"]==cell] units = tmp_cell["unit_s"].unique() for unit in units: tmp_endpoint_unit = tmp.loc[tmp["unit_s"]==unit] conc_units = tmp_endpoint_unit[_tag_conc_unit].unique() for cu in conc_units: tmp_endpoint_unit_cu = tmp_endpoint_unit.loc[tmp_endpoint_unit[_tag_conc_unit]==cu] if not tmp_endpoint_unit_cu.empty: print(method,endpoint,cell,unit,cu) #print(tmp.columns) #display(tmp_endpoint_unit_cu[["publicname_s",_tag_conc,_tag_conc_unit,"time","reference_owner_s"]].head()) imgfile = os.path.join(folder_output,"data","{}_{}_{}_{}_{}.png").format(method,endpoint,unit.replace("/","_"),cell,cu.replace("/","_")) plot_dose_response(tmp_endpoint_unit_cu,method, endpoint,unit, cu,facet_row="time",facet_col="publicname_s",color="reference_owner_s",imgfile=imgfile)
facets = client_solr.Facets() facets.set_annotation_folder(annotation_folder) # FAIR - Findable ## Data model import pandas as pd import pandas as pd docs_query = "{} +publicname_s:({})".format(query, "NRCWE-006") print(docs_query) _query = {'q': docs_query, 'fq': 'type_s:study', 'wt': 'json', 'rows': 10000} r = client_solr.post(solr_api_url, query=_query, auth=auth_object) tmp = pd.DataFrame(r.json()['response']['docs']) print(tmp.shape) print(tmp.columns) tmp.head() print(tmp["owner_name_s"].unique()) doc_uuids = tmp["document_uuid_s"].unique() print(tmp["E.method_s"].unique()) #for doc_uuid in doc_uuids: # doc = tmp.loc[tmp["document_uuid_s"]==doc_uuid] cols = [ "publicname_s", "document_uuid_s", "effectendpoint_s", "loValue_d", "E.method_s", "unit_s" ] fig = px.treemap(