def getqid(): print(request.form.get("buttom")) pprint.pprint(request.form) if request.form.get("button") == "PMID": qid = wdi_helpers.PublicationHelper(request.form.get("refid"), id_type="pmid",source="europepmc").get_or_create(login) elif request.form.get("button") == "DOI": qid = wdi_helpers.PublicationHelper(request.form.get("refid"), id_type="doi", source="crossref").get_or_create(login) return render_template("resolve.html", wdid=qid)
def create_reference(source_str, evidence_level, login): """ Reference is: curator: Cancer Biomarkers database retrieved: date stated in: links to pmid items no reference URL """ reference = [ wdi_core.WDItemID(ITEMS['Cancer Biomarkers database'], PROPS['curator'], is_reference=True) ] t = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) reference.append( wdi_core.WDTime(t, prop_nr=PROPS['retrieved'], is_reference=True)) for source in source_str.split(";"): if source.startswith("PMID:"): qid, _, success = wdi_helpers.PublicationHelper( source.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login) if success: reference.append( wdi_core.WDItemID(qid, PROPS['stated in'], is_reference=True)) elif source in source_map: reference.append( wdi_core.WDItemID(source_map[source], PROPS['stated in'], is_reference=True)) else: print("unknown source: {}".format(source)) return reference
def create_articles(pmids: Set[str], login: object, write: bool = True) -> Dict[str, str]: """ Given a list of pmids, make article items for each :param pmids: list of pmids :param login: wdi_core login instance :param write: actually perform write :return: map pmid -> wdid """ pmid_map = dict() for pmid in pmids: p = wdi_helpers.PublicationHelper(pmid.replace("PMID:", ""), id_type="pmid", source="europepmc") if write: try: pmid_wdid, _, success = p.get_or_create(login) if success: pmid_map[pmid] = pmid_wdid except Exception as e: print("Error creating article pmid: {}, error: {}".format( pmid, e)) continue else: pmid_map[pmid] = 'Q1' return pmid_map
def run(self): wd_genes = defaultdict(list) wd_diseases = defaultdict(list) gdrs = list(self.gwas_catalog.data) print("Get or create references") pmids = set([x.pmid for x in gdrs]) print("Need {} pmids".format(len(pmids))) self.pmid_qid_map = get_values("P698", pmids) print("Found {} pmids".format(len(self.pmid_qid_map))) for pmid in pmids - set(self.pmid_qid_map.keys()): qid, _, success = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(self.login) if success: self.pmid_qid_map[pmid] = qid print("Building relationships & references") for gdr in tqdm(gdrs): try: # Retrieve Wikidata ID for this disease phenotype doid_wdid = self.doid_wdid_map["DOID:{}".format(gdr.doid)] except KeyError as e: msg = "Missing DOID Disease WD Item; skipping {}".format( gdr.doid) print(msg) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(gdr.doid, PROPS['Disease Ontology ID'], None, msg, type(e))) continue try: # Retrieve Wikidata ID for this gene gene_wdid = self.gene_wdid_map[gdr.ncbi] except KeyError as e: msg = "Missing NCBI Gene WD Item; skipping {}".format(gdr.ncbi) print(msg) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(gdr.ncbi, PROPS['Entrez Gene ID'], None, msg, type(e))) continue try: items = self.process_relationship(gene_wdid, doid_wdid, gdr) except Exception as e: print(e) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(gdr.ncbi, PROPS['Entrez Gene ID'], None, str(e), type(e))) continue gdr.gene_wditem = items['gene_item'] gdr.disease_wditem = items['disease_item'] # Aggregating data to reduce wikidata updates wd_genes[gene_wdid].append(gdr) wd_diseases[doid_wdid].append(gdr) print("Begin creating Wikidata Gene items with new relationships") # Create Wikidata items for genes for wdid, gdrs in tqdm(wd_genes.items()): # Attach updated disease information to gene try: gene_wd_item = wdi_core.WDItemEngine( wd_item_id=wdid, data=[gdr.disease_wditem for gdr in gdrs], domain="genes", append_value=[PROPS["genetic association"]], fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_gene_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new, global_ref_mode="CUSTOM", core_props=core_props) wd_item = { 'item': gene_wd_item, 'record_id': gdrs[0].ncbi, 'record_prop': PROPS['Entrez Gene ID'] } self.write_item(wd_item) except Exception as e: msg = "Problem Creating Gene WDItem; skipping {}".format( gdr.ncbi) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(gdr.ncbi, PROPS['Entrez Gene ID'], wdid, msg, type(e))) print("Begin creating Wikidata Disease items with new relationships") for wdid, gdrs in tqdm(wd_diseases.items()): # Attach updated gene information to disease try: disease_wd_item = wdi_core.WDItemEngine( wd_item_id=wdid, data=[gdr.gene_wditem for gdr in gdrs], domain="diseases", append_value=[PROPS["genetic association"]], fast_run=self.fast_run, fast_run_base_filter=self.fast_run_base_disease_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new, global_ref_mode="CUSTOM", core_props=core_props) wd_item = { 'item': disease_wd_item, 'record_id': "DOID:{}".format(gdrs[0].doid), 'record_prop': PROPS['Disease Ontology ID'] } self.write_item(wd_item) except Exception as e: msg = "Problem Creating Disease WDItem; skipping {}".format( gdr.doid) wdi_core.WDItemEngine.log( "ERROR", wdi_helpers.format_msg(gdr.doid, PROPS['Disease Ontology ID'], wdid, msg, type(e)))
from src.local import WDUSER, WDPASS from wikidataintegrator import wdi_core, wdi_login, wdi_helpers from tqdm import tqdm from src.utils import * folder_for_errors = sys.argv[1] assert (folder_for_errors ), "You need to pass the folder for errors used by prepare_files.py" doi_file = folder_for_errors + "/references_absent_in_wikidata.txt" with open(doi_file, "r") as f: references = f.readlines() references_still_absent = [] login_instance = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS) for reference in tqdm(references): doi = reference.replace("DOI=", "").replace("\n", "") tqdm.write(f"Adding article with DOI = {doi}") print(doi) wdi_publication_helper = wdi_helpers.PublicationHelper(ext_id=doi, id_type="doi", source="crossref") wdi_result = wdi_publication_helper.get_or_create(login_instance) print(wdi_result) if wdi_result[0] == None: print("Not added to Wikidata.") references_still_absent.append(reference.strip()) write_list(doi_file, references_still_absent)
def run_one(pathway_id, retrieved, fast_run, write, login, temp): print(pathway_id) pathway_reference = create_reference(pathway_id, retrieved) prep = dict() prep = get_PathwayElements(pathway=pathway_id, datatype="Metabolite", temp=temp, prep=prep) prep = get_PathwayElements(pathway=pathway_id, datatype="GeneProduct", temp=temp, prep=prep) # P703 = found in taxon, Q15978631 = "H**o sapiens" prep["P703"] = [ wdi_core.WDItemID(value="Q15978631", prop_nr='P703', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX gpml: <http://vocabularies.wikipathways.org/gpml#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT DISTINCT ?pathway ?pwId ?pwLabel ?description WHERE { VALUES ?pwId {""" query += "\"" + pathway_id + "\"^^xsd:string}" query += """ ?pathway a wp:Pathway ; dc:title ?pwLabel ; dcterms:description ?description ; dcterms:identifier ?pwId ; <http://vocabularies.wikipathways.org/wp#isAbout> ?details ; wp:organismName "H**o sapiens"^^xsd:string . }""" qres3 = temp.query(query) for row in qres3: #pathway_iri = str(row[0]) pw_id = str(row[1]) pw_label = str(row[2]) description = str(row[3]) ## clean up descriptions description = re.sub(r'https?:\/\/.*[\s\r\n]', '', description) description = description.replace('\n', ' ').replace( '\r', ' ').replace('\'\'\'', '').replace('\'\'', '').replace('[', '').replace(']', '') description = description.replace( 'Proteins on this pathway have targeted assays available via the Portal', '') description = (description[:246] + '...') if len(description) > 246 else description description = 'biological pathway in human' if len( description) < 20 else description # P31 = instance of prep["P31"] = [ wdi_core.WDItemID(value="Q4915012", prop_nr="P31", references=[copy.deepcopy(pathway_reference)]) ] prep["P1476"] = [ wdi_core.WDMonolingualText( value=pw_label, prop_nr="P1476", references=[copy.deepcopy(pathway_reference)]) ] # P2410 = WikiPathways ID prep["P2410"] = [ wdi_core.WDString(pathway_id, prop_nr='P2410', references=[copy.deepcopy(pathway_reference)]) ] # P2888 = exact match prep["P2888"] = [ wdi_core.WDUrl("http://identifiers.org/wikipathways/" + pw_id, prop_nr='P2888', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> select ?pubmed WHERE { ?pubmed a wp:PublicationReference ; dcterms:isPartOf <""" query += str(row[0]) query += """> .} """ qres4 = temp.query(query) p = re.compile('^[0-9]+$') for pubmed_result in qres4: pmid = str(pubmed_result[0]).replace( "http://identifiers.org/pubmed/", "") print(pmid) m = p.match(pmid) if not m: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid, id_type="doi", source="crossref").get_or_create(login if write else None) else: pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login if write else None) if pmid_qid is None: return panic(pathway_id, "not found: {}".format(pmid), "pmid") else: if 'P2860' not in prep.keys(): prep["P2860"] = [] print(pmid_qid) prep['P2860'].append( wdi_core.WDItemID( value=str(pmid_qid), prop_nr='P2860', references=[copy.deepcopy(pathway_reference)])) author_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> SELECT ?author ?authorName ?authorHomepage ?authorQIRI WHERE { <http://identifiers.org/wikipathways/""" + pathway_id + """> dc:creator ?author . ?author a foaf:Person ; foaf:name ?authorName ; foaf:homepage ?authorHomepage . OPTIONAL { ?author owl:sameAs ?authorQIRI . } } """ author_query_res = temp.query(author_query) prep["P2093"] = [] prep["P50"] = [] for row in author_query_res: author_name = str(row[1]) print("author_name") print(author_name) author_homepage = str(row[2]) print("author_homepage") print(author_homepage) # P2093 = author name string author_url_qualifier = wdi_core.WDString(value=author_homepage, prop_nr="P2699", is_qualifier=True) prep["P2093"].append( wdi_core.WDString( author_name, prop_nr='P2093', qualifiers=[copy.deepcopy(author_url_qualifier)], references=[copy.deepcopy(pathway_reference)])) if row[3] != None: # only if row[3] exists (authorQIRI) author_iri = str(row[0]) author_name = str(row[1]) print("author_name") print(author_name) author_qiri = str(row[3]) if ("https://www.wikidata.org/wiki/" in author_qiri): author_qid = author_qiri.replace( "https://www.wikidata.org/wiki/", "") if ("http://www.wikidata.org/entity/" in author_qiri): author_qid = author_qiri.replace( "http://www.wikidata.org/entity/", "") print("author_qid") print(author_qid) # P50 = author prep["P50"].append( wdi_core.WDItemID( author_qid, prop_nr='P50', references=[copy.deepcopy(pathway_reference)])) disease_ontology_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT ?diseaseOntologyTerm WHERE { ?pathwayRDF wp:diseaseOntologyTag ?diseaseOntologyTerm ; foaf:page ?pathway ; dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . } """ disease_ontology_query_res = temp.query(disease_ontology_query) prep["P1050"] = [] for row in disease_ontology_query_res: disease_ontology_iri = str(row[0]) doid = disease_ontology_iri.replace( "http://purl.obolibrary.org/obo/DOID_", "DOID:") print("doid") print(doid) # P1050 = medical condition if doid_qid.get(doid) != None: #skip if qid is missing prep["P1050"].append( wdi_core.WDItemID( doid_qid[doid], prop_nr='P1050', references=[copy.deepcopy(pathway_reference)])) pw_ontology_query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT ?pwOntologyTerm WHERE { ?pathwayRDF wp:pathwayOntologyTag ?pwOntologyTerm ; foaf:page ?pathway ; dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . } """ pw_ontology_query_res = temp.query(pw_ontology_query) prep["P921"] = [] for row in pw_ontology_query_res: pw_ontology_iri = str(row[0]) poid = pw_ontology_iri.replace( "http://purl.obolibrary.org/obo/PW_", "PW:") print("poid") print(poid) # P921 = main subject if poid_qid.get(poid) != None: #skip if qid is missing prep["P921"].append( wdi_core.WDItemID( poid_qid[poid], prop_nr='P921', references=[copy.deepcopy(pathway_reference)])) #TODO: Propose Cell Type Ontology ID as new property, add release item, associate terms with WD items. #cell_type_ontology_query = """ # PREFIX wp: <http://vocabularies.wikipathways.org/wp#> # PREFIX dcterms: <http://purl.org/dc/terms/> # SELECT ?cellTypeOntologyTerm # WHERE { # ?pathwayRDF wp:cellTypeOntologyTag ?cellTypeOntologyTerm ; # foaf:page ?pathway ; # dcterms:identifier \"""" + pathway_id + """\"^^xsd:string . # } # """ #cell_type_ontology_query_res = temp.query(cell_type_ontology_query) #prep["P927"] = [] #for row in cell_type_ontology_query_res: # cell_type_ontology_iri = str(row[0]) # ctoid = cell_type_ontology_iri.replace("http://purl.obolibrary.org/obo/CL_", "CL:") # print("ctoid") # print(ctoid) # P927 = anatomical location # prep["P927"].append(wdi_core.WDItem(qid[ctoid], prop_nr='P927', references=[copy.deepcopy(pathway_reference)])) data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) wdPage = wdi_core.WDItemEngine( data=data2add, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new_multiple_refs, core_props=core_props) wdPage.set_label(pw_label, lang="en") wdPage.set_description(description, lang="en") try_write(wdPage, record_id=pathway_id, record_prop=PROPS['Wikipathways ID'], edit_summary="Updated a Wikipathways pathway", login=login, write=write)
def make_statements_from_evidence(variant_id, evidence_item, login, write): ss = [] ## determination method and rating qualifiers ev_quals = [ wdi_core.WDItemID(value=EVIDENCE_LEVEL[str( evidence_item["evidence_level"])], prop_nr=PROPS['determination method'], is_qualifier=True), wdi_core.WDItemID(value=TRUST_RATING[str(evidence_item["rating"])], prop_nr=PROPS['rating'], is_qualifier=True) ] ## Disease if not evidence_item["disease"]["doid"]: panic(variant_id, "", "no disease") return [] doid = "DOID:" + evidence_item["disease"]["doid"] if doid not in DO_QID_MAP: panic(variant_id, doid, "disease") return [] disease = DO_QID_MAP[doid] ## Drugs drug_qids = [] for drug in evidence_item["drugs"]: drug_label = drug['name'].lower() if drug_label not in DRUGLABEL_QID_MAP: panic(variant_id, drug_label, "drug") return [] drug_qids.append(DRUGLABEL_QID_MAP[drug_label]) dit = evidence_item['drug_interaction_type'] if dit == "Combination": # make this a drug therapy combination item instead!! drug_qids = [ DrugCombo(drug_qids).get_or_create(login if write else None) ] elif dit == "Substitutes": # see "Drug Interaction Type" in https://civicdb.org/help/evidence/overview # we're going to add a statement for each drug pass elif dit == "Sequential": # we have no wikidata way of specying this for now panic(variant_id, "drug_interaction_type: {}".format(dit), "drug") return [] elif dit: panic(variant_id, "drug_interaction_type: {}".format(dit), "drug") return [] ## Reference pmid = evidence_item["source"]["pubmed_id"] pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login if write else None) if not pmid_qid: panic(variant_id, "not found: {}".format(pmid), "pmid") return [] refStatedIn = wdi_core.WDItemID(value=pmid_qid, prop_nr=PROPS['stated in'], is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr=PROPS['retrieved'], is_reference=True) url = "https://civic.genome.wustl.edu/links/evidence/" + str( evidence_item['id']) refReferenceURL = wdi_core.WDUrl(url, prop_nr=PROPS['reference URL'], is_reference=True) refCurator = wdi_core.WDItemID(value=ITEMS['CIViC database'], prop_nr=PROPS['curator'], is_reference=True) evidence_reference = [ refCurator, refRetrieved, refReferenceURL, refStatedIn ] ## "disputed by" qualifer that can be added onto the qualifiers refDisputedBy = wdi_core.WDItemID(value=pmid_qid, prop_nr=PROPS['statement disputed by'], is_qualifier=True) # positive/negative therapeutic predictor if evidence_item["evidence_type"] == "Predictive": if evidence_item["clinical_significance"] == "Sensitivity/Response": prop = PROPS['positive therapeutic predictor'] elif evidence_item["clinical_significance"] == "Resistance": prop = PROPS['negative therapeutic predictor'] else: return [] ev_quals.append( wdi_core.WDItemID(disease, PROPS['medical condition treated'], is_qualifier=True)) if evidence_item["evidence_direction"] == "Does Not Support": ev_quals.append(refDisputedBy) for drug_qid in drug_qids: ss.append( wdi_core.WDItemID(drug_qid, prop, references=[evidence_reference], qualifiers=ev_quals)) # positive/negative Diagnostic predictor if evidence_item["evidence_type"] == "Diagnostic": if evidence_item["clinical_significance"] == "Positive": prop = PROPS['positive diagnostic predictor'] elif evidence_item["clinical_significance"] == "Negative": prop = PROPS['negative diagnostic predictor'] else: return [] if evidence_item["evidence_direction"] == "Does Not Support": ev_quals.append(refDisputedBy) ss.append( wdi_core.WDItemID(disease, prop, references=[evidence_reference], qualifiers=ev_quals)) # positive/negative Prognostic predictor if evidence_item["evidence_type"] == "Prognostic": if evidence_item["clinical_significance"] in { "Better Outcome", "Good Outcome" }: prop = PROPS['positive prognostic predictor'] elif evidence_item["clinical_significance"] == "Poor Outcome": prop = PROPS['negative prognostic predictor'] else: return [] if evidence_item["evidence_direction"] == "Does Not Support": ev_quals.append(refDisputedBy) ss.append( wdi_core.WDItemID(disease, prop, references=[evidence_reference], qualifiers=ev_quals)) return ss
def run_one(pathway_id, retrieved, fast_run, write, login, temp): print(pathway_id) pathway_reference = create_reference(pathway_id, retrieved) prep = dict() prep = get_PathwayElements(pathway=pathway_id, datatype="Metabolite", temp=temp, prep=prep) prep = get_PathwayElements(pathway=pathway_id, datatype="GeneProduct", temp=temp, prep=prep) # P703 = found in taxon, Q15978631 = "H**o sapiens" prep["P703"] = [ wdi_core.WDItemID(value="Q15978631", prop_nr='P703', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX gpml: <http://vocabularies.wikipathways.org/gpml#> PREFIX dcterms: <http://purl.org/dc/terms/> SELECT DISTINCT ?pathway ?pwId ?pwLabel WHERE { VALUES ?pwId {""" query += "\"" + pathway_id + "\"^^xsd:string}" query += """ ?pathway a wp:Pathway ; dc:title ?pwLabel ; dcterms:identifier ?pwId ; <http://vocabularies.wikipathways.org/wp#isAbout> ?details ; wp:organismName "H**o sapiens"^^xsd:string . }""" # print(query) qres3 = temp.query(query) for row in qres3: print(row[1]) print(str(row[2])) # P31 = instance of prep["P31"] = [ wdi_core.WDItemID(value="Q4915012", prop_nr="P31", references=[copy.deepcopy(pathway_reference)]) ] # P2410 = WikiPathways ID prep["P2410"] = [ wdi_core.WDString(pathway_id, prop_nr='P2410', references=[copy.deepcopy(pathway_reference)]) ] # P2888 = exact match prep["P2888"] = [ wdi_core.WDUrl("http://identifiers.org/wikipathways/" + str(row[1]), prop_nr='P2888', references=[copy.deepcopy(pathway_reference)]) ] query = """ PREFIX wp: <http://vocabularies.wikipathways.org/wp#> PREFIX dcterms: <http://purl.org/dc/terms/> select ?pubmed WHERE { ?pubmed a wp:PublicationReference ; dcterms:isPartOf <""" query += str(row[0]) query += """> .} """ qres4 = temp.query(query) print(query) for pubmed_result in qres4: pprint.pprint(pubmed_result) pmid = str(pubmed_result[0]).replace( "http://identifiers.org/pubmed/", "") print(pmid) pmid_qid, _, _ = wdi_helpers.PublicationHelper( pmid.replace("PMID:", ""), id_type="pmid", source="europepmc").get_or_create(login if write else None) if pmid_qid is None: return panic(pathway_id, "not found: {}".format(pmid), "pmid") else: print(pmid_qid) if 'P2860' not in prep.keys(): prep["P2860"] = [] prep['P2860'].append( wdi_core.WDItemID( value=str(pmid_qid), prop_nr='P2860', references=[copy.deepcopy(pathway_reference)])) data2add = [] for key in prep.keys(): for statement in prep[key]: data2add.append(statement) print(statement.prop_nr, statement.value) pprint.pprint(data2add) wdPage = wdi_core.WDItemEngine( data=data2add, domain="pathways", fast_run=fast_run, item_name=row.pwLabel, fast_run_base_filter=fast_run_base_filter, fast_run_use_refs=True, ref_handler=update_retrieved_if_new_multiple_refs, core_props=core_props) wdPage.set_label(str(row[2]), lang="en") wdPage.set_description("biological pathway in human", lang="en") try_write(wdPage, record_id=pathway_id, record_prop=PROPS['Wikipathways ID'], edit_summary="Updated a Wikipathways pathway", login=login, write=write)