def append_obo_exact_matches(cell_line_object, data_to_add_to_wikidata): reference = cell_line_object.references_in_wdi_format cell_line_dump = cell_line_object.cell_line_dump if cell_line_dump["CLO"]: for CLO in cell_line_dump["CLO"]: # P2888: exact match data_to_add_to_wikidata.append( wdi_core.WDUrl(value="http://purl.obolibrary.org/obo/" + CLO, prop_nr="P2888", references=reference)) if cell_line_dump["BTO"]: for BTO in cell_line_dump["BTO"]: data_to_add_to_wikidata.append( wdi_core.WDUrl(value="http://purl.obolibrary.org/obo/" + BTO, prop_nr="2888", references=reference)) if cell_line_dump["EFO"]: for EFO in cell_line_dump["EFO"]: data_to_add_to_wikidata.append( wdi_core.WDUrl(value="http://purl.obolibrary.org/obo/" + EFO, prop_nr="2888", references=reference)) if cell_line_dump["BCGO"]: for BCGO in cell_line_dump["BCGO"]: data_to_add_to_wikidata.append( wdi_core.WDUrl(value="http://purl.obolibrary.org/obo/" + BCGO, prop_nr="2888", references=reference)) return data_to_add_to_wikidata
def create_reference(unii: str, url=None): """ Reference is: retrieved: date stated in: links to pmid items optional reference URL """ # ref = [ wdi_core.WDItemID(ITEMS['Inxight: Drugs Database'], PROPS['stated in'], is_reference=True) ] t = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) ref.append( wdi_core.WDTime(t, prop_nr=PROPS['retrieved'], is_reference=True)) if unii: ref_url = "https://drugs.ncats.io/drug/{}".format(unii) ref.append( wdi_core.WDUrl(ref_url, PROPS['reference URL'], is_reference=True)) if url: for u in url: try: ref.append( wdi_core.WDUrl(u, PROPS['reference URL'], is_reference=True)) except Exception as e: print(e) print(u) return ref
def create_references(self, gdr): references = [] # Reference URL for phenocarta references.append( wdi_core.WDUrl(value=gdr.phenocarta_url, prop_nr=PROPS['reference URL'], is_reference=True)) # Reference URL for genome.gov references.append( wdi_core.WDUrl(value=gdr.link, prop_nr=PROPS['reference URL'], is_reference=True)) # Stated in Phenocarta references.append( wdi_core.WDItemID(value='Q22330995', prop_nr=PROPS['stated in'], is_reference=True)) # Stated in PubMed references.append( wdi_core.WDItemID(value=self.pmid_qid_map[gdr.pmid], prop_nr=PROPS['stated in'], is_reference=True)) # Date retrieved references.append( wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr=PROPS['retrieved'], is_reference=True)) return references
def update_all_settlements(config: Configuration): login = login_with_credentials(config.credentials_path) ref_time, ref_url, path = find_latest_processed_file_info( config.matched_tables_path, config.data) ref = wdi_core.WDUrl(prop_nr="P854", value=ref_url, is_reference=True) # publisher = wdi_core.WDItemID(value=login.consumer_key, prop_nr="P123", is_reference=True) qualifiers = create_qualifiers(ref_time) error_logs = [] data = pd.DataFrame(pd.read_csv(path)) for _, row in data.iterrows(): settlement_qid: str = row['settlement'] population: str = row['permanent_population'] prop = wdi_core.WDQuantity(prop_nr='P1082', value=population, qualifiers=qualifiers, references=[[ref]]) try: update_item(login, settlement_qid, [prop]) except BaseException: error_logs.append(settlement_qid) print("An error occurred for item : " + settlement_qid) if len(error_logs) > 0: print("Summarizing failures for specific IDs") for error in error_logs: print("Error for : " + error)
def create_main_statements(self): if not self.reference: self.create_reference() self.s_main = [] for relationship in self.relationships: if relationship[0] not in self.do_graph.edge_prop: # s = "unknown relationship: {}".format(relationship[0]) # msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown relationship") # wdi_core.WDItemEngine.log("WARNING", msg) continue if relationship[1] not in self.do_graph.purl_wdid: s = "unknown obj: {}".format(relationship[1]) msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown obj") wdi_core.WDItemEngine.log("WARNING", msg) continue self.s_main.append(wdi_core.WDItemID(self.do_graph.purl_wdid[relationship[1]], self.do_graph.edge_prop[relationship[0]], references=[self.reference])) # add http://purl.obolibrary.org/obo/, exact match self.s_main.append(wdi_core.WDString(self.id, PROPS['exact match'], references=[self.reference])) if self.doid != "DOID:4": # instance of disease self.s_main.append(wdi_core.WDItemID('Q12136', PROPS['instance of'], references=[self.reference])) miriam_ref = [wdi_core.WDItemID(value="Q16335166", prop_nr='P248', is_reference=True), wdi_core.WDUrl("http://www.ebi.ac.uk/miriam/main/collections/MIR:00000233", 'P854', is_reference=True)] self.s_main.append(wdi_core.WDString("http://identifiers.org/doid/{}".format(self.doid), PROPS['exact match'], references=[miriam_ref]))
def create_reference(omim, pmid, login=None): """ Reference is: retrieved: date stated in: links to pmid items optional reference URL """ # ref = [ wdi_core.WDItemID(ITEMS['MitoDB'], PROPS['curator'], is_reference=True) ] t = strftime("+%Y-%m-%dT00:00:00Z", gmtime()) ref.append( wdi_core.WDTime(t, prop_nr=PROPS['retrieved'], is_reference=True)) pmid_qid, _, success = PublicationHelper( ext_id=pmid, id_type='pmid', source="europepmc").get_or_create(login) if success is True: ref.append( wdi_core.WDItemID(pmid_qid, PROPS['stated in'], is_reference=True)) ref_url = "http://mitodb.com/symptoms.php?oid={}&symptoms=Show" ref.append( wdi_core.WDUrl(ref_url.format(omim), PROPS['reference URL'], is_reference=True)) return ref
def task_processor(self, task, n): ref = [[ wdi_core.WDItemID(value=self.source, prop_nr='P248', is_reference=True), wdi_core.WDUrl(value=self.url_pattern + urllib.parse.quote_plus(str(task[1])), prop_nr='P854', is_reference=True), wdi_core.WDTime(task[2], prop_nr='P813', is_reference=True) ]] data = [] for cited_item in task[3]: data.append( wdi_core.WDItemID(value='Q' + str(cited_item), prop_nr='P2860', references=ref)) itemengine = self.integrator[n]['core'].WDItemEngine( wd_item_id='Q' + str(task[0]), data=data, append_value=self.append_value, good_refs=self.good_refs, keep_good_ref_statements=True) print( itemengine.write(self.integrator[n]['login'], edit_summary=self.edit_summary))
def create_ref_statement(emea_id, url): ref_url = wdi_core.WDUrl(url, prop_nr='P854', is_reference=True) ref_emea = wdi_core.WDExternalID(emea_id, 'P3637', is_reference=True) ref_retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), prop_nr='P813', is_reference=True) reference = [ref_emea, ref_url, ref_retrieved] return reference
def create_reference(spl_url,source_type): timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z") archived_date = datetime.strptime('9/29/2015','%m/%d/%Y').strftime("+%Y-%m-%dT00:00:00Z") refStatedIn = wdi_core.WDItemID(value="Q73670648", prop_nr="P248", is_reference=True) refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refRetrieved2 = wdi_core.WDTime(archived_date, prop_nr="P2960", is_reference=True) refURL = wdi_core.WDUrl(value=spl_url, prop_nr="P854", is_reference=True) reftype = wdi_core.WDString(value=source_type, prop_nr="P958", is_reference=True) return [refStatedIn, refRetrieved, refRetrieved2, refURL, reftype]
def add_entity(self, property_list, result): """ function to add pathway item to wikidata :param property_list: the list of property entries that will be made :param result: the data from Reactome :return: """ et = result['entitytype'] if et == 'COMP': wditem_value = 'Q420927' elif et == 'DS': wditem_value = 'Q47461827' elif et == 'CS': wditem_value = 'Q47461807' elif et == 'OS': wditem_value = 'Q49980450' else: return # P31 = instance of cpref = [] if result['cportal'] != '': cpref = self.create_complex_portal_reference(result['cportal']) if cpref: property_list["P31"] = [ wdi_core.WDItemID( value=wditem_value, prop_nr="P31", references=[copy.deepcopy(self.reference), cpref]) ] else: property_list["P31"] = [ wdi_core.WDItemID(value=wditem_value, prop_nr="P31", references=[copy.deepcopy(self.reference)]) ] # P2888 = exact match property_list["P2888"] = [ wdi_core.WDUrl(self.match_url, prop_nr='P2888', references=[copy.deepcopy(self.reference)]) ] # P703 = found in taxon property_list["P703"] = [ wdi_core.WDItemID(value=self.species, prop_nr='P703', references=[copy.deepcopy(self.reference)]) ] # P3937 = Reactome ID property_list["P3937"] = [ wdi_core.WDString(value=self.reactome_id, prop_nr='P3937') ] self.add_entity_parts(property_list, result)
def create_reference(genbank_id): stated_in = wdi_core.WDItemID(ITEMS['GenBank'], PROPS['stated in'], is_reference=True) retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), PROPS['retrieved'], is_reference=True) url = "https://www.ncbi.nlm.nih.gov/genome/?term={}".format(genbank_id) ref_url = wdi_core.WDUrl(url, PROPS['reference URL'], is_reference=True) return [stated_in, retrieved, ref_url]
def add_extra_statements(self): if self.url: is_download_url = wdi_core.WDItemID('Q7126717', 'P642', is_qualifier=True) self.statements.append( wdi_core.WDUrl(self.url, 'P2699', qualifiers=[is_download_url])) if self.doi: self.statements.append(wdi_core.WDExternalID(self.doi, 'P356'))
def _add_related_link_to_entity(self, entity: wdi_core.WDItemEngine, uri: str): """ adds related link which is the original URI to the entity :param entity: wikibase item :param uri: item's URI :return: update the item with the related link prop """ rel_link = wdi_core.WDUrl(value=uri, prop_nr=self._related_link_prop) entity.update([rel_link], append_value=[self._related_link_prop])
def create_item(label, description, equiv_classes, login): CORE_PROPS.add(get_quiv_class_pid()) s = [ wdi_core.WDUrl(equiv_class, get_quiv_class_pid()) for equiv_class in equiv_classes ] item = localItemEngine(item_name=label, domain="foo", data=s) item.set_label(label) item.set_description(description) item.write(login) return item
def create_reference(ghr_url): refStatedIn = wdi_core.WDItemID(value="Q62606821", prop_nr="P248", is_reference=True) timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refURL = wdi_core.WDUrl(value=ghr_url, prop_nr="P854", is_reference=True) return [refStatedIn, refRetrieved, refURL]
def create_reference(): refStatedIn = wdi_core.WDItemID(value="Q64403342", prop_nr="P248", is_reference=True) timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refURL = wdi_core.WDUrl((df.loc[index, 'Report Reference URL']), prop_nr="P854", is_reference=True) return [refStatedIn, refRetrieved, refURL]
def create_reference(): """Create references for an item.""" stated_in = wdi_core.WDItemID(ITEMS['Schools Portal'], PROPS['stated in'], is_reference=True) retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), PROPS['retrieved'], is_reference=True) url = "http://dati.istruzione.it/opendata/opendata/catalogo/elements1/?area=Scuole" ref_url = wdi_core.WDUrl(url, PROPS['reference URL'], is_reference=True) return [stated_in, retrieved, ref_url]
def create_property(label, description, property_datatype, equiv_props, login): CORE_PROPS.add(get_quiv_prop_pid()) s = [ wdi_core.WDUrl(equiv_prop, get_quiv_prop_pid()) for equiv_prop in equiv_props ] item = localItemEngine(item_name=label, domain="foo", data=s) item.set_label(label) item.set_description(description) item.write(login, entity_type="property", property_datatype=property_datatype) return item
def create_statements(self): ref = self.create_ref_statement() self.pids.add(self.id_pid) # make sure this ID is unique in wikidata self.graph.CORE_IDS.update({self.id_pid}) # this node's primary id s = [wdi_core.WDExternalID(self.id_value, self.id_pid, references=[ref])] # add the exact match statements s.append(wdi_core.WDUrl(self.id_uri, self.helper.get_pid('P2888'), references=[ref])) s.extend(self.create_xref_statements()) return s
def create_reference(variant_id, retrieved): refStatedIn = wdi_core.WDItemID(value=ITEMS['CIViC database'], prop_nr=PROPS['stated in'], is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr=PROPS['retrieved'], is_reference=True) refReferenceURL = wdi_core.WDUrl( "https://civic.genome.wustl.edu/links/variants/" + str(variant_id), prop_nr=PROPS['reference URL'], is_reference=True) variant_reference = [refStatedIn, refRetrieved, refReferenceURL] return variant_reference
def create_statement_ref(self, rows): """ Ref supporting text gets split up into chunks of 400 chars each. if the ref url is from pubmed, it gets split. Otherwise it gets cropped to 400 chars """ ref_url_pid = self.uri_pid['http://www.wikidata.org/entity/P854'] ref_supp_text_pid = self.uri_pid['http://reference_supporting_text'] refs = [] for _, row in rows.iterrows(): # textwrap.wrap splits lines on spaces only lines = textwrap.wrap(row.reference_supporting_text, 400, break_long_words=False) ref = [ wdi_core.WDString(rst_chunk, ref_supp_text_pid, is_reference=True) for rst_chunk in lines ] if row.reference_uri: for ref_uri in row.reference_uri.split("|"): ref_uri = self.handle_special_ref_url(ref_uri) if ref_uri.startswith( "https://www.ncbi.nlm.nih.gov/pubmed/"): ref.extend([ wdi_core.WDUrl(this_url, ref_url_pid, is_reference=True) for this_url in self.split_pubmed_url(ref_uri) ]) else: ref.append( wdi_core.WDUrl(ref_uri[:400], ref_url_pid, is_reference=True)) refs.append(ref) return refs
def update_item(login, settlement_qid, population): ref = wdi_core.WDUrl(prop_nr="P854", value="https://www.grao.bg/tna/t41nm-15-06-2020_2.txt", is_reference=True) determination_method = wdi_core.WDItemID(value='Q90878157', prop_nr="P459", is_qualifier=True) point_in_time = wdi_core.WDTime(time='+2020-06-15T00:00:00Z', prop_nr='P585', is_qualifier=True) # publisher = wdi_core.WDItemID(value=login.consumer_key, prop_nr="P123", is_reference=True) qualifiers = [] qualifiers.append(point_in_time) qualifiers.append(determination_method) data = [] prop = wdi_core.WDQuantity(prop_nr='P1082', value=population, qualifiers=qualifiers, references=[[ref]]) data.append(prop) item = wdi_core.WDItemEngine(wd_item_id=settlement_qid, data=data) item.write(login, False) time.sleep(15)
def generate_refsnak(source, url, date): """ Helper function to generate a reference snak. @param source: Wikidata ID string referring to the data source @param url: string of the API URL @param date: string of the date of access, format '+YYYY-MM-DDT00:00:00Z' (precision is only to the day) @return one-level nested list of WD statement objects as required by WikidataIntegrator """ return [[ wdi_core.WDItemID(is_reference=True, value=source, prop_nr='P248'), wdi_core.WDUrl(is_reference=True, value=url, prop_nr='P854'), wdi_core.WDTime(date, is_reference=True, prop_nr='P813') ]]
def create_property(self, label, description, property_datatype, uri, dbxref): if uri in self.uri_pid: print("property already exists: {} {}".format( self.uri_pid[uri], uri)) return None s = [wdi_core.WDUrl(uri, self.get_equiv_prop_pid())] s.append(wdi_core.WDString(dbxref, self.dbxref_pid)) item = self.item_engine(item_name=label, domain="foo", data=s, core_props=[self.equiv_prop_pid]) item.set_label(label) item.set_description(description) if self.write: item.write(self.login, entity_type="property", property_datatype=property_datatype) self.uri_pid[uri] = item.wd_item_id
def test_new_item_creation(self): data = [ wdi_core.WDString(value='test', prop_nr='P1'), wdi_core.WDString(value='test1', prop_nr='P2'), wdi_core.WDMath("xxx", prop_nr="P3"), wdi_core.WDExternalID("xxx", prop_nr="P4"), wdi_core.WDItemID("Q123", prop_nr="P5"), wdi_core.WDTime('+%Y-%m-%dT%H:%M:%SZ', "P6"), wdi_core.WDUrl("http://www.google.com", "P7"), wdi_core.WDMonolingualText("xxx", prop_nr="P8"), wdi_core.WDQuantity(5, prop_nr="P9"), wdi_core.WDQuantity(5, upper_bound=9, lower_bound=2, prop_nr="P10"), wdi_core.WDCommonsMedia("xxx", prop_nr="P11"), wdi_core.WDGlobeCoordinate(1.2345, 1.2345, 12, prop_nr="P12"), wdi_core.WDGeoShape("xxx", prop_nr="P13"), wdi_core.WDProperty("P123", "P14") ] core_props = set(["P{}".format(x) for x in range(20)]) for d in data: item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=set()) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=set()) assert item.get_wd_json_representation()
def create_reference(item_id: str, database_target: str = ITEMS['Bgee'], url_prefix: str = GENE_PAGE_PREFIX) -> list: """Create wikidata reference subgraph to the Bgee database. :param item_id: the id considered in URL (i.e. url_prefix) query parameter. For example, Bgee database gene pages uses Ensembl ids in their URL such as https://bgee.org/?page=gene&gene_id=ENSG00000216588. :param database_target: the wikidata individual representing the database to be referenced. :param url_prefix: a URL prefix where url_prefix + itemid produces a valid URL to be referenced valid. :return: a list with wikidata statements """ stated_in = wdi_core.WDItemID(database_target, PROPS['stated in'], is_reference=True) retrieved = wdi_core.WDTime(strftime("+%Y-%m-%dT00:00:00Z", gmtime()), PROPS['retrieved'], is_reference=True) url = url_prefix + item_id ref_url = wdi_core.WDUrl(url, PROPS['reference URL'], is_reference=True) return [stated_in, retrieved, ref_url]
def add_modprot(self, property_list, result): """ function to add modified protein item to wikidata :param property_list: the list of property entries that will be made :param result: the data from Reactome :return: """ # P279 = subclass of term_to_add = acquire_wikidata_links.WDGetData('uniprotid', 'P279', self.wikidata_sparql) term_to_add.add_term(result['protein']['value'], property_list, self.reference) for term in term_to_add.get_missing_terms(): if term not in global_variables.used_wd_ids['proteins']: global_variables.used_wd_ids['proteins'].append(term) # P703 = found in taxon property_list["P703"] = [ wdi_core.WDItemID(value=self.species, prop_nr='P703', references=[copy.deepcopy(self.reference)]) ] # P2888 = exact match pro = global_variables.get_pro_for_id(self.reactome_id) if pro != '': pro = pro.replace(':', '_') url = 'http://purl.obolibrary.org/obo/{0}'.format(pro) property_list["P2888"] = [ wdi_core.WDUrl(url, prop_nr='P2888', references=[copy.deepcopy(self.reference)]) ] # P3937 = Reactome ID property_list["P3937"] = [ wdi_core.WDString(value=self.reactome_id, prop_nr='P3937') ] AddModProt.add_modprot_parts(self, property_list, result)
def create_equiv_property_property(login): # create a property for "equivalent property" # https://www.wikidata.org/wiki/Property:P1628 item = localItemEngine(item_name="equivalent property", domain="foo") item.set_label("equivalent property") item.set_description( "equivalent property in other ontologies (use in statements on properties, use property URI)" ) item.write(login, entity_type="property", property_datatype="url") equiv_prop_pid = item.wd_item_id # add equiv prop statement to equiv prop item = localItemEngine(wd_item_id=equiv_prop_pid) del item.wd_json_representation['sitelinks'] s = wdi_core.WDUrl("http://www.w3.org/2002/07/owl#equivalentProperty", equiv_prop_pid) item.update(data=[s]) item.write(login) # so the updater updates blazegraph time.sleep(30) return equiv_prop_pid
def add_reaction(self, property_list, result): """ function to add pathway item to wikidata :param property_list: the list of property entries that will be made :param result: the data from Reactome :return: """ # add instance of biological process property_list["P31"] = [ wdi_core.WDItemID(value="Q2996394", prop_nr="P31", references=[copy.deepcopy(self.reference)]) ] # P2888 = exact match property_list["P2888"] = [ wdi_core.WDUrl(self.match_url, prop_nr='P2888', references=[copy.deepcopy(self.reference)]) ] # P703 = found in taxon property_list["P703"] = [ wdi_core.WDItemID(value=self.species, prop_nr='P703', references=[copy.deepcopy(self.reference)]) ] # P3937 = Reactome ID property_list["P3937"] = [ wdi_core.WDString(value=self.reactome_id, prop_nr='P3937') ] AddEntry.add_go_term(self, property_list, result) AddEntry.add_citations(self, property_list, result) AddEntry.add_part_of(self, property_list, result) AddEntity.add_entity_parts(self, property_list, result, 'inputs') AddEntity.add_entity_parts(self, property_list, result, 'outputs') AddEntity.add_entity_parts(self, property_list, result, 'mods')
def make_statements(self): s = [] helper = self.helper # instance of edition s.append( wdi_core.WDItemID(helper.get_qid('Q3331189'), helper.get_pid("P31"))) # edition or translation of s.append(wdi_core.WDItemID(self.edition_of_qid, helper.get_pid("P629"))) # edition number s.append(wdi_core.WDString(self.edition, helper.get_pid("P393"))) if self.archive_url: s.append(wdi_core.WDUrl(self.archive_url, helper.get_pid('P1065'))) if self.pub_date: s.append( wdi_core.WDTime(self.pub_date, helper.get_pid('P577'), precision=self.date_precision)) self.statements = s