def test_basedatatype_if_exists(self): instance_of_append = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='APPEND') instance_of_forceappend = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='FORCE_APPEND') instance_of_replace = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='REPLACE') instance_of_keep = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='KEEP') item = wbi_core.ItemEngine( item_id="Q2", data=[instance_of_append, instance_of_append]) claims = [ x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] ] assert len(claims) > 1 and 'Q1234' in claims and claims.count( 'Q1234') == 1 item = wbi_core.ItemEngine( item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend]) claims = [ x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] ] assert len(claims) > 1 and 'Q1234' in claims and claims.count( 'Q1234') == 2 item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_replace], debug=True) claims = [ x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] if 'remove' not in x ] removed_claims = [ True for x in item.get_json_representation()['claims']['P31'] if 'remove' in x ] assert len(claims) == 1 and 'Q1234' in claims and len( removed_claims) == 3 and True in removed_claims item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_keep], debug=True) claims = [ x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] ] assert len(claims) == 3 and 'Q1234' not in claims
def test_label(self): item = wbi_core.ItemEngine(item_id="Q2") assert item.get_label('en') == "Earth" assert "globe" in item.get_aliases() assert item.get_label("es") == "Tierra" item.set_label("Earth") item.set_label("lorem") item.set_label("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'lorem'} assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} item.set_aliases(["fake alias"], if_exists='APPEND') assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en'] item.set_label(label=None, lang='fr') item.set_label(label=None, lang='non-exist-key') assert 'remove' in item.json_representation['labels']['fr'] item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang='ak') item.set_description("d", lang='ak') item.set_aliases(["a"], lang='ak', if_exists='APPEND') assert item.get_aliases('ak') == ['a'] item.set_aliases("b", lang='ak') assert item.get_aliases('ak') == ['a', 'b'] item.set_aliases("b", lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['b'] item.set_aliases(["c"], lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['c']
def test_description(self): item = wbi_core.ItemEngine(item_id="Q2") descr = item.get_description('en') assert len(descr) > 3 assert "planet" in item.get_description() # set_description on already existing description item.set_description(descr) item.set_description("lorem") item.set_description("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'lorem' } # set_description on empty desription item.set_description("") item.set_description("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'lorem ipsum' } item.set_description("lorem", lang='fr', if_exists='KEEP') item.set_description("lorem ipsum", lang='fr', if_exists='REPLACE') item.set_description("lorem", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'lorem ipsum' } assert item.json_representation['descriptions']['fr'] == { 'language': 'fr', 'value': 'lorem ipsum' }
def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language data = [wbi_core.ExternalID('/m/02j71', 'P646')] fast_run_base_filter = {'P361': 'Q18589965'} item = wbi_core.ItemEngine(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) frc = wbi_core.ItemEngine.fast_run_store[0] frc.debug = True assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 aliases = item.get_aliases() assert "Terra" in aliases assert list( item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth" assert item.fast_run_container.check_language_data( "Q2", ['not the Earth'], 'en', 'label') assert "Terra" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra" item.set_description(descr) item.set_description("fghjkl") assert item.json_representation['descriptions']['en'] == { 'language': 'en', 'value': 'fghjkl' } item.set_label("Earth") item.set_label("xfgfdsg") assert item.json_representation['labels']['en'] == { 'language': 'en', 'value': 'xfgfdsg' } item.set_aliases(["fake alias"], if_exists='APPEND') assert { 'language': 'en', 'value': 'fake alias' } in item.json_representation['aliases']['en'] # something thats empty (for now.., can change, so this just makes sure no exception is thrown) frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label') frc.check_language_data("Q2", ['not Ewiase'], 'ak', 'label') frc.check_language_data("Q2", [''], 'ak', 'description') frc.check_language_data("Q2", [], 'ak', 'aliases') frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases') item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang="ak") item.set_description("d", lang="ak") item.set_aliases(["a"], lang="ak", if_exists='APPEND')
def test_sitelinks(): data = [wbi_core.ItemID(value='Q12136', prop_nr='P31')] item = wbi_core.ItemEngine(item_id='Q622901', data=data) item.get_sitelink("enwiki") assert "enwiki" not in item.json_representation['sitelinks'] item.set_sitelink("enwiki", "something") assert item.get_sitelink("enwiki")['title'] == "something" assert "enwiki" in item.json_representation['sitelinks']
def test_item_engine(self): wbi_core.ItemEngine(debug=True) wbi_core.ItemEngine(data=None, debug=True) wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True) wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True) with self.assertRaises(TypeError): wbi_core.ItemEngine(data='test', debug=True) with self.assertRaises(ValueError): wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True) with self.assertRaises(TypeError): wbi_core.ItemEngine(ref_handler='test', debug=True) with self.assertRaises(ValueError): wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True) wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True)
def test_nositelinks(): # this item doesn't and probably wont ever have any sitelinks (but who knows?? maybe one day..) data = [wbi_core.ItemID(value='Q5', prop_nr='P31')] item = wbi_core.ItemEngine(item_id='Q27869338', data=data) item.get_sitelink("enwiki") assert "enwiki" not in item.json_representation['sitelinks'] item.set_sitelink("enwiki", "something") assert item.get_sitelink("enwiki")['title'] == "something" assert "enwiki" in item.json_representation['sitelinks']
def import_entity(username, password, data, label="", item_id=""): login_instance = wbi_login.Login(user=username, pwd=password) entity = wbi_core.ItemEngine(data=data, item_id=item_id) if label: entity.set_label(label, ENGLISH) entity_id = entity.write(login_instance) return entity_id
def test_live_item(self): """ Test an item against Wikidata """ item = wbi_core.ItemEngine(item_id='Q423111') mass_statement = [x for x in item.statements if x.get_prop_nr() == 'P2067'].pop() pprint.pprint(mass_statement.get_json_representation()) if not mass_statement: raise
def test_search_only(self): item = wbi_core.ItemEngine(item_id="Q2", search_only=True) assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 assert "globe" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra"
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, ): # Use WikibaseIntegrator aka wbi to upload the changes link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) reference = [ wbi_core.ItemID( prop_nr="P248", # Stated in Riksdagen open data portal value="Q21592569", is_reference=True), wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True), wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ) ] claim = wbi_core.MonolingualText( sentence, "P5831", language="sv", qualifiers=[link_to_form, link_to_sense], references=[reference], ) # print(claim) if debug: print(claim.get_json_representation()) item = wbi_core.ItemEngine(data=[claim], item_id=lid) if debug: print(item.get_json_representation()) result = item.write( login_instance, edit_summary="Added usage example with [[Wikidata:rikslex]]") return result
def test_label(self): item = wbi_core.ItemEngine(item_id="Q2") assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 assert "Terra" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra" # set_description on already existing description item.set_description(descr) item.set_description("fghjkl") item.set_description("fghjkltest", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'} # set_description on empty desription item.set_description("") item.set_description("zaehjgreytret", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'zaehjgreytret'} item.set_label("Earth") item.set_label("xfgfdsg") item.set_label("xfgfdsgtest", lang='en', if_exists='KEEP') assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} item.set_aliases(["fake alias"], if_exists='APPEND') assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en'] item.set_label(label=None, lang='fr') item.set_label(label=None, lang='non-exist-key') assert 'remove' in item.json_representation['labels']['fr'] item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang='ak') item.set_description("d", lang='ak') item.set_aliases(["a"], lang='ak', if_exists='APPEND') assert item.get_aliases('ak') == ['a'] item.set_aliases("b", lang='ak') assert item.get_aliases('ak') == ['a', 'b'] item.set_aliases("b", lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['b'] item.set_aliases(["c"], lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['c']
def test_new_item_creation(self): data = [ wbi_datatype.String(value='test1', prop_nr='P1'), wbi_datatype.String(value='test2', prop_nr='1'), wbi_datatype.String(value='test3', prop_nr=1), wbi_datatype.Math("xxx", prop_nr="P2"), wbi_datatype.ExternalID("xxx", prop_nr="P3"), wbi_datatype.ItemID("Q123", prop_nr="P4"), wbi_datatype.ItemID("123", prop_nr="P4"), wbi_datatype.ItemID(123, prop_nr="P4"), wbi_datatype.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Url("http://www.wikidata.org", prop_nr="P6"), wbi_datatype.Url("https://www.wikidata.org", prop_nr="P6"), wbi_datatype.Url("ftp://example.com", prop_nr="P6"), wbi_datatype.Url("ssh://user@server/project.git", prop_nr="P6"), wbi_datatype.Url("svn+ssh://user@server:8888/path", prop_nr="P6"), wbi_datatype.MonolingualText(text="xxx", language="fr", prop_nr="P7"), wbi_datatype.Quantity(quantity=-5.04, prop_nr="P8"), wbi_datatype.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), wbi_datatype.CommonsMedia("xxx", prop_nr="P9"), wbi_datatype.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"), wbi_datatype.GeoShape("Data:xxx.map", prop_nr="P11"), wbi_datatype.Property("P123", prop_nr="P12"), wbi_datatype.Property("123", prop_nr="P12"), wbi_datatype.Property(123, prop_nr="P12"), wbi_datatype.TabularData("Data:Taipei+Population.tab", prop_nr="P13"), wbi_datatype.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"), wbi_datatype.Lexeme("L123", prop_nr="P15"), wbi_datatype.Lexeme("123", prop_nr="P15"), wbi_datatype.Lexeme(123, prop_nr="P15"), wbi_datatype.Form("L123-F123", prop_nr="P16"), wbi_datatype.Sense("L123-S123", prop_nr="P17"), wbi_datatype.EDTF("2004-06-~01/2004-06-~20", prop_nr="P18") ] core_props = {f"P{x}" for x in range(20)} for d in data: item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set()) assert item.get_json_representation()
def add_source_in_db(source_name, stable_url, username=None, password=None): """""" # Load Wikibase Integrator config with the environment wbi_config["MEDIAWIKI_API_URL"] = os.environ[API_URL] wbi_config["SPARQL_ENDPOINT_URL"] = os.environ[SPARQL_BIGDATA_URL] wbi_config["WIKIBASE_URL"] = SVC_URL if not username: username = os.environ[USERNAME] if not password: password = os.environ[PASSWORD] stable_url_prop = os.environ[STABLE_URL_PROP] catalog_prop = os.environ[CATALOG_PROP] login_instance = wbi_login.Login( user=username, pwd=password, ) source_instance_of = wbi_core.ItemID( prop_nr=os.environ[INSTANCE_PROP], value=os.environ[GTFS_SCHEDULE_SOURCE_CODE] ) try: source_stable_url = wbi_core.Url(value=stable_url, prop_nr=stable_url_prop) except ValueError as ve: print(f"url {stable_url} for source name {source_name} raised {ve}") raise ve source_catalog_ref = wbi_core.ItemID( prop_nr=catalog_prop, value=os.environ[GTFS_CATALOG_OF_SOURCES_CODE], # fix this ) source_catalog_entity = wbi_core.ItemEngine( item_id=os.environ[GTFS_CATALOG_OF_SOURCES_CODE] ) # Create the source entity source_data = [source_instance_of, source_stable_url, source_catalog_ref] source_entity = wbi_core.ItemEngine(data=source_data, core_props={stable_url_prop}) source_entity.set_label(f"{source_name}") source_entity_id = source_entity.write(login=login_instance) # Create the Archives ID using the name and the entity code of the source archives_id_prefix = source_name.replace("'s GTFS Schedule source", "") archives_id_prefix = archives_id_prefix[:15] archives_id_prefix = re.sub( NON_ALPHABETICAL_CHAR_REGEX, "-", archives_id_prefix ).lower() archives_id_suffix = source_entity_id.lower() source_archives_id = f"{archives_id_prefix}-gtfs-{archives_id_suffix}" source_archives_id_data = wbi_core.String( prop_nr=os.environ[ARCHIVES_ID_PROP], value=source_archives_id, ) source_data_updated = [source_archives_id_data] source_entity_updated = wbi_core.ItemEngine(item_id=source_entity_id) source_entity_updated.update(source_data_updated) source_entity_updated.write(login=login_instance) # Update the catalog of sources source_entity_prop = wbi_core.ItemID( value=source_entity_id, prop_nr=os.environ[SOURCE_ENTITY_PROP], if_exists=APPEND ) catalog_data = [source_entity_prop] source_catalog_entity.update(catalog_data) source_catalog_entity.write(login_instance) return source_entity_id, source_archives_id
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, language_style=None, type_of_reference=None, source=None, line=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) if language_style == "formal": style = "Q104597585" else: if language_style == "informal": style = "Q901711" else: print(f"Error. Language style {language_style} " + "not one of (formal,informal)") exit(1) logging.debug("Generating qualifier language_style " + f"with {style}") language_style_qualifier = wbi_core.ItemID(prop_nr="P6191", value=style, is_qualifier=True) # oral or written if type_of_reference == "written": medium = "Q47461344" else: if type_of_reference == "oral": medium = "Q52946" else: print(f"Error. Type of reference {type_of_reference} " + "not one of (written,oral)") exit(1) logging.debug("Generating qualifier type of reference " + f"with {medium}") type_of_reference_qualifier = wbi_core.ItemID(prop_nr="P3865", value=medium, is_qualifier=True) if source == "riksdagen": if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print("Publication date of document {document_id} " + "is missing. We have no fallback for that at the moment. " + "Abort adding usage example.") return False stated_in = wbi_core.ItemID(prop_nr="P248", value="Q21592569", is_reference=True) document_id = wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ), type_of_reference_qualifier, ] if source == "europarl": stated_in = wbi_core.ItemID(prop_nr="P248", value="Q5412081", is_reference=True) reference = [ stated_in, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time="+2012-05-12T00:00:00Z", is_reference=True, ), wbi_core.Url( prop_nr="P854", # reference url value="http://www.statmt.org/europarl/v7/sv-en.tgz", is_reference=True, ), # filename in archive wbi_core.String( (f"europarl-v7.{config.language_code}" + f"-en.{config.language_code}"), "P7793", is_reference=True, ), # line number wbi_core.String( str(line), "P7421", is_reference=True, ), type_of_reference_qualifier, ] # This is the usage example statement claim = wbi_core.MonolingualText( sentence, "P5831", language=config.language_code, # Add qualifiers qualifiers=[ link_to_form, link_to_sense, language_style_qualifier, ], # Add reference references=[reference], ) if config.debug_json: logging.debug(f"claim:{claim.get_json_representation()}") item = wbi_core.ItemEngine( data=[claim], append_value=["P5831"], item_id=lid, ) # if config.debug_json: # print(item.get_json_representation()) if config.login_instance is None: # Authenticate with WikibaseIntegrator print("Logging in with Wikibase Integrator") config.login_instance = wbi_login.Login(user=config.username, pwd=config.password) result = item.write( config.login_instance, edit_summary="Added usage example with [[Wikidata:LexUse]]") if config.debug_json: logging.debug(f"result from WBI:{result}") return result
from wikibaseintegrator import wbi_login, wbi_core import logging logging.basicConfig(level=logging.INFO) login_instance = wbi_login.Login(user='******', pwd='VP4ptJbLhNM9vB4') my_first_wikidata_item = wbi_core.ItemEngine(item_id='Q1') # to check successful installation and retrieval of the data, you can print the json representation of the item print(my_first_wikidata_item.get_json_representation()) result = wbi_core.ItemEngine( item_id='Q1', data={'P3': 'http://www.wikidata.org/entity/Q65216433'})
class TestWbiCore(unittest.TestCase): common_item = wbi_core.ItemEngine(item_id="Q2") def test_item_engine(self): wbi_core.ItemEngine(debug=True) wbi_core.ItemEngine(data=None, debug=True) wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True) wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True) with self.assertRaises(TypeError): wbi_core.ItemEngine(data='test', debug=True) with self.assertRaises(ValueError): wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True) with self.assertRaises(TypeError): wbi_core.ItemEngine(ref_handler='test', debug=True) with self.assertRaises(ValueError): wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True) wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True) def test_search_only(self): item = wbi_core.ItemEngine(item_id="Q2", search_only=True) assert item.get_label('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 assert "globe" in item.get_aliases() assert "planet" in item.get_description() assert item.get_label("es") == "Tierra" def test_basedatatype_if_exists(self): instance_of_append = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='APPEND') instance_of_forceappend = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='FORCE_APPEND') instance_of_replace = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='REPLACE') instance_of_keep = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='KEEP') item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_append, instance_of_append]) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 1 item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend]) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 2 item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_replace], debug=True) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] if 'remove' not in x] removed_claims = [True for x in item.get_json_representation()['claims']['P31'] if 'remove' in x] assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 3 and True in removed_claims item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_keep], debug=True) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) == 3 and 'Q1234' not in claims def test_description(self): item = wbi_core.ItemEngine(item_id="Q2") descr = item.get_description('en') assert len(descr) > 3 assert "planet" in item.get_description() # set_description on already existing description item.set_description(descr) item.set_description("lorem") item.set_description("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem'} # set_description on empty desription item.set_description("") item.set_description("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} item.set_description("lorem", lang='fr', if_exists='KEEP') item.set_description("lorem ipsum", lang='fr', if_exists='REPLACE') item.set_description("lorem", lang='en', if_exists='KEEP') assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} assert item.json_representation['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'} def test_label(self): item = wbi_core.ItemEngine(item_id="Q2") assert item.get_label('en') == "Earth" assert "globe" in item.get_aliases() assert item.get_label("es") == "Tierra" item.set_label("Earth") item.set_label("lorem") item.set_label("lorem ipsum", lang='en', if_exists='KEEP') assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'lorem'} assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} item.set_aliases(["fake alias"], if_exists='APPEND') assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en'] item.set_label(label=None, lang='fr') item.set_label(label=None, lang='non-exist-key') assert 'remove' in item.json_representation['labels']['fr'] item.get_label("ak") item.get_description("ak") item.get_aliases("ak") item.set_label("label", lang='ak') item.set_description("d", lang='ak') item.set_aliases(["a"], lang='ak', if_exists='APPEND') assert item.get_aliases('ak') == ['a'] item.set_aliases("b", lang='ak') assert item.get_aliases('ak') == ['a', 'b'] item.set_aliases("b", lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['b'] item.set_aliases(["c"], lang='ak', if_exists='REPLACE') assert item.get_aliases('ak') == ['c'] def test_wd_search(self): t = wbi_functions.search_entities('rivaroxaban') print('Number of results: ', len(t)) self.assertIsNot(len(t), 0) def test_item_generator(self): items = ['Q408883', 'P715', 'Q18046452'] item_instances = wbi_functions.generate_item_instances(items=items) for qid, item in item_instances: self.assertIn(qid, items) def test_new_item_creation(self): data = [ wbi_datatype.String(value='test1', prop_nr='P1'), wbi_datatype.String(value='test2', prop_nr='1'), wbi_datatype.String(value='test3', prop_nr=1), wbi_datatype.Math("xxx", prop_nr="P2"), wbi_datatype.ExternalID("xxx", prop_nr="P3"), wbi_datatype.ItemID("Q123", prop_nr="P4"), wbi_datatype.ItemID("123", prop_nr="P4"), wbi_datatype.ItemID(123, prop_nr="P4"), wbi_datatype.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), wbi_datatype.Url("http://www.wikidata.org", prop_nr="P6"), wbi_datatype.Url("https://www.wikidata.org", prop_nr="P6"), wbi_datatype.Url("ftp://example.com", prop_nr="P6"), wbi_datatype.Url("ssh://user@server/project.git", prop_nr="P6"), wbi_datatype.Url("svn+ssh://user@server:8888/path", prop_nr="P6"), wbi_datatype.MonolingualText(text="xxx", language="fr", prop_nr="P7"), wbi_datatype.Quantity(quantity=-5.04, prop_nr="P8"), wbi_datatype.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), wbi_datatype.CommonsMedia("xxx", prop_nr="P9"), wbi_datatype.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"), wbi_datatype.GeoShape("Data:xxx.map", prop_nr="P11"), wbi_datatype.Property("P123", prop_nr="P12"), wbi_datatype.Property("123", prop_nr="P12"), wbi_datatype.Property(123, prop_nr="P12"), wbi_datatype.TabularData("Data:Taipei+Population.tab", prop_nr="P13"), wbi_datatype.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"), wbi_datatype.Lexeme("L123", prop_nr="P15"), wbi_datatype.Lexeme("123", prop_nr="P15"), wbi_datatype.Lexeme(123, prop_nr="P15"), wbi_datatype.Form("L123-F123", prop_nr="P16"), wbi_datatype.Sense("L123-S123", prop_nr="P17"), wbi_datatype.EDTF("2004-06-~01/2004-06-~20", prop_nr="P18") ] core_props = {f"P{x}" for x in range(20)} for d in data: item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set()) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props) assert item.get_json_representation() item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set()) assert item.get_json_representation() def test_get_property_list(self): self.assertTrue(len(self.common_item.get_property_list())) def test_count_references(self): self.assertTrue(len(self.common_item.count_references(prop_id='P2067'))) def test_get_reference_properties(self): self.assertTrue(len(self.common_item.get_reference_properties(prop_id='P2067'))) def test_get_qualifier_properties(self): print(self.common_item.get_qualifier_properties(prop_id='P170')) self.assertTrue(len(self.common_item.get_qualifier_properties(prop_id='P2067')))
lemma = lexeme lid = lexeme_data[0] print(f"Uploading id to {lid}: {lemma}") # TODO if numbered # - fetch lexeme using wbi # - present to user # - ask user which if one matches print(f"Adding {saob_id} to {lid}") saob_statement = wbi_core.ExternalID( prop_nr="P8478", value=saob_id, ) described_by_source = wbi_core.ItemID(prop_nr="P1343", value="Q1935308") item = wbi_core.ItemEngine( data=[saob_statement, described_by_source], #append_value="P8478", item_id=lid) result = item.write( login_instance, edit_summary= "Added SAOB identifier with [[Wikidata:Tools/LexSAOB]]" ) #if config.debug_json: #logging.debug(f"result from WBI:{result}") print(f"{wd_prefix}{lid}") exit(0) else: print("Categories did not match :/ - skipping") else: print(f"{lexeme} not found in SAOB wordlist")
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, language_style=None, type_of_reference=None, source=None, line=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit link_to_form = wbi_core.Form( prop_nr="P5830", value=form_id, is_qualifier=True ) link_to_sense = wbi_core.Sense( prop_nr="P6072", value=sense_id, is_qualifier=True ) if language_style == "formal": style = "Q104597585" else: if language_style == "informal": style = "Q901711" else: print(_( "Error. Language style {} ".format(language_style) + "not one of (formal,informal). Please report a bug at "+ "https://github.com/egils-consulting/LexUtils/issues" )) return logging.debug("Generating qualifier language_style " + f"with {style}") language_style_qualifier = wbi_core.ItemID( prop_nr="P6191", value=style, is_qualifier=True ) # oral or written if type_of_reference == "written": medium = "Q47461344" else: if type_of_reference == "oral": medium = "Q52946" else: print(_( "Error. Type of reference {} ".format(type_of_reference) + "not one of (written,oral). Please report a bug at "+ "https://github.com/egils-consulting/LexUtils/issues" )) return logging.debug(_( "Generating qualifier type of reference " + "with {}".format(medium) )) type_of_reference_qualifier = wbi_core.ItemID( prop_nr="P3865", value=medium, is_qualifier=True ) if source == "riksdagen": if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print(_( "Publication date of document {} " + "is missing. We have no fallback for that at the moment. " + "Abort adding usage example.".format(document_id) )) return False stated_in = wbi_core.ItemID( prop_nr="P248", value="Q21592569", is_reference=True ) document_id = wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True ) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ), type_of_reference_qualifier, ] if source == "europarl": stated_in = wbi_core.ItemID( prop_nr="P248", value="Q5412081", is_reference=True ) reference = [ stated_in, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time="+2012-05-12T00:00:00Z", is_reference=True, ), wbi_core.Url( prop_nr="P854", # reference url value="http://www.statmt.org/europarl/v7/sv-en.tgz", is_reference=True, ), # filename in archive wbi_core.String( (f"europarl-v7.{config.language_code}" + f"-en.{config.language_code}"), "P7793", is_reference=True, ), # line number wbi_core.String( str(line), "P7421", is_reference=True, ), type_of_reference_qualifier, ] if source == "ksamsok": # No date is provided unfortunately, so we set it to unknown value stated_in = wbi_core.ItemID( prop_nr="P248", value="Q7654799", is_reference=True ) document_id = wbi_core.ExternalID( # K-Samsök URI prop_nr="P1260", value=document_id, is_reference=True ) reference = [ stated_in, document_id, wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace( tzinfo=timezone.utc ).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( # We don't know the value of the publication dates unfortunately prop_nr="P577", # Publication date time="", snak_type="somevalue", is_reference=True, ), type_of_reference_qualifier, ] if reference is None: logger.error(_( "No reference defined, cannot add usage example" )) exit(1) # This is the usage example statement claim = wbi_core.MonolingualText( sentence, "P5831", language=config.language_code, # Add qualifiers qualifiers=[ link_to_form, link_to_sense, language_style_qualifier, ], # Add reference references=[reference], ) if config.debug_json: logging.debug(f"claim:{claim.get_json_representation()}") item = wbi_core.ItemEngine( data=[claim], append_value=["P5831"], item_id=lid, ) # if config.debug_json: # print(item.get_json_representation()) if config.login_instance is None: # Authenticate with WikibaseIntegrator print("Logging in with Wikibase Integrator") config.login_instance = wbi_login.Login( user=config.username, pwd=config.password ) result = item.write( config.login_instance, edit_summary=( _( "Added usage example "+ "with [[Wikidata:LexUtils]] v{}".format(config.version) ) ) ) if config.debug_json: logging.debug(f"result from WBI:{result}") # TODO add handling of result from WBI and return True == Success or False return result
def create_dataset_entity_for_gtfs_metadata(gtfs_representation, api_url, username=None, password=None): """Create a dataset entity for a new dataset version on the Database. :param gtfs_representation: The representation of the GTFS dataset to process. :param api_url: API url, either PRODUCTION_API_URL or STAGING_API_URL. :return: The representation of the GTFS dataset post-execution. """ validate_api_url(api_url) validate_gtfs_representation(gtfs_representation) metadata = gtfs_representation.metadata ########################### # 1. Process the core props ########################### # Begin with the core properties data # To verify if the dataset entity already exist core_props_data = [] # SHA-1 hash property if is_valid_instance(metadata.sha1_hash, str): core_props_data.append( wbi_core.String(value=metadata.sha1_hash, prop_nr=os.environ[SHA1_HASH_PROP])) # Archives URL, from the stable URL property if is_valid_instance(metadata.stable_urls, dict): archives_url = metadata.stable_urls.get(ARCHIVES_URL) try: core_props_data.append( wbi_core.Url( value=archives_url, prop_nr=os.environ[STABLE_URL_PROP], rank=PREFERRED, )) except ValueError as ve: print( f"url {archives_url} for source {metadata.source_entity_code} caused {ve}" ) raise ve # If the 2 core props values in were NOT added in core_props_data, # then it is not possible to verify if the dataset entity already exists if len(core_props_data) != 2: raise MissingCorePropsException(core_props_data) # An existing dataset entity is considered the same as the one processed # if and only if 2 core props values are matching: the SHA-1 hash and the Archives URL # so the core properties threshold is 100% core_props_threshold = 1.0 try: dataset_entity = wbi_core.ItemEngine( data=core_props_data, core_props={ os.environ[STABLE_URL_PROP], os.environ[SHA1_HASH_PROP], }, core_prop_match_thresh=core_props_threshold, ) except ManualInterventionReqException as mi: print( f"ManualInterventionReqException : a core property value exists for multiple dataset entities." ) raise mi except CorePropIntegrityException as cp: print( f"CorePropIntegrityException: a dataset entity exists with 1 of the 2 core props values." ) raise cp except Exception as e: print(f"metadata : {metadata} raised {e}") raise e # If the dataset entity retrieved as already an item_id (entity id) value, # then we do nothing because the dataset already exists if dataset_entity.item_id != "": raise EntityAlreadyExistsException(dataset_entity.item_id) ################################################# # 2. Add the other properties to the dataset data ################################################# dataset_data = [] # Add the core_props_data to the dataset_data dataset_data += core_props_data # Delete the archives_url from the metadata.stable_urls # Since it was part of the core_props_data del metadata.stable_urls[ARCHIVES_URL] # Stable urls property if is_valid_instance(metadata.stable_urls, dict): for url in metadata.stable_urls.values(): try: dataset_data.append( wbi_core.Url(value=url, prop_nr=os.environ[STABLE_URL_PROP], rank=NORMAL)) except ValueError as ve: print( f"url {url} for source {metadata.source_entity_code} caused {ve}" ) raise ve # Instance property dataset_data.append( wbi_core.ItemID( value=os.environ[GTFS_SCHEDULE_DATA_FORMAT], prop_nr=os.environ[INSTANCE_PROP], )) # Source entity property dataset_data.append( wbi_core.ItemID(value=metadata.source_entity_code, prop_nr=os.environ[SOURCE_ENTITY_PROP])) # Main timezone property if is_valid_instance(metadata.main_timezone, str): dataset_data.append( wbi_core.String( value=metadata.main_timezone, prop_nr=os.environ[TIMEZONE_PROP], rank=PREFERRED, )) # Other timezones property if is_valid_instance(metadata.other_timezones, list): for timezone in metadata.other_timezones: dataset_data.append( wbi_core.String(value=timezone, prop_nr=os.environ[TIMEZONE_PROP], rank=NORMAL)) # Country code property if is_valid_instance(metadata.country_codes, list): for country_code in metadata.country_codes: dataset_data.append( wbi_core.String( value=country_code, prop_nr=os.environ[COUNTRY_CODE_PROP], rank=NORMAL, )) # Main language code property if is_valid_instance(metadata.main_language_code, str): dataset_data.append( wbi_core.String( value=metadata.main_language_code, prop_nr=os.environ[MAIN_LANGUAGE_CODE_PROP], rank=PREFERRED, )) # Start service date property if is_valid_instance(metadata.start_service_date, str): dataset_data.append( wbi_core.String( value=metadata.start_service_date, prop_nr=os.environ[START_SERVICE_DATE_PROP], )) # End service date property if is_valid_instance(metadata.end_service_date, str): dataset_data.append( wbi_core.String( value=metadata.end_service_date, prop_nr=os.environ[END_SERVICE_DATE_PROP], )) # Start timestamp property if is_valid_instance(metadata.start_timestamp, str): dataset_data.append( wbi_core.String(value=metadata.start_timestamp, prop_nr=os.environ[START_TIMESTAMP_PROP])) # End timestamp property if is_valid_instance(metadata.end_timestamp, str): dataset_data.append( wbi_core.String(value=metadata.end_timestamp, prop_nr=os.environ[END_TIMESTAMP_PROP])) # Bounding box property if is_valid_instance(metadata.bounding_box, dict): for order_key, corner_value in metadata.bounding_box.items(): dataset_data.append( create_geographical_property(order_key, corner_value, os.environ[BOUNDING_BOX_PROP])) # Bounding octagon property if is_valid_instance(metadata.bounding_octagon, dict): for order_key, corner_value in metadata.bounding_octagon.items(): dataset_data.append( create_geographical_property( order_key, corner_value, os.environ[BOUNDING_OCTAGON_PROP])) # Stop counts if is_valid_instance(metadata.stops_count_by_type, dict): # Number of stops property stops_count = metadata.stops_count_by_type.get(STOP_KEY, None) if stops_count is not None: dataset_data.append( wbi_core.Quantity( quantity=stops_count, prop_nr=os.environ[NUM_OF_STOPS_PROP], )) # Number of stations property stations_count = metadata.stops_count_by_type.get(STATION_KEY, None) if stations_count is not None: dataset_data.append( wbi_core.Quantity( quantity=stations_count, prop_nr=os.environ[NUM_OF_STATIONS_PROP], )) # Number of entrances property entrances_count = metadata.stops_count_by_type.get(ENTRANCE_KEY, None) if entrances_count is not None: dataset_data.append( wbi_core.Quantity( quantity=entrances_count, prop_nr=os.environ[NUM_OF_ENTRANCES_PROP], )) if is_valid_instance(metadata.agencies_count, int): # Number of agencies property dataset_data.append( wbi_core.Quantity( quantity=metadata.agencies_count, prop_nr=os.environ[NUM_OF_AGENCIES_PROP], )) # Number of routes property if is_valid_instance(metadata.routes_count_by_type, dict): for route_key, route_value in metadata.routes_count_by_type.items(): route_qualifier = [ wbi_core.ItemID( value=route_key, prop_nr=os.environ[ROUTE_TYPE_PROP], is_qualifier=True, ) ] dataset_data.append( wbi_core.Quantity( quantity=route_value, prop_nr=os.environ[NUM_OF_ROUTES_PROP], qualifiers=route_qualifier, )) # Download date if is_valid_instance(metadata.download_date, str): dataset_data.append( wbi_core.String( value=metadata.download_date, prop_nr=os.environ[DOWNLOAD_DATE_PROP], )) # Dataset version entity label version_name_label = metadata.dataset_version_name if not username: username = os.environ[USERNAME] if not password: password = os.environ[PASSWORD] login_instance = wbi_login.Login(user=username, pwd=password) ################################################# # 3. Create the dataset entity on the database ################################################# # Create the Dataset WITHOUT using the core_props # For some reasons, when using the core_props with all the data # the WikibaseIntegrator library retrieves entities # that are not sharing data with the actual dataset entity, # which makes the process crash dataset_entity = wbi_core.ItemEngine(data=dataset_data, ) # Set the label (name) dataset_entity.set_label(version_name_label, ENGLISH) # Create the dataset entity on the database dataset_entity_id = dataset_entity.write(login_instance) metadata.dataset_version_entity_code = dataset_entity_id # Create the source data with the dataset entity code and property version_prop = wbi_core.ItemID( value=metadata.dataset_version_entity_code, prop_nr=os.environ[DATASET_PROP], if_exists=APPEND, ) source_data = [version_prop] # Update the source entity # Try maximum 20 times in cases there is edit conflicts try_count = 20 has_succeeded = False while not has_succeeded and try_count > 0: source_entity = wbi_core.ItemEngine( item_id=metadata.source_entity_code) source_entity.update(source_data) try: source_entity.write(login_instance) except MWApiError as mwae: print( f"Failed to update: {source_entity.item_id} with data: {source_data} raised MWApiError. " f"{try_count} attempts left.") try_count -= 1 # If the attempts have not succeeded, fail loudly if try_count == 0: print( f"source_entity: {source_entity.get_json_representation()} with data: " f"{source_data} raised MWApiError.") raise mwae # Wait 20 seconds before the next attempt so the database updates # preventing other edit conflicts # and not overloading the database with requests time.sleep(20) else: has_succeeded = True metadata.source_entity_code = source_entity.item_id return gtfs_representation
def add_usage_example( document_id=None, sentence=None, lid=None, form_id=None, sense_id=None, word=None, publication_date=None, ): # Use WikibaseIntegrator aka wbi to upload the changes in one edit if publication_date is not None: publication_date = datetime.fromisoformat(publication_date) else: print("Publication date of document {document_id} " + "is missing. We have no fallback for that. " + "Abort adding usage example.") return False link_to_form = wbi_core.Form(prop_nr="P5830", value=form_id, is_qualifier=True) link_to_sense = wbi_core.Sense(prop_nr="P6072", value=sense_id, is_qualifier=True) reference = [ wbi_core.ItemID( prop_nr="P248", # Stated in Riksdagen open data portal value="Q21592569", is_reference=True), wbi_core.ExternalID( prop_nr="P8433", # Riksdagen Document ID value=document_id, is_reference=True), wbi_core.Time( prop_nr="P813", # Fetched today time=datetime.utcnow().replace(tzinfo=timezone.utc).replace( hour=0, minute=0, second=0, ).strftime("+%Y-%m-%dT%H:%M:%SZ"), is_reference=True, ), wbi_core.Time( prop_nr="P577", # Publication date time=publication_date.strftime("+%Y-%m-%dT00:00:00Z"), is_reference=True, ) ] claim = wbi_core.MonolingualText( sentence, "P5831", language="sv", qualifiers=[link_to_form, link_to_sense], references=[reference], ) # print(claim) if debug_json: print(claim.get_json_representation()) item = wbi_core.ItemEngine(data=[claim], item_id=lid) if debug_json: print(item.get_json_representation()) result = item.write( login_instance, edit_summary="Added usage example with [[Wikidata:LexUse]]") if debug_json: print(f"Result from WBI: {result}") return result