def create_season(series_id: str, label: str, descr: str, ordinal: int, dry: bool): """Creates a season item on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage label: str The label to use for this season. Typically "<Series Name>, season x", where x is the ordinal descr: str The description to use for this season. Typically "season x of <Series Name>" where x is the ordinal ordinal: int The ordinal of this season, within the series dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- season_id: str The Wiki ID of the season that was created """ dry_str = "[DRY-RUN] " if dry else "" repoutil = RepoUtils(Site().data_repository()) season = None print( f"{dry_str}Creating season with\n\tlabel='{label}'\n\tdescription='{descr}'" ) if not dry: season = repoutil.new_item(labels={"en": label}, descriptions={"en": descr}) print(f"Created a new Item: {season.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_SEASON}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_SEASON)) season.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print( f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL.pid}={ordinal}" ) if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) season_ordinal = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal.setTarget(str(ordinal)) series_claim.addQualifier(season_ordinal) season.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}") return season.getID() if season is not None else "Q-1"
def setUpClass(cls): """Set up test class.""" super(TestIsbnWikibaseBot, cls).setUpClass() # Check if the unit test item page and the property both exist item_ns = cls.get_repo().item_namespace for page in cls.get_site().search('IsbnWikibaseBotUnitTest', total=1, namespaces=item_ns): cls.test_page_qid = page.title() item_page = ItemPage(cls.get_repo(), page.title()) for pid, claims in item_page.get()['claims'].items(): for claim in claims: prop_page = pywikibot.PropertyPage(cls.get_repo(), claim.getID()) prop_page.get() if ('ISBN-10' in prop_page.labels.values() and claim.getTarget() == '097522980x'): return raise unittest.SkipTest(u'%s: "ISBN-10" property was not found in ' u'"IsbnWikibaseBotUnitTest" item page' % cls.__name__) raise unittest.SkipTest( u'%s: "IsbnWikibaseBotUnitTest" item page was not found' % cls.__name__)
def create_seasons(series_id, number_of_seasons, quickstatements=False, dry=False): series_title = ItemPage(Site().data_repository(), series_id) series_title.get(force=True) series_label = series_title.labels['en'] for i in range(1, number_of_seasons + 1): label = f"{series_label}, season {i}" descr = f"season {i} of {series_label}" if quickstatements: create_season_quickstatements(series_id, label, descr, i) else: create_season(series_id, label, descr, i, dry)
def normalize_repo_url( item: ItemPage, url_normalized: str, url_raw: str, q_value: str, edit_group_hash: str, ): """ Canonicalize the github url This use the format https://github.com/[owner]/[repo] Note: This apparently only works with a bot account """ if url_raw == url_normalized: return logger.info("Normalizing {} to {}".format(url_raw, url_normalized)) source_p = Properties.source_code_repository urls = item.claims[source_p] if source_p in item.claims and len(urls) == 2: if urls[0].getTarget() == url_normalized and urls[1].getTarget( ) == url_raw: logger.info( "The old and the new url are already set, removing the old") item.removeClaims(urls[1], summary=get_sumary(edit_group_hash)) return if urls[0].getTarget() == url_raw and urls[1].getTarget( ) == url_normalized: logger.info( "The old and the new url are already set, removing the old") item.removeClaims(urls[0], summary=get_sumary(edit_group_hash)) return if source_p in item.claims and len(urls) > 1: logger.info( "Multiple source code repositories for {} not supported".format( q_value)) return if urls[0].getTarget() != url_raw: logger.error( f"The url on the object ({urls[0].getTarget()}) doesn't match the url from the sparql query ({url_raw}) for {q_value}" ) return # Editing is in this case actually remove the old value and adding the new one claim = Claim(Settings.wikidata_repo, source_p) claim.setTarget(url_normalized) claim.setSnakType("value") item.addClaim(claim, summary=get_sumary(edit_group_hash)) item.removeClaims(urls[0], summary=get_sumary(edit_group_hash)) # Add git as protocol git = ItemPage(Settings.wikidata_repo, "Q186055") get_or_create_qualifiers(claim, Properties.protocol, git, edit_group_hash)
def _clone(src: str, dest: str, props: Iterable[wp.WikidataProperty]): """Copy all specified properties from the src ID to the dest ID""" repoutil = RepoUtils(Site().data_repository()) if not src.startswith("Q"): raise ValueError( f"Expected item ID of the format 'Q####', found {src}") if not dest.startswith("Q"): raise ValueError( f"Expected item ID of the format 'Q####', found {dest}") src_item = ItemPage(repoutil.repo, src) dest_item = ItemPage(repoutil.repo, dest) success, failures = repoutil.copy(src_item, dest_item, props) print(f"Success: {success}, Failures: {failures}")
def get_or_create_claim(item: ItemPage, p_value: str, value: Any, edit_group_hash: str) -> Tuple[Claim, bool]: """ Gets or creates a claim with `value` under the property `p_value` to `item` """ all_claims = item.claims.get(p_value, []) for claim in all_claims: if claim.target_equals(value): return claim, False claim = Claim(Settings.wikidata_repo, p_value) claim.setTarget(value) item.addClaim(claim, summary=get_sumary(edit_group_hash)) return claim, True
def get_typed_item(self, item_id: str) -> api.BaseType: item_page = ItemPage(self.repo, item_id) item_page.get() if INSTANCE_OF.pid not in item_page.claims: raise ValueError(f"{item_id} has no 'instance of' property") claims = item_page.claims[INSTANCE_OF.pid] instance_ids = {claim.getTarget().id for claim in claims} if TELEVISION_SERIES_EPISODE in instance_ids: return Episode(item_page, self.repo) if TELEVISION_SERIES_SEASON in instance_ids: return Season(item_page, self.repo) if TELEVISION_SERIES in instance_ids or ANIMATED_SERIES in instance_ids: return Series(item_page, self.repo) raise ValueError(f"Unsupported item with instance QIDs {instance_ids}")
def setUpClass(cls): """Set up test class.""" super(TestIsbnWikibaseBot, cls).setUpClass() # Check if the unit test item page and the property both exist item_ns = cls.get_repo().item_namespace for page in cls.get_site().search("IsbnWikibaseBotUnitTest", step=1, total=1, namespaces=item_ns): cls.test_page_qid = page.title() item_page = ItemPage(cls.get_repo(), page.title()) for pid, claims in item_page.get()["claims"].items(): for claim in claims: prop_page = pywikibot.PropertyPage(cls.get_repo(), claim.getID()) prop_page.get() if "ISBN-10" in prop_page.labels.values() and claim.getTarget() == "097522980x": return raise unittest.SkipTest( '%s: "ISBN-10" property was not found in ' '"IsbnWikibaseBotUnitTest" item page' % cls.__name__ ) raise unittest.SkipTest('%s: "IsbnWikibaseBotUnitTest" item page was not found' % cls.__name__)
def from_id(cls, item_id: str, repo=None): """Create an instance of the class from the item ID (QID) Note: This does not check if the QID is the same type as the wrapper class. It is recommended that the user: 1. uses Factory to instantiate this class, OR 2. ensures that the item_id is in fact the same type as the class """ repo = Site().data_repository() if repo is None else repo return cls(ItemPage(repo, item_id), repo)
def new_item(self, labels, descriptions) -> ItemPage: item = ItemPage(self.repo) if labels: item.editLabels(labels, summary="Setting label") if descriptions: item.editDescriptions(descriptions, summary="Setting description") return item
def create_seasons(series_id, number_of_seasons, quickstatements=False, dry=False): """Creates multiple season items on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage number_of_seasons: int The number of season to create for this series quickstatements: bool if True, simply print out a list of quickstatements. if False, then create the items on WikiData directly dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- season_ids: List[str] The Wiki IDs of the seasons that were created """ series_title = ItemPage(Site().data_repository(), series_id) series_title.get(force=True) series_label = series_title.labels['en'] season_ids = [] for i in tqdm(range(1, number_of_seasons + 1)): label = f"{series_label}, season {i}" descr = f"season {i} of {series_label}" if quickstatements: create_season_quickstatements(series_id, label, descr, i) else: season_id = create_season(series_id, label, descr, i, dry) season_ids.append(season_id) return season_ids
def copy( self, src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty], ): """Copy properties from the source item to the destination item Returns a tuple of (successes, failures) """ src_item.get() dest_item.get() failures = 0 successes = 0 for prop in props: if prop.pid not in src_item.claims: print(f"{prop} not found in {src_item.title()}") failures += 1 continue src_claims = src_item.claims[prop.pid] if len(src_claims) > 1: copy_multiple = click.confirm( f"There are {len(src_claims)} values for {prop}. Are you sure you want to copy all of them?" ) # copy_multiple = False if not copy_multiple: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) failures += 1 continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") failures += 1 continue targets = [claim.getTarget() for claim in src_claims] for target in targets: if hasattr(target, "get"): target.get() target_str = printable_target_value(target) print( f"Copying {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(self.repo, prop.pid) new_claim.setTarget(target) dest_item.addClaim( new_claim, summary=f"Setting {prop.pid} ({prop.name})" ) successes += 1 return (successes, failures)
def copy_delayed( src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty] ) -> Iterable[api.Fix]: repo = Site().data_repository() src_item.get() dest_item.get() claims = [] for prop in props: src_claims = src_item.claims.get(prop.pid, []) if len(src_claims) > 1: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") continue targets = [claim.getTarget() for claim in src_claims] for target in targets: target.get() target_str = printable_target_value(target) print( f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(repo, prop.pid) new_claim.setTarget(target) summary = f"Setting {prop.pid} ({prop.name})" claims.append(api.ClaimFix(new_claim, summary, dest_item)) return claims
def check_tv_show(tvshow_id=None, child_type="all", autofix=False, accumulate=False, interactive=False, filter=""): """Check constraints for season/episodes of this TV show Arguments --------- tvshow_id: str the Wiki ID of the television series, in the format Q######. child_type: str one of "episode", "season", "series", or "all" autofix: bool whether or not to attempt auto-fixing constraint failures accumulate: bool whether or not to accumulate all fixes before applying them to WikiData interactive: bool whether or not to prompt for confirmation before making edits filter: str a comma-separated list of properties in the format P###. Only edits for these properties will be applied. """ if child_type == "episode": instance_types = [wp.TELEVISION_SERIES_EPISODE] elif child_type == "season": instance_types = [wp.TELEVISION_SERIES_SEASON] elif child_type == "series": instance_types = [wp.TELEVISION_SERIES] elif child_type == "all": instance_types = [ wp.TELEVISION_SERIES, wp.TELEVISION_SERIES_SEASON, wp.TELEVISION_SERIES_EPISODE ] for instance_of_type in instance_types: key_val_pairs = { wp.PART_OF_THE_SERIES.pid: tvshow_id, wp.INSTANCE_OF.pid: instance_of_type } query = generate_sparql_query(key_val_pairs) gen = WikidataSPARQLPageGenerator(query) if instance_of_type == wp.TELEVISION_SERIES: gen = [ItemPage(Site().data_repository(), tvshow_id)] bot = getbot(gen, autofix=autofix, accumulate=accumulate, always=(not interactive), property_filter=filter) bot.run()
def _handle_birth_and_death(self, event, author_dict): if author_dict[event] == '' or self.match_property.search( author_dict[event]): self.logger.debug( f"No valid entry in {event} for " f"[[{author_dict['title']}]] ... Fallback to wikidata") try: item = ItemPage(self.repo, author_dict["wikidata"]) if event == "birth": property_label = "P569" else: property_label = "P570" claim = item.text["claims"][property_label][0] date_from_data = claim.getTarget() if date_from_data.precision < 7: self.logger.error( f"Precison is to low for [[{author_dict['title']}]]") elif date_from_data.precision < 8: date_from_data = int( ceil(float(date_from_data.year) / 100.0) * 100) if date_from_data < 1000: date_from_data = str(date_from_data)[0:1] + ". Jh." else: date_from_data = str(date_from_data)[0:2] + ". Jh." elif date_from_data.precision < 10: date_from_data = str(date_from_data.year) elif date_from_data.precision < 11: date_from_data = self.number_to_month[date_from_data.month] + " " + \ str(date_from_data.year) else: date_from_data = f"{date_from_data.day}. " \ f"{self.number_to_month[date_from_data.month]} " \ f"{date_from_data.year}" if re.search("-", date_from_data): date_from_data = date_from_data.replace("-", "") + " v. Chr." self.logger.debug( f"Found {date_from_data} @ wikidata for {event}") return date_from_data # 4,6 except Exception: self.logger.debug("Wasn't able to ge any data from wikidata") return '' # 4,6 else: return author_dict[event] # 4,6
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for episode_id, title, series_label in episodes_with_titles_and_missing_labels( ): print(f"{dry_str}Fixing {series_label}:{title} ({episode_id})") if not dry: episode_item = ItemPage(repo, episode_id) episode_item.get() episode_item.editLabels({"en": title})
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for movie_id, title in movies_with_missing_labels_with_title(): print( f"{dry_str}Setting label='{title}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )" ) if not dry: movie_item = ItemPage(repo, movie_id) movie_item.get() movie_item.editLabels({"en": title})
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for movie_id, movie_label in movies_with_missing_titles(): print( f"{dry_str}Setting title='{movie_label}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )" ) if not dry: movie_item = ItemPage(repo, movie_id) movie_item.get() claim = Claim(repo, wp.TITLE.pid) claim.setTarget(WbMonolingualText(movie_label, "en")) movie_item.addClaim(claim)
def check_tv_show(tvshow_id=None, child_type="episode", autofix=False, accumulate=False, always=False, filter=""): """Check constraints for season/episodes of this TV show TVSHOW_ID is the ID of the television series, in the format Q######. """ if child_type == "episode": instance_types = [wp.TELEVISION_SERIES_EPISODE] elif child_type == "season": instance_types = [wp.TELEVISION_SERIES_SEASON] elif child_type == "series": instance_types = [wp.TELEVISION_SERIES] elif child_type == "all": instance_types = [ wp.TELEVISION_SERIES, wp.TELEVISION_SERIES_SEASON, wp.TELEVISION_SERIES_EPISODE ] for instance_of_type in instance_types: key_val_pairs = { wp.PART_OF_THE_SERIES.pid: tvshow_id, wp.INSTANCE_OF.pid: instance_of_type } query = generate_sparql_query(key_val_pairs) gen = WikidataSPARQLPageGenerator(query) if instance_of_type == wp.TELEVISION_SERIES: gen = [ItemPage(Site().data_repository(), tvshow_id)] bot = getbot(gen, autofix=autofix, accumulate=accumulate, always=always, property_filter=filter) bot.run()
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() seen = set() for board_game_id, bgg_id in board_games_with_missing_labels(): if board_game_id in seen: continue seen.add(board_game_id) board_game_name = utils.bgg_title(bgg_id) if board_game_name is None: print(f"Unable to fetch name for {board_game_id}.") continue wiki_url = f"https://www.wikidata.org/wiki/{board_game_id}" print( f"{dry_str}Setting label='{board_game_name}' for {board_game_id} ( {wiki_url} )" ) if not dry: bg_item = ItemPage(repo, board_game_id) bg_item.get() bg_item.editLabels({"en": board_game_name})
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for item_link, item_id, title in items_with_missing_labels_with_title(): print( f"{dry_str} ( {str(item_link).ljust(40, ' ')} ) Fixing {str(item_id).ljust(9, ' ')}: {title}" ) if dry: continue # Labels have a character limit, so ignore if trying to add it will result in an error if len(title) >= 250: continue item = ItemPage(repo, item_id) item.get() try: item.editLabels({"en": title}) except (APIError, OtherPageSaveError) as e: print(f"An error occurred while adding label for {item_id}: {e}")
def parts(self): """An iterable of (ordinal, Episode) that are parts of this season""" for ordinal, episode_id, _ in sorted(Q.episodes(self.qid)): yield ordinal, Episode(ItemPage(self.repo, episode_id))
def test_extract_literal_properties_freestanding(): """ :return: """ claim = Claim.fromJSON(DataSite("wikidata", "wikidata"), {u'type': u'statement', u'references': [{ u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741}}, u'property': u'P248', u'snaktype': u'value'}]}, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [ u'P248']}, { u'snaks': { u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P1433': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148}}, u'property': u'P1433', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}], u'P50': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749}}, u'property': u'P50', u'snaktype': u'value'}]}, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407']}, { u'snaks': { u'P123': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621}}, u'property': u'P123', u'snaktype': u'value'}], u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}]}, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813']}, { u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578}}, u'property': u'P248', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P227': [ { u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364'}, u'property': u'P227', u'snaktype': u'value'}]}, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [ u'P248', u'P227', u'P813']}], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350}}, u'property': u'P19', u'snaktype': u'value'}, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal'}) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print target result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge'
def create_episode(series_id, season_id, title, series_ordinal, season_ordinal, dry): """Creates a season item on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage season_id: str The Wiki ID of the season ItemPage title: str The title of this episode. This is used to set the label. series_ordinal: int The ordinal of this episode, within the series season_ordinal: int The ordinal of this episode, within the season dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- episode_id: str The Wiki ID of the episode item """ dry_str = "[DRY-RUN] " if dry else "" print(f"{dry_str}Creating episode with label='{title}'") episode = None if not dry: repoutil = RepoUtils(Site().data_repository()) season = ItemPage(repoutil.repo, season_id) season.get() # Check if season has part_of_the_series set to series_id if wp.PART_OF_THE_SERIES.pid not in season.claims: raise ValueError( f"The season {season_id} does not have a PART_OF_THE_SERIES ({wp.PART_OF_THE_SERIES.pid} property). Check the input series and season IDs for correctness." ) actual_series_id = str( season.claims[wp.PART_OF_THE_SERIES.pid][0].getTarget().getID()) if actual_series_id != series_id: raise ValueError( f"The season {season_id} has PART_OF_THE_SERIES={actual_series_id} but expected={series_id}. Check the input series and season IDs for correctness." ) episode = ItemPage(repoutil.repo) episode.editLabels({"en": title}, summary="Setting label") print(f"Created a new Item: {episode.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_EPISODE}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_EPISODE)) episode.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print( f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL}={series_ordinal}" ) if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) series_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) series_ordinal_claim.setTarget(series_ordinal) series_claim.addQualifier(series_ordinal_claim) episode.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}") print( f"{dry_str}Setting {wp.SEASON}={season_id}, with {wp.SERIES_ORDINAL}={season_ordinal}" ) if not dry: season_claim = repoutil.new_claim(wp.SEASON.pid) season_claim.setTarget(ItemPage(repoutil.repo, season_id)) season_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal_claim.setTarget(season_ordinal) season_claim.addQualifier(season_ordinal_claim) episode.addClaim(season_claim, summary=f"Setting {wp.SEASON.pid}") return episode.getID() if episode is not None else "Q-1"
def test_extract_literal_properties_freestanding(self): """ :return: """ try: claim = Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'type': u'statement', u'references': [{ u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741 } }, u'property': u'P248', u'snaktype': u'value' }] }, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [u'P248'] }, { u'snaks': { u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P1433': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148 } }, u'property': u'P1433', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }], u'P50': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749 } }, u'property': u'P50', u'snaktype': u'value' }] }, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407' ] }, { u'snaks': { u'P123': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621 } }, u'property': u'P123', u'snaktype': u'value' }], u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }] }, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813' ] }, { u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578 } }, u'property': u'P248', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P227': [{ u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364' }, u'property': u'P227', u'snaktype': u'value' }] }, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [u'P248', u'P227', u'P813'] }], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350 } }, u'property': u'P19', u'snaktype': u'value' }, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal' }) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print(target) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def update_wikidata(project: Project, edit_group_hash: str): """ Update wikidata entry with data from github """ # Wikidata boilerplate wikidata = Settings.wikidata_repo q_value = project.project.replace("http://www.wikidata.org/entity/", "") item = ItemPage(wikidata, title=q_value) item.get() url_raw = project.repo url_normalized = str(normalize_url(url_raw)) if Settings.normalize_repo_url: normalize_repo_url(item, url_normalized, url_raw, q_value, edit_group_hash) set_website(item, project, url_normalized, edit_group_hash) set_license(item, project, url_normalized, edit_group_hash) # Add all stable releases stable_releases = project.stable_release stable_releases.sort( key=lambda x: LooseVersion(re.sub(r"[^0-9.]", "", x.version))) if len(stable_releases) == 0: logger.info("No stable releases") return versions = [i.version for i in stable_releases] if len(versions) != len(set(versions)): duplicates = [ release for release in stable_releases if versions.count(release.version) > 1 ] logger.warning("There are duplicate releases in {}: {}".format( q_value, duplicates)) return latest_version: Optional[str] = stable_releases[-1].version existing_versions = item.claims.get(Properties.software_version, []) github_version_names = [i.version for i in stable_releases] for i in existing_versions: if i.getRank() == "preferred" and i.getTarget( ) not in github_version_names: logger.warning( "There's a preferred rank for {} for a version which is not in the github page: {}" .format(q_value, i.getTarget())) latest_version = None if len(stable_releases) > 100: logger.warning("Limiting {} to 100 of {} stable releases".format( q_value, len(stable_releases))) stable_releases = stable_releases[-100:] else: logger.info("There are {} stable releases".format( len(stable_releases))) for release in stable_releases: claim, created = get_or_create_claim(item, Properties.software_version, release.version, edit_group_hash) if created: logger.info("Added '{}'".format(release.version)) # Assumption: A preexisting publication date is more reliable than the one from github date_p = Properties.publication_date if date_p not in claim.qualifiers: get_or_create_qualifiers(claim, date_p, release.date, edit_group_hash) title = "Release %s" % release.version get_or_create_sources(claim, release.page, project.retrieved, edit_group_hash, title, release.date) # Give the latest release the preferred rank # And work around a bug in pywikibot try: set_claim_rank(claim, latest_version, release, edit_group_hash) except AssertionError: logger.warning( f"Using the fallback for setting the preferred rank of {q_value}" ) item.get(force=True) claim, created = get_or_create_claim(item, Properties.software_version, release.version, edit_group_hash) assert not created set_claim_rank(claim, latest_version, release, edit_group_hash)
def create_season(series_id, label, descr, ordinal, dry): dry_str = "[DRY-RUN] " if dry else "" repoutil = RepoUtils(Site().data_repository()) print( f"{dry_str}Creating season with\n\tlabel='{label}'\n\tdescription='{descr}'" ) if not dry: season = ItemPage(repoutil.repo) season.editLabels({"en": label}, summary="Setting label") season.editDescriptions({"en": descr}, summary="Setting description") print(f"Created a new Item: {season.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_SEASON}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_SEASON)) season.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print( f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL.pid}={ordinal}" ) if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) season_ordinal = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal.setTarget(str(ordinal)) series_claim.addQualifier(season_ordinal) season.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}")
import daty from pprint import pprint from pywikibot import Site, ItemPage site = Site('wikidata', 'wikidata') repo = site.data_repository() item_page = ItemPage(repo, 'Q4115189') data = item_page.get() target_page = ItemPage(repo, 'Q17') for P in data['claims']: for c in data['claims'][P]: if hasattr(c, 'qualifiers'): #print(c.qualifiers) if 'P710' in c.qualifiers: for q in c.qualifiers['P710']: print(q.hash) q.setTarget(target_page) print(q.hash) repo.editQualifier(c, q) #q.changeTarget(target_page) #print(q) #del item_page #print(data) #data['descriptions']['it'] = "LE MIE MANI SONO INCREDIBILI" #for p in data['claims']:
def create_episode(series_id, season_id, title, ordinal, dry): dry_str = "[DRY-RUN] " if dry else "" print(f"{dry_str}Creating episode with label='{title}'") if not dry: repoutil = RepoUtils(Site().data_repository()) episode = ItemPage(repoutil.repo) episode.editLabels({"en": title}, summary="Setting label") print(f"Created a new Item: {episode.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_EPISODE}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_EPISODE)) episode.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print(f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}") if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) episode.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}") print( f"{dry_str}Setting {wp.SEASON}={season_id}, with {wp.SERIES_ORDINAL}={ordinal}" ) if not dry: season_claim = repoutil.new_claim(wp.SEASON.pid) season_claim.setTarget(ItemPage(repoutil.repo, season_id)) season_ordinal = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal.setTarget(ordinal) season_claim.addQualifier(season_ordinal) episode.addClaim(season_claim, summary=f"Setting {wp.SEASON.pid}")