def copy( self, src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty], ): """Copy properties from the source item to the destination item Returns a tuple of (successes, failures) """ src_item.get() dest_item.get() failures = 0 successes = 0 for prop in props: if prop.pid not in src_item.claims: print(f"{prop} not found in {src_item.title()}") failures += 1 continue src_claims = src_item.claims[prop.pid] if len(src_claims) > 1: copy_multiple = click.confirm( f"There are {len(src_claims)} values for {prop}. Are you sure you want to copy all of them?" ) # copy_multiple = False if not copy_multiple: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) failures += 1 continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") failures += 1 continue targets = [claim.getTarget() for claim in src_claims] for target in targets: if hasattr(target, "get"): target.get() target_str = printable_target_value(target) print( f"Copying {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(self.repo, prop.pid) new_claim.setTarget(target) dest_item.addClaim( new_claim, summary=f"Setting {prop.pid} ({prop.name})" ) successes += 1 return (successes, failures)
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for episode_id, title, series_label in episodes_with_titles_and_missing_labels( ): print(f"{dry_str}Fixing {series_label}:{title} ({episode_id})") if not dry: episode_item = ItemPage(repo, episode_id) episode_item.get() episode_item.editLabels({"en": title})
def create_seasons(series_id, number_of_seasons, quickstatements=False, dry=False): series_title = ItemPage(Site().data_repository(), series_id) series_title.get(force=True) series_label = series_title.labels['en'] for i in range(1, number_of_seasons + 1): label = f"{series_label}, season {i}" descr = f"season {i} of {series_label}" if quickstatements: create_season_quickstatements(series_id, label, descr, i) else: create_season(series_id, label, descr, i, dry)
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for movie_id, title in movies_with_missing_labels_with_title(): print( f"{dry_str}Setting label='{title}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )" ) if not dry: movie_item = ItemPage(repo, movie_id) movie_item.get() movie_item.editLabels({"en": title})
def setUpClass(cls): """Set up test class.""" super(TestIsbnWikibaseBot, cls).setUpClass() # Check if the unit test item page and the property both exist item_ns = cls.get_repo().item_namespace for page in cls.get_site().search('IsbnWikibaseBotUnitTest', total=1, namespaces=item_ns): cls.test_page_qid = page.title() item_page = ItemPage(cls.get_repo(), page.title()) for pid, claims in item_page.get()['claims'].items(): for claim in claims: prop_page = pywikibot.PropertyPage(cls.get_repo(), claim.getID()) prop_page.get() if ('ISBN-10' in prop_page.labels.values() and claim.getTarget() == '097522980x'): return raise unittest.SkipTest(u'%s: "ISBN-10" property was not found in ' u'"IsbnWikibaseBotUnitTest" item page' % cls.__name__) raise unittest.SkipTest( u'%s: "IsbnWikibaseBotUnitTest" item page was not found' % cls.__name__)
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for movie_id, movie_label in movies_with_missing_titles(): print( f"{dry_str}Setting title='{movie_label}' for {movie_id} ( https://www.wikidata.org/wiki/{movie_id} )" ) if not dry: movie_item = ItemPage(repo, movie_id) movie_item.get() claim = Claim(repo, wp.TITLE.pid) claim.setTarget(WbMonolingualText(movie_label, "en")) movie_item.addClaim(claim)
def get_typed_item(self, item_id: str) -> api.BaseType: item_page = ItemPage(self.repo, item_id) item_page.get() if INSTANCE_OF.pid not in item_page.claims: raise ValueError(f"{item_id} has no 'instance of' property") claims = item_page.claims[INSTANCE_OF.pid] instance_ids = {claim.getTarget().id for claim in claims} if TELEVISION_SERIES_EPISODE in instance_ids: return Episode(item_page, self.repo) if TELEVISION_SERIES_SEASON in instance_ids: return Season(item_page, self.repo) if TELEVISION_SERIES in instance_ids or ANIMATED_SERIES in instance_ids: return Series(item_page, self.repo) raise ValueError(f"Unsupported item with instance QIDs {instance_ids}")
def create_seasons(series_id, number_of_seasons, quickstatements=False, dry=False): """Creates multiple season items on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage number_of_seasons: int The number of season to create for this series quickstatements: bool if True, simply print out a list of quickstatements. if False, then create the items on WikiData directly dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- season_ids: List[str] The Wiki IDs of the seasons that were created """ series_title = ItemPage(Site().data_repository(), series_id) series_title.get(force=True) series_label = series_title.labels['en'] season_ids = [] for i in tqdm(range(1, number_of_seasons + 1)): label = f"{series_label}, season {i}" descr = f"season {i} of {series_label}" if quickstatements: create_season_quickstatements(series_id, label, descr, i) else: season_id = create_season(series_id, label, descr, i, dry) season_ids.append(season_id) return season_ids
def copy_delayed( src_item: ItemPage, dest_item: ItemPage, props: Iterable[wp.WikidataProperty] ) -> Iterable[api.Fix]: repo = Site().data_repository() src_item.get() dest_item.get() claims = [] for prop in props: src_claims = src_item.claims.get(prop.pid, []) if len(src_claims) > 1: print( f"Cannot copy {prop} from {format(src_item)} to {format(dest_item)}. Only scalar properties can be copied" ) continue if prop.pid in dest_item.claims: print(f"{prop} already has a value in {format(dest_item)}") continue targets = [claim.getTarget() for claim in src_claims] for target in targets: target.get() target_str = printable_target_value(target) print( f"Creating claim to copy {prop}={target_str} from {format(src_item)} to {format(dest_item)}" ) new_claim = Claim(repo, prop.pid) new_claim.setTarget(target) summary = f"Setting {prop.pid} ({prop.name})" claims.append(api.ClaimFix(new_claim, summary, dest_item)) return claims
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() for item_link, item_id, title in items_with_missing_labels_with_title(): print( f"{dry_str} ( {str(item_link).ljust(40, ' ')} ) Fixing {str(item_id).ljust(9, ' ')}: {title}" ) if dry: continue # Labels have a character limit, so ignore if trying to add it will result in an error if len(title) >= 250: continue item = ItemPage(repo, item_id) item.get() try: item.editLabels({"en": title}) except (APIError, OtherPageSaveError) as e: print(f"An error occurred while adding label for {item_id}: {e}")
def main(dry=False): dry_str = "" if dry: print("Running in dry-run mode, will not implement any changes") dry_str = "[DRY-RUN MODE] " repo = Site().data_repository() seen = set() for board_game_id, bgg_id in board_games_with_missing_labels(): if board_game_id in seen: continue seen.add(board_game_id) board_game_name = utils.bgg_title(bgg_id) if board_game_name is None: print(f"Unable to fetch name for {board_game_id}.") continue wiki_url = f"https://www.wikidata.org/wiki/{board_game_id}" print( f"{dry_str}Setting label='{board_game_name}' for {board_game_id} ( {wiki_url} )" ) if not dry: bg_item = ItemPage(repo, board_game_id) bg_item.get() bg_item.editLabels({"en": board_game_name})
def setUpClass(cls): """Set up test class.""" super(TestIsbnWikibaseBot, cls).setUpClass() # Check if the unit test item page and the property both exist item_ns = cls.get_repo().item_namespace for page in cls.get_site().search("IsbnWikibaseBotUnitTest", step=1, total=1, namespaces=item_ns): cls.test_page_qid = page.title() item_page = ItemPage(cls.get_repo(), page.title()) for pid, claims in item_page.get()["claims"].items(): for claim in claims: prop_page = pywikibot.PropertyPage(cls.get_repo(), claim.getID()) prop_page.get() if "ISBN-10" in prop_page.labels.values() and claim.getTarget() == "097522980x": return raise unittest.SkipTest( '%s: "ISBN-10" property was not found in ' '"IsbnWikibaseBotUnitTest" item page' % cls.__name__ ) raise unittest.SkipTest('%s: "IsbnWikibaseBotUnitTest" item page was not found' % cls.__name__)
import daty from pprint import pprint from pywikibot import Site, ItemPage site = Site('wikidata', 'wikidata') repo = site.data_repository() item_page = ItemPage(repo, 'Q4115189') data = item_page.get() target_page = ItemPage(repo, 'Q17') for P in data['claims']: for c in data['claims'][P]: if hasattr(c, 'qualifiers'): #print(c.qualifiers) if 'P710' in c.qualifiers: for q in c.qualifiers['P710']: print(q.hash) q.setTarget(target_page) print(q.hash) repo.editQualifier(c, q) #q.changeTarget(target_page) #print(q) #del item_page #print(data) #data['descriptions']['it'] = "LE MIE MANI SONO INCREDIBILI" #for p in data['claims']:
def update_wikidata(project: Project, edit_group_hash: str): """ Update wikidata entry with data from github """ # Wikidata boilerplate wikidata = Settings.wikidata_repo q_value = project.project.replace("http://www.wikidata.org/entity/", "") item = ItemPage(wikidata, title=q_value) item.get() url_raw = project.repo url_normalized = str(normalize_url(url_raw)) if Settings.normalize_repo_url: normalize_repo_url(item, url_normalized, url_raw, q_value, edit_group_hash) set_website(item, project, url_normalized, edit_group_hash) set_license(item, project, url_normalized, edit_group_hash) # Add all stable releases stable_releases = project.stable_release stable_releases.sort( key=lambda x: LooseVersion(re.sub(r"[^0-9.]", "", x.version))) if len(stable_releases) == 0: logger.info("No stable releases") return versions = [i.version for i in stable_releases] if len(versions) != len(set(versions)): duplicates = [ release for release in stable_releases if versions.count(release.version) > 1 ] logger.warning("There are duplicate releases in {}: {}".format( q_value, duplicates)) return latest_version: Optional[str] = stable_releases[-1].version existing_versions = item.claims.get(Properties.software_version, []) github_version_names = [i.version for i in stable_releases] for i in existing_versions: if i.getRank() == "preferred" and i.getTarget( ) not in github_version_names: logger.warning( "There's a preferred rank for {} for a version which is not in the github page: {}" .format(q_value, i.getTarget())) latest_version = None if len(stable_releases) > 100: logger.warning("Limiting {} to 100 of {} stable releases".format( q_value, len(stable_releases))) stable_releases = stable_releases[-100:] else: logger.info("There are {} stable releases".format( len(stable_releases))) for release in stable_releases: claim, created = get_or_create_claim(item, Properties.software_version, release.version, edit_group_hash) if created: logger.info("Added '{}'".format(release.version)) # Assumption: A preexisting publication date is more reliable than the one from github date_p = Properties.publication_date if date_p not in claim.qualifiers: get_or_create_qualifiers(claim, date_p, release.date, edit_group_hash) title = "Release %s" % release.version get_or_create_sources(claim, release.page, project.retrieved, edit_group_hash, title, release.date) # Give the latest release the preferred rank # And work around a bug in pywikibot try: set_claim_rank(claim, latest_version, release, edit_group_hash) except AssertionError: logger.warning( f"Using the fallback for setting the preferred rank of {q_value}" ) item.get(force=True) claim, created = get_or_create_claim(item, Properties.software_version, release.version, edit_group_hash) assert not created set_claim_rank(claim, latest_version, release, edit_group_hash)
def create_episode(series_id, season_id, title, series_ordinal, season_ordinal, dry): """Creates a season item on WikiData Arguments --------- series_id: str The Wiki ID of the series ItemPage season_id: str The Wiki ID of the season ItemPage title: str The title of this episode. This is used to set the label. series_ordinal: int The ordinal of this episode, within the series season_ordinal: int The ordinal of this episode, within the season dry: bool Whether or not this function should run in dry-run mode. In dry-run mode, no real changes are made to WikiData, they are only logged to stdout. Returns ------- episode_id: str The Wiki ID of the episode item """ dry_str = "[DRY-RUN] " if dry else "" print(f"{dry_str}Creating episode with label='{title}'") episode = None if not dry: repoutil = RepoUtils(Site().data_repository()) season = ItemPage(repoutil.repo, season_id) season.get() # Check if season has part_of_the_series set to series_id if wp.PART_OF_THE_SERIES.pid not in season.claims: raise ValueError( f"The season {season_id} does not have a PART_OF_THE_SERIES ({wp.PART_OF_THE_SERIES.pid} property). Check the input series and season IDs for correctness." ) actual_series_id = str( season.claims[wp.PART_OF_THE_SERIES.pid][0].getTarget().getID()) if actual_series_id != series_id: raise ValueError( f"The season {season_id} has PART_OF_THE_SERIES={actual_series_id} but expected={series_id}. Check the input series and season IDs for correctness." ) episode = ItemPage(repoutil.repo) episode.editLabels({"en": title}, summary="Setting label") print(f"Created a new Item: {episode.getID()}") print(f"{dry_str}Setting {wp.INSTANCE_OF}={wp.TELEVISION_SERIES_EPISODE}") if not dry: instance_claim = repoutil.new_claim(wp.INSTANCE_OF.pid) instance_claim.setTarget( ItemPage(repoutil.repo, wp.TELEVISION_SERIES_EPISODE)) episode.addClaim(instance_claim, summary=f"Setting {wp.INSTANCE_OF.pid}") print( f"{dry_str}Setting {wp.PART_OF_THE_SERIES}={series_id}, with {wp.SERIES_ORDINAL}={series_ordinal}" ) if not dry: series_claim = repoutil.new_claim(wp.PART_OF_THE_SERIES.pid) series_claim.setTarget(ItemPage(repoutil.repo, series_id)) series_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) series_ordinal_claim.setTarget(series_ordinal) series_claim.addQualifier(series_ordinal_claim) episode.addClaim(series_claim, summary=f"Setting {wp.PART_OF_THE_SERIES.pid}") print( f"{dry_str}Setting {wp.SEASON}={season_id}, with {wp.SERIES_ORDINAL}={season_ordinal}" ) if not dry: season_claim = repoutil.new_claim(wp.SEASON.pid) season_claim.setTarget(ItemPage(repoutil.repo, season_id)) season_ordinal_claim = repoutil.new_claim(wp.SERIES_ORDINAL.pid) season_ordinal_claim.setTarget(season_ordinal) season_claim.addQualifier(season_ordinal_claim) episode.addClaim(season_claim, summary=f"Setting {wp.SEASON.pid}") return episode.getID() if episode is not None else "Q-1"