def compare_links(user: str, sitedata: SiteData, sig: str) -> Union[bool, Set[str]]: """Compare links in a sig to data in sitedata""" wikitext = mwph.parse(sig) user = datasources.normal_name(user) errors = set() for link in wikitext.ifilter_wikilinks(): title = str(link.title).partition("#")[0] # Extract namespace and page. # Interwiki prefixes are left in the namespace if ":" in user: # Colons in usernames break the partitioning if title.endswith(f":{user}"): ns = title.replace(f":{user}", "") sep = ":" page = title.replace(f"{ns}:", "") elif title.endswith(f"/{user}"): raw = title.replace(f"/{user}", "") ns, sep, page = raw.rpartition(":") page += f"/{user}" else: continue # pragma: no cover else: ns, sep, page = title.rpartition(":") # normalize namespace and strip whitespace from both ns, page = datasources.normal_name(ns.strip()), page.strip() # remove leading colon from namespace if ns.startswith(":"): ns = ns[1:] # Check if linking to user or user talk if not sep: continue elif ":" in ns: errors.add("interwiki-user-link") elif ns in sitedata.user or ns in sitedata.user_talk: # Check that it's the right user or user_talk if datasources.normal_name(page) == user: return True else: errors.add("link-username-mismatch") continue elif ns in sitedata.special: # Could be a contribs page, check # split page and normalize names specialpage, slash, target = page.partition("/") specialpage = datasources.normal_name(specialpage.strip()) target = datasources.normal_name(target.strip()) if specialpage in sitedata.contribs: # It's contribs if target == user: # The right one return True else: errors.add("link-username-mismatch") continue # pragma: no cover else: continue # pragma: no cover else: return errors
def check_impersonation(sig: str, user: str, sitedata: SiteData) -> Optional[SigError]: wikitext = mwph.parse(sig) problem = False for link in wikitext.ifilter_wikilinks(): if not link.text: continue text = datasources.normal_name(link.text) if compare_links(user, sitedata, link) is True: if text == datasources.normal_name(user): # one link matches, that's good enough break elif datasources.check_user_exists(text, sitedata): problem = True if problem: return SigError.LINK_NAME else: return None
def check_images(sig: str, sitedata: SiteData) -> Optional[SigError]: """Check for displayed images in a signature""" wikitext = mwph.parse(sig) for link in wikitext.ifilter_wikilinks(): title = link.title # if it starts with :, it's not a displayed image if title.startswith(":"): continue # Can't interwiki transclude an image, so the extra safety # in check_links isn't required ns, sep, page = title.partition(":") if not sep: continue if datasources.normal_name(ns) in sitedata.file: return SigError.IMAGES else: return None
def get_site_data(hostname: str) -> SiteData: """Get metadata about a site from the API""" url = f"https://{hostname}/w/api.php" data = dict( action="query", meta="siteinfo", siprop="|".join([ "namespaces", "namespacealiases", "specialpagealiases", "magicwords", "general", ]), formatversion="2", format="json", ) res_json = backoff_retry("get", url, params=data, output="json") namespaces: Dict[str, Set[str]] = {} all_namespaces = res_json["query"]["namespaces"] namespace_aliases = res_json["query"]["namespacealiases"] for namespace, nsdata in all_namespaces.items(): namespaces.setdefault(namespace, set()).update([ datasources.normal_name(nsdata.get("canonical", "")), datasources.normal_name(nsdata.get("name", "")), ]) for nsdata in namespace_aliases: namespaces.setdefault(str(nsdata["id"]), set()).add( datasources.normal_name(nsdata.get("alias", ""))) specialpages = { item["realname"]: item["aliases"] for item in res_json["query"]["specialpagealiases"] } magicwords = { item["name"]: item["aliases"] for item in res_json["query"]["magicwords"] } general = res_json["query"]["general"] contribs = { datasources.normal_name(name) for name in specialpages["Contributions"] } subst = list( itertools.chain( magicwords.get("subst", ["SUBST"]), [item.lower() for item in magicwords.get("subst", ["SUBST"])], [ item[0] + item[1:].lower() for item in magicwords.get("subst", ["SUBST"]) ], )) sitedata = SiteData( user=namespaces["2"] - {""}, user_talk=namespaces["3"] - {""}, file=namespaces["6"] - {""}, special=namespaces["-1"] - {""}, contribs=contribs, subst=subst, dbname=general["wikiid"], hostname=hostname, ) return sitedata