Ejemplo n.º 1
0
def compare_links(user: str, sitedata: SiteData, sig: str) -> Union[bool, Set[str]]:
    """Compare links in a sig to data in sitedata"""
    wikitext = mwph.parse(sig)
    user = datasources.normal_name(user)
    errors = set()
    for link in wikitext.ifilter_wikilinks():
        title = str(link.title).partition("#")[0]
        # Extract namespace and page.
        # Interwiki prefixes are left in the namespace
        if ":" in user:
            # Colons in usernames break the partitioning
            if title.endswith(f":{user}"):
                ns = title.replace(f":{user}", "")
                sep = ":"
                page = title.replace(f"{ns}:", "")
            elif title.endswith(f"/{user}"):
                raw = title.replace(f"/{user}", "")
                ns, sep, page = raw.rpartition(":")
                page += f"/{user}"
            else:
                continue  # pragma: no cover
        else:
            ns, sep, page = title.rpartition(":")
        # normalize namespace and strip whitespace from both
        ns, page = datasources.normal_name(ns.strip()), page.strip()

        # remove leading colon from namespace
        if ns.startswith(":"):
            ns = ns[1:]

        # Check if linking to user or user talk
        if not sep:
            continue
        elif ":" in ns:
            errors.add("interwiki-user-link")
        elif ns in sitedata.user or ns in sitedata.user_talk:
            # Check that it's the right user or user_talk
            if datasources.normal_name(page) == user:
                return True
            else:
                errors.add("link-username-mismatch")
                continue
        elif ns in sitedata.special:
            # Could be a contribs page, check
            # split page and normalize names
            specialpage, slash, target = page.partition("/")
            specialpage = datasources.normal_name(specialpage.strip())
            target = datasources.normal_name(target.strip())
            if specialpage in sitedata.contribs:
                # It's contribs
                if target == user:
                    # The right one
                    return True
                else:
                    errors.add("link-username-mismatch")
                    continue  # pragma: no cover
            else:
                continue  # pragma: no cover
    else:
        return errors
Ejemplo n.º 2
0
def check_impersonation(sig: str, user: str, sitedata: SiteData) -> Optional[SigError]:
    wikitext = mwph.parse(sig)
    problem = False
    for link in wikitext.ifilter_wikilinks():
        if not link.text:
            continue
        text = datasources.normal_name(link.text)
        if compare_links(user, sitedata, link) is True:
            if text == datasources.normal_name(user):
                # one link matches, that's good enough
                break
            elif datasources.check_user_exists(text, sitedata):
                problem = True

    if problem:
        return SigError.LINK_NAME
    else:
        return None
Ejemplo n.º 3
0
def check_images(sig: str, sitedata: SiteData) -> Optional[SigError]:
    """Check for displayed images in a signature"""
    wikitext = mwph.parse(sig)
    for link in wikitext.ifilter_wikilinks():
        title = link.title
        # if it starts with :, it's not a displayed image
        if title.startswith(":"):
            continue
        # Can't interwiki transclude an image, so the extra safety
        # in check_links isn't required
        ns, sep, page = title.partition(":")
        if not sep:
            continue
        if datasources.normal_name(ns) in sitedata.file:
            return SigError.IMAGES
    else:
        return None
Ejemplo n.º 4
0
def get_site_data(hostname: str) -> SiteData:
    """Get metadata about a site from the API"""
    url = f"https://{hostname}/w/api.php"
    data = dict(
        action="query",
        meta="siteinfo",
        siprop="|".join([
            "namespaces",
            "namespacealiases",
            "specialpagealiases",
            "magicwords",
            "general",
        ]),
        formatversion="2",
        format="json",
    )
    res_json = backoff_retry("get", url, params=data, output="json")

    namespaces: Dict[str, Set[str]] = {}
    all_namespaces = res_json["query"]["namespaces"]
    namespace_aliases = res_json["query"]["namespacealiases"]
    for namespace, nsdata in all_namespaces.items():
        namespaces.setdefault(namespace, set()).update([
            datasources.normal_name(nsdata.get("canonical", "")),
            datasources.normal_name(nsdata.get("name", "")),
        ])

    for nsdata in namespace_aliases:
        namespaces.setdefault(str(nsdata["id"]), set()).add(
            datasources.normal_name(nsdata.get("alias", "")))

    specialpages = {
        item["realname"]: item["aliases"]
        for item in res_json["query"]["specialpagealiases"]
    }
    magicwords = {
        item["name"]: item["aliases"]
        for item in res_json["query"]["magicwords"]
    }
    general = res_json["query"]["general"]

    contribs = {
        datasources.normal_name(name)
        for name in specialpages["Contributions"]
    }

    subst = list(
        itertools.chain(
            magicwords.get("subst", ["SUBST"]),
            [item.lower() for item in magicwords.get("subst", ["SUBST"])],
            [
                item[0] + item[1:].lower()
                for item in magicwords.get("subst", ["SUBST"])
            ],
        ))

    sitedata = SiteData(
        user=namespaces["2"] - {""},
        user_talk=namespaces["3"] - {""},
        file=namespaces["6"] - {""},
        special=namespaces["-1"] - {""},
        contribs=contribs,
        subst=subst,
        dbname=general["wikiid"],
        hostname=hostname,
    )
    return sitedata