Example #1
0
def main():
    opener = wiki.login()

    #Datei öffnen
    if len(sys.argv) < 2:
        print("missing argument: "+sys.argv[0]+" FILENAME")
        exit()
    if sys.argv[0] == sys.argv[1]:
        print("nope")
    file = open(sys.argv[1], "w")

    #Revisions sammeln
    revs = {}
    for page in wiki.allPages():
        ##zerbricht an ß
        #title = urllib.parse.quote(page)
        title = page
        xml = wiki.sendQuery(
            "redirects",
            "prop=revisions",
            "titles="+urllib.parse.quote(page),
            "rvdir=older",
            "rvlimit=max")

        #redirects ausschließen
        #xml = xml.find("query").find("pages")
        #if not xml:
        #    continue
        #title = xml.find("page").attrib["title"]
        #if title in revs:
        #    continue

        #letzte Fluggbot-revision finden
        fluggbotid = 0
        found = False
        for el in xml.iter():
            if el.tag != "rev":
                continue
            if el.attrib["user"] != "Fluggbot":
                continue

            found = True
            id = int(el.attrib["revid"])
            if id > fluggbotid:
                fluggbotid = id

        if not found:
            continue

        #letzte revision vor Fluggbot finden
        lastrevid = 0
        for el in xml.iter():
            if el.tag != "rev":
                continue
            if el.attrib["user"] == "Fluggbot":
                continue

            id = int(el.attrib["revid"])
            if id > lastrevid and id < fluggbotid:
                lastrevid = id

        revs[title] = str(lastrevid)


    #Revisioncontents parsen und LD-Host-Links extrahieren
    links = []
    for title in revs:
        qry = wiki.buildQuery((
            "titles="+title,
            "prop=revisions",
            "rvstartid="+revs[title],
            "rvprop=content",
            "rvlimit=1",
            "redirects"))
        print(qry)
        try:
            r = wiki.opener.open(wiki._url+"api.php"+qry)
        except:
            print("Überspringe "+title)
            continue

        text = []
        for line in r.readlines():
            text.append(line.decode('utf-8'))
        found = extractLDHostLinks(text)
        for el in found:
            if not el in links:
                links.append(el)

    print(str(len(links))+" Links gefunden, schreibe in "+sys.argv[1])
    for el in links:
        file.write(el + "\n")
Example #2
0
def main():
    opener = wiki.login(wiki.username, wiki.password)

    for p in wiki.allPages(opener, resume="speed"):
        doIt(p, opener)