Beispiel #1
0
    def gen_inserts_from_logevent(self, logevent):
        title = self.db.Title(logevent["title"])

        log_deleted = 0
        if "actionhidden" in logevent:
            log_deleted |= mwconst.DELETED_ACTION
        if "commenthidden" in logevent:
            log_deleted |= mwconst.DELETED_COMMENT
        if "userhidden" in logevent:
            log_deleted |= mwconst.DELETED_USER
        if "suppressed" in logevent:
            log_deleted |= mwconst.DELETED_RESTRICTED

        # Do not use title.dbtitle:
        #   - Interwiki prefix has to be included due to old log entries from
        #     times when the current interwiki prefixes were not in place.
        #   - Section anchor has to be included due to old log entries,
        #     apparently MediaWiki allowed ``#`` in user names at some point.
        log_title = title.format(iwprefix=True,
                                 namespace=False,
                                 sectionname=True)
        # Hack for the introduction of a new namespace (if the namespace numbers
        # don't match, use logevent["title"] verbatim).
        if logevent["ns"] == 0 and title.namespacenumber != 0:
            log_title = logevent["title"]
        # it's not an interwiki prefix -> capitalize first letter
        log_title = log_title[0].upper() + log_title[1:]

        db_entry = {
            "log_id": logevent["logid"],
            "log_type": logevent["type"],
            "log_action": logevent["action"],
            "log_timestamp": logevent["timestamp"],
            # This assumes that anonymous users can't create log events, so all "0" from the API are from deleted users
            "log_user": value_or_none(logevent["userid"]),
            "log_user_text": logevent["user"],
            "log_namespace": logevent["ns"],
            "log_title": log_title,
            # 'logpage' can be different from 'pageid', e.g. if the page was deleted
            # in an old MediaWiki that did not preserve pageid and then restored
            "log_page": value_or_none(logevent["logpage"]),
            "log_comment": logevent["comment"],
            "log_params": logevent["params"],
            "log_deleted": log_deleted,
        }
        yield self.sql["insert", "logging"], db_entry

        for tag_name in logevent.get("tags", []):
            db_entry = {
                "b_log_id": logevent["logid"],
                "b_tag_name": tag_name,
            }
            yield self.sql["insert", "tagged_logevent"], db_entry
Beispiel #2
0
    def gen_revisions(self, page):
        for rev in page["revisions"]:
            db_entry = {
                "rev_id": rev["revid"],
                "rev_page": value_or_none(page.get("pageid")),
                "rev_comment": rev["comment"],
                "rev_user": rev["userid"],
                "rev_user_text": rev["user"],
                "rev_timestamp": rev["timestamp"],
                "rev_minor_edit": "minor" in rev,
                # rev_deleted is set separately with an update query, see below
                "rev_len": rev["size"],
                "rev_parent_id": rev.get("parentid"),
                "rev_sha1": rev["sha1"],
                "rev_content_model": rev["contentmodel"],        # always available
                "rev_content_format": rev.get("contentformat"),  # available iff content is available
            }

            if self.with_content is True:
                text_id = next(self.text_id_gen)
                db_entry["rev_text_id"] = text_id
                yield from self.gen_text(rev, text_id)

            yield self.sql["insert", "revision"], db_entry

            for tag_name in rev.get("tags", []):
                db_entry = {
                    "b_rev_id": rev["revid"],
                    "b_tag_name": tag_name,
                }
                yield self.sql["insert", "tagged_revision"], db_entry
Beispiel #3
0
    def gen_revisions(self, page):
        for rev in page["revisions"]:
            db_entry = {
                "rev_id": rev["revid"],
                "rev_page": value_or_none(page.get("pageid")),
                "rev_comment": rev["comment"],
                "rev_user": rev["userid"],
                "rev_user_text": rev["user"],
                "rev_timestamp": rev["timestamp"],
                "rev_minor_edit": "minor" in rev,
                # rev_deleted is set separately with an update query, see below
                "rev_len": rev["size"],
                "rev_parent_id": rev.get("parentid"),
                "rev_sha1": rev["sha1"],
                "rev_content_model": rev["contentmodel"],  # always available
                "rev_content_format":
                rev.get("contentformat"),  # available iff content is available
            }

            if self.with_content is True:
                text_id = next(self.text_id_gen)
                db_entry["rev_text_id"] = text_id
                yield from self.gen_text(rev, text_id)

            yield self.sql["insert", "revision"], db_entry

            for tag_name in rev.get("tags", []):
                db_entry = {
                    "b_rev_id": rev["revid"],
                    "b_tag_name": tag_name,
                }
                yield self.sql["insert", "tagged_revision"], db_entry
Beispiel #4
0
    def gen_inserts_from_logevent(self, logevent):
        title = self.db.Title(logevent["title"])

        log_deleted = 0
        if "actionhidden" in logevent:
            log_deleted |= mwconst.DELETED_ACTION
        if "commenthidden" in logevent:
            log_deleted |= mwconst.DELETED_COMMENT
        if "userhidden" in logevent:
            log_deleted |= mwconst.DELETED_USER
        if "suppressed" in logevent:
            log_deleted |= mwconst.DELETED_RESTRICTED

        # Do not use title.dbtitle:
        #   - Interwiki prefix has to be included due to old log entries from
        #     times when the current interwiki prefixes were not in place.
        #   - Section anchor has to be included due to old log entries,
        #     apparently MediaWiki allowed ``#`` in user names at some point.
        log_title = title.format(iwprefix=True, namespace=False, sectionname=True)
        # it's not an interwiki prefix -> capitalize first letter
        log_title = log_title[0].upper() + log_title[1:]

        db_entry = {
            "log_id": logevent["logid"],
            "log_type": logevent["type"],
            "log_action": logevent["action"],
            "log_timestamp": logevent["timestamp"],
            # This assumes that anonymous users can't create log events, so all "0" from the API are from deleted users
            "log_user": value_or_none(logevent["userid"]),
            "log_user_text": logevent["user"],
            "log_namespace": logevent["ns"],
            "log_title": log_title,
            # 'logpage' can be different from 'pageid', e.g. if the page was deleted
            # in an old MediaWiki that did not preserve pageid and then restored
            "log_page": value_or_none(logevent["logpage"]),
            "log_comment": logevent["comment"],
            "log_params": logevent["params"],
            "log_deleted": log_deleted,
        }
        yield self.sql["insert", "logging"], db_entry

        for tag_name in logevent.get("tags", []):
            db_entry = {
                "b_log_id": logevent["logid"],
                "b_tag_name": tag_name,
            }
            yield self.sql["insert", "tagged_logevent"], db_entry
Beispiel #5
0
 def gen_insert(self):
     # Tags from MW extension appear on first use, without any log event,
     # so we fetch them the same way as namespaces.
     for tag in self.api.site.tags:
         db_entry = {
             "tag_name": tag["name"],
             "tag_displayname": tag["displayname"],
             "tag_description": value_or_none(tag["description"]),
             "tag_defined": "defined" in tag,
             "tag_active": "active" in tag,
             "tag_source": tag["source"],
         }
         yield self.sql["insert", "tag"], db_entry
Beispiel #6
0
 def gen_insert(self):
     # Tags from MW extension appear on first use, without any log event,
     # so we fetch them the same way as namespaces.
     for tag in self.api.site.tags:
         db_entry = {
             "tag_name": tag["name"],
             "tag_displayname": tag["displayname"],
             "tag_description": value_or_none(tag["description"]),
             "tag_defined": "defined" in tag,
             "tag_active": "active" in tag,
             "tag_source": tag["source"],
         }
         yield self.sql["insert", "tag"], db_entry
Beispiel #7
0
    def gen_inserts_from_rc(self, rc):
        title = self.db.Title(rc["title"])

        rc_deleted = 0
        if "sha1hidden" in rc:
            rc_deleted |= mwconst.DELETED_TEXT
        if "actionhidden" in rc:
            rc_deleted |= mwconst.DELETED_ACTION
        if "commenthidden" in rc:
            rc_deleted |= mwconst.DELETED_COMMENT
            # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing
            rc.setdefault("comment", "")
        if "userhidden" in rc:
            rc_deleted |= mwconst.DELETED_USER
            # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing
            rc.setdefault("user", "")
        if "suppressed" in rc:
            rc_deleted |= mwconst.DELETED_RESTRICTED

        db_entry = {
            "rc_id": rc["rcid"],
            "rc_timestamp": rc["timestamp"],
            "rc_user": rc.get("userid"),  # may be hidden due to rc_deleted
            "rc_user_text": rc["user"],  # may be hidden due to rc_deleted
            "rc_namespace": rc["ns"],
            "rc_title": title.dbtitle(rc["ns"]),
            "rc_comment": rc["comment"],  # may be hidden due to rc_deleted
            "rc_minor": "minor" in rc,
            "rc_bot": "bot" in rc,
            "rc_new": "new" in rc,
            "rc_cur_id": value_or_none(rc["pageid"]),
            "rc_this_oldid": value_or_none(rc["revid"]),
            "rc_last_oldid": value_or_none(rc["old_revid"]),
            "rc_type": rc["type"],
            "rc_patrolled": "patrolled" in rc,
            "rc_old_len": rc["oldlen"],
            "rc_new_len": rc["newlen"],
            "rc_deleted": rc_deleted,
            "rc_logid": rc.get("logid"),
            "rc_log_type": rc.get("logtype"),
            "rc_log_action": rc.get("logaction"),
            "rc_params": rc.get("logparams"),
        }
        yield self.sql["insert", "recentchanges"], db_entry

        for tag_name in rc.get("tags", []):
            db_entry = {
                "b_rc_id": rc["rcid"],
                "b_tag_name": tag_name,
            }
            yield self.sql["insert", "tagged_recentchange"], db_entry

        # check logevents and and update rc_deleted of the past changes,
        # including the DELETED_TEXT value (which is a MW incompatibility)
        if rc.get("logtype") == "delete":
            if rc.get("logaction") == "revision":
                for revid in rc["logparams"]["ids"]:
                    yield self.sql["update", "rc_deleted-b_revid"], {
                        "b_rev_id": revid,
                        "rc_deleted": rc["logparams"]["new"]["bitmask"]
                    }
            elif rc.get("logaction") == "event":
                for logid in rc["logparams"]["ids"]:
                    yield self.sql["update", "rc_deleted-b_logid"], {
                        "b_log_id": logid,
                        "rc_deleted": rc["logparams"]["new"]["bitmask"]
                    }
Beispiel #8
0
    def gen_inserts_from_rc(self, rc):
        title = self.db.Title(rc["title"])

        rc_deleted = 0
        if "sha1hidden" in rc:
            rc_deleted |= mwconst.DELETED_TEXT
        if "actionhidden" in rc:
            rc_deleted |= mwconst.DELETED_ACTION
        if "commenthidden" in rc:
            rc_deleted |= mwconst.DELETED_COMMENT
            # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing
            rc.setdefault("comment", "")
        if "userhidden" in rc:
            rc_deleted |= mwconst.DELETED_USER
            # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing
            rc.setdefault("user", "")
        if "suppressed" in rc:
            rc_deleted |= mwconst.DELETED_RESTRICTED

        db_entry = {
            "rc_id": rc["rcid"],
            "rc_timestamp": rc["timestamp"],
            "rc_user": rc.get("userid"),  # may be hidden due to rc_deleted
            "rc_user_text": rc["user"],  # may be hidden due to rc_deleted
            "rc_namespace": rc["ns"],
            "rc_title": title.dbtitle(rc["ns"]),
            "rc_comment": rc["comment"],  # may be hidden due to rc_deleted
            "rc_minor": "minor" in rc,
            "rc_bot": "bot" in rc,
            "rc_new": "new" in rc,
            "rc_cur_id": value_or_none(rc["pageid"]),
            "rc_this_oldid": value_or_none(rc["revid"]),
            "rc_last_oldid": value_or_none(rc["old_revid"]),
            "rc_type": rc["type"],
            "rc_patrolled": "patrolled" in rc,
            "rc_old_len": rc["oldlen"],
            "rc_new_len": rc["newlen"],
            "rc_deleted": rc_deleted,
            "rc_logid": rc.get("logid"),
            "rc_log_type": rc.get("logtype"),
            "rc_log_action": rc.get("logaction"),
            "rc_params": rc.get("logparams"),
        }
        yield self.sql["insert", "recentchanges"], db_entry

        for tag_name in rc.get("tags", []):
            db_entry = {
                "b_rc_id": rc["rcid"],
                "b_tag_name": tag_name,
            }
            yield self.sql["insert", "tagged_recentchange"], db_entry

        # check logevents and and update rc_deleted of the past changes,
        # including the DELETED_TEXT value (which is a MW incompatibility)
        if rc.get("logtype") == "delete":
            if rc.get("logaction") == "revision":
                for revid in rc["logparams"]["ids"]:
                    yield self.sql["update", "rc_deleted-b_revid"], {"b_rev_id": revid, "rc_deleted": rc["logparams"]["new"]["bitmask"]}
            elif rc.get("logaction") == "event":
                for logid in rc["logparams"]["ids"]:
                    yield self.sql["update", "rc_deleted-b_logid"], {"b_log_id": logid, "rc_deleted": rc["logparams"]["new"]["bitmask"]}