def gen_inserts_from_logevent(self, logevent): title = self.db.Title(logevent["title"]) log_deleted = 0 if "actionhidden" in logevent: log_deleted |= mwconst.DELETED_ACTION if "commenthidden" in logevent: log_deleted |= mwconst.DELETED_COMMENT if "userhidden" in logevent: log_deleted |= mwconst.DELETED_USER if "suppressed" in logevent: log_deleted |= mwconst.DELETED_RESTRICTED # Do not use title.dbtitle: # - Interwiki prefix has to be included due to old log entries from # times when the current interwiki prefixes were not in place. # - Section anchor has to be included due to old log entries, # apparently MediaWiki allowed ``#`` in user names at some point. log_title = title.format(iwprefix=True, namespace=False, sectionname=True) # Hack for the introduction of a new namespace (if the namespace numbers # don't match, use logevent["title"] verbatim). if logevent["ns"] == 0 and title.namespacenumber != 0: log_title = logevent["title"] # it's not an interwiki prefix -> capitalize first letter log_title = log_title[0].upper() + log_title[1:] db_entry = { "log_id": logevent["logid"], "log_type": logevent["type"], "log_action": logevent["action"], "log_timestamp": logevent["timestamp"], # This assumes that anonymous users can't create log events, so all "0" from the API are from deleted users "log_user": value_or_none(logevent["userid"]), "log_user_text": logevent["user"], "log_namespace": logevent["ns"], "log_title": log_title, # 'logpage' can be different from 'pageid', e.g. if the page was deleted # in an old MediaWiki that did not preserve pageid and then restored "log_page": value_or_none(logevent["logpage"]), "log_comment": logevent["comment"], "log_params": logevent["params"], "log_deleted": log_deleted, } yield self.sql["insert", "logging"], db_entry for tag_name in logevent.get("tags", []): db_entry = { "b_log_id": logevent["logid"], "b_tag_name": tag_name, } yield self.sql["insert", "tagged_logevent"], db_entry
def gen_revisions(self, page): for rev in page["revisions"]: db_entry = { "rev_id": rev["revid"], "rev_page": value_or_none(page.get("pageid")), "rev_comment": rev["comment"], "rev_user": rev["userid"], "rev_user_text": rev["user"], "rev_timestamp": rev["timestamp"], "rev_minor_edit": "minor" in rev, # rev_deleted is set separately with an update query, see below "rev_len": rev["size"], "rev_parent_id": rev.get("parentid"), "rev_sha1": rev["sha1"], "rev_content_model": rev["contentmodel"], # always available "rev_content_format": rev.get("contentformat"), # available iff content is available } if self.with_content is True: text_id = next(self.text_id_gen) db_entry["rev_text_id"] = text_id yield from self.gen_text(rev, text_id) yield self.sql["insert", "revision"], db_entry for tag_name in rev.get("tags", []): db_entry = { "b_rev_id": rev["revid"], "b_tag_name": tag_name, } yield self.sql["insert", "tagged_revision"], db_entry
def gen_inserts_from_logevent(self, logevent): title = self.db.Title(logevent["title"]) log_deleted = 0 if "actionhidden" in logevent: log_deleted |= mwconst.DELETED_ACTION if "commenthidden" in logevent: log_deleted |= mwconst.DELETED_COMMENT if "userhidden" in logevent: log_deleted |= mwconst.DELETED_USER if "suppressed" in logevent: log_deleted |= mwconst.DELETED_RESTRICTED # Do not use title.dbtitle: # - Interwiki prefix has to be included due to old log entries from # times when the current interwiki prefixes were not in place. # - Section anchor has to be included due to old log entries, # apparently MediaWiki allowed ``#`` in user names at some point. log_title = title.format(iwprefix=True, namespace=False, sectionname=True) # it's not an interwiki prefix -> capitalize first letter log_title = log_title[0].upper() + log_title[1:] db_entry = { "log_id": logevent["logid"], "log_type": logevent["type"], "log_action": logevent["action"], "log_timestamp": logevent["timestamp"], # This assumes that anonymous users can't create log events, so all "0" from the API are from deleted users "log_user": value_or_none(logevent["userid"]), "log_user_text": logevent["user"], "log_namespace": logevent["ns"], "log_title": log_title, # 'logpage' can be different from 'pageid', e.g. if the page was deleted # in an old MediaWiki that did not preserve pageid and then restored "log_page": value_or_none(logevent["logpage"]), "log_comment": logevent["comment"], "log_params": logevent["params"], "log_deleted": log_deleted, } yield self.sql["insert", "logging"], db_entry for tag_name in logevent.get("tags", []): db_entry = { "b_log_id": logevent["logid"], "b_tag_name": tag_name, } yield self.sql["insert", "tagged_logevent"], db_entry
def gen_insert(self): # Tags from MW extension appear on first use, without any log event, # so we fetch them the same way as namespaces. for tag in self.api.site.tags: db_entry = { "tag_name": tag["name"], "tag_displayname": tag["displayname"], "tag_description": value_or_none(tag["description"]), "tag_defined": "defined" in tag, "tag_active": "active" in tag, "tag_source": tag["source"], } yield self.sql["insert", "tag"], db_entry
def gen_inserts_from_rc(self, rc): title = self.db.Title(rc["title"]) rc_deleted = 0 if "sha1hidden" in rc: rc_deleted |= mwconst.DELETED_TEXT if "actionhidden" in rc: rc_deleted |= mwconst.DELETED_ACTION if "commenthidden" in rc: rc_deleted |= mwconst.DELETED_COMMENT # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing rc.setdefault("comment", "") if "userhidden" in rc: rc_deleted |= mwconst.DELETED_USER # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing rc.setdefault("user", "") if "suppressed" in rc: rc_deleted |= mwconst.DELETED_RESTRICTED db_entry = { "rc_id": rc["rcid"], "rc_timestamp": rc["timestamp"], "rc_user": rc.get("userid"), # may be hidden due to rc_deleted "rc_user_text": rc["user"], # may be hidden due to rc_deleted "rc_namespace": rc["ns"], "rc_title": title.dbtitle(rc["ns"]), "rc_comment": rc["comment"], # may be hidden due to rc_deleted "rc_minor": "minor" in rc, "rc_bot": "bot" in rc, "rc_new": "new" in rc, "rc_cur_id": value_or_none(rc["pageid"]), "rc_this_oldid": value_or_none(rc["revid"]), "rc_last_oldid": value_or_none(rc["old_revid"]), "rc_type": rc["type"], "rc_patrolled": "patrolled" in rc, "rc_old_len": rc["oldlen"], "rc_new_len": rc["newlen"], "rc_deleted": rc_deleted, "rc_logid": rc.get("logid"), "rc_log_type": rc.get("logtype"), "rc_log_action": rc.get("logaction"), "rc_params": rc.get("logparams"), } yield self.sql["insert", "recentchanges"], db_entry for tag_name in rc.get("tags", []): db_entry = { "b_rc_id": rc["rcid"], "b_tag_name": tag_name, } yield self.sql["insert", "tagged_recentchange"], db_entry # check logevents and and update rc_deleted of the past changes, # including the DELETED_TEXT value (which is a MW incompatibility) if rc.get("logtype") == "delete": if rc.get("logaction") == "revision": for revid in rc["logparams"]["ids"]: yield self.sql["update", "rc_deleted-b_revid"], { "b_rev_id": revid, "rc_deleted": rc["logparams"]["new"]["bitmask"] } elif rc.get("logaction") == "event": for logid in rc["logparams"]["ids"]: yield self.sql["update", "rc_deleted-b_logid"], { "b_log_id": logid, "rc_deleted": rc["logparams"]["new"]["bitmask"] }
def gen_inserts_from_rc(self, rc): title = self.db.Title(rc["title"]) rc_deleted = 0 if "sha1hidden" in rc: rc_deleted |= mwconst.DELETED_TEXT if "actionhidden" in rc: rc_deleted |= mwconst.DELETED_ACTION if "commenthidden" in rc: rc_deleted |= mwconst.DELETED_COMMENT # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing rc.setdefault("comment", "") if "userhidden" in rc: rc_deleted |= mwconst.DELETED_USER # FIXME: either this or make the column nullable or require the "viewsuppressed" right for syncing rc.setdefault("user", "") if "suppressed" in rc: rc_deleted |= mwconst.DELETED_RESTRICTED db_entry = { "rc_id": rc["rcid"], "rc_timestamp": rc["timestamp"], "rc_user": rc.get("userid"), # may be hidden due to rc_deleted "rc_user_text": rc["user"], # may be hidden due to rc_deleted "rc_namespace": rc["ns"], "rc_title": title.dbtitle(rc["ns"]), "rc_comment": rc["comment"], # may be hidden due to rc_deleted "rc_minor": "minor" in rc, "rc_bot": "bot" in rc, "rc_new": "new" in rc, "rc_cur_id": value_or_none(rc["pageid"]), "rc_this_oldid": value_or_none(rc["revid"]), "rc_last_oldid": value_or_none(rc["old_revid"]), "rc_type": rc["type"], "rc_patrolled": "patrolled" in rc, "rc_old_len": rc["oldlen"], "rc_new_len": rc["newlen"], "rc_deleted": rc_deleted, "rc_logid": rc.get("logid"), "rc_log_type": rc.get("logtype"), "rc_log_action": rc.get("logaction"), "rc_params": rc.get("logparams"), } yield self.sql["insert", "recentchanges"], db_entry for tag_name in rc.get("tags", []): db_entry = { "b_rc_id": rc["rcid"], "b_tag_name": tag_name, } yield self.sql["insert", "tagged_recentchange"], db_entry # check logevents and and update rc_deleted of the past changes, # including the DELETED_TEXT value (which is a MW incompatibility) if rc.get("logtype") == "delete": if rc.get("logaction") == "revision": for revid in rc["logparams"]["ids"]: yield self.sql["update", "rc_deleted-b_revid"], {"b_rev_id": revid, "rc_deleted": rc["logparams"]["new"]["bitmask"]} elif rc.get("logaction") == "event": for logid in rc["logparams"]["ids"]: yield self.sql["update", "rc_deleted-b_logid"], {"b_log_id": logid, "rc_deleted": rc["logparams"]["new"]["bitmask"]}