Ejemplo n.º 1
0
    def process_contributor(self, contributor):
        if self._skip_revision:
            return

        if contributor is None:
            self._skip_revision = True

        sender_tag = contributor.find(self.tag["username"])
        if sender_tag is None:
            try:
                self._sender = contributor.find(self.tag["ip"]).text
                if self._sender is None:
                    self._skip_revision = True
                    self.counter_deleted += 1
            except AttributeError:
                ## user deleted
                self._skip_revision = True
                self.counter_deleted += 1
        else:
            try:
                self._sender = mwlib.normalize_pagename(sender_tag.text)
            except AttributeError:
                ## if username is defined but empty, look for id tag
                try:
                    self._sender = contributor.find(self.tag["id"]).text
                except KeyError:
                    self._skip_revision = True
    def process_contributor(self, contributor):
        if self._skip_revision: return

        if contributor is None:
            logging.warning('contributor is None')
            self._skip_revision = True

        sender_tag = contributor.find(self.tag['username'])
        if sender_tag is None:
            self._skip_revision = True
        else:
            try:
                self._sender = mwlib.normalize_pagename(sender_tag.text)
            except AttributeError:
                ## if username is defined but empty, look for id tag
                self._sender = contributor.find(self.tag['id']).text
Ejemplo n.º 3
0
    def process_text(self, elem):
        assert self.user, "User still not defined"

        text = elem.text
        if not (text and self.user):
            return

        if (mwlib.isHardRedirect(text) or mwlib.isSoftRedirect(text)):
            return

        talks = self.sig_finder.find(text)

        self.ecache.add(mwlib.normalize_pagename(self.user), talks)
        self.count += 1
        if not self.count % 500:
            print self.count
Ejemplo n.º 4
0
    def process_contributor(self, contributor):
        if self._skip_revision: return

        if contributor is None:
            logging.warning('contributor is None')
            self._skip_revision = True

        sender_tag = contributor.find(self.tag['username'])
        if sender_tag is None:
            self._skip_revision = True
        else:
            try:
                self._sender = mwlib.normalize_pagename(sender_tag.text)
            except AttributeError:
                ## if username is defined but empty, look for id tag
                self._sender = contributor.find(self.tag['id']).text
Ejemplo n.º 5
0
    def process_title(self, elem):
        if self._skip_revision: return

        title = elem.text
        a_title = title.split(':')

        if len(a_title) > 1 and a_title[0] in self.user_talk_names:
            self._receiver = mwlib.normalize_pagename(a_title[1])
        else:
            self._skip = True
            return

        try:
            title.index('/')
            self.count_archive += 1
            self._skip = True
        except ValueError:
            pass
    def process_title(self, elem):
        if self._skip_revision: return

        title = elem.text
        a_title = title.split(':')

        if len(a_title) > 1 and a_title[0] in self.user_talk_names:
            self._receiver = mwlib.normalize_pagename(a_title[1])
        else:
            self._skip = True
            return

        try:
            title.index('/')
            self.count_archive += 1
            self._skip = True
        except ValueError:
            pass
Ejemplo n.º 7
0
    def process_text(self, elem):
        assert self.user, "User still not defined"

        text = elem.text
        if not (text and self.user):
            return

        if (mwlib.isHardRedirect(text) or mwlib.isSoftRedirect(text)):
            return

        try:
            talks = self.sig_finder.find(text)
            self.ecache.add(mwlib.normalize_pagename(self.user), talks)
        # Checks if self.user is a valid pagename
        except AttributeError:
            self._skip = True
            return
        self.count += 1
        if not self.count % 500:
            logging.info("Counter: %d", self.count)
Ejemplo n.º 8
0
    def process_title(self, elem):
        self.delattr(("_counter", "_type", "_title", "_skip", "_date", "_receiver"))
        if self._skip_revision:
            return

        title = elem.text
        a_title = title.split(":")

        if len(a_title) > 1 and smart_str(a_title[0]) in self.user_talk_names:
            self._receiver = mwlib.normalize_pagename(a_title[1])
        else:
            self._skip = True
            return

        try:
            title.index("/")
            self.count_archive += 1
            self._skip = True
        except ValueError:
            pass
        finally:
            del title, a_title
Ejemplo n.º 9
0
    def process_title(self, elem):
        self.delattr(("_counter", "_type", "_title", "_skip", "_date",
                      "_receiver", "_time", "_id", "_username", "_ip"))
        if self._skip_revision:
            return

        title = elem.text
        a_title = title.split(':')
        if len(a_title) > 1 and smart_str(a_title[0]) in self.user_talk_names:
            self._receiver = mwlib.normalize_pagename(a_title[1])
        else:
            self._skip = True
            return

        try:
            title.index('/')
            self.count_archive += 1
            self._skip = True
        except ValueError:
            pass
        finally:
            del title, a_title