Ejemplo n.º 1
0
    def _export_(self):
        self.logger.debug("Récupération des messages du sujet %d (page %d)", self.topic.topic_id, self.page)

        response = self.session.get("/t{}p{}-a".format(self.topic.topic_id, self.page))
        document = PyQuery(response.text)

        pattern = re.compile(r"/u(\d+)")

        for element in document.find("tr.post"):
            e = PyQuery(element)

            post_id = int(e("td span.name a").attr("name"))

            self.logger.info("Récupération du message %d (sujet %d)", post_id, self.topic.topic_id)

            match = pattern.fullmatch(clean_url(e("td span.name strong a").eq(0).attr("href") or ""))
            if match:
                poster = self.users[int(match.group(1))]
            else:
                poster = AnonymousUser()

            post = e("td div.postbody div").eq(0).html()
            if not post:
                self.logger.warning("Le message  %d (sujet %d) semble être vide", post_id, self.topic.topic_id)
                post = ""

            # Get title
            title = e("table td span.postdetails").contents()[1]
            # Remove "Sujet :" before the title and spaces at the end
            title = title[7:].rstrip()

            # Get the date and time of the post
            timestamp = parse_date(e("table td span.postdetails").contents()[3])

            self.add_child(Post(post_id, post, title, timestamp, poster))
Ejemplo n.º 2
0
    def _export_(self):
        self.logger.info('Récupération des statistiques')
        response = self.session.get("/statistics")
        document = PyQuery(response.text)

        # Go through the table of statistics and save the relevant
        # ones
        stats = {}
        for element in document.find("table.forumline tr"):
            e = PyQuery(element)

            stats[e("td span").eq(0).text()] = e("td span").eq(1).text()
            stats[e("td span").eq(2).text()] = e("td span").eq(3).text()

        self.total_posts = int(stats["Messages"])
        self.total_topics = int(stats["Nombre de sujets ouvert dans le forum"])
        self.total_users = int(stats["Nombre d'utilisateurs"])

        self.startdate = parse_date(stats["Ouverture du forum"])
        self.record_online_date = parse_date(
            stats["Date du record de connexions"])
        self.record_online_users = int(
            stats["Nombre record d'utilisateurs connectés en même temps"])

        self.site_name = document("div.maintitle").eq(0).text().strip(" \n")
        self.site_desc = document("div.maintitle").siblings("span.gen").eq(
            0).text().strip(" \n")

        self.logger.debug('Messages : %d', self.total_posts)
        self.logger.debug('Sujets : %d', self.total_topics)
        self.logger.debug('Membres : %d', self.total_users)

        # Add the children nodes, which respectively handle the
        # exportation of the smilies, the users and the message
        self.add_child(Smilies())

        if self.config["use_ocr"]:
            # Use Optical Character Recognition to get the users'
            # emails
            self.add_child(OcrUsers())
        else:
            self.add_child(Users())

        self.add_child(Groups())

        self.add_child(Forums())
Ejemplo n.º 3
0
    def _export_(self):
        self.logger.info('Récupération des statistiques')
        response = self.session.get("/statistics")
        document = PyQuery(response.text)

        # Go through the table of statistics and save the relevant
        # ones
        stats = {}
        for element in document.find("table.forumline tr"):
            e = PyQuery(element)

            stats[e("td span").eq(0).text()] = e("td span").eq(1).text()
            stats[e("td span").eq(2).text()] = e("td span").eq(3).text()

        self.total_posts = int(stats["Messages"])
        self.total_topics = int(stats["Nombre de sujets ouvert dans le forum"])
        self.total_users = int(stats["Nombre d'utilisateurs"])

        self.startdate = parse_date(stats["Ouverture du forum"])
        self.record_online_date = parse_date(stats["Date du record de connexions"])
        self.record_online_users = int(
            stats["Nombre record d'utilisateurs connectés en même temps"])

        self.site_name = document("div.maintitle").eq(0).text().strip(" \n")
        self.site_desc = document("div.maintitle").siblings("span.gen").eq(0).text().strip(" \n")

        self.logger.debug('Messages : %d', self.total_posts)
        self.logger.debug('Sujets : %d', self.total_topics)
        self.logger.debug('Membres : %d', self.total_users)

        # Add the children nodes, which respectively handle the
        # exportation of the smilies, the users and the message
        self.add_child(Smilies())

        if self.config["use_ocr"]:
            # Use Optical Character Recognition to get the users'
            # emails
            self.add_child(OcrUsers())
        else:
            self.add_child(Users())

        self.add_child(Groups())

        self.add_child(Forums())
Ejemplo n.º 4
0
    def _export_(self):
        self.logger.debug('Récupération des messages du sujet %d (page %d)',
                          self.topic.topic_id, self.page)

        response = self.session.get("/t{}p{}-{}".format(
            self.topic.topic_id, self.page, self.topic.topic_slug))
        self.logger.debug("url : t{}p{}-{}".format(self.topic.topic_id,
                                                   self.page,
                                                   self.topic.topic_slug))
        document = PyQuery(response.text)

        pattern = re.compile(r"/u(\d+)")

        for element in document.find('tr.post'):
            e = PyQuery(element)

            name = e("td span.name a").attr("name")

            if name is None:
                self.logger.warning(
                    'Message %s (sujet %d) irrécupérable, informations manquantes. !',
                    e.attr('class'), self.topic.topic_id)
                continue

            post_id = int(name)

            self.logger.info('Récupération du message %d (sujet %d)', post_id,
                             self.topic.topic_id)

            match = pattern.fullmatch(
                clean_url(e("td span.name strong a").eq(0).attr("href") or ""))
            if match:
                poster = self.users[int(match.group(1))]
            else:
                poster = AnonymousUser()

            post = e("td div.postbody div").eq(0).html()
            if not post:
                self.logger.warning(
                    'Le message  %d (sujet %d) semble être vide', post_id,
                    self.topic.topic_id)
                post = ""

            # Get title
            title = e("table td span.postdetails").contents()[1]
            # Remove "Sujet :" before the title and spaces at the end
            title = title[7:].rstrip()

            # Get the date and time of the post
            timestamp = parse_date(
                e("table td span.postdetails").contents()[3])

            self.add_child(Post(post_id, post, title, timestamp, poster))