Example #1
0
 def get_body(self):
     element_body = self.get_element_body()
     try_drop_tree(self.parser, element_body, "li.button-social")
     try_drop_tree(self.parser, element_body, "aside.articlerelated")
     try_drop_tree(self.parser, element_body, "div.sharecount")
     clean_relativ_urls(element_body, "http://presseurop.eu")
     return self.parser.tostring(element_body)
Example #2
0
    def get_body(self):
        element_body = self.get_element_body()
        remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"])
        try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"])
        try_drop_tree(self.parser, element_body, "script")
        clean_relativ_urls(element_body, "http://ecrans.fr")

        return self.parser.tostring(element_body)
Example #3
0
    def get_body(self):
        element_body = self.get_element_body()
        remove_from_selector_list(self.parser, element_body,
                                  ["p.auteur", "h4"])
        try_remove_from_selector_list(
            self.parser, element_body,
            ["p.tag", "div.alire", self.element_title_selector, "h4"])
        try_drop_tree(self.parser, element_body, "script")
        clean_relativ_urls(element_body, "http://ecrans.fr")

        return self.parser.tostring(element_body)
Example #4
0
    def get_body(self):
        div = self.document.getroot().find('.//div[@class="sectbody"]')
        try_drop_tree(self.parser, div, "div.anchor")
        clean_relativ_urls(div, "http://taz.de")

        return self.parser.tostring(div)