Ejemplo n.º 1
0
    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                urls |= block_parser.extract_urls(unicode(user_url))

        return urls
Ejemplo n.º 2
0
    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()
        url_list = []

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                block_urls = block_parser.extract_urls(unicode(user_url))

                for url in block_urls:
                    if url not in urls:
                        url_list.append(url)
                        urls.add(url)

        return url_list
Ejemplo n.º 3
0
    def parse_data(self, text, maxwidth, maxheight, template_dir, context,
                   urlize_all_links):
        block_parser = TextBlockParser()
        original_template_dir = template_dir

        soup = BeautifulSoup(text)

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                if self.is_standalone(user_url):
                    template_dir = original_template_dir
                else:
                    template_dir = 'inline'

                replacement = block_parser.parse(str(user_url), maxwidth,
                                                 maxheight, template_dir,
                                                 context, urlize_all_links)
                user_url.replaceWith(BeautifulSoup(replacement))

        return unicode(soup)
Ejemplo n.º 4
0
 def setUp(self):
     self.parser = TextBlockParser()
     super(TextBlockParserTestCase, self).setUp()