Beispiel #1
0
class TextBlockParserTestCase(BaseOEmbedTestCase):
    def setUp(self):
        self.parser = TextBlockParser()
        super(TextBlockParserTestCase, self).setUp()
    
    def test_basic_handling(self):
        parsed = self.parser.parse(self.category_url)
        self.assertEqual(parsed, self.category_embed)
    
    def test_inline_link_handling(self):
        parsed = self.parser.parse('Testing %s' % self.category_url)
        self.assertEqual(parsed, 'Testing %s' % self.category_embed)
    
    def test_block_handling(self):
        parsed = self.parser.parse('Testing %(url)s\n%(url)s' % ({'url': self.category_url}))
        self.assertEqual(parsed, 'Testing %(embed)s\n%(embed)s' % ({'embed': self.category_embed}))
    
    def test_urlization(self):
        test_string = 'Testing http://www.google.com'
        parsed = self.parser.parse(test_string, urlize_all_links=False)
        self.assertEqual(parsed, test_string)
        
        parsed = self.parser.parse(test_string, urlize_all_links=True)
        self.assertEqual(parsed, 'Testing <a href="http://www.google.com">http://www.google.com</a>')
    
    def test_extraction(self):
        extracted = self.parser.extract_urls('Testing %s wha?' % self.category_url)
        self.assertEqual(extracted, set([self.category_url]))
Beispiel #2
0
 def extract_urls(self, text):
     block_parser = TextBlockParser()
     soup = BeautifulSoup(text)
     urls = set()
     
     for user_url in soup.findAll(text=re.compile(URL_RE)):
         if not self.inside_a(user_url):
             urls |= block_parser.extract_urls(unicode(user_url))
     
     return urls
Beispiel #3
0
    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                urls |= block_parser.extract_urls(unicode(user_url))

        return urls
Beispiel #4
0
class TextBlockParserTestCase(BaseOEmbedTestCase):
    def setUp(self):
        self.parser = TextBlockParser()
        super(TextBlockParserTestCase, self).setUp()
    
    def test_basic_handling(self):
        parsed = self.parser.parse(self.category_url)
        self.assertEqual(parsed, self.category_embed)
    
    def test_inline_link_handling(self):
        parsed = self.parser.parse('Testing %s' % self.category_url)
        self.assertEqual(parsed, 'Testing %s' % self.category_embed)
    
    def test_block_handling(self):
        parsed = self.parser.parse('Testing %(url)s\n%(url)s' % ({'url': self.category_url}))
        self.assertEqual(parsed, 'Testing %(embed)s\n%(embed)s' % ({'embed': self.category_embed}))
    
    def test_urlization(self):
        test_string = 'Testing http://www.google.com'
        parsed = self.parser.parse(test_string, urlize_all_links=False)
        self.assertEqual(parsed, test_string)
        
        parsed = self.parser.parse(test_string, urlize_all_links=True)
        self.assertEqual(parsed, 'Testing <a href="http://www.google.com">http://www.google.com</a>')
    
    def test_extraction(self):
        extracted = self.parser.extract_urls('Testing %s wha?' % self.category_url)
        self.assertEqual(extracted, [self.category_url])
    
    def test_extraction_ordering(self):
        extracted = self.parser.extract_urls('''
            %s %s %s
            %s
        ''' % (self.category_url, self.blog_url, self.category_url, self.rich_url))
        
        self.assertEqual(extracted, [
            self.category_url,
            self.blog_url,
            self.rich_url,
        ])
Beispiel #5
0
    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()
        url_list = []

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                block_urls = block_parser.extract_urls(unicode(user_url))

                for url in block_urls:
                    if url not in urls:
                        url_list.append(url)
                        urls.add(url)

        return url_list
    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()
        url_list = []

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                block_urls = block_parser.extract_urls(unicode(user_url))
                
                for url in block_urls:
                    if url not in urls:
                        url_list.append(url)
                        urls.add(url)
        
        return url_list