Esempio n. 1
0
 def parse(self, log: ProcessLogger, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults:
     found = [u.url for u in urls.get_url_annotations(text)]
     if found:
         unique = set(found)
         return ParseResults({UrlUsage: [UrlUsage(text_unit_id=text_unit_id,
                                                  source_url=item,
                                                  count=found.count(item)) for item in unique]})
Esempio n. 2
0
    def test_ratios(self):
        text = "I've been banned on www.google.com :("
        ds = list(get_urls(text))
        self.assertEqual(1, len(ds))
        self.assertEqual('www.google.com', ds[0])

        ants = list(get_url_annotations(text))
        self.assertEqual(1, len(ds))
        self.assertEqual('en', ants[0].locale)
        self.assertEqual('www.google.com', ants[0].url)