def test_add_empty_to_nonempty(): gold = ['alpha', 'bravo', 'charlie'] words = WordStore() for word in gold: words.add(word) words += WordStore() assert len(list(words.iter_words())) == len(gold)
def test_add(): words = WordStore() words.add('albatross') assert len(list(words.iter_words())) == 1 assert 'albatross' in words.iter_words() words.add('ganet') assert len(list(words.iter_words())) == 2 assert 'albatross' in words.iter_words() assert 'ganet' in words.iter_words()
def test_add_duplicate(): gold = ['left', 'center', 'right'] more_gold = ['surround', 'center'] words = WordStore() for word in gold: words.add(word) more_words = WordStore() for word in more_gold: words.add(word) words += more_words assert len(list(words.iter_words())) == len(frozenset(gold + more_gold)) for word in words.iter_words(): assert word in gold or word in more_gold
def test_add_nonempty_to_nonempty(): gold = ['left', 'center', 'right'] more_gold = ['surround'] words = WordStore() for word in gold: words.add(word) more_words = WordStore() for word in more_gold: words.add(word) words += more_words assert len(list(words.iter_words())) == len(gold) + len(more_gold) for word in words.iter_words(): assert word in gold or word in more_gold
def scrape_html(html: str) -> WordStore: """Scrape HTML of its text. Args: html: Returns: words in HTML. """ words = WordStore() soup = BeautifulSoup(html, 'html.parser') try: for s in soup.body.strings: for word in s.split(): words.add(word) except AttributeError: _logger.info('HTML has no body.') return words
def build_store(words): store = WordStore() for word in words: store.add(word) return store