def test_pseudocorpus_no_common_terms(self): vocab = [ "prime_minister", "gold", "chief_technical_officer", "effective" ] result = list(pseudocorpus(vocab, "_")) self.assertEqual( result, [["prime", "minister"], ["chief", "technical_officer"], ["chief_technical", "officer"]])
def test_pseudocorpus_no_common_terms(self): vocab = [ "prime_minister", "gold", "chief_technical_officer", "effective"] result = list(pseudocorpus(vocab, "_")) self.assertEqual( result, [["prime", "minister"], ["chief", "technical_officer"], ["chief_technical", "officer"]])
def test_pseudocorpus_with_common_terms(self): vocab = [ "hall_of_fame", "gold", "chief_of_political_bureau", "effective", "beware_of_the_dog_in_the_yard" ] common_terms = frozenset(["in", "the", "of"]) result = list(pseudocorpus(vocab, "_", common_terms=common_terms)) self.assertEqual( result, [["hall", "of", "fame"], ["chief", "of", "political_bureau"], ["chief_of_political", "bureau"], ["beware", "of", "the", "dog_in_the_yard"], ["beware_of_the_dog", "in", "the", "yard"]])
def test_pseudocorpus_with_common_terms(self): vocab = [ "hall_of_fame", "gold", "chief_of_political_bureau", "effective", "beware_of_the_dog_in_the_yard"] common_terms = frozenset(["in", "the", "of"]) result = list(pseudocorpus(vocab, "_", common_terms=common_terms)) self.assertEqual( result, [["hall", "of", "fame"], ["chief", "of", "political_bureau"], ["chief_of_political", "bureau"], ["beware", "of", "the", "dog_in_the_yard"], ["beware_of_the_dog", "in", "the", "yard"]])