def test_words_in_text(self):
        sample_input = """你叫什麼名字?我叫鄧小平。我高興認識你!"""
        words = words_in_text(sample_input)

        # Use in because order is not guaranteed
        self.assertEqual(len(words), 8)
        self.assertTrue('你' in words)
        self.assertTrue('叫' in words)
        self.assertTrue('什麼' in words)
        self.assertTrue('名字' in words)
        self.assertTrue('我' in words)
        self.assertTrue('鄧小平' in words)
        self.assertTrue('高興' in words)
        self.assertTrue('認識' in words)
def scan_new_words(input_text, known_words=get_known_words()):
    """Scans an input text and returns a set of new words.
    Defaults to reading from known_words.txt, but can take a set"""
    return set(word for word in words_in_text(input_text) if word not in known_words)