コード例 #1
0
 def setUpClass(cls):
     """set up wikitext examples with expected output for each parser step.
     examples -- dictionary of word: text, where text is a dictionary
     text['text'] -- full wikitext
     text['lang'] -- only the language section
     text['pron'] -- only the pronunciation section of the language section
     text['ipa'] -- only the extracted ipa
     """
     # examples = {'word': {'text': ..., 'lang': ...}, 'word2': {...}}
     cls.examples = wp.json_load('test/wikitext_examples.json')
コード例 #2
0
    def test_extract_ipa_meets_threshold(self):
        pronunciation = wp.json_load('test/pron.json')
        self.assertGreater(len(pronunciation), 45000)

        # dictionaries from words to lists of ipa
        ipa = {}
        ipa_lenient = {}
        for word, pron_section in pronunciation.items():
            pron_info = wp.Wikitext(pron_section).extract_pronunciation()
            if 'ipa' in pron_info:
                ipa[word] = pron_info['ipa']

            ipa_lenient_results = wp.Wikitext(pron_section).extract_ipa_lenient()
            if ipa_lenient_results:
                ipa_lenient[word] = ipa_lenient_results

        ipa_diff = {k: pronunciation[k] for k in ipa_lenient.keys()
                    if k not in ipa or len(ipa_lenient[k]) > len(ipa[k])}
        self.assertGreater(len(ipa_lenient), 32000)
        self.assertGreater(len(ipa), 32000)
        self.assertGreater(len(ipa_diff), 500)