def test_remove_stop_chars(self): cleanup = sc.StringCleanup(self.config) self.assertEqual(cleanup.remove_stop_chars(cleanup.stop_chars), '')
def test_simplify_separators(self): cleanup = sc.StringCleanup(self.config) self.assertEqual(cleanup.simplify_separators('--a--b-cde-fg-h---'), 'a-b-cde-fg-h')
with open(args.conf, encoding='utf-8') as f: config = yaml.safe_load(f) available_languages = config['ipa_hyphenation_dict'] if args.language not in available_languages: parser.error(f"Language \"{args.language}\" not available." f"\nAvailable languages: {' ,'.join(available_languages)} ") config['simplifying'] = config['simplifying'][args.language] config['hyphenation_dict'] = config['hyphenation_dict'][args.language] config['ipa_hyphenation_dict'] = config['ipa_hyphenation_dict'][args.language] config['language'] = args.language cleaner = sc.StringCleanup(config) raw_data = cleaner.read_pronunciation_file_as_dict(args.input_file) #clean_data = {k: [cleaner.simplify_separators(cleaner.cleanup(x)) for x in v] for k, v in raw_data.items()} clean_data = {k: [cleaner.cleanup(x) for x in v] for k, v in raw_data.items()} request = pr.PhonetoqueRequest(config) request.pronunciations = clean_data request.prepare_data() #request.post_all_words() request.get_all_syllables() print("got all syllables") request.get_max_syllables()