Ejemplo n.º 1
0
 def test_remove_stop_chars(self):
     cleanup = sc.StringCleanup(self.config)
     self.assertEqual(cleanup.remove_stop_chars(cleanup.stop_chars), '')
Ejemplo n.º 2
0
 def test_simplify_separators(self):
     cleanup = sc.StringCleanup(self.config)
     self.assertEqual(cleanup.simplify_separators('--a--b-cde-fg-h---'), 'a-b-cde-fg-h')
Ejemplo n.º 3
0
    with open(args.conf, encoding='utf-8') as f:
        config = yaml.safe_load(f)
    available_languages = config['ipa_hyphenation_dict']

    if args.language not in available_languages:
        parser.error(f"Language \"{args.language}\" not available."
                     f"\nAvailable languages: {' ,'.join(available_languages)} ")

    config['simplifying'] = config['simplifying'][args.language]
    config['hyphenation_dict'] = config['hyphenation_dict'][args.language]
    config['ipa_hyphenation_dict'] = config['ipa_hyphenation_dict'][args.language]
    config['language'] = args.language


    cleaner = sc.StringCleanup(config)
    raw_data = cleaner.read_pronunciation_file_as_dict(args.input_file)
    #clean_data = {k: [cleaner.simplify_separators(cleaner.cleanup(x)) for x in v] for k, v in raw_data.items()}
    clean_data = {k: [cleaner.cleanup(x) for x in v] for k, v in raw_data.items()}

    request = pr.PhonetoqueRequest(config)
 
 
    request.pronunciations = clean_data
    request.prepare_data()

    #request.post_all_words()

    request.get_all_syllables()
    print("got all syllables")
    request.get_max_syllables()