def isValidSymbol(form, field): """ Checks if input is valid IPA or accepted alternative. Accepts if lower case version is valid IPA """ if not is_valid_ipa(field.data): if field.data != "-": if not is_valid_ipa(field.data.lower()): raise ValidationError("Not valid IPA") else: field.data = field.data.lower()
def makePairList(form, field): print("\nform.makePairList:") # Generates fields to fill out based on chosen pairs if not form.pairs.data: emptyFiedList(form.pairSounds) raise ValidationError("Choose what words to pair with") else: # There are pairs to do stuff with word1 = Word.query.get(int(form.word1.data)) # If user has clicked "Add sounds", refresh list from pairs if form.definePairs.data: # This must be "define pairs" # Make new list from chosen pairs repopulateFieldList(form.pairSounds, form.pairs, word1) else: # User clicked "Submit Pairs/add sounds" # This is for "add sounds" (first btn) if form.pairSounds.data: for word in form.pairSounds: if word.sound1.data == "" or word.sound2.data == "": repopulateFieldList(form.pairSounds, form.pairs, word1) raise ValidationError("No empty sound fields allowed") if (not is_valid_ipa(word.sound1.data) and (word.sound1.data != "-")) or ( not is_valid_ipa(word.sound2.data) and (word.sound2.data != "-")): repopulateFieldList(form.pairSounds, form.pairs, word1) raise ValidationError("Not valid IPA") for word in form.pairSounds: # get word2 from db with word id in hidden field and pair them up word2 = Word.query.get(int(word.word2_id.data)) print( "incoming data says that word *{}: {}* and word *{}: {}*" .format(word1.word, word.sound1.data, word2.word, word.sound2.data)) addedPairs = word1.pair(word2, word.sound1.data, word.sound2.data) if addedPairs: for pair in addedPairs: print("Added pair:" + pair.textify()) else: raise ValidationError("Sounds cannot be null") # raise ValidationError("Need to define pair sound") return
def _check_ipa_phonemes(phone_to_examples: Dict[str, Set[str]], filepath: str): """Given the phonemes checks whether they are represented in the IPA. This will catch problematic phonemes, according to the current IPA standard supported by `ipapy`. In addition, it is likely to complain about highly specific allophones, which are likely to be present in languages which have highly phonetic representation of their phoneme inventory. For a current IPA chart, please see: https://www.internationalphoneticassociation.org/IPAcharts/IPA_chart_orig/IPA_charts_E.html """ bad_ipa_phonemes = frozenset( phone for phone in phone_to_examples.keys() if not (ipapy.is_valid_ipa(unicodedata.normalize("NFD", phone)) or phone in OTHER_VALID_IPA)) if len(bad_ipa_phonemes) and filepath.endswith("phonemic.tsv"): logging.warning("Found %d invalid IPA phones:", len(bad_ipa_phonemes)) phoneme_id = 1 for phoneme in bad_ipa_phonemes: bad_chars = [ f"[%d %04x %s %s]" % (i, ord(c), unicodedata.category(c), unicodedata.name(c)) for i, c in enumerate(ipapy.invalid_ipa_characters(phoneme)) ] logging.warning( "[%d] Non-IPA transcription: %s (%s)", phoneme_id, phoneme, " ".join(bad_chars), ) phoneme_id += 1
def __init__(self, word_unicode, pron_unicode=None, pron_ipastring=None): self.word_unicode = word_unicode if pron_ipastring is None: self.pron_unicode = pron_unicode self.pron_unicode_is_valid = is_valid_ipa(self.pron_unicode) self.pron_ipastring = IPAString( unicode_string=self.pron_unicode, ignore=True, single_char_parsing=False ) else: self.pron_ipastring = pron_ipastring self.pron_unicode = pron_ipastring.__unicode__() self.pron_unicode_is_valid = True
def __init__(self, ipa_chars=None, unicode_string=None, ignore=False, single_char_parsing=False): self.ipa_chars = [] if ipa_chars is not None: self.ipa_chars = ipa_chars elif unicode_string is not None: if not is_unicode_string(unicode_string): raise ValueError("The given string is not a Unicode string.") if (not ignore) and (not is_valid_ipa(unicode_string)): raise ValueError("The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.") substrings = remove_invalid_ipa_characters( unicode_string=unicode_string, return_invalid=False, single_char_parsing=single_char_parsing ) self.ipa_chars = [UNICODE_TO_IPA[substring] for substring in substrings]
def command_check(string, vargs): """ Check if the given string is IPA valid. If the given string is not IPA valid, print the invalid characters. :param str string: the string to act upon :param dict vargs: the command line arguments """ is_valid = is_valid_ipa(string) print(is_valid) if not is_valid: valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True) print_invalid_chars(invalid_chars, vargs)
def command_check(string, vargs): """ Check if the given string is IPA valid. If the given string is not IPA valid, print the invalid characters. :param str string: the string to act upon :param dict vargs: the command line arguments """ is_valid = is_valid_ipa(string) print(is_valid) if not is_valid: valid_chars, invalid_chars = remove_invalid_ipa_characters( unicode_string=string, return_invalid=True ) print_invalid_chars(invalid_chars, vargs)
- a few example word-pronunciation pairs for this phone/phoneme """ import argparse import collections import logging import random import unicodedata from typing import Dict, List, Set import ipapy OTHER_VALID_IPA = frozenset( phone for phone in ipapy.UNICODE_TO_IPA.keys() if not ipapy.is_valid_ipa(unicodedata.normalize("NFD", phone))) def _count_phones(filepath: str) -> Dict[str, Set[str]]: """Count the phones in the given TSV file. phone_to_examples as Dict[str, Set[str]] is the most straightforward data structure for our purposes. It's not memory-efficient (with the same word-pron pair appearing in different phones' sets), but anything fancier doesn't seem worth the work. """ phone_to_examples = collections.defaultdict(set) with open(filepath, encoding="utf-8") as source: for line in source: line = line.strip() if not line:
idx = 0 for char in glyph: if idx == 0: item = '%04x' % ord(char) name = item idx += 1 else: item = "+" + '%04x' % ord(char) name = name + item names.append(name) # Todo:, Only keeps one of pair, handle both # s_glyph = IPAString(unicode_string=glyph) idx = 0 for glyph in glyphs: name = names[idx] if ft.validate_word(glyph) and is_valid_ipa(glyph): results[rows[1]]["phones"][rows[5]] = {"glyph": glyph} if not rows[5] == "NA": results[rows[1]]["phones"][ rows[5]]["allophones"] = prefilter(rows[7]) elif not is_valid_ipa(glyph) and not ft.validate_word(glyph): descrip = "both: " + glyph if descrip not in IPAErrors: IPAErrors.append(descrip) else: if not is_valid_ipa(glyph): descrip = "ipapy: " + glyph if descrip not in IPAErrors: IPAErrors.append(descrip) if not ft.validate_word(glyph): descrip = "panphon: " + glyph