コード例 #1
0
ファイル: forms.py プロジェクト: Hamleyburger/MinimalPairs
def isValidSymbol(form, field):
    """ Checks if input is valid IPA or accepted alternative. Accepts if lower case version is valid IPA """

    if not is_valid_ipa(field.data):
        if field.data != "-":
            if not is_valid_ipa(field.data.lower()):
                raise ValidationError("Not valid IPA")
            else:
                field.data = field.data.lower()
コード例 #2
0
ファイル: forms.py プロジェクト: Hamleyburger/MinimalPairs
def makePairList(form, field):
    print("\nform.makePairList:")
    # Generates fields to fill out based on chosen pairs

    if not form.pairs.data:
        emptyFiedList(form.pairSounds)
        raise ValidationError("Choose what words to pair with")
    else:
        # There are pairs to do stuff with
        word1 = Word.query.get(int(form.word1.data))
        # If user has clicked "Add sounds", refresh list from pairs
        if form.definePairs.data:  # This must be "define pairs"
            # Make new list from chosen pairs
            repopulateFieldList(form.pairSounds, form.pairs, word1)

        else:
            # User clicked "Submit Pairs/add sounds"
            # This is for "add sounds" (first btn)

            if form.pairSounds.data:
                for word in form.pairSounds:
                    if word.sound1.data == "" or word.sound2.data == "":
                        repopulateFieldList(form.pairSounds, form.pairs, word1)
                        raise ValidationError("No empty sound fields allowed")
                    if (not is_valid_ipa(word.sound1.data) and
                        (word.sound1.data != "-")) or (
                            not is_valid_ipa(word.sound2.data) and
                            (word.sound2.data != "-")):
                        repopulateFieldList(form.pairSounds, form.pairs, word1)
                        raise ValidationError("Not valid IPA")

                for word in form.pairSounds:
                    # get word2 from db with word id in hidden field and pair them up
                    word2 = Word.query.get(int(word.word2_id.data))
                    print(
                        "incoming data says that word *{}: {}* and word *{}: {}*"
                        .format(word1.word, word.sound1.data, word2.word,
                                word.sound2.data))
                    addedPairs = word1.pair(word2, word.sound1.data,
                                            word.sound2.data)
                    if addedPairs:
                        for pair in addedPairs:
                            print("Added pair:" + pair.textify())

            else:
                raise ValidationError("Sounds cannot be null")

        # raise ValidationError("Need to define pair sound")
        return
コード例 #3
0
ファイル: list_phones.py プロジェクト: blueCormorant/wikipron
def _check_ipa_phonemes(phone_to_examples: Dict[str, Set[str]], filepath: str):
    """Given the phonemes checks whether they are represented in the IPA.

    This will catch problematic phonemes, according to the current IPA standard
    supported by `ipapy`. In addition, it is likely to complain about highly
    specific allophones, which are likely to be present in languages which have
    highly phonetic representation of their phoneme inventory. For a current
    IPA chart, please see:

        https://www.internationalphoneticassociation.org/IPAcharts/IPA_chart_orig/IPA_charts_E.html
    """
    bad_ipa_phonemes = frozenset(
        phone for phone in phone_to_examples.keys()
        if not (ipapy.is_valid_ipa(unicodedata.normalize("NFD", phone))
                or phone in OTHER_VALID_IPA))
    if len(bad_ipa_phonemes) and filepath.endswith("phonemic.tsv"):
        logging.warning("Found %d invalid IPA phones:", len(bad_ipa_phonemes))
        phoneme_id = 1
        for phoneme in bad_ipa_phonemes:
            bad_chars = [
                f"[%d %04x %s %s]" %
                (i, ord(c), unicodedata.category(c), unicodedata.name(c))
                for i, c in enumerate(ipapy.invalid_ipa_characters(phoneme))
            ]
            logging.warning(
                "[%d] Non-IPA transcription: %s (%s)",
                phoneme_id,
                phoneme,
                " ".join(bad_chars),
            )
            phoneme_id += 1
コード例 #4
0
 def __init__(self, word_unicode, pron_unicode=None, pron_ipastring=None):
     self.word_unicode = word_unicode
     if pron_ipastring is None:
         self.pron_unicode = pron_unicode
         self.pron_unicode_is_valid = is_valid_ipa(self.pron_unicode)
         self.pron_ipastring = IPAString(
             unicode_string=self.pron_unicode,
             ignore=True,
             single_char_parsing=False
         )
     else:
         self.pron_ipastring = pron_ipastring
         self.pron_unicode = pron_ipastring.__unicode__()
         self.pron_unicode_is_valid = True
コード例 #5
0
ファイル: ipastring.py プロジェクト: agatamartyna/phonetics
 def __init__(self, ipa_chars=None, unicode_string=None, ignore=False, single_char_parsing=False):
     self.ipa_chars = []
     if ipa_chars is not None:
         self.ipa_chars = ipa_chars
     elif unicode_string is not None:
         if not is_unicode_string(unicode_string):
             raise ValueError("The given string is not a Unicode string.")
         if (not ignore) and (not is_valid_ipa(unicode_string)):
             raise ValueError("The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.")
         substrings = remove_invalid_ipa_characters(
             unicode_string=unicode_string,
             return_invalid=False,
             single_char_parsing=single_char_parsing
         )
         self.ipa_chars = [UNICODE_TO_IPA[substring] for substring in substrings]
コード例 #6
0
ファイル: __main__.py プロジェクト: wgfi110/ipapy
def command_check(string, vargs):
    """
    Check if the given string is IPA valid.

    If the given string is not IPA valid,
    print the invalid characters.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    is_valid = is_valid_ipa(string)
    print(is_valid)
    if not is_valid:
        valid_chars, invalid_chars = remove_invalid_ipa_characters(
            unicode_string=string, return_invalid=True)
        print_invalid_chars(invalid_chars, vargs)
コード例 #7
0
ファイル: __main__.py プロジェクト: pettarin/ipapy
def command_check(string, vargs):
    """
    Check if the given string is IPA valid.

    If the given string is not IPA valid,
    print the invalid characters.

    :param str string: the string to act upon
    :param dict vargs: the command line arguments
    """
    is_valid = is_valid_ipa(string)
    print(is_valid)
    if not is_valid:
        valid_chars, invalid_chars = remove_invalid_ipa_characters(
            unicode_string=string,
            return_invalid=True
        )
        print_invalid_chars(invalid_chars, vargs)
コード例 #8
0
ファイル: list_phones.py プロジェクト: blueCormorant/wikipron
- a few example word-pronunciation pairs for this phone/phoneme
"""

import argparse
import collections
import logging
import random
import unicodedata

from typing import Dict, List, Set

import ipapy

OTHER_VALID_IPA = frozenset(
    phone for phone in ipapy.UNICODE_TO_IPA.keys()
    if not ipapy.is_valid_ipa(unicodedata.normalize("NFD", phone)))


def _count_phones(filepath: str) -> Dict[str, Set[str]]:
    """Count the phones in the given TSV file.

    phone_to_examples as Dict[str, Set[str]] is the most straightforward
    data structure for our purposes. It's not memory-efficient
    (with the same word-pron pair appearing in different phones' sets),
    but anything fancier doesn't seem worth the work.
    """
    phone_to_examples = collections.defaultdict(set)
    with open(filepath, encoding="utf-8") as source:
        for line in source:
            line = line.strip()
            if not line:
コード例 #9
0
         idx = 0
         for char in glyph:
             if idx == 0:
                 item = '%04x' % ord(char)
                 name = item
                 idx += 1
             else:
                 item = "+" + '%04x' % ord(char)
                 name = name + item
         names.append(name)
 # Todo:, Only keeps one of pair, handle both
 # s_glyph = IPAString(unicode_string=glyph)
 idx = 0
 for glyph in glyphs:
     name = names[idx]
 if ft.validate_word(glyph) and is_valid_ipa(glyph):
     results[rows[1]]["phones"][rows[5]] = {"glyph": glyph}
     if not rows[5] == "NA":
         results[rows[1]]["phones"][
             rows[5]]["allophones"] = prefilter(rows[7])
 elif not is_valid_ipa(glyph) and not ft.validate_word(glyph):
     descrip = "both: " + glyph
     if descrip not in IPAErrors:
         IPAErrors.append(descrip)
 else:
     if not is_valid_ipa(glyph):
         descrip = "ipapy: " + glyph
         if descrip not in IPAErrors:
             IPAErrors.append(descrip)
     if not ft.validate_word(glyph):
         descrip = "panphon: " + glyph