def check(args): """ clpa check <STRING> """ if len(args.args) != 1: raise ParserError('only one argument allowed') check = check_string(args.args[0], load_whitelist()) print('\t'.join(args.args[0].split(' '))) print('\t'.join(check))
def test_find_token(self): from pyclpa.util import find_token, load_whitelist, load_alias wl = load_whitelist() patterns = load_alias('patterns.tsv') assert not find_token('t', {}, {}, {}, {}, []) assert find_token('t', wl, {}, {}, {}, []) == 't' assert find_token('th', wl, {'h': 'ʰ'}, {}, {}, []) == 'tʰ' assert find_token('th', wl, {}, {'th': 'x'}, {}, []) == 'x' with self.assertRaises(ValueError): find_token('th', wl, {}, {'th': 'X'}, {}, []) assert find_token('th', wl, {}, {}, patterns, []) == 'tʰ' assert find_token('th', wl, {}, {}, {}, ['h']) == 't'
def __init__(self, whitelist=None, alias=None, delete=None, explicit=None, patterns=None, accents=None, rules=None): self.whitelist = whitelist or load_whitelist() self.alias = alias or load_alias('alias.tsv') self.delete = delete or ['\u0361', '\u035c', '\u0301'] self.explicit = explicit or load_alias('explicit.tsv') self.patterns = patterns or load_alias('patterns.tsv') self.accents = accents or "ˈˌ'" self.rules = rules or []
def __init__(self, whitelist=None, alias=None, delete=None, explicit=None, patterns=None, accents=None, rules=None, normalized=None): self.whitelist = whitelist or load_whitelist() self.alias = alias or load_alias('alias.tsv') self.delete = delete or ['\u0361', '\u035c', '\u0301'] self.explicit = explicit or load_alias('explicit.tsv') self.patterns = patterns or load_alias('patterns.tsv') self.accents = accents or "ˈˌ'" self.rules = rules or [] self.normalized = normalized or load_normalized('normalized.tsv')
def test_check_string(self): from pyclpa.util import check_string, load_whitelist check = check_string('m a tt i s', load_whitelist()) assert check[2] == '?'
def test_load_whitelist(self): from pyclpa.util import load_whitelist assert load_whitelist()['t']['ID'] == 'c118'