예제 #1
0
def oppositions(consonants, feature, others_same=True):
    """
    Returns pairs of consonants opposed by the value of
    a feature while values of all other features are kept
    fixed (default) or a free to vary.
    """
    results = {}
    for c1, c2 in combinations(consonants, 2):
        parse1 = parse_consonant(c1)
        parse2 = parse_consonant(c2)
        if feature not in parse1 or feature not in parse2:
            continue
        if parse1[feature] != parse2[feature]:
            if others_same:
                other_are_same = True
                for k in parse1:
                    if k == feature or k == 'glyph':
                        continue
                    elif k in {'additional articulations', 'pre-features'}:
                        if sorted(parse1[k]) != sorted(parse2[k]):
                            other_are_same = False
                            break
                    else:
                        if not congruent(parse1[k], parse2[k]):
                            other_are_same = False
                            break
                if other_are_same:
                    results[(c1, c2)] = (parse1[feature], parse2[feature])
            else:
                results[(c1, c2)] = (parse1[feature], parse2[feature])
    return results
예제 #2
0
def all_segments_parsable(inventory):
    for p in inventory:
        try:
            parse_consonant(p)
        except:
            return False
    return True
예제 #3
0
def feature_difference(p1, p2):
    parse1 = parse_consonant(p1)
    parse2 = parse_consonant(p2)
    parse_differences = {}
    for k in parse1:
        if k == 'glyph':
            continue
        if parse1[k] != parse2[k]:
            parse_differences[k] = {'p1': parse1[k], 'p2': parse2[k]}
    return parse_differences
예제 #4
0
def get_html_for_consonants(cons_list):
    unparsed = []
    parsed = {}
    for c in cons_list:
        try:
            parsed[c] = parse_consonant(c)
        except ValueError:
            unparsed.append(c)
    with StringIO() as html_stream:
        get_tables_consonants(parsed, html_stream)
        dump_unparsed(unparsed, html_stream)
        return html_stream.getvalue()
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str],
                      voice: str, direction: str) -> Dict[str, List[str]]:
    """
    @manners must have exactly two elements
    @direction can be either 'direct' or 'inverse'
    When @direction is 'direct', we take triples with two elements having the first manner.
    When @direction is 'inverse', we take triples with two elements having the second manner.
    """
    if direction not in {'direct', 'inverse'}:
        raise ValueError(f'Wrong direction: {direction}')
    result = defaultdict(list)
    for gltc in data_frame.Glottocode.unique():
        segments = ql.get_manners(
            ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]),
            manners)
        manner1, manner2 = manners
        for triple in combinations(segments, 3):
            manner1_count = 0
            manner2_count = 0
            for el in triple:
                parse = parse_consonant(el)
                if parse['manner'] == manner1:
                    manner1_count += 1
                elif parse['manner'] == manner2:
                    manner2_count += 1
            if direction == 'direct' and manner1_count != 2:
                continue
            elif direction == 'inverse' and manner2_count != 2:
                continue
            if len(ql.oppositions(triple, 'place')) == 1 and len(
                    ql.oppositions(triple, 'manner')) == 1:
                a, b, c = triple
                plug_found = False
                for d in filter(lambda x: x not in triple, segments):
                    quadruple = a, b, c, d
                    if len(ql.oppositions(quadruple, 'place')) == 2 and len(
                            ql.oppositions(quadruple, 'manner')) == 2:
                        plug_found = True
                        break
                if not plug_found:
                    result[f'/{" ".join(triple)}/'].append(gltc)
    return result
예제 #6
0
def get_voices(inventory, voices):
    return sorted(
        filter(lambda x: parse_consonant(x).get('voice', None) in voices,
               inventory))
예제 #7
0
def get_manners(inventory, manners):
    return sorted(
        filter(lambda x: parse_consonant(x).get('manner', None) in manners,
               inventory))
예제 #8
0
def count_places(inventory):
    result = set()
    for segment in inventory:
        result.add(parse_consonant(segment).get('place', 'na'))
    return len(result)
예제 #9
0
from collections import Counter
import pandas as pd
import query_lib as ql
from IPAParser_2_0 import parse_consonant

d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False)
voiced_stops = Counter()
voiced_stops_single = Counter()
voiced_stop_combs = Counter()
for gltc in d.Glottocode.unique():
    inv = ql.get_inventory(d, gltc)
    tmp = 0
    tmp_stops = []
    has_implosives = False
    for p in inv:
        parse = parse_consonant(p)
        if parse.get('voice', '') == 'voiced' and \
                parse.get('manner', '') == 'stop' and \
                parse['nasal'] == False and \
                parse['place'] == 'bilabial':
            tmp_stops.append(p)
            tmp += 1
            if parse.get('implosive', False) == True:
                has_implosives = True
    if not has_implosives:
        continue
    voiced_stops[tmp] += 1
    if tmp == 1:
        voiced_stops_single[tmp_stops[0]] += 1
    else:
        voiced_stop_combs[tuple(sorted(tmp_stops))] += 1
예제 #10
0
        if all_segments_parsable(inventory):
            parsable_sample.add(inv_id)
    print(f'Sample size: {len(parsable_sample)}')

    d = d.loc[ d.apply(lambda row: row.InventoryID in parsable_sample, axis=1) ]
    for gltc in d.Glottocode.unique():
        inv_id = list(d.loc[ d.Glottocode == gltc ].InventoryID)[0]
        inv  = get_inventory(d, gltc)
        if not voice_opp_in(inv, ['stop']):
            continue
        opps = voice_opp_in(inv, ['affricate'])
        if opps:
            fricatives = get_manners(inv, ['fricative'])
            affricates = get_manners(inv, ['affricate'])
            voiced_affricates = list(filter(
                lambda x: parse_consonant(x)['voice'] == 'voiced',
                affricates))
            voiceless_affricates = list(filter(
                lambda x: parse_consonant(x)['voice'] == 'voiceless',
                affricates))
            # Check for voiced affricates that have paired
            # voiceless affricates and voiceless fricatives
            # but do not have paired voiced fricatives.
            result = []
            for affr_vcd in voiced_affricates:
                if oppositions([affr_vcd] + fricatives, 'manner'):
                    continue
                # Find the corresponding voiceless affricate.
                opps_tmp = oppositions([affr_vcd] + voiceless_affricates, 'voice')
                if opps_tmp:
                    for _, affr_vcl in opps_tmp: