def fill_gaps(input_path, output_path, manner, voice, feature1='place', feature2='manner'): print(input_path) with open(input_path, 'r', encoding='utf-8') as inp: gaps = json.load(inp) reference_segments = ql.get_manners(segments, [manner]) reference_segments = ql.get_voices(reference_segments, [voice]) result = {} for key in gaps: a, b, c = key[1:-1].split() for d in filter(lambda x: different(a, b, c, x), reference_segments): quadruple = a, b, c, d opps_list_voice = list(ql.oppositions(quadruple, feature1)) opps_list_place = list(ql.oppositions(quadruple, feature2)) if len(opps_list_voice) == 2 and len(opps_list_place) == 2: print(f'{key} -> {d}') result[key] = d break else: print(f'{key} cannot be filled.') result[key] = None with open(output_path, 'w', encoding='utf-8') as out: json.dump(result, out, indent=2, ensure_ascii=False)
def enumerate_triples_from_list(segments: List[str], manners: List[str]) -> Dict[str, List[str]]: filtered = ql.get_manners(segments, manners) result = [] for triple in combinations(filtered, 3): if len(list(ql.oppositions(triple, 'voice'))) == 1 and len( list(ql.oppositions(triple, 'place'))) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, filtered): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'voice')) == 2 and len( ql.oppositions(quadruple, 'place')) == 2: plug_found = True break if not plug_found: result.append(f'/{" ".join(triple)}/') return result
def enumerate_triples(data_frame: pd.DataFrame, manners: Tuple[str, str], voice: str, direction: str) -> Dict[str, List[str]]: """ @manners must have exactly two elements @direction can be either 'direct' or 'inverse' When @direction is 'direct', we take triples with two elements having the first manner. When @direction is 'inverse', we take triples with two elements having the second manner. """ if direction not in {'direct', 'inverse'}: raise ValueError(f'Wrong direction: {direction}') result = defaultdict(list) for gltc in data_frame.Glottocode.unique(): segments = ql.get_manners( ql.get_voices(ql.get_inventory(data_frame, gltc), [voice]), manners) manner1, manner2 = manners for triple in combinations(segments, 3): manner1_count = 0 manner2_count = 0 for el in triple: parse = parse_consonant(el) if parse['manner'] == manner1: manner1_count += 1 elif parse['manner'] == manner2: manner2_count += 1 if direction == 'direct' and manner1_count != 2: continue elif direction == 'inverse' and manner2_count != 2: continue if len(ql.oppositions(triple, 'place')) == 1 and len( ql.oppositions(triple, 'manner')) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, segments): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'place')) == 2 and len( ql.oppositions(quadruple, 'manner')) == 2: plug_found = True break if not plug_found: result[f'/{" ".join(triple)}/'].append(gltc) return result
def check_gaps(input_path, feature1='place', feature2='manner'): with open( input_path, 'r', encoding='utf-8' ) as inp: gap_fillers = json.load(inp) for k, x in gap_fillers.items(): if x is None: continue a, b, c = k[1:-1].split() quadruple = a, b, c, x # print(quadruple) if not len(ql.oppositions(quadruple, feature1)) == 2 and \ len(ql.oppositions(quadruple, feature2)) == 2: print(quadruple) print('Error!') sys.exit(1) print('Passed.')
def enumerate_triples(data_frame: pd.DataFrame, manners: List[str]) -> Dict[str, List[str]]: result = defaultdict(list) for gltc in data_frame.Glottocode.unique(): table = data_frame.loc[data_frame.Glottocode == gltc] segments = list(table.Phoneme) stops = ql.get_manners(segments, manners) for triple in combinations(stops, 3): if len(list(ql.oppositions(triple, 'voice'))) == 1 and len( list(ql.oppositions(triple, 'place'))) == 1: a, b, c = triple plug_found = False for d in filter(lambda x: x not in triple, stops): quadruple = a, b, c, d if len(ql.oppositions(quadruple, 'voice')) == 2 and len( ql.oppositions(quadruple, 'place')) == 2: plug_found = True break if not plug_found: result[f'/{" ".join(triple)}/'].append(gltc) return result
return False if places_congruent(a, x) or places_congruent(b, x) or places_congruent( c, x): return False return True if __name__ == '__main__': with open('../json/affricate_gaps.json', 'r', encoding='utf-8') as inp: gaps = json.load(inp) d = pd.read_csv('../csv/phoible_working_sample.csv', low_memory=False) segments = list(d.Phoneme.unique()) reference_segments = ql.get_manners(segments, ['affricate']) result = {} for key in gaps: a, b, c = key[1:-1].split() for d in filter(lambda x: different(a, b, c, x), reference_segments): quadruple = a, b, c, d opps_list_voice = list(ql.oppositions(quadruple, 'voice')) opps_list_place = list(ql.oppositions(quadruple, 'place')) if len(opps_list_voice) == 2 and len(opps_list_place) == 2: print(f'{key} -> {d}') result[key] = d break else: print(f'{key} cannot be filled.') result[key] = None with open('../json/affricate_gaps_fillers.json', 'w', encoding='utf-8') as out: json.dump(result, out, indent=2, ensure_ascii=False)
import json import sys import query_lib as ql with open('affricate_gaps_fillers.json', 'r', encoding='utf-8') as inp: gap_fillers = json.load(inp) for k, x in gap_fillers.items(): if x is None: continue a, b, c = k[1:-1].split() quadruple = a, b, c, x print(quadruple) if not len(ql.oppositions(quadruple, 'voice')) == 2 and len( ql.oppositions(quadruple, 'place')) == 2: sys.exit(1)