def write_generated_mapping_to_file(config: dict, mapping: List[dict]): # read config with open(GEN_CONFIG, 'r') as f: data = yaml.safe_load(f) map_output_path = os.path.join(GEN_DIR, config['mapping']) # write mapping if os.path.exists(map_output_path): LOGGER.info(f"Overwriting file at {map_output_path}") with open(map_output_path, 'w', encoding='utf8') as f: json.dump(mapping, f, indent=4) data = deepcopy(data) cfg_exists = bool([x for x in data['mappings'] if x['in_lang'] == config['in_lang'] and x['out_lang'] == config['out_lang']]) # add new mapping if no mappings are generated yet if not data['mappings']: data['mappings'] = [config] # add new mapping if it doesn't exist yet elif not cfg_exists: data['mappings'].append(config) # rewrite config with open(GEN_CONFIG, 'w', encoding='utf8') as f: yaml.dump(data, f, Dumper=IndentDumper, default_flow_style=False) elif cfg_exists: for i, cfg in enumerate(data['mappings']): if cfg['in_lang'] == config['in_lang'] and cfg['out_lang'] == config['out_lang']: data['mappings'][i] = config # rewrite config with open(GEN_CONFIG, 'w', encoding='utf8') as f: yaml.dump(data, f, Dumper=IndentDumper, default_flow_style=False) break else: LOGGER.warn( f"Not writing generated files because a non-generated mapping from {config['in_lang']} to {config['out_lang']} already exists.")
def align_to_dummy_fallback(mapping: Mapping, io: str = 'in', write_to_file: bool = False): dummy_inventory = ["ɑ", "i", "u", "t", "s", "n"] display_name = mapping.kwargs.get('language_name', 'No Language display name in Config') config = generate_config(mapping.kwargs[f'{io}_lang'], 'dummy', display_name, display_name) default_char = 't' if is_ipa(mapping.kwargs[f'{io}_lang']): mapping = align_inventories(mapping.inventory(io), dummy_inventory) else: und_g2p = make_g2p('und', 'und-ipa') mapping = [{"in": unicode_escape(x), "out": und_g2p(unidecode(x).lower())} for x in mapping.inventory(io)] dummy_list = align_inventories([x['out'] for x in mapping], dummy_inventory) dummy_dict = {} for x in dummy_list: if x['in']: dummy_dict[x['in']] = x['out'] for x in mapping: try: x['out'] = dummy_dict[x['out']] except KeyError: LOGGER.warn(f"We couldn't guess at what {x['in']} means, so it's being replaced with '{default_char}' instead.") x['out'] = default_char if write_to_file: write_generated_mapping_to_file(config, mapping) return config, mapping
def align_to_dummy_fallback(mapping: Mapping, io: str = 'in', write_to_file: bool = False, out_dir: str = ''): display_name = mapping.kwargs.get('language_name', 'No Language display name in Config') config = {'in_lang': mapping.kwargs[f'{io}_lang'], 'out_lang': 'dummy'} default_char = 't' if is_ipa(mapping.kwargs[f'{io}_lang']): mapping = align_inventories(mapping.inventory(io), DUMMY_INVENTORY) else: und_g2p = make_g2p('und', 'und-ipa') mapping = [{"in": unicode_escape(x), "out": und_g2p(unidecode(x).lower()).output_string} for x in mapping.inventory(io)] dummy_list = align_inventories([x['out'] for x in mapping], DUMMY_INVENTORY) dummy_dict = {} for x in dummy_list: if x['in']: dummy_dict[x['in']] = x['out'] for x in mapping: try: x['out'] = dummy_dict[x['out']] except KeyError: LOGGER.warn(f"We couldn't guess at what {x['in']} means, so it's being replaced with '{default_char}' instead.") x['out'] = default_char config['mapping'] = mapping mapping = Mapping(**config) if write_to_file: if out_dir: if os.path.isdir(out_dir): mapping.config_to_file(out_dir) mapping.mapping_to_file(out_dir) else: LOGGER.warning(f'{out_dir} is not a directory. Writing to default instead.') else: mapping.config_to_file() mapping.mapping_to_file() return mapping