Exemple #1
0
def write_generated_mapping_to_file(config: dict, mapping: List[dict]):
    # read config
    with open(GEN_CONFIG, 'r') as f:
        data = yaml.safe_load(f)
    map_output_path = os.path.join(GEN_DIR, config['mapping'])
    # write mapping
    if os.path.exists(map_output_path):
        LOGGER.info(f"Overwriting file at {map_output_path}")
    with open(map_output_path, 'w', encoding='utf8') as f:
        json.dump(mapping, f, indent=4)
    data = deepcopy(data)
    cfg_exists = bool([x for x in data['mappings'] if x['in_lang']
                       == config['in_lang'] and x['out_lang'] == config['out_lang']])
    # add new mapping if no mappings are generated yet
    if not data['mappings']:
        data['mappings'] = [config]
    # add new mapping if it doesn't exist yet
    elif not cfg_exists:
        data['mappings'].append(config)
        # rewrite config
        with open(GEN_CONFIG, 'w', encoding='utf8') as f:
            yaml.dump(data, f, Dumper=IndentDumper, default_flow_style=False)
    elif cfg_exists:
        for i, cfg in enumerate(data['mappings']):
            if cfg['in_lang'] == config['in_lang'] and cfg['out_lang'] == config['out_lang']:
                data['mappings'][i] = config
                # rewrite config
                with open(GEN_CONFIG, 'w', encoding='utf8') as f:
                    yaml.dump(data, f, Dumper=IndentDumper,
                              default_flow_style=False)
                break
    else:
        LOGGER.warn(
            f"Not writing generated files because a non-generated mapping from {config['in_lang']} to {config['out_lang']} already exists.")
def align_to_dummy_fallback(mapping: Mapping, io: str = 'in', write_to_file: bool = False):
    dummy_inventory = ["ɑ", "i", "u", "t", "s", "n"]
    display_name = mapping.kwargs.get('language_name', 'No Language display name in Config')
    config = generate_config(mapping.kwargs[f'{io}_lang'], 'dummy', display_name, display_name)
    default_char = 't'
    if is_ipa(mapping.kwargs[f'{io}_lang']):
        mapping = align_inventories(mapping.inventory(io), dummy_inventory)
    else:
        und_g2p = make_g2p('und', 'und-ipa')
        mapping = [{"in": unicode_escape(x), "out": und_g2p(unidecode(x).lower())} for x in mapping.inventory(io)]
        dummy_list = align_inventories([x['out'] for x in mapping], dummy_inventory)
        dummy_dict = {}
        for x in dummy_list:
            if x['in']:
                dummy_dict[x['in']] = x['out']
                
        for x in mapping:
            try:
                x['out'] = dummy_dict[x['out']]
            except KeyError:
                LOGGER.warn(f"We couldn't guess at what {x['in']} means, so it's being replaced with '{default_char}' instead.")
                x['out'] = default_char       
 
    if write_to_file:
        write_generated_mapping_to_file(config, mapping)
    return config, mapping
def align_to_dummy_fallback(mapping: Mapping, io: str = 'in', write_to_file: bool = False, out_dir: str = ''):
    display_name = mapping.kwargs.get('language_name', 'No Language display name in Config')
    config = {'in_lang': mapping.kwargs[f'{io}_lang'], 'out_lang': 'dummy'}
    default_char = 't'
    if is_ipa(mapping.kwargs[f'{io}_lang']):
        mapping = align_inventories(mapping.inventory(io), DUMMY_INVENTORY)
    else:
        und_g2p = make_g2p('und', 'und-ipa')
        mapping = [{"in": unicode_escape(x), "out": und_g2p(unidecode(x).lower()).output_string} for x in mapping.inventory(io)]
        dummy_list = align_inventories([x['out'] for x in mapping], DUMMY_INVENTORY)
        dummy_dict = {}
        for x in dummy_list:
            if x['in']:
                dummy_dict[x['in']] = x['out']
                
        for x in mapping:
            try:
                x['out'] = dummy_dict[x['out']]
            except KeyError:
                LOGGER.warn(f"We couldn't guess at what {x['in']} means, so it's being replaced with '{default_char}' instead.")
                x['out'] = default_char       

    config['mapping'] = mapping
    mapping = Mapping(**config)
    if write_to_file:
        if out_dir:
            if os.path.isdir(out_dir):
                mapping.config_to_file(out_dir)
                mapping.mapping_to_file(out_dir)
            else:
                LOGGER.warning(f'{out_dir} is not a directory. Writing to default instead.')
        else:
            mapping.config_to_file()
            mapping.mapping_to_file()
    return mapping