예제 #1
0
파일: utils.py 프로젝트: dhdaines/g2p
def validate(mapping):
    try:
        for io in mapping:
            if not 'context_before' in io:
                io['context_before'] = ''
            if not 'context_after' in io:
                io['context_after'] = ''
        valid = all('in' in d for d in mapping) and all(
            'out' in d for d in mapping)
        if not valid:
            raise exceptions.MalformedMapping()
        return mapping
    except TypeError:
        # The JSON probably is not just a list (ie could be legacy readalongs format) TODO: proper exception handling
        raise exceptions.MalformedMapping()
예제 #2
0
파일: utils.py 프로젝트: deltork/g2p
def validate(mapping, path):
    try:
        for io in mapping:
            if not "context_before" in io:
                io["context_before"] = ""
            if not "context_after" in io:
                io["context_after"] = ""
        valid = all("in" in d for d in mapping) and all("out" in d for d in mapping)
        if not valid:
            raise exceptions.MalformedMapping(
                'Missing "in" or "out" in an entry in {}.'.format(path)
            )
        return mapping
    except TypeError as e:
        # The JSON probably is not just a list (ie could be legacy readalongs format)
        # TODO: proper exception handling
        raise exceptions.MalformedMapping(
            "Formatting error in mapping in {}.".format(path)
        ) from e
예제 #3
0
파일: utils.py 프로젝트: deltork/g2p
def load_from_csv(language, delimiter=","):
    """ Parse mapping from csv
    """
    work_sheet = []
    with open(language, encoding="utf8") as f:
        reader = csv.reader(f, delimiter=delimiter)
        for line in reader:
            work_sheet.append(line)
    # Create wordlist
    mapping = []
    # Loop through rows in worksheet, create if statements for different columns
    # and append mappings to self.mapping.
    for entry in work_sheet:
        new_io = {"in": "", "out": "", "context_before": "", "context_after": ""}
        if len(entry) == 0:
            # Just ignore empty lines in the CSV file
            continue

        if len(entry) == 1:
            raise exceptions.MalformedMapping(
                'Entry {} in mapping {} has no "out" value.'.format(entry, language)
            )

        new_io["in"] = entry[0]
        new_io["out"] = entry[1]
        try:
            new_io["context_before"] = entry[2]
        except IndexError:
            new_io["context_before"] = ""
        try:
            new_io["context_after"] = entry[3]
        except IndexError:
            new_io["context_after"] = ""
        for k in new_io:
            if isinstance(new_io[k], (float, int)):
                new_io[k] = str(new_io[k])
        mapping.append(new_io)

    return mapping
예제 #4
0
파일: utils.py 프로젝트: deltork/g2p
def load_mapping_from_path(path_to_mapping_config, index=0):
    """ Loads a mapping from a path, if there is more than one mapping, then it loads based on the int
        provided to the 'index' argument. Default is 0.
    """
    path = Path(path_to_mapping_config)
    # If path leads to actual mapping config
    if path.exists() and (path.suffix.endswith("yml") or path.suffix.endswith("yaml")):
        # safe load it
        with open(path, encoding="utf8") as f:
            mapping = yaml.safe_load(f)
        # If more than one mapping in the mapping config
        if "mappings" in mapping:
            try:
                LOGGER.debug(
                    'Loading mapping from %s between "%s" and "%s" at index %s',
                    path_to_mapping_config,
                    mapping["mappings"][index].get("in_lang", "und"),
                    mapping["mappings"][index].get("out_lang", "und"),
                    index,
                )
                mapping = mapping["mappings"][index]
            except KeyError:
                LOGGER.warning(
                    "An index of %s was provided for the mapping %s but that index does not exist in the mapping. "
                    "Please check your mapping.",
                    index,
                    path_to_mapping_config,
                )
        # Log the warning if an Index other than 0 was provided for a mapping config with a single mapping.
        elif index != 0:
            LOGGER.warning(
                "An index of %s was provided for the mapping %s but that index does not exist in the mapping. "
                "Please check your mapping.",
                index,
                path_to_mapping_config,
            )
        # try to load the data from the mapping data file
        if "mapping" in mapping:
            try:
                mapping["mapping_data"] = load_from_file(
                    os.path.join(path.parent, mapping["mapping"])
                )
            except (OSError, exceptions.IncorrectFileType) as e:
                raise exceptions.MalformedMapping(
                    f"Cannot load mapping data file specified in {path}: {e}"
                ) from e
        elif mapping.get("type", "") == "unidecode":
            # This mapping is not implemented as a regular mapping, but as custom software
            pass
        else:
            # Is "mapping" key missing?
            raise exceptions.MalformedMapping(
                'Key "mapping:" missing from a mapping in {}.'.format(path)
            )
        # load any abbreviations
        if "abbreviations" in mapping:
            try:
                mapping["abbreviations_data"] = load_abbreviations_from_file(
                    os.path.join(path.parent, mapping["abbreviations"])
                )
            except (OSError, exceptions.IncorrectFileType) as e:
                raise exceptions.MalformedMapping(
                    f"Cannot load abbreviations data file specified in {path}: {e}"
                ) from e
        return mapping
    else:
        raise FileNotFoundError