def validate(mapping): try: for io in mapping: if not 'context_before' in io: io['context_before'] = '' if not 'context_after' in io: io['context_after'] = '' valid = all('in' in d for d in mapping) and all( 'out' in d for d in mapping) if not valid: raise exceptions.MalformedMapping() return mapping except TypeError: # The JSON probably is not just a list (ie could be legacy readalongs format) TODO: proper exception handling raise exceptions.MalformedMapping()
def validate(mapping, path): try: for io in mapping: if not "context_before" in io: io["context_before"] = "" if not "context_after" in io: io["context_after"] = "" valid = all("in" in d for d in mapping) and all("out" in d for d in mapping) if not valid: raise exceptions.MalformedMapping( 'Missing "in" or "out" in an entry in {}.'.format(path) ) return mapping except TypeError as e: # The JSON probably is not just a list (ie could be legacy readalongs format) # TODO: proper exception handling raise exceptions.MalformedMapping( "Formatting error in mapping in {}.".format(path) ) from e
def load_from_csv(language, delimiter=","): """ Parse mapping from csv """ work_sheet = [] with open(language, encoding="utf8") as f: reader = csv.reader(f, delimiter=delimiter) for line in reader: work_sheet.append(line) # Create wordlist mapping = [] # Loop through rows in worksheet, create if statements for different columns # and append mappings to self.mapping. for entry in work_sheet: new_io = {"in": "", "out": "", "context_before": "", "context_after": ""} if len(entry) == 0: # Just ignore empty lines in the CSV file continue if len(entry) == 1: raise exceptions.MalformedMapping( 'Entry {} in mapping {} has no "out" value.'.format(entry, language) ) new_io["in"] = entry[0] new_io["out"] = entry[1] try: new_io["context_before"] = entry[2] except IndexError: new_io["context_before"] = "" try: new_io["context_after"] = entry[3] except IndexError: new_io["context_after"] = "" for k in new_io: if isinstance(new_io[k], (float, int)): new_io[k] = str(new_io[k]) mapping.append(new_io) return mapping
def load_mapping_from_path(path_to_mapping_config, index=0): """ Loads a mapping from a path, if there is more than one mapping, then it loads based on the int provided to the 'index' argument. Default is 0. """ path = Path(path_to_mapping_config) # If path leads to actual mapping config if path.exists() and (path.suffix.endswith("yml") or path.suffix.endswith("yaml")): # safe load it with open(path, encoding="utf8") as f: mapping = yaml.safe_load(f) # If more than one mapping in the mapping config if "mappings" in mapping: try: LOGGER.debug( 'Loading mapping from %s between "%s" and "%s" at index %s', path_to_mapping_config, mapping["mappings"][index].get("in_lang", "und"), mapping["mappings"][index].get("out_lang", "und"), index, ) mapping = mapping["mappings"][index] except KeyError: LOGGER.warning( "An index of %s was provided for the mapping %s but that index does not exist in the mapping. " "Please check your mapping.", index, path_to_mapping_config, ) # Log the warning if an Index other than 0 was provided for a mapping config with a single mapping. elif index != 0: LOGGER.warning( "An index of %s was provided for the mapping %s but that index does not exist in the mapping. " "Please check your mapping.", index, path_to_mapping_config, ) # try to load the data from the mapping data file if "mapping" in mapping: try: mapping["mapping_data"] = load_from_file( os.path.join(path.parent, mapping["mapping"]) ) except (OSError, exceptions.IncorrectFileType) as e: raise exceptions.MalformedMapping( f"Cannot load mapping data file specified in {path}: {e}" ) from e elif mapping.get("type", "") == "unidecode": # This mapping is not implemented as a regular mapping, but as custom software pass else: # Is "mapping" key missing? raise exceptions.MalformedMapping( 'Key "mapping:" missing from a mapping in {}.'.format(path) ) # load any abbreviations if "abbreviations" in mapping: try: mapping["abbreviations_data"] = load_abbreviations_from_file( os.path.join(path.parent, mapping["abbreviations"]) ) except (OSError, exceptions.IncorrectFileType) as e: raise exceptions.MalformedMapping( f"Cannot load abbreviations data file specified in {path}: {e}" ) from e return mapping else: raise FileNotFoundError