def merge(self, data, *, lang="en", key=None): """Merges a dictionary keyed by the names in a language to this data map If a key is given, it will be added under key, Otherwise it will be merged without overwrite. Returns self to support chaining. """ # validation, make sure it links data_names = self.names(lang) unlinked = [key for key in data.keys() if key not in data_names] if unlinked: raise Exception( "Several invalid names found. Invalid entries are " + ','.join(unlinked)) # validation complete, it may not link to all base entries but thats ok for data_key, data_entry in data.items(): base_entry = self.entry_of(lang, data_key) if key: base_entry[key] = data_entry elif hasattr(data_entry, 'keys'): joindicts(base_entry, data_entry) else: # If we get here, its a key-less merge with a non-dict # We cannot merge a dictionary with a non-dictionary raise Exception( "Invalid data, the data map must be a dictionary for a keyless merge" ) return self
def merge(self, data, *, key_join='name_en', key=None, key_join_fn=None): """Merges a dictionary keyed by the names in a language to this data map If a key is given, it will be added as a subfield under key, Otherwise it will be merged without overwrite. Key join is the field to merge on. If the field is id, it will automatically convert to int. If any other type conversion is required, supply a key join function. Returns self to support chaining. """ def convert_key(key_value): if key_join_fn: key_value = key_join_fn(key_value) elif key_join == 'id': key_value = int(key_value) return key_value def extract_field(entry): value = entry[key_join] if isinstance(value, collections.Mapping): value = value[key_join] return convert_key(value) # validation, make sure it links entry_map = {extract_field(e): e for e in self.values()} converted_keys = [convert_key(key) for key in data.keys()] unlinked = [ key for key in converted_keys if key not in entry_map.keys() ] if unlinked: raise Exception( "Several invalid names found in sub data map. Invalid entries are " + ','.join(unlinked)) # validation complete, it may not link to all base entries but thats ok for data_key, data_entry in data.items(): base_entry = entry_map[convert_key(data_key)] if key: base_entry[key] = data_entry elif isinstance(data_entry, collections.Mapping): if 'name' in data_entry: self._unregister_entry(base_entry) joindicts(base_entry, data_entry) self._register_entry(base_entry) else: joindicts(base_entry, data_entry) else: # If we get here, its a key-less merge with a non-dict # We cannot merge a dictionary with a non-dictionary raise Exception( "Invalid data, the data map must be a dictionary for a keyless merge" ) return self
def load_split_data_map(self, parent_map: DataMap, data_directory, lang="en", validate=True): """Loads a data map by combining separate maps in a folder into one. Just like a normal data map, it is anchored to the translation map. """ #TODO: WILL BE REFACTORED TO USE THE NEW MERGE-FLOW data_directory = self.get_data_path(data_directory) all_subdata = [] for dir_entry in os.scandir(data_directory): if not dir_entry.is_file(): continue if not dir_entry.name.lower().endswith('.json'): continue with open(dir_entry, encoding="utf-8") as f: subdata_json = json.load(f) # Check if the data is of the correct type (is a dict) if not hasattr(subdata_json, 'keys'): raise Exception( f"Invalid data in {dir_entry}, the data map must be a dictionary" ) all_subdata.append(subdata_json) # todo: validate key conflicts # todo: store origins of keys somehow data = joindicts({}, *all_subdata) # Set validation function depending on validation setting ensure_fn = ensure if validate else ensure_warn # Hold all keys yet to be joined. If any exist, it didn't join unlinked = validate_key_join(parent_map, data.keys(), join_lang=lang) ensure_fn( not unlinked, "Several invalid names found. Invalid entries are " + ','.join(unlinked)) result = {} for id, entry in parent_map.items(): name = entry.name(lang) if name not in data: continue result[id] = joindicts({}, entry, data[name]) return DataMap(result, languages=self.required_languages)
def add_json(self, data_file, *, key=None, join=None): """ Loads a data map from a json file, adds it to the base map, and returns self. If a key is given, it will be added under key, Otherwise it will be merged without overwrite. """ if not join: raise ValueError('Join must have a value') data = self.reader.load_json(self._get_filename(data_file)) def derive_key(d): return d[join] # validation, make sure it links entry_map = {str(e[join]): e for e in self.data_map.values()} converted_keys = [str(k) for k in data.keys()] unlinked = [k for k in converted_keys if k not in entry_map.keys()] if unlinked: raise Exception( "Several invalid names found in sub data map. Invalid entries are " + ','.join('None' if e is None else str(e) for e in unlinked)) # validation complete, it may not link to all base entries but thats ok for data_key, data_entry in data.items(): base_entry = entry_map[str(data_key)] if key: base_entry[key] = data_entry elif isinstance(data_entry, collections.Mapping): joindicts(base_entry, data_entry) else: # If we get here, its a key-less merge with a non-dict # We cannot merge a dictionary with a non-dictionary raise Exception( "Invalid data, the data map must be a dictionary for a keyless merge" ) return self
def merge_list(base, rows: typing.Iterable[dict], key=None, groups=[], many=False): """Routine to merge lists of dictionaries together using one or more keys. The keys used are determined by first sequential key of the first row. If the key is an id, it will join on that, but if it is a name, it will join on that and key_ex fields. """ def create_key_fields(data_map, column_name): lang = derive_lang(column_name) key_fields = [] if lang is None: key_fields.append('id') else: key_fields.append(f'name_{lang}') key_fields.extend(data_map.keys_ex) return key_fields def create_key_fn(key_fields): def derive_key(dict): items = [] for k in key_fields: if f'base_{k}' in dict: items.append(dict[f'base_{k}']) else: items.append(dict[k]) return tuple(str(i) for i in items) return derive_key if many and not key: raise ValueError('Key must have a value') if not rows: return # Create keying function first_column = next(iter(rows[0].keys())) key_fields = create_key_fields(base, first_column) derive_key = create_key_fn(key_fields) # group rows keyed_data = {} for row in rows: row_key = derive_key(row) # Delete key fields. Its possible for base_name_en AND name_en to be in the same row. # Therefore, prioritize deleting base_ versions first for k in key_fields: if f'base_{k}' in row: del row[f'base_{k}'] elif k in row: del row[k] if groups: row = util.group_fields(row, groups=groups) entry = keyed_data.setdefault(row_key, []) entry.append(row) if not many and len(entry) > 1: raise ValueError( f"Key {row_key} has too many matching entries in sub data") # Group base base = {derive_key(e): e for e in base.values()} "Test the keys to see that sub's keys exist in base" unlinked = [k for k in keyed_data.keys() if k not in base.keys()] if unlinked: raise Exception( "Several entries in sub data map cannot be joined. Their keys are " + ','.join('None' if e is None else str(e) for e in unlinked)) for data_key, data_entries in keyed_data.items(): base_entry = base[data_key] if key: if many: base_entry[key] = data_entries else: base_entry[key] = data_entries[0] elif isinstance(data_entries[0], abc.Mapping): util.joindicts(base_entry, data_entries[0]) else: # We cannot merge a dictionary with a non-dictionary raise Exception( "Invalid data, the data map must be a dictionary for a keyless merge" )