예제 #1
0
    def merge(self, data, *, lang="en", key=None):
        """Merges a dictionary keyed by the names in a language to this data map
        
        If a key is given, it will be added under key,
        Otherwise it will be merged without overwrite.

        Returns self to support chaining.
        """
        # validation, make sure it links
        data_names = self.names(lang)
        unlinked = [key for key in data.keys() if key not in data_names]
        if unlinked:
            raise Exception(
                "Several invalid names found. Invalid entries are " +
                ','.join(unlinked))

            # validation complete, it may not link to all base entries but thats ok
        for data_key, data_entry in data.items():
            base_entry = self.entry_of(lang, data_key)

            if key:
                base_entry[key] = data_entry
            elif hasattr(data_entry, 'keys'):
                joindicts(base_entry, data_entry)
            else:
                # If we get here, its a key-less merge with a non-dict
                # We cannot merge a dictionary with a non-dictionary
                raise Exception(
                    "Invalid data, the data map must be a dictionary for a keyless merge"
                )

        return self
예제 #2
0
    def merge(self, data, *, key_join='name_en', key=None, key_join_fn=None):
        """Merges a dictionary keyed by the names in a language to this data map
        
        If a key is given, it will be added as a subfield under key,
        Otherwise it will be merged without overwrite.

        Key join is the field to merge on. If the field is id, it will automatically convert to int.
        If any other type conversion is required, supply a key join function.

        Returns self to support chaining.
        """
        def convert_key(key_value):
            if key_join_fn:
                key_value = key_join_fn(key_value)
            elif key_join == 'id':
                key_value = int(key_value)
            return key_value

        def extract_field(entry):
            value = entry[key_join]
            if isinstance(value, collections.Mapping):
                value = value[key_join]
            return convert_key(value)

        # validation, make sure it links
        entry_map = {extract_field(e): e for e in self.values()}
        converted_keys = [convert_key(key) for key in data.keys()]
        unlinked = [
            key for key in converted_keys if key not in entry_map.keys()
        ]
        if unlinked:
            raise Exception(
                "Several invalid names found in sub data map. Invalid entries are "
                + ','.join(unlinked))

        # validation complete, it may not link to all base entries but thats ok
        for data_key, data_entry in data.items():
            base_entry = entry_map[convert_key(data_key)]

            if key:
                base_entry[key] = data_entry

            elif isinstance(data_entry, collections.Mapping):
                if 'name' in data_entry:
                    self._unregister_entry(base_entry)
                    joindicts(base_entry, data_entry)
                    self._register_entry(base_entry)
                else:
                    joindicts(base_entry, data_entry)

            else:
                # If we get here, its a key-less merge with a non-dict
                # We cannot merge a dictionary with a non-dictionary
                raise Exception(
                    "Invalid data, the data map must be a dictionary for a keyless merge"
                )

        return self
예제 #3
0
    def load_split_data_map(self,
                            parent_map: DataMap,
                            data_directory,
                            lang="en",
                            validate=True):
        """Loads a data map by combining separate maps in a folder into one.
        Just like a normal data map, it is anchored to the translation map.
        """
        #TODO: WILL BE REFACTORED TO USE THE NEW MERGE-FLOW
        data_directory = self.get_data_path(data_directory)

        all_subdata = []
        for dir_entry in os.scandir(data_directory):
            if not dir_entry.is_file():
                continue
            if not dir_entry.name.lower().endswith('.json'):
                continue

            with open(dir_entry, encoding="utf-8") as f:
                subdata_json = json.load(f)

                # Check if the data is of the correct type (is a dict)
                if not hasattr(subdata_json, 'keys'):
                    raise Exception(
                        f"Invalid data in {dir_entry}, the data map must be a dictionary"
                    )

                all_subdata.append(subdata_json)

        # todo: validate key conflicts
        # todo: store origins of keys somehow
        data = joindicts({}, *all_subdata)

        # Set validation function depending on validation setting
        ensure_fn = ensure if validate else ensure_warn

        # Hold all keys yet to be joined. If any exist, it didn't join
        unlinked = validate_key_join(parent_map, data.keys(), join_lang=lang)
        ensure_fn(
            not unlinked, "Several invalid names found. Invalid entries are " +
            ','.join(unlinked))

        result = {}
        for id, entry in parent_map.items():
            name = entry.name(lang)
            if name not in data:
                continue
            result[id] = joindicts({}, entry, data[name])

        return DataMap(result, languages=self.required_languages)
예제 #4
0
    def add_json(self, data_file, *, key=None, join=None):
        """
        Loads a data map from a json file, adds it to the base map, and returns self.
        
        If a key is given, it will be added under key, 
        Otherwise it will be merged without overwrite.
        """

        if not join:
            raise ValueError('Join must have a value')

        data = self.reader.load_json(self._get_filename(data_file))

        def derive_key(d):
            return d[join]

        # validation, make sure it links
        entry_map = {str(e[join]): e for e in self.data_map.values()}
        converted_keys = [str(k) for k in data.keys()]
        unlinked = [k for k in converted_keys if k not in entry_map.keys()]
        if unlinked:
            raise Exception(
                "Several invalid names found in sub data map. Invalid entries are "
                + ','.join('None' if e is None else str(e) for e in unlinked))

        # validation complete, it may not link to all base entries but thats ok
        for data_key, data_entry in data.items():
            base_entry = entry_map[str(data_key)]

            if key:
                base_entry[key] = data_entry

            elif isinstance(data_entry, collections.Mapping):
                joindicts(base_entry, data_entry)

            else:
                # If we get here, its a key-less merge with a non-dict
                # We cannot merge a dictionary with a non-dictionary
                raise Exception(
                    "Invalid data, the data map must be a dictionary for a keyless merge"
                )

        return self
예제 #5
0
def merge_list(base,
               rows: typing.Iterable[dict],
               key=None,
               groups=[],
               many=False):
    """Routine to merge lists of dictionaries together using one or more keys.
    The keys used are determined by first sequential key of the first row.
    If the key is an id, it will join on that, but if it is a name, it will join on that and key_ex fields.
    """
    def create_key_fields(data_map, column_name):
        lang = derive_lang(column_name)

        key_fields = []
        if lang is None:
            key_fields.append('id')
        else:
            key_fields.append(f'name_{lang}')
            key_fields.extend(data_map.keys_ex)

        return key_fields

    def create_key_fn(key_fields):
        def derive_key(dict):
            items = []
            for k in key_fields:
                if f'base_{k}' in dict:
                    items.append(dict[f'base_{k}'])
                else:
                    items.append(dict[k])
            return tuple(str(i) for i in items)

        return derive_key

    if many and not key:
        raise ValueError('Key must have a value')

    if not rows:
        return

    # Create keying function
    first_column = next(iter(rows[0].keys()))
    key_fields = create_key_fields(base, first_column)
    derive_key = create_key_fn(key_fields)

    # group rows
    keyed_data = {}
    for row in rows:
        row_key = derive_key(row)

        # Delete key fields. Its possible for base_name_en AND name_en to be in the same row.
        # Therefore, prioritize deleting base_ versions first
        for k in key_fields:
            if f'base_{k}' in row:
                del row[f'base_{k}']
            elif k in row:
                del row[k]

        if groups:
            row = util.group_fields(row, groups=groups)
        entry = keyed_data.setdefault(row_key, [])
        entry.append(row)
        if not many and len(entry) > 1:
            raise ValueError(
                f"Key {row_key} has too many matching entries in sub data")

    # Group base
    base = {derive_key(e): e for e in base.values()}
    "Test the keys to see that sub's keys exist in base"
    unlinked = [k for k in keyed_data.keys() if k not in base.keys()]
    if unlinked:
        raise Exception(
            "Several entries in sub data map cannot be joined. Their keys are "
            + ','.join('None' if e is None else str(e) for e in unlinked))

    for data_key, data_entries in keyed_data.items():
        base_entry = base[data_key]
        if key:
            if many:
                base_entry[key] = data_entries
            else:
                base_entry[key] = data_entries[0]
        elif isinstance(data_entries[0], abc.Mapping):
            util.joindicts(base_entry, data_entries[0])
        else:
            # We cannot merge a dictionary with a non-dictionary
            raise Exception(
                "Invalid data, the data map must be a dictionary for a keyless merge"
            )