def _get_item_length(item, parents_ids=frozenset([])): """ Get the number of operations in a diff object. It is designed mainly for the delta view output but can be used with other dictionary types of view outputs too. """ length = 0 if isinstance(item, Mapping): for key, subitem in item.items(): # dedupe the repetition report so the number of times items have shown up does not affect the distance. if key in { 'iterable_items_added_at_indexes', 'iterable_items_removed_at_indexes' }: new_subitem = dict_() for path_, indexes_to_items in subitem.items(): used_value_ids = set() new_indexes_to_items = dict_() for k, v in indexes_to_items.items(): v_id = id(v) if v_id not in used_value_ids: used_value_ids.add(v_id) new_indexes_to_items[k] = v new_subitem[path_] = new_indexes_to_items subitem = new_subitem # internal keys such as _numpy_paths should not count towards the distance if isinstance(key, strings) and (key.startswith('_') or key == 'deep_distance'): continue item_id = id(subitem) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) elif isinstance(item, numbers): length = 1 elif isinstance(item, strings): length = 1 elif isinstance(item, Iterable): for subitem in item: item_id = id(subitem) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) elif isinstance(item, type): # it is a class length = 1 else: if hasattr(item, '__dict__'): for subitem in item.__dict__: item_id = id(subitem) parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) return length
def __search_iterable(self, obj, item, parent="root", parents_ids=frozenset()): """Search iterables except dictionaries, sets and strings.""" for i, thing in enumerate(obj): new_parent = "{}[{}]".format(parent, i) if self.__skip_this(thing, parent=new_parent): continue if self.case_sensitive or not isinstance(thing, strings): thing_cased = thing else: thing_cased = thing.lower() if thing_cased == item: self.__report(report_key='matched_values', key=new_parent, value=thing) else: item_id = id(thing) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) self.__search(thing, item, "%s[%s]" % (parent, i), parents_ids_added)
def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = defaultdict(int) for i, item in enumerate(obj): if self._skip_this(item, parent="{}[{}]".format(parent, i)): continue item_id = get_id(item) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added) # counting repetitions result[hashed] += 1 if self.ignore_repetition: result = list(result.keys()) else: result = [ '{}|{}'.format(i, v) for i, v in result.items() ] result = sorted(map(str, result)) # making sure the result items are string and sorted so join command works. result = ','.join(result) result = KEY_TO_VAL_STR.format(type(obj).__name__, result) return result
def __search_iterable(self, obj, item, parent="root", parents_ids=frozenset({})): """Search iterables except dictionaries, sets and strings.""" for i, thing in enumerate(obj): new_parent = "%s[%s]" % (parent, i) if self.__skip_this(thing, parent=new_parent): continue if self.case_sensitive or not isinstance(thing, strings): thing_cased = thing else: thing_cased = thing.lower() if thing_cased == item: self.__report( report_key='matched_values', key=new_parent, value=thing) else: item_id = id(thing) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) self.__search(thing, item, "%s[%s]" % (parent, i), parents_ids_added)
def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None): result = [] key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute]) for key, item in obj.items(): key_formatted = "'%s'" % key if not print_as_attribute and isinstance(key, strings) else key key_in_report = key_text % (parent, key_formatted) key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids) item_id = get_id(item) if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report): continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parent=key_in_report, parents_ids=parents_ids_added) hashed = KEY_TO_VAL_STR.format(key_hash, hashed) result.append(hashed) result.sort() result = ';'.join(result) if print_as_attribute: type_ = original_type or type(obj) type_str = type_.__name__ for type_group in self.ignore_type_in_groups: if self.type_check_func(type_, type_group): type_str = ','.join(map(lambda x: x.__name__, type_group)) break else: type_str = 'dict' return "%s:{%s}" % (type_str, result)
def __search_dict(self, obj, item, parent, parents_ids=frozenset(), print_as_attribute=False): """Search dictionaries""" if print_as_attribute: parent_text = "%s.%s" else: parent_text = "%s[%s]" obj_keys = OrderedSetPlus(obj.keys()) for item_key in obj_keys: if not print_as_attribute and isinstance(item_key, strings): item_key_str = "'%s'" % item_key else: item_key_str = item_key obj_child = obj[item_key] item_id = id(obj_child) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() str_item = str(item) if (self.match_string and str_item == new_parent_cased) or\ (not self.match_string and str_item in new_parent_cased) or\ (self.use_regexp and item.search(new_parent_cased)): self.__report( report_key='matched_paths', key=new_parent, value=obj_child) self.__search( obj_child, item, parent=new_parent, parents_ids=parents_ids_added)
def __search_dict(self, obj, item, parent, parents_ids=frozenset({}), print_as_attribute=False): """Search dictionaries""" if print_as_attribute: parent_text = "%s.%s" else: parent_text = "%s[%s]" obj_keys = set(obj.keys()) for item_key in obj_keys: if not print_as_attribute and isinstance(item_key, strings): item_key_str = "'%s'" % item_key else: item_key_str = item_key obj_child = obj[item_key] item_id = id(obj_child) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() str_item = str(item) if (self.match_string and str_item == new_parent_cased) or\ (not self.match_string and str_item in new_parent_cased): self.__report( report_key='matched_paths', key=new_parent, value=obj_child) self.__search( obj_child, item, parent=new_parent, parents_ids=parents_ids_added)
def __diff_iterable(self, level, parents_ids=frozenset({})): """Difference of iterables""" # We're handling both subscriptable and non-subscriptable iterables. Which one is it? subscriptable = self.__iterables_subscriptable(level.t1, level.t2) if subscriptable: child_relationship_class = SubscriptableIterableRelationship else: child_relationship_class = NonSubscriptableIterableRelationship for i, (x, y) in enumerate( zip_longest(level.t1, level.t2, fillvalue=ListItemRemovedOrAdded)): if y is ListItemRemovedOrAdded: # item removed completely change_level = level.branch_deeper( x, notpresent, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__report_result('iterable_item_removed', change_level) elif x is ListItemRemovedOrAdded: # new item added change_level = level.branch_deeper( notpresent, y, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__report_result('iterable_item_added', change_level) else: # check if item value has changed item_id = id(x) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( x, y, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__diff(next_level, parents_ids_added)
def __diff_iterable(self, level, parents_ids=frozenset({})): """Difference of iterables""" # We're handling both subscriptable and non-subscriptable iterables. Which one is it? subscriptable = self.__iterables_subscriptable(level.t1, level.t2) if subscriptable: child_relationship_class = SubscriptableIterableRelationship else: child_relationship_class = NonSubscriptableIterableRelationship for i, (x, y) in enumerate( zip_longest( level.t1, level.t2, fillvalue=ListItemRemovedOrAdded)): if y is ListItemRemovedOrAdded: # item removed completely change_level = level.branch_deeper( x, notpresent, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__report_result('iterable_item_removed', change_level) elif x is ListItemRemovedOrAdded: # new item added change_level = level.branch_deeper( notpresent, y, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__report_result('iterable_item_added', change_level) else: # check if item value has changed item_id = id(x) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( x, y, child_relationship_class=child_relationship_class, child_relationship_param=i) self.__diff(next_level, parents_ids_added)
def __diff_dict(self, level, parents_ids=frozenset({}), print_as_attribute=False, override=False, override_t1=None, override_t2=None): """Difference of 2 dictionaries""" if override: # for special stuff like custom objects and named tuples we receive preprocessed t1 and t2 # but must not spoil the chain (=level) with it t1 = override_t1 t2 = override_t2 else: t1 = level.t1 t2 = level.t2 if print_as_attribute: item_added_key = "attribute_added" item_removed_key = "attribute_removed" rel_class = AttributeRelationship else: item_added_key = "dictionary_item_added" item_removed_key = "dictionary_item_removed" rel_class = DictRelationship t1_keys = set(t1.keys()) t2_keys = set(t2.keys()) if self.ignore_string_type_changes or self.ignore_numeric_type_changes: t1_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t1_keys, level=level) t2_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t2_keys, level=level) t1_keys = set(t1_clean_to_keys.keys()) t2_keys = set(t2_clean_to_keys.keys()) else: t1_clean_to_keys = t2_clean_to_keys = None t_keys_intersect = t2_keys.intersection(t1_keys) t_keys_added = t2_keys - t_keys_intersect t_keys_removed = t1_keys - t_keys_intersect for key in t_keys_added: key = t2_clean_to_keys[key] if t2_clean_to_keys else key change_level = level.branch_deeper( notpresent, t2[key], child_relationship_class=rel_class, child_relationship_param=key) self.__report_result(item_added_key, change_level) for key in t_keys_removed: key = t1_clean_to_keys[key] if t1_clean_to_keys else key change_level = level.branch_deeper( t1[key], notpresent, child_relationship_class=rel_class, child_relationship_param=key) self.__report_result(item_removed_key, change_level) for key in t_keys_intersect: # key present in both dicts - need to compare values key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key item_id = id(t1[key1]) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( t1[key1], t2[key2], child_relationship_class=rel_class, child_relationship_param=key) self.__diff(next_level, parents_ids_added)