def _get_item_length(item, parents_ids=frozenset([])): """ Get the number of operations in a diff object. It is designed mainly for the delta view output but can be used with other dictionary types of view outputs too. """ length = 0 if isinstance(item, Mapping): for key, subitem in item.items(): # dedupe the repetition report so the number of times items have shown up does not affect the distance. if key in { 'iterable_items_added_at_indexes', 'iterable_items_removed_at_indexes' }: new_subitem = dict_() for path_, indexes_to_items in subitem.items(): used_value_ids = set() new_indexes_to_items = dict_() for k, v in indexes_to_items.items(): v_id = id(v) if v_id not in used_value_ids: used_value_ids.add(v_id) new_indexes_to_items[k] = v new_subitem[path_] = new_indexes_to_items subitem = new_subitem # internal keys such as _numpy_paths should not count towards the distance if isinstance(key, strings) and (key.startswith('_') or key == 'deep_distance'): continue item_id = id(subitem) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) elif isinstance(item, numbers): length = 1 elif isinstance(item, strings): length = 1 elif isinstance(item, Iterable): for subitem in item: item_id = id(subitem) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) elif isinstance(item, type): # it is a class length = 1 else: if hasattr(item, '__dict__'): for subitem in item.__dict__: item_id = id(subitem) parents_ids_added = add_to_frozen_set(parents_ids, item_id) length += _get_item_length(subitem, parents_ids_added) return length
def _precalculate_distance_by_custom_compare_func(self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): pre_calced_distances = dict_() for added_hash in hashes_added: for removed_hash in hashes_removed: try: is_close_distance = self.iterable_compare_func( t2_hashtable[added_hash].item, t1_hashtable[removed_hash].item) except CannotCompare: pass else: if is_close_distance: # an arbitrary small distance if math_epsilon is not defined distance = self.math_epsilon or 0.000001 else: distance = 1 pre_calced_distances["{}--{}".format( added_hash, removed_hash)] = distance return pre_calced_distances
def __init__(self, capacity): self.cache = dict_() # {key: cache_node} if capacity <= 0: raise ValueError('Capacity of LFUCache needs to be positive.' ) # pragma: no cover. self.capacity = capacity self.freq_link_head = None self.lock = Lock()
def __init__(self, tree_results=None, verbose_level=1): self.verbose_level = verbose_level # TODO: centralize keys self.update({ "type_changes": dict_(), "dictionary_item_added": self.__set_or_dict(), "dictionary_item_removed": self.__set_or_dict(), "values_changed": dict_(), "unprocessed": [], "iterable_item_added": dict_(), "iterable_item_removed": dict_(), "attribute_added": self.__set_or_dict(), "attribute_removed": self.__set_or_dict(), "set_item_removed": PrettyOrderedSet(), "set_item_added": PrettyOrderedSet(), "repetition_change": dict_() }) if tree_results: self._from_tree_results(tree_results)
def _get_objects_to_hashes_dict(self, extract_index=0): """ A dictionary containing only the objects to hashes, or a dictionary of objects to the count of items that went to build them. extract_index=0 for hashes and extract_index=1 for counts. """ result = dict_() for key, value in self.hashes.items(): if key in RESERVED_DICT_KEYS: result[key] = value else: result[key] = value[extract_index] return result
def _from_tree_repetition_change(self, tree): if 'repetition_change' in tree: for change in tree['repetition_change']: path, _, _ = change.path(get_parent_too=True) repetition = RemapDict(change.additional['repetition']) value = change.t1 try: iterable_items_added_at_indexes = self[ 'iterable_items_added_at_indexes'][path] except KeyError: iterable_items_added_at_indexes = self[ 'iterable_items_added_at_indexes'][path] = dict_() for index in repetition['new_indexes']: iterable_items_added_at_indexes[index] = value
def _from_tree_iterable_item_added_or_removed(self, tree, report_type, delta_report_key): if report_type in tree: for change in tree[report_type]: # report each change # determine change direction (added or removed) # Report t2 (the new one) whenever possible. # In cases where t2 doesn't exist (i.e. stuff removed), report t1. if change.t2 is not notpresent: item = change.t2 else: item = change.t1 # do the reporting path, param, _ = change.path(force=FORCE_DEFAULT, get_parent_too=True) try: iterable_items_added_at_indexes = self[delta_report_key][path] except KeyError: iterable_items_added_at_indexes = self[delta_report_key][path] = dict_() iterable_items_added_at_indexes[param] = item
def _precalculate_numpy_arrays_distance(self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type): # We only want to deal with 1D arrays. if isinstance(t2_hashtable[hashes_added[0]].item, (np_ndarray, list)): return pre_calced_distances = dict_() added = [t2_hashtable[k].item for k in hashes_added] removed = [t1_hashtable[k].item for k in hashes_removed] if _original_type is None: added_numpy_compatible_type = get_homogeneous_numpy_compatible_type_of_seq( added) removed_numpy_compatible_type = get_homogeneous_numpy_compatible_type_of_seq( removed) if added_numpy_compatible_type and added_numpy_compatible_type == removed_numpy_compatible_type: _original_type = added_numpy_compatible_type if _original_type is None: return added = np_array_factory(added, dtype=_original_type) removed = np_array_factory(removed, dtype=_original_type) pairs = cartesian_product_numpy(added, removed) pairs_transposed = pairs.T distances = _get_numpy_array_distance( pairs_transposed[0], pairs_transposed[1], max_=self.cutoff_distance_for_pairs) i = 0 for added_hash in hashes_added: for removed_hash in hashes_removed: pre_calced_distances["{}--{}".format( added_hash, removed_hash)] = distances[i] i += 1 return pre_calced_distances
def _do_pre_process(self): if self._numpy_paths and ('iterable_item_added' in self.diff or 'iterable_item_removed' in self.diff): preprocess_paths = dict_() for path, type_ in self._numpy_paths.items(): preprocess_paths[path] = { 'old_type': np_ndarray, 'new_type': list } try: type_ = numpy_dtype_string_to_type(type_) except Exception as e: self._raise_or_log(NOT_VALID_NUMPY_TYPE.format(e)) continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198 self.post_process_paths_to_convert[path] = { 'old_type': list, 'new_type': type_ } if preprocess_paths: self._do_values_or_type_changed(preprocess_paths, is_type_change=True)
def __set_or_dict(self): return dict_() if self.verbose_level >= 2 else OrderedSetPlus()
def _do_ignore_order(self): """ 't1': [5, 1, 1, 1, 6], 't2': [7, 1, 1, 1, 8], 'iterable_items_added_at_indexes': { 'root': { 0: 7, 4: 8 } }, 'iterable_items_removed_at_indexes': { 'root': { 4: 6, 0: 5 } } """ fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_()) remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_()) paths = set(fixed_indexes.keys()) | set(remove_indexes.keys()) for path in paths: # In the case of ignore_order reports, we are pointing to the container object. # Thus we add a [0] to the elements so we can get the required objects and discard what we don't need. elem_and_details = self._get_elements_and_details( "{}[0]".format(path)) if elem_and_details: _, parent, parent_to_obj_elem, parent_to_obj_action, obj, _, _ = elem_and_details else: continue # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198 # copying both these dictionaries since we don't want to mutate them. fixed_indexes_per_path = fixed_indexes.get(path, dict_()).copy() remove_indexes_per_path = remove_indexes.get(path, dict_()).copy() fixed_indexes_values = AnySet(fixed_indexes_per_path.values()) new_obj = [] # Numpy's NdArray does not like the bool function. if isinstance(obj, np_ndarray): there_are_old_items = obj.size > 0 else: there_are_old_items = bool(obj) old_item_gen = self._do_ignore_order_get_old( obj, remove_indexes_per_path, fixed_indexes_values, path_for_err_reporting=path) while there_are_old_items or fixed_indexes_per_path: new_obj_index = len(new_obj) if new_obj_index in fixed_indexes_per_path: new_item = fixed_indexes_per_path.pop(new_obj_index) new_obj.append(new_item) elif there_are_old_items: try: new_item = next(old_item_gen) except StopIteration: there_are_old_items = False else: new_obj.append(new_item) else: # pop a random item from the fixed_indexes_per_path dictionary self._raise_or_log( INDEXES_NOT_FOUND_WHEN_IGNORE_ORDER.format( fixed_indexes_per_path)) new_item = fixed_indexes_per_path.pop( next(iter(fixed_indexes_per_path))) new_obj.append(new_item) if isinstance(obj, tuple): new_obj = tuple(new_obj) # Making sure that the object is re-instated inside the parent especially if it was immutable # and we had to turn it into a mutable one. In such cases the object has a new id. self._simple_set_elem_value(obj=parent, path_for_err_reporting=path, elem=parent_to_obj_elem, value=new_obj, action=parent_to_obj_action)
def reset(self): self.post_process_paths_to_convert = dict_()
def __init__(self, obj, *, hashes=None, exclude_types=None, exclude_paths=None, exclude_regex_paths=None, hasher=None, ignore_repetition=True, significant_digits=None, truncate_datetime=None, number_format_notation="f", apply_hash=True, ignore_type_in_groups=None, ignore_string_type_changes=False, ignore_numeric_type_changes=False, ignore_type_subclasses=False, ignore_string_case=False, exclude_obj_callback=None, number_to_string_func=None, ignore_private_variables=True, parent="root", **kwargs): if kwargs: raise ValueError( ("The following parameter(s) are not valid: %s\n" "The valid parameters are obj, hashes, exclude_types, significant_digits, truncate_datetime," "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, " "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case " "number_to_string_func, ignore_private_variables, parent") % ', '.join(kwargs.keys())) if isinstance(hashes, MutableMapping): self.hashes = hashes elif isinstance(hashes, DeepHash): self.hashes = hashes.hashes else: self.hashes = dict_() exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.hasher = default_hasher if hasher is None else hasher self.hashes[UNPROCESSED_KEY] = [] self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) self.truncate_datetime = get_truncate_datetime(truncate_datetime) self.number_format_notation = number_format_notation self.ignore_type_in_groups = self.get_ignore_types_in_groups( ignore_type_in_groups=ignore_type_in_groups, ignore_string_type_changes=ignore_string_type_changes, ignore_numeric_type_changes=ignore_numeric_type_changes, ignore_type_subclasses=ignore_type_subclasses) self.ignore_string_type_changes = ignore_string_type_changes self.ignore_numeric_type_changes = ignore_numeric_type_changes self.ignore_string_case = ignore_string_case self.exclude_obj_callback = exclude_obj_callback # makes the hash return constant size result if true # the only time it should be set to False is when # testing the individual hash functions for different types of objects. self.apply_hash = apply_hash self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group self.number_to_string = number_to_string_func or number_to_string self.ignore_private_variables = ignore_private_variables self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)})) if self.hashes[UNPROCESSED_KEY]: logger.warning("Can not hash the following items: {}.".format(self.hashes[UNPROCESSED_KEY])) else: del self.hashes[UNPROCESSED_KEY]
def __init__(self, t1, t2, down=None, up=None, report_type=None, child_rel1=None, child_rel2=None, additional=None, verbose_level=1): """ :param child_rel1: Either: - An existing ChildRelationship object describing the "down" relationship for t1; or - A ChildRelationship subclass. In this case, we will create the ChildRelationship objects for both t1 and t2. Alternatives for child_rel1 and child_rel2 must be used consistently. :param child_rel2: Either: - An existing ChildRelationship object describing the "down" relationship for t2; or - The param argument for a ChildRelationship class we shall create. Alternatives for child_rel1 and child_rel2 must be used consistently. """ # The current-level object in the left hand tree self.t1 = t1 # The current-level object in the right hand tree self.t2 = t2 # Another DiffLevel object describing this change one level deeper down the object tree self.down = down # Another DiffLevel object describing this change one level further up the object tree self.up = up self.report_type = report_type # If this object is this change's deepest level, this contains a string describing the type of change. # Examples: "set_item_added", "values_changed" # Note: don't use {} as additional's default value - this would turn out to be always the same dict object self.additional = dict_() if additional is None else additional # For some types of changes we store some additional information. # This is a dict containing this information. # Currently, this is used for: # - values_changed: In case the changes data is a multi-line string, # we include a textual diff as additional['diff']. # - repetition_change: additional['repetition']: # e.g. {'old_repeat': 2, 'new_repeat': 1, 'old_indexes': [0, 2], 'new_indexes': [2]} # the user supplied ChildRelationship objects for t1 and t2 # A ChildRelationship object describing the relationship between t1 and it's child object, # where t1's child object equals down.t1. # If this relationship is representable as a string, str(self.t1_child_rel) returns a formatted param parsable python string, # e.g. "[2]", ".my_attribute" self.t1_child_rel = child_rel1 # Another ChildRelationship object describing the relationship between t2 and it's child object. self.t2_child_rel = child_rel2 # Will cache result of .path() per 'force' as key for performance self._path = dict_() self.verbose_level = verbose_level
def __init__(self, tree_results=None, ignore_order=None): self.ignore_order = ignore_order self.update({ "type_changes": dict_(), "dictionary_item_added": dict_(), "dictionary_item_removed": dict_(), "values_changed": dict_(), "iterable_item_added": dict_(), "iterable_item_removed": dict_(), "attribute_added": dict_(), "attribute_removed": dict_(), "set_item_removed": dict_(), "set_item_added": dict_(), "iterable_items_added_at_indexes": dict_(), "iterable_items_removed_at_indexes": dict_(), }) if tree_results: self._from_tree_results(tree_results)