Beispiel #1
0
def _get_item_length(item, parents_ids=frozenset([])):
    """
    Get the number of operations in a diff object.
    It is designed mainly for the delta view output
    but can be used with other dictionary types of view outputs too.
    """
    length = 0
    if isinstance(item, Mapping):
        for key, subitem in item.items():
            # dedupe the repetition report so the number of times items have shown up does not affect the distance.
            if key in {
                    'iterable_items_added_at_indexes',
                    'iterable_items_removed_at_indexes'
            }:
                new_subitem = dict_()
                for path_, indexes_to_items in subitem.items():
                    used_value_ids = set()
                    new_indexes_to_items = dict_()
                    for k, v in indexes_to_items.items():
                        v_id = id(v)
                        if v_id not in used_value_ids:
                            used_value_ids.add(v_id)
                            new_indexes_to_items[k] = v
                    new_subitem[path_] = new_indexes_to_items
                subitem = new_subitem

            # internal keys such as _numpy_paths should not count towards the distance
            if isinstance(key, strings) and (key.startswith('_')
                                             or key == 'deep_distance'):
                continue

            item_id = id(subitem)
            if parents_ids and item_id in parents_ids:
                continue
            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            length += _get_item_length(subitem, parents_ids_added)
    elif isinstance(item, numbers):
        length = 1
    elif isinstance(item, strings):
        length = 1
    elif isinstance(item, Iterable):
        for subitem in item:
            item_id = id(subitem)
            if parents_ids and item_id in parents_ids:
                continue
            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
            length += _get_item_length(subitem, parents_ids_added)
    elif isinstance(item, type):  # it is a class
        length = 1
    else:
        if hasattr(item, '__dict__'):
            for subitem in item.__dict__:
                item_id = id(subitem)
                parents_ids_added = add_to_frozen_set(parents_ids, item_id)
                length += _get_item_length(subitem, parents_ids_added)
    return length
Beispiel #2
0
    def _precalculate_distance_by_custom_compare_func(self, hashes_added,
                                                      hashes_removed,
                                                      t1_hashtable,
                                                      t2_hashtable,
                                                      _original_type):

        pre_calced_distances = dict_()
        for added_hash in hashes_added:
            for removed_hash in hashes_removed:
                try:
                    is_close_distance = self.iterable_compare_func(
                        t2_hashtable[added_hash].item,
                        t1_hashtable[removed_hash].item)
                except CannotCompare:
                    pass
                else:
                    if is_close_distance:
                        # an arbitrary small distance if math_epsilon is not defined
                        distance = self.math_epsilon or 0.000001
                    else:
                        distance = 1
                    pre_calced_distances["{}--{}".format(
                        added_hash, removed_hash)] = distance

        return pre_calced_distances
 def __init__(self, capacity):
     self.cache = dict_()  # {key: cache_node}
     if capacity <= 0:
         raise ValueError('Capacity of LFUCache needs to be positive.'
                          )  # pragma: no cover.
     self.capacity = capacity
     self.freq_link_head = None
     self.lock = Lock()
Beispiel #4
0
    def __init__(self, tree_results=None, verbose_level=1):
        self.verbose_level = verbose_level
        # TODO: centralize keys
        self.update({
            "type_changes": dict_(),
            "dictionary_item_added": self.__set_or_dict(),
            "dictionary_item_removed": self.__set_or_dict(),
            "values_changed": dict_(),
            "unprocessed": [],
            "iterable_item_added": dict_(),
            "iterable_item_removed": dict_(),
            "attribute_added": self.__set_or_dict(),
            "attribute_removed": self.__set_or_dict(),
            "set_item_removed": PrettyOrderedSet(),
            "set_item_added": PrettyOrderedSet(),
            "repetition_change": dict_()
        })

        if tree_results:
            self._from_tree_results(tree_results)
Beispiel #5
0
 def _get_objects_to_hashes_dict(self, extract_index=0):
     """
     A dictionary containing only the objects to hashes,
     or a dictionary of objects to the count of items that went to build them.
     extract_index=0 for hashes and extract_index=1 for counts.
     """
     result = dict_()
     for key, value in self.hashes.items():
         if key in RESERVED_DICT_KEYS:
             result[key] = value
         else:
             result[key] = value[extract_index]
     return result
Beispiel #6
0
 def _from_tree_repetition_change(self, tree):
     if 'repetition_change' in tree:
         for change in tree['repetition_change']:
             path, _, _ = change.path(get_parent_too=True)
             repetition = RemapDict(change.additional['repetition'])
             value = change.t1
             try:
                 iterable_items_added_at_indexes = self[
                     'iterable_items_added_at_indexes'][path]
             except KeyError:
                 iterable_items_added_at_indexes = self[
                     'iterable_items_added_at_indexes'][path] = dict_()
             for index in repetition['new_indexes']:
                 iterable_items_added_at_indexes[index] = value
Beispiel #7
0
    def _from_tree_iterable_item_added_or_removed(self, tree, report_type, delta_report_key):
        if report_type in tree:
            for change in tree[report_type]:  # report each change
                # determine change direction (added or removed)
                # Report t2 (the new one) whenever possible.
                # In cases where t2 doesn't exist (i.e. stuff removed), report t1.
                if change.t2 is not notpresent:
                    item = change.t2
                else:
                    item = change.t1

                # do the reporting
                path, param, _ = change.path(force=FORCE_DEFAULT, get_parent_too=True)
                try:
                    iterable_items_added_at_indexes = self[delta_report_key][path]
                except KeyError:
                    iterable_items_added_at_indexes = self[delta_report_key][path] = dict_()
                iterable_items_added_at_indexes[param] = item
Beispiel #8
0
    def _precalculate_numpy_arrays_distance(self, hashes_added, hashes_removed,
                                            t1_hashtable, t2_hashtable,
                                            _original_type):

        # We only want to deal with 1D arrays.
        if isinstance(t2_hashtable[hashes_added[0]].item, (np_ndarray, list)):
            return

        pre_calced_distances = dict_()
        added = [t2_hashtable[k].item for k in hashes_added]
        removed = [t1_hashtable[k].item for k in hashes_removed]

        if _original_type is None:
            added_numpy_compatible_type = get_homogeneous_numpy_compatible_type_of_seq(
                added)
            removed_numpy_compatible_type = get_homogeneous_numpy_compatible_type_of_seq(
                removed)
            if added_numpy_compatible_type and added_numpy_compatible_type == removed_numpy_compatible_type:
                _original_type = added_numpy_compatible_type
        if _original_type is None:
            return

        added = np_array_factory(added, dtype=_original_type)
        removed = np_array_factory(removed, dtype=_original_type)

        pairs = cartesian_product_numpy(added, removed)

        pairs_transposed = pairs.T

        distances = _get_numpy_array_distance(
            pairs_transposed[0],
            pairs_transposed[1],
            max_=self.cutoff_distance_for_pairs)

        i = 0
        for added_hash in hashes_added:
            for removed_hash in hashes_removed:
                pre_calced_distances["{}--{}".format(
                    added_hash, removed_hash)] = distances[i]
                i += 1
        return pre_calced_distances
Beispiel #9
0
 def _do_pre_process(self):
     if self._numpy_paths and ('iterable_item_added' in self.diff
                               or 'iterable_item_removed' in self.diff):
         preprocess_paths = dict_()
         for path, type_ in self._numpy_paths.items():
             preprocess_paths[path] = {
                 'old_type': np_ndarray,
                 'new_type': list
             }
             try:
                 type_ = numpy_dtype_string_to_type(type_)
             except Exception as e:
                 self._raise_or_log(NOT_VALID_NUMPY_TYPE.format(e))
                 continue  # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
             self.post_process_paths_to_convert[path] = {
                 'old_type': list,
                 'new_type': type_
             }
         if preprocess_paths:
             self._do_values_or_type_changed(preprocess_paths,
                                             is_type_change=True)
Beispiel #10
0
 def __set_or_dict(self):
     return dict_() if self.verbose_level >= 2 else OrderedSetPlus()
Beispiel #11
0
    def _do_ignore_order(self):
        """

            't1': [5, 1, 1, 1, 6],
            't2': [7, 1, 1, 1, 8],

            'iterable_items_added_at_indexes': {
                'root': {
                    0: 7,
                    4: 8
                }
            },
            'iterable_items_removed_at_indexes': {
                'root': {
                    4: 6,
                    0: 5
                }
            }

        """
        fixed_indexes = self.diff.get('iterable_items_added_at_indexes',
                                      dict_())
        remove_indexes = self.diff.get('iterable_items_removed_at_indexes',
                                       dict_())
        paths = set(fixed_indexes.keys()) | set(remove_indexes.keys())
        for path in paths:
            # In the case of ignore_order reports, we are pointing to the container object.
            # Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
            elem_and_details = self._get_elements_and_details(
                "{}[0]".format(path))
            if elem_and_details:
                _, parent, parent_to_obj_elem, parent_to_obj_action, obj, _, _ = elem_and_details
            else:
                continue  # pragma: no cover. Due to cPython peephole optimizer, this line doesn't get covered. https://github.com/nedbat/coveragepy/issues/198
            # copying both these dictionaries since we don't want to mutate them.
            fixed_indexes_per_path = fixed_indexes.get(path, dict_()).copy()
            remove_indexes_per_path = remove_indexes.get(path, dict_()).copy()
            fixed_indexes_values = AnySet(fixed_indexes_per_path.values())

            new_obj = []
            # Numpy's NdArray does not like the bool function.
            if isinstance(obj, np_ndarray):
                there_are_old_items = obj.size > 0
            else:
                there_are_old_items = bool(obj)
            old_item_gen = self._do_ignore_order_get_old(
                obj,
                remove_indexes_per_path,
                fixed_indexes_values,
                path_for_err_reporting=path)
            while there_are_old_items or fixed_indexes_per_path:
                new_obj_index = len(new_obj)
                if new_obj_index in fixed_indexes_per_path:
                    new_item = fixed_indexes_per_path.pop(new_obj_index)
                    new_obj.append(new_item)
                elif there_are_old_items:
                    try:
                        new_item = next(old_item_gen)
                    except StopIteration:
                        there_are_old_items = False
                    else:
                        new_obj.append(new_item)
                else:
                    # pop a random item from the fixed_indexes_per_path dictionary
                    self._raise_or_log(
                        INDEXES_NOT_FOUND_WHEN_IGNORE_ORDER.format(
                            fixed_indexes_per_path))
                    new_item = fixed_indexes_per_path.pop(
                        next(iter(fixed_indexes_per_path)))
                    new_obj.append(new_item)

            if isinstance(obj, tuple):
                new_obj = tuple(new_obj)
            # Making sure that the object is re-instated inside the parent especially if it was immutable
            # and we had to turn it into a mutable one. In such cases the object has a new id.
            self._simple_set_elem_value(obj=parent,
                                        path_for_err_reporting=path,
                                        elem=parent_to_obj_elem,
                                        value=new_obj,
                                        action=parent_to_obj_action)
Beispiel #12
0
 def reset(self):
     self.post_process_paths_to_convert = dict_()
Beispiel #13
0
    def __init__(self,
                 obj,
                 *,
                 hashes=None,
                 exclude_types=None,
                 exclude_paths=None,
                 exclude_regex_paths=None,
                 hasher=None,
                 ignore_repetition=True,
                 significant_digits=None,
                 truncate_datetime=None,
                 number_format_notation="f",
                 apply_hash=True,
                 ignore_type_in_groups=None,
                 ignore_string_type_changes=False,
                 ignore_numeric_type_changes=False,
                 ignore_type_subclasses=False,
                 ignore_string_case=False,
                 exclude_obj_callback=None,
                 number_to_string_func=None,
                 ignore_private_variables=True,
                 parent="root",
                 **kwargs):
        if kwargs:
            raise ValueError(
                ("The following parameter(s) are not valid: %s\n"
                 "The valid parameters are obj, hashes, exclude_types, significant_digits, truncate_datetime,"
                 "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, "
                 "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
                 "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
                 "number_to_string_func, ignore_private_variables, parent") % ', '.join(kwargs.keys()))
        if isinstance(hashes, MutableMapping):
            self.hashes = hashes
        elif isinstance(hashes, DeepHash):
            self.hashes = hashes.hashes
        else:
            self.hashes = dict_()
        exclude_types = set() if exclude_types is None else set(exclude_types)
        self.exclude_types_tuple = tuple(exclude_types)  # we need tuple for checking isinstance
        self.ignore_repetition = ignore_repetition
        self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths)
        self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
        self.hasher = default_hasher if hasher is None else hasher
        self.hashes[UNPROCESSED_KEY] = []

        self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
        self.truncate_datetime = get_truncate_datetime(truncate_datetime)
        self.number_format_notation = number_format_notation
        self.ignore_type_in_groups = self.get_ignore_types_in_groups(
            ignore_type_in_groups=ignore_type_in_groups,
            ignore_string_type_changes=ignore_string_type_changes,
            ignore_numeric_type_changes=ignore_numeric_type_changes,
            ignore_type_subclasses=ignore_type_subclasses)
        self.ignore_string_type_changes = ignore_string_type_changes
        self.ignore_numeric_type_changes = ignore_numeric_type_changes
        self.ignore_string_case = ignore_string_case
        self.exclude_obj_callback = exclude_obj_callback
        # makes the hash return constant size result if true
        # the only time it should be set to False is when
        # testing the individual hash functions for different types of objects.
        self.apply_hash = apply_hash
        self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
        self.number_to_string = number_to_string_func or number_to_string
        self.ignore_private_variables = ignore_private_variables

        self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)}))

        if self.hashes[UNPROCESSED_KEY]:
            logger.warning("Can not hash the following items: {}.".format(self.hashes[UNPROCESSED_KEY]))
        else:
            del self.hashes[UNPROCESSED_KEY]
Beispiel #14
0
    def __init__(self,
                 t1,
                 t2,
                 down=None,
                 up=None,
                 report_type=None,
                 child_rel1=None,
                 child_rel2=None,
                 additional=None,
                 verbose_level=1):
        """
        :param child_rel1: Either:
                            - An existing ChildRelationship object describing the "down" relationship for t1; or
                            - A ChildRelationship subclass. In this case, we will create the ChildRelationship objects
                              for both t1 and t2.
                            Alternatives for child_rel1 and child_rel2 must be used consistently.
        :param child_rel2: Either:
                            - An existing ChildRelationship object describing the "down" relationship for t2; or
                            - The param argument for a ChildRelationship class we shall create.
                           Alternatives for child_rel1 and child_rel2 must be used consistently.
        """

        # The current-level object in the left hand tree
        self.t1 = t1

        # The current-level object in the right hand tree
        self.t2 = t2

        # Another DiffLevel object describing this change one level deeper down the object tree
        self.down = down

        # Another DiffLevel object describing this change one level further up the object tree
        self.up = up

        self.report_type = report_type

        # If this object is this change's deepest level, this contains a string describing the type of change.
        # Examples: "set_item_added", "values_changed"

        # Note: don't use {} as additional's default value - this would turn out to be always the same dict object
        self.additional = dict_() if additional is None else additional

        # For some types of changes we store some additional information.
        # This is a dict containing this information.
        # Currently, this is used for:
        # - values_changed: In case the changes data is a multi-line string,
        #                   we include a textual diff as additional['diff'].
        # - repetition_change: additional['repetition']:
        #                      e.g. {'old_repeat': 2, 'new_repeat': 1, 'old_indexes': [0, 2], 'new_indexes': [2]}
        # the user supplied ChildRelationship objects for t1 and t2

        # A ChildRelationship object describing the relationship between t1 and it's child object,
        # where t1's child object equals down.t1.
        # If this relationship is representable as a string, str(self.t1_child_rel) returns a formatted param parsable python string,
        # e.g. "[2]", ".my_attribute"
        self.t1_child_rel = child_rel1

        # Another ChildRelationship object describing the relationship between t2 and it's child object.
        self.t2_child_rel = child_rel2

        # Will cache result of .path() per 'force' as key for performance
        self._path = dict_()

        self.verbose_level = verbose_level
Beispiel #15
0
    def __init__(self, tree_results=None, ignore_order=None):
        self.ignore_order = ignore_order

        self.update({
            "type_changes": dict_(),
            "dictionary_item_added": dict_(),
            "dictionary_item_removed": dict_(),
            "values_changed": dict_(),
            "iterable_item_added": dict_(),
            "iterable_item_removed": dict_(),
            "attribute_added": dict_(),
            "attribute_removed": dict_(),
            "set_item_removed": dict_(),
            "set_item_added": dict_(),
            "iterable_items_added_at_indexes": dict_(),
            "iterable_items_removed_at_indexes": dict_(),
        })

        if tree_results:
            self._from_tree_results(tree_results)