class TestDictDiffer(TestCase):
    def setUp(self):
        a = {'a': 1, 'b': 1, 'c': 0}
        b = {'a': 1, 'b': 2, 'd': 0}
        self.d = DictDiffer(b, a)

    def test_added(self):
        self.assertEqual(self.d.added(), set(['d']))

    def test_removed(self):
        self.assertEqual(self.d.removed(), set(['c']))

    def test_changed(self):
        self.assertEqual(self.d.changed(), set(['b']))

    def test_unchanged(self):
        self.assertEqual(self.d.unchanged(), set(['a']))
Example #2
0
    def checkDelta(self, letter):
        if letter == 'A':
            delta = self.deltaA
            # Refer to the opposite MHL to access and perform searches on it
            oppositeMHL = self.B

            listLetter = 'A'
            listLabel = '1st'
            listLabelOpposite = '2nd'
            listColor = LOG_COLOR_MHL_A
            listColorOpposite = LOG_COLOR_MHL_B
        elif letter == 'B':
            delta = self.deltaB
            oppositeMHL = self.A

            listLetter = 'B'
            listLabel = '2nd'
            listLabelOpposite = '1st'
            listColor = LOG_COLOR_MHL_B
            listColorOpposite = LOG_COLOR_MHL_A
        else:
            raise Exception(
                "INTERNAL: Couldn't check deltas, none were specified. Specify one"
            )
            return

        # Quickly clean Nonexistent objects out if they exist
        deltaClean = [h for h in delta if not isinstance(h, HashNonexistent)]
        deltaClean.sort()

        for hash in deltaClean:

            # Debug
            # print(color('DEBUG >>>', 'yellow'), hash.identifier, color(hash.filename, 'green'))
            # print('rh', hash.recordedHashes)

            foundHashPossible = None
            beenCounted = False  # If this hash has been counted yet

            # Look for a match by other hash
            # E.g., if XXHASH and MD5 present, search by MD5
            for otherHashType, otherHashValue in hash.recordedHashes.items():
                if otherHashType == hash.identifierType:
                    pass  # Skip the hash type we are already using

                hashPossible = oppositeMHL.findByOtherHash(
                    otherHashType, otherHashValue)
                if isinstance(hashPossible, HashNonexistent):
                    # No result found, move on
                    foundHashPossible = False
                    pass
                else:
                    # Found it
                    # And because we found it by another hash...
                    # Let's update the IDENTIFIER. Risky?
                    hash.identifier = otherHashValue
                    hash.identifierType = otherHashType
                    hashPossible.identifier = otherHashValue
                    hashPossible.identifierType = otherHashType
                    foundHashPossible = True
                    break

            if foundHashPossible is False:
                # Searched but no matches by other hash.
                # Look for a match by filename
                hashPossible = oppositeMHL.findHashByAttribute(
                    'filename', hash.filename)

                if isinstance(hashPossible, HashNonexistent):
                    # Definitely missing. No other matches by name or hash.
                    foundHashPossible = False
                else:
                    foundHashPossible = True

            if foundHashPossible is True:
                # Compare the hash and the possible hash.
                diff = DictDiffer(hash.__dict__, hashPossible.__dict__)
                dAdded = diff.added()
                dRemoved = diff.removed()
                dUnchanged = diff.unchanged()
                dChanged = diff.changed()

                # First print a filename so everything fits underneath it.
                logDetail('  ' +
                          color(hash.filename, None, attrs=LOG_COLOR_BOLD))

                # Then begin testing.
                if hash.identifierType == hashPossible.identifierType:
                    # Hash type is the same
                    if hash.identifier == hashPossible.identifier:
                        # And so are the hashes

                        # But check if it's a duplicate first
                        if hash.isDuplicate is True:
                            logDetail(
                                '      This file is a duplicate. Another file exists in this MHL with the same hash.'
                            )
                            if not beenCounted:
                                self.COUNT['DUPLICATE'] += 1
                                beenCounted = True
                            logDetail(
                                '      Hash ({}):'.format(listLabel),
                                colored(
                                    hash.identifier +
                                    ' ({})'.format(hash.identifierType),
                                    listColor))
                        else:
                            if not beenCounted:
                                self.COUNT['PERFECT'] += 1
                                beenCounted = True
                            logDetail('      Hash: identical.')
                    else:
                        # But the hashes are different. File has changed?
                        if not beenCounted:
                            self.COUNT['HASH_CHANGED'] += 1
                            beenCounted = True
                        logDetail(
                            color(
                                '      Hash: These hashes are different from each other. It is likely the files were different between the time the MHLs were generated.',
                                LOG_COLOR_WARNING))
                else:
                    # Hash type is not the same. Unlikely to be comparable.
                    if not beenCounted:
                        self.COUNT['HASH_TYPE_DIFFERENT'] += 1
                        beenCounted = True
                    logDetail(
                        color(
                            "      Hash: These hashes are of different types. It's not possible to compare them.",
                            LOG_COLOR_INFORMATION))

                if hash.isDuplicate is False:
                    logDetail(
                        '      Hash ({}):'.format(listLabel),
                        color(
                            '{} ({})'.format(hash.identifier,
                                             hash.identifierType), listColor))
                    logDetail(
                        '      Hash ({}):'.format(listLabelOpposite),
                        color(
                            '{} ({})'.format(hashPossible.identifier,
                                             hashPossible.identifierType),
                            listColorOpposite))

                if {'filename', 'directory', 'size'}.issubset(dUnchanged):
                    # If neither of these variables have changed, then we have a perfect match.
                    # EVEN THOUGH we used a slightly different preferred hash.
                    if not beenCounted:
                        self.COUNT['PERFECT'] += 1
                        beenCounted = True
                    continue
                else:

                    if 'filename' in dChanged:
                        if not beenCounted:
                            self.COUNT['MINOR'] += 1
                            beenCounted = True
                        logDetail('      Filename: different (1st):',
                                  color(hash.filename, LOG_COLOR_MHL_A))
                        logDetail(
                            '                          (2nd):',
                            color(hashPossible.filename, LOG_COLOR_MHL_B))
                    else:
                        # If the filename is the same, it has already been declared closer to the top.
                        pass

                    if 'directory' in dChanged:
                        if not beenCounted:
                            self.COUNT['MINOR'] += 1
                            beenCounted = True
                        logDetail('      Path: different (1st):',
                                  color(hash.directory, LOG_COLOR_MHL_A))
                        logDetail(
                            '                      (2nd):',
                            color(hashPossible.directory, LOG_COLOR_MHL_B))
                    else:
                        logDetail('      Path: identical:', hash.directory)

                    if 'size' in dChanged:
                        # First, check if the Size is simply "Not specified"
                        # This is not an anomaly if so.
                        if hash.sizeDefined == False:
                            # If we have come this far (hash match, name, directory) but size can't be compared
                            # That is as good as we are gonna get.
                            self.COUNT['PERFECT'] += 1
                            beenCounted = True
                        else:
                            # It is an anomaly if the size has changed while the hash has not.
                            # Report it as impossible, but also print it to the user anyway.
                            if not beenCounted:
                                self.COUNT['IMPOSSIBLE'] += 1
                                beenCounted = True
                            logDetail('      Size: different (1st):',
                                      color(hash.sizeHuman, LOG_COLOR_MHL_A))
                            logDetail(
                                '                      (2nd):',
                                color(hashPossible.sizeHuman, LOG_COLOR_MHL_B))
                    else:
                        logDetail('      ' + 'Size: identical: ' +
                                  hashPossible.sizeHuman)

                    if 'lastmodificationdate' in dChanged:
                        if LOG_SHOW_DATES:
                            if not beenCounted:
                                self.COUNT['MINOR'] += 1
                                beenCounted = True

                            hModDate = showDate(hash.lastmodificationdate)
                            hPModDate = showDate(
                                hashPossible.lastmodificationdate)

                            logDetail('      Modified date: different (1st):',
                                      color(hModDate, LOG_COLOR_MHL_A))
                            logDetail('                               (2nd):',
                                      color(hPModDate, LOG_COLOR_MHL_B))
                        else:
                            # Don't count date changes unless user wants it (LOG_SHOW_DATES is true)
                            pass

                    # Briefly explain to the user what attributes were added/removed
                    if LOG_SHOW_DATES == False:
                        dAddedFiltered = [
                            i for i in dAdded
                            if i not in LIST_OF_DATE_ATTRIBUTES
                        ]
                        dRemovedFiltered = [
                            i for i in dRemoved
                            if i not in LIST_OF_DATE_ATTRIBUTES
                        ]
                    else:
                        dAddedFiltered = dAdded
                        dRemovedFiltered = dRemoved

                    if len(dAddedFiltered) > 0:
                        dAddedString = ', '.join(
                            str(i) for i in dAddedFiltered)
                        logDetail('      These attributes exist in 1st only:',
                                  color(dAddedString, LOG_COLOR_MHL_A))
                    if len(dRemovedFiltered) > 0:
                        dRemovedString = ', '.join(
                            str(i) for i in dRemovedFiltered)
                        logDetail('      These attributes exist in 2nd only:',
                                  color(dRemovedString, LOG_COLOR_MHL_B))

                    pass

            else:
                # Else if foundHashPossible was False.
                self.COUNT['MISSING'] += 1
                logDetail(
                    '  ' +
                    color(hash.filename, listColor, attrs=LOG_COLOR_BOLD))
                logDetail('  This file only exists in',
                          color(listLabel + ' MHL', listColor) + '.')
                logDetail('      ' + 'Path:', hash.directory)
                logDetail('      ' + 'Size:', hash.sizeHuman)
                logDetail('      ' + 'Hash:', hash.identifier,
                          '({})'.format(hash.identifierType))
Example #3
0
    def checkCommon(self):

        for hashA, hashB in self.common:
            beenCounted = False

            diff = DictDiffer(hashA.__dict__, hashB.__dict__)
            dAdded = diff.added()
            dRemoved = diff.removed()
            dChanged = diff.changed()
            dUnchanged = diff.unchanged()

            if {'filename', 'directory', 'size'}.issubset(dUnchanged):
                # If neither of these variables have changed, then we have a perfect match.
                # Report it and move on.
                if not beenCounted:
                    self.COUNT['PERFECT'] += 1
                    beenCounted = True
                continue

            if 'filename' in dChanged:
                if not beenCounted:
                    self.COUNT['MINOR'] += 1
                    beenCounted = True
                logDetail('  ' +
                          color(hashA.filename, 'green', attrs=LOG_COLOR_BOLD))
                logDetail('      Filename: different (1st):',
                          color(hashA.filename, LOG_COLOR_MHL_A))
                logDetail('                          (2nd):',
                          color(hashB.filename, LOG_COLOR_MHL_B))
            else:
                logDetail('  ' +
                          color(hashA.filename, None, attrs=LOG_COLOR_BOLD))
            if 'directory' in dChanged:
                if not beenCounted:
                    self.COUNT['MINOR'] += 1
                    beenCounted = True
                logDetail('      Path: different (1st):',
                          color(hashA.directory, LOG_COLOR_MHL_A))
                logDetail('                      (2nd):',
                          color(hashB.directory, LOG_COLOR_MHL_B))
            else:
                logDetail('      Path: identical: ' + hashA.directory)

            # Straight up print the hash, don't check it.
            # At this stage, it's not possible for the hash to be different.
            # A check has already been performed for the pair to even be included in this group.
            logDetail('      Hash: identical: {} ({})'.format(
                hashA.identifier, hashA.identifierType))

            if 'size' in dChanged:
                # First, check if the Size is simply "Not specified"
                if hashA.sizeDefined == False or hashB.sizeDefined == False:
                    self.COUNT['PERFECT'] += 1
                    beenCounted = True

                # It is an anomaly if the size has changed, but not the hash.
                # Report it as impossible, but also print it to the user anyway.
                if not beenCounted:
                    self.COUNT['IMPOSSIBLE'] += 1
                    beenCounted = True
                logDetail('      Size: different (1st):',
                          color(hashA.sizeHuman, LOG_COLOR_MHL_A))
                logDetail('                      (2nd):',
                          color(hashB.sizeHuman, LOG_COLOR_MHL_B))
            else:
                logDetail('      ' + 'Size: identical: ' + hashA.sizeHuman)

            if 'lastmodificationdate' in dChanged:
                if LOG_SHOW_DATES:
                    if not beenCounted:
                        self.COUNT['MINOR'] += 1
                        beenCounted = True
                    logDetail(
                        '      Modified date: different (1st):',
                        color(hashA.lastmodificationdate, LOG_COLOR_MHL_A))
                    logDetail(
                        '                               (2nd):',
                        color(hashB.lastmodificationdate, LOG_COLOR_MHL_B))
                else:
                    # Don't count date changes unless user wants it (LOG_SHOW_DATES is true)
                    pass

            # Briefly explain to the user what attributes were added/removed
            if LOG_SHOW_DATES == False:
                dAddedFiltered = [
                    i for i in dAdded if i not in LIST_OF_DATE_ATTRIBUTES
                ]
                dRemovedFiltered = [
                    i for i in dRemoved if i not in LIST_OF_DATE_ATTRIBUTES
                ]
            else:
                dAddedFiltered = dAdded
                dRemovedFiltered = dRemoved

            if len(dAddedFiltered) > 0:
                dAddedString = ', '.join(str(i) for i in dAddedFiltered)
                logDetail('      These attributes exist in 1st only:',
                          color(dAddedString, LOG_COLOR_MHL_A))
            if len(dRemovedFiltered) > 0:
                dRemovedString = ', '.join(str(i) for i in dRemovedFiltered)
                logDetail('      These attributes exist in 2nd only:',
                          color(dRemovedString, LOG_COLOR_MHL_B))
Example #4
0
class TestDictDiffer(TestCase):
    def setUp(self):
        a = {'a': 1, 'b': 1, 'c': 0}
        b = {'a': 1, 'b': 2, 'd': 0}
        self.d = DictDiffer(b, a)

    def test_added(self):
        self.assertEqual(self.d.added(), set(['d']))

    def test_removed(self):
        self.assertEqual(self.d.removed(), set(['c']))

    def test_changed(self):
        self.assertEqual(self.d.changed(), set(['b']))

    def test_unchanged(self):
        self.assertEqual(self.d.unchanged(), set(['a']))

    def test_changes(self):
        self.assertEqual(self.d.changes(), {'added': 1,
                                            'removed': 1,
                                            'changed': 1})

    def test_changes_same(self):
        """Dict are same
        """
        a = {'a': 1, 'b': 1, 'c': 0}
        b = {'a': 1, 'b': 1, 'c': 0}
        tdf = DictDiffer(b, a)

        self.assertEqual(tdf.changes(), {'added': 0,
                                         'removed': 0,
                                         'changed': 0})

    def test_haschanges(self):
        self.assertEqual(self.d.has_changes(), True)

    def test_haschanges_no(self):
        """Dict are the same
        """
        a = {'a': 1, 'b': 1, 'c': 0}
        b = {'a': 1, 'b': 1, 'c': 0}
        tdf = DictDiffer(b, a)

        self.assertEqual(tdf.has_changes(), False)

    def test_haschanges_empty(self):
        """Dict are empty
        """
        tdf = DictDiffer({}, {})
        self.assertFalse(tdf.has_changes())

    def test_nb_changes(self):
        """Number of changes
        """
        a = {'a': 1, 'b': 1, 'c': 0}
        b = {'a': 1, 'b': 2}
        tdf = DictDiffer(b, a)

        self.assertEqual(tdf.nb_changes(), 2)

    def test_nb_changes_same(self):
        """Number of changes
        """
        a = {'a': 1, 'b': 1}
        b = {'a': 1, 'b': 1}
        tdf = DictDiffer(b, a)

        self.assertEqual(tdf.nb_changes(), 0)

    def test_nb_changes_full(self):
        """Number of changes
        """
        a = {'a': 1, 'b': 1}
        b = {'d': 1, 'f': 1}
        tdf = DictDiffer(b, a)

        self.assertEqual(tdf.nb_changes(), 4)

    def test_fulldiff(self):
        """
        """
        a = {'a': 1, 'b': 1, 'c': 4}
        b = {'d': 1, 'f': 1, 'c': 5}
        tdf = DictDiffer(b, a)

        diff = tdf.fulldiff()
        result = {'added': [{'d': 1}, {'f': 1}],
                  'changed': [{'key': 'c', 'old': 4, 'new': 5}],
                  'removed': [{'a': 1}, {'b': 1}]}

        self.assertEqual(diff, result)