Beispiel #1
0
    def test_diff_profiles(self):
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        text_profile2 = TextProfiler("Name")
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'],
                      ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'],
                      ['B', 'b', 'd', '"', 'g', ',', "'", 'f']],
            'vocab_count': [{
                '!': 3,
                'l': 2,
                'm': 2,
                'H': 1,
                'y': 1,
                ':': 1,
                '.': 1
            }, {
                ' ': -2,
                'e': 'unchanged',
                'n': -3,
                'a': -3,
                'o': 'unchanged',
                'i': 'unchanged',
                's': 'unchanged',
                'G': -1,
                'r': -4,
                't': -2
            }, {
                'd': 2,
                '"': 2,
                "'": 2,
                'B': 1,
                'b': 1,
                'g': 1,
                ',': 1,
                'f': 1
            }],
            'words': [['Hello', 'name'], ['Grant'],
                      ['Bob', 'grant', 'friends']],
            'word_count': [{
                'Hello': 1,
                'name': 1
            }, {
                'Grant': -1
            }, {
                'Bob': 1,
                'grant': 1,
                'friends': 1
            }]
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))

        # Test when one profiler is not case sensitive
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        options = TextProfilerOptions()
        options.is_case_sensitive = False
        text_profile2 = TextProfiler("Name", options=options)
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'],
                      ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'],
                      ['B', 'b', 'd', '"', 'g', ',', "'", 'f']],
            'vocab_count': [{
                '!': 3,
                'l': 2,
                'm': 2,
                'H': 1,
                'y': 1,
                ':': 1,
                '.': 1
            }, {
                ' ': -2,
                'e': 'unchanged',
                'n': -3,
                'a': -3,
                'o': 'unchanged',
                'i': 'unchanged',
                's': 'unchanged',
                'G': -1,
                'r': -4,
                't': -2
            }, {
                'd': 2,
                '"': 2,
                "'": 2,
                'B': 1,
                'b': 1,
                'g': 1,
                ',': 1,
                'f': 1
            }],
            'words': [['hello', 'name'], ['grant'], ['bob', 'friends']],
            'word_count': [{
                'hello': 1,
                'name': 1
            }, {
                'grant': -2
            }, {
                'bob': 1,
                'friends': 1
            }]
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))
Beispiel #2
0
    def test_diff_profiles(self):
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        text_profile2 = TextProfiler("Name")
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            "vocab": [
                ["H", "l", "m", "y", ":", ".", "!"],
                ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"],
                ["B", "b", "d", '"', "g", ",", "'", "f"],
            ],
            "vocab_count": [
                {
                    "!": 3,
                    "l": 2,
                    "m": 2,
                    "H": 1,
                    "y": 1,
                    ":": 1,
                    ".": 1
                },
                {
                    " ": -2,
                    "e": "unchanged",
                    "n": -3,
                    "a": -3,
                    "o": "unchanged",
                    "i": "unchanged",
                    "s": "unchanged",
                    "G": -1,
                    "r": -4,
                    "t": -2,
                },
                {
                    "d": 2,
                    '"': 2,
                    "'": 2,
                    "B": 1,
                    "b": 1,
                    "g": 1,
                    ",": 1,
                    "f": 1
                },
            ],
            "words": [["Hello", "name"], ["Grant"],
                      ["Bob", "grant", "friends"]],
            "word_count": [
                {
                    "Hello": 1,
                    "name": 1
                },
                {
                    "Grant": -1
                },
                {
                    "Bob": 1,
                    "grant": 1,
                    "friends": 1
                },
            ],
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))

        # Test when one profiler is not case sensitive
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        options = TextProfilerOptions()
        options.is_case_sensitive = False
        text_profile2 = TextProfiler("Name", options=options)
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            "vocab": [
                ["H", "l", "m", "y", ":", ".", "!"],
                ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"],
                ["B", "b", "d", '"', "g", ",", "'", "f"],
            ],
            "vocab_count": [
                {
                    "!": 3,
                    "l": 2,
                    "m": 2,
                    "H": 1,
                    "y": 1,
                    ":": 1,
                    ".": 1
                },
                {
                    " ": -2,
                    "e": "unchanged",
                    "n": -3,
                    "a": -3,
                    "o": "unchanged",
                    "i": "unchanged",
                    "s": "unchanged",
                    "G": -1,
                    "r": -4,
                    "t": -2,
                },
                {
                    "d": 2,
                    '"': 2,
                    "'": 2,
                    "B": 1,
                    "b": 1,
                    "g": 1,
                    ",": 1,
                    "f": 1
                },
            ],
            "words": [["hello", "name"], ["grant"], ["bob", "friends"]],
            "word_count": [
                {
                    "hello": 1,
                    "name": 1
                },
                {
                    "grant": -2
                },
                {
                    "bob": 1,
                    "friends": 1
                },
            ],
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))