예제 #1
0
    def test_options_case_sensitive(self):
        # change is_case_sensitive, other options remain the same as default values
        options = TextProfilerOptions()
        options.is_case_sensitive = False

        # input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"sentence": 1, "test": 2}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"sentence": 1, "test": 2}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #2
0
    def test_options_case_sensitive(self):
        # change is_case_sensitive, other options remain the same as default values
        options = TextProfilerOptions()
        options.is_case_sensitive = False

        # input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'test': 2}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'test': 2}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #3
0
    def test_report(self):
        """Test report method in TextProfiler class under four (4) scenarios.
        First, test under scenario of disabling vocab and word. Second, test with no options and
        `remove_disabled_flag`=True. Third, test no options and default
        `remove_disabled_flag`. Lastly, test under scenario of disabling vocab but not word.
        """
        options = (
            TextProfilerOptions()
        )  # With TextProfilerOptions as False and remove_disabled_flag == True
        options.vocab.is_enabled = False
        options.words.is_enabled = False

        profiler = TextProfiler("Name", options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        profiler.update(sample)

        report = profiler.report(remove_disabled_flag=True)
        report_keys = list(report.keys())
        self.assertNotIn("vocab", report_keys)
        self.assertNotIn("words", report_keys)

        profiler = TextProfiler(
            "Name")  # w/o TextProfilerOptions and remove_disabled_flag == True
        report = profiler.report(remove_disabled_flag=True)
        report_keys = list(report.keys())
        self.assertIn("vocab", report_keys)
        self.assertIn("words", report_keys)

        profiler = TextProfiler(
            "Name")  # w/o TextProfilerOptions and remove_disabled_flag default
        report = profiler.report()
        report_keys = list(report.keys())
        self.assertIn("vocab", report_keys)
        self.assertIn("words", report_keys)

        options = (
            TextProfilerOptions()
        )  # With TextProfilerOptions True/False and remove_disabled_flag == True
        options.vocab.is_enabled = True
        options.words.is_enabled = False

        profiler = TextProfiler("Name", options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        profiler.update(sample)

        report = profiler.report(remove_disabled_flag=True)
        report_keys = list(report.keys())

        self.assertIn("vocab", report_keys)
        self.assertNotIn("words", report_keys)
예제 #4
0
    def test_options_most_common_words_count(self):
        # None value for number of common words
        options = TextProfilerOptions()
        options.top_k_words = None
        options.stop_words = [
        ]  # set stop_words to empty list for easy inspection

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_word_count = {
            'this': 4,
            'is': 3,
            'test': 2,
            'a': 1,
            'sentence': 1
        }
        self.assertDictEqual(expected_word_count, profile["word_count"])

        # set number of common words to 3
        options.top_k_words = 3
        options.stop_words = [
        ]  # set stop_words to empty list for easy inspection

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_word_count = {'this': 4, 'is': 3, 'test': 2}
        self.assertDictEqual(expected_word_count, profile["word_count"])

        # change number of common words
        options.top_k_words = 2
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_word_count = {'this': 4, 'is': 3}
        self.assertDictEqual(expected_word_count, profile["word_count"])

        # change number of common words greater than length of word_counts list
        options.top_k_words = 10
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_word_count = {
            'this': 4,
            'is': 3,
            'test': 2,
            'a': 1,
            'sentence': 1
        }
        self.assertDictEqual(expected_word_count, profile["word_count"])
예제 #5
0
    def test_options_default(self):
        options = TextProfilerOptions()

        # input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'Test': 1, 'test': 1}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'Test': 1, 'test': 1}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #6
0
    def test_options_default(self):
        options = TextProfilerOptions()

        # input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"sentence": 1, "Test": 1, "test": 1}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"sentence": 1, "Test": 1, "test": 1}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #7
0
    def test_options_vocab_update(self):
        # change vocab.is_enabled, other options remain the same as default values
        options = TextProfilerOptions()
        options.vocab.is_enabled = False

        # input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'Test': 1, 'test': 1}
        expected_vocab = dict()
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'sentence': 1, 'Test': 1, 'test': 1}
        expected_vocab = dict()
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #8
0
    def test_options_most_common_chars_count(self):
        # None value for number of common chars
        options = TextProfilerOptions()
        options.top_k_chars = None

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # set number of common chars to 3
        options.top_k_chars = 3

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {'s': 10, 't': 9, ' ': 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars
        options.top_k_chars = 2
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {'s': 10, 't': 9}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars greater than length of vocab_counts list
        options.top_k_chars = 300
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
예제 #9
0
    def test_options_stop_words(self):
        # change stop_words, other options remain the same as default values

        # with a list of stopwords
        options = TextProfilerOptions()
        options.stop_words = ['hello', 'sentence', 'is', 'a']

        ## input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'This': 1, 'Test': 1, 'test': 1}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        ## input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {'This': 1, 'Test': 1, 'test': 1}
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # with an empty list
        options = TextProfilerOptions()
        options.stop_words = []

        ## input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {
            'This': 1,
            'is': 1,
            'test': 1,
            'a': 1,
            'Test': 1,
            'sentence': 1
        }
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        ## input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {
            'This': 1,
            'is': 1,
            'test': 1,
            'a': 1,
            'Test': 1,
            'sentence': 1
        }
        expected_vocab = {
            's': 5,
            ' ': 5,
            'e': 5,
            't': 4,
            '!': 3,
            'T': 2,
            'i': 2,
            'n': 2,
            'h': 1,
            ',': 1,
            'a': 1,
            'c': 1,
            '.': 1
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #10
0
    def test_diff_profiles(self):
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        text_profile2 = TextProfiler("Name")
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'],
                      ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'],
                      ['B', 'b', 'd', '"', 'g', ',', "'", 'f']],
            'vocab_count': [{
                '!': 3,
                'l': 2,
                'm': 2,
                'H': 1,
                'y': 1,
                ':': 1,
                '.': 1
            }, {
                ' ': -2,
                'e': 'unchanged',
                'n': -3,
                'a': -3,
                'o': 'unchanged',
                'i': 'unchanged',
                's': 'unchanged',
                'G': -1,
                'r': -4,
                't': -2
            }, {
                'd': 2,
                '"': 2,
                "'": 2,
                'B': 1,
                'b': 1,
                'g': 1,
                ',': 1,
                'f': 1
            }],
            'words': [['Hello', 'name'], ['Grant'],
                      ['Bob', 'grant', 'friends']],
            'word_count': [{
                'Hello': 1,
                'name': 1
            }, {
                'Grant': -1
            }, {
                'Bob': 1,
                'grant': 1,
                'friends': 1
            }]
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))

        # Test when one profiler is not case sensitive
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        options = TextProfilerOptions()
        options.is_case_sensitive = False
        text_profile2 = TextProfiler("Name", options=options)
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'],
                      ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'],
                      ['B', 'b', 'd', '"', 'g', ',', "'", 'f']],
            'vocab_count': [{
                '!': 3,
                'l': 2,
                'm': 2,
                'H': 1,
                'y': 1,
                ':': 1,
                '.': 1
            }, {
                ' ': -2,
                'e': 'unchanged',
                'n': -3,
                'a': -3,
                'o': 'unchanged',
                'i': 'unchanged',
                's': 'unchanged',
                'G': -1,
                'r': -4,
                't': -2
            }, {
                'd': 2,
                '"': 2,
                "'": 2,
                'B': 1,
                'b': 1,
                'g': 1,
                ',': 1,
                'f': 1
            }],
            'words': [['hello', 'name'], ['grant'], ['bob', 'friends']],
            'word_count': [{
                'hello': 1,
                'name': 1
            }, {
                'grant': -2
            }, {
                'bob': 1,
                'friends': 1
            }]
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))
예제 #11
0
    def test_options_most_common_chars_count(self):
        # None value for number of common chars
        options = TextProfilerOptions()
        options.top_k_chars = None

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # set number of common chars to 3
        options.top_k_chars = 3

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {"s": 10, "t": 9, " ": 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars
        options.top_k_chars = 2
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {"s": 10, "t": 9}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars greater than length of vocab_counts list
        options.top_k_chars = 300
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
예제 #12
0
    def test_options_stop_words(self):
        # change stop_words, other options remain the same as default values

        # with a list of stopwords
        options = TextProfilerOptions()
        options.stop_words = ["hello", "sentence", "is", "a"]

        ## input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"This": 1, "Test": 1, "test": 1}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        ## input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {"This": 1, "Test": 1, "test": 1}
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        # with an empty list
        options = TextProfilerOptions()
        options.stop_words = []

        ## input with one sample
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test, a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {
            "This": 1,
            "is": 1,
            "test": 1,
            "a": 1,
            "Test": 1,
            "sentence": 1,
        }
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)

        ## input with two samples
        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(["This is test,", " a Test sentence.!!!"])
        text_profile.update(sample)

        expected_word_count = {
            "This": 1,
            "is": 1,
            "test": 1,
            "a": 1,
            "Test": 1,
            "sentence": 1,
        }
        expected_vocab = {
            "s": 5,
            " ": 5,
            "e": 5,
            "t": 4,
            "!": 3,
            "T": 2,
            "i": 2,
            "n": 2,
            "h": 1,
            ",": 1,
            "a": 1,
            "c": 1,
            ".": 1,
        }
        self.assertDictEqual(expected_word_count, text_profile.word_count)
        self.assertDictEqual(expected_vocab, text_profile.vocab_count)
예제 #13
0
    def test_diff_profiles(self):
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        text_profile2 = TextProfiler("Name")
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            "vocab": [
                ["H", "l", "m", "y", ":", ".", "!"],
                ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"],
                ["B", "b", "d", '"', "g", ",", "'", "f"],
            ],
            "vocab_count": [
                {
                    "!": 3,
                    "l": 2,
                    "m": 2,
                    "H": 1,
                    "y": 1,
                    ":": 1,
                    ".": 1
                },
                {
                    " ": -2,
                    "e": "unchanged",
                    "n": -3,
                    "a": -3,
                    "o": "unchanged",
                    "i": "unchanged",
                    "s": "unchanged",
                    "G": -1,
                    "r": -4,
                    "t": -2,
                },
                {
                    "d": 2,
                    '"': 2,
                    "'": 2,
                    "B": 1,
                    "b": 1,
                    "g": 1,
                    ",": 1,
                    "f": 1
                },
            ],
            "words": [["Hello", "name"], ["Grant"],
                      ["Bob", "grant", "friends"]],
            "word_count": [
                {
                    "Hello": 1,
                    "name": 1
                },
                {
                    "Grant": -1
                },
                {
                    "Bob": 1,
                    "grant": 1,
                    "friends": 1
                },
            ],
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))

        # Test when one profiler is not case sensitive
        text_profile1 = TextProfiler("Name")
        sample = pd.Series(["Hello my name is: Grant.!!!"])
        text_profile1.update(sample)

        options = TextProfilerOptions()
        options.is_case_sensitive = False
        text_profile2 = TextProfiler("Name", options=options)
        sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"])
        text_profile2.update(sample)

        expected_diff = {
            "vocab": [
                ["H", "l", "m", "y", ":", ".", "!"],
                ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"],
                ["B", "b", "d", '"', "g", ",", "'", "f"],
            ],
            "vocab_count": [
                {
                    "!": 3,
                    "l": 2,
                    "m": 2,
                    "H": 1,
                    "y": 1,
                    ":": 1,
                    ".": 1
                },
                {
                    " ": -2,
                    "e": "unchanged",
                    "n": -3,
                    "a": -3,
                    "o": "unchanged",
                    "i": "unchanged",
                    "s": "unchanged",
                    "G": -1,
                    "r": -4,
                    "t": -2,
                },
                {
                    "d": 2,
                    '"': 2,
                    "'": 2,
                    "B": 1,
                    "b": 1,
                    "g": 1,
                    ",": 1,
                    "f": 1
                },
            ],
            "words": [["hello", "name"], ["grant"], ["bob", "friends"]],
            "word_count": [
                {
                    "hello": 1,
                    "name": 1
                },
                {
                    "grant": -2
                },
                {
                    "bob": 1,
                    "friends": 1
                },
            ],
        }
        self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))