def test_diff_profiles(self): text_profile1 = TextProfiler("Name") sample = pd.Series(["Hello my name is: Grant.!!!"]) text_profile1.update(sample) text_profile2 = TextProfiler("Name") sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"]) text_profile2.update(sample) expected_diff = { 'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'], ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'], ['B', 'b', 'd', '"', 'g', ',', "'", 'f']], 'vocab_count': [{ '!': 3, 'l': 2, 'm': 2, 'H': 1, 'y': 1, ':': 1, '.': 1 }, { ' ': -2, 'e': 'unchanged', 'n': -3, 'a': -3, 'o': 'unchanged', 'i': 'unchanged', 's': 'unchanged', 'G': -1, 'r': -4, 't': -2 }, { 'd': 2, '"': 2, "'": 2, 'B': 1, 'b': 1, 'g': 1, ',': 1, 'f': 1 }], 'words': [['Hello', 'name'], ['Grant'], ['Bob', 'grant', 'friends']], 'word_count': [{ 'Hello': 1, 'name': 1 }, { 'Grant': -1 }, { 'Bob': 1, 'grant': 1, 'friends': 1 }] } self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2)) # Test when one profiler is not case sensitive text_profile1 = TextProfiler("Name") sample = pd.Series(["Hello my name is: Grant.!!!"]) text_profile1.update(sample) options = TextProfilerOptions() options.is_case_sensitive = False text_profile2 = TextProfiler("Name", options=options) sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"]) text_profile2.update(sample) expected_diff = { 'vocab': [['H', 'l', 'm', 'y', ':', '.', '!'], ['e', 'o', ' ', 'n', 'a', 'i', 's', 'G', 'r', 't'], ['B', 'b', 'd', '"', 'g', ',', "'", 'f']], 'vocab_count': [{ '!': 3, 'l': 2, 'm': 2, 'H': 1, 'y': 1, ':': 1, '.': 1 }, { ' ': -2, 'e': 'unchanged', 'n': -3, 'a': -3, 'o': 'unchanged', 'i': 'unchanged', 's': 'unchanged', 'G': -1, 'r': -4, 't': -2 }, { 'd': 2, '"': 2, "'": 2, 'B': 1, 'b': 1, 'g': 1, ',': 1, 'f': 1 }], 'words': [['hello', 'name'], ['grant'], ['bob', 'friends']], 'word_count': [{ 'hello': 1, 'name': 1 }, { 'grant': -2 }, { 'bob': 1, 'friends': 1 }] } self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))
def test_diff_profiles(self): text_profile1 = TextProfiler("Name") sample = pd.Series(["Hello my name is: Grant.!!!"]) text_profile1.update(sample) text_profile2 = TextProfiler("Name") sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"]) text_profile2.update(sample) expected_diff = { "vocab": [ ["H", "l", "m", "y", ":", ".", "!"], ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"], ["B", "b", "d", '"', "g", ",", "'", "f"], ], "vocab_count": [ { "!": 3, "l": 2, "m": 2, "H": 1, "y": 1, ":": 1, ".": 1 }, { " ": -2, "e": "unchanged", "n": -3, "a": -3, "o": "unchanged", "i": "unchanged", "s": "unchanged", "G": -1, "r": -4, "t": -2, }, { "d": 2, '"': 2, "'": 2, "B": 1, "b": 1, "g": 1, ",": 1, "f": 1 }, ], "words": [["Hello", "name"], ["Grant"], ["Bob", "grant", "friends"]], "word_count": [ { "Hello": 1, "name": 1 }, { "Grant": -1 }, { "Bob": 1, "grant": 1, "friends": 1 }, ], } self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2)) # Test when one profiler is not case sensitive text_profile1 = TextProfiler("Name") sample = pd.Series(["Hello my name is: Grant.!!!"]) text_profile1.update(sample) options = TextProfilerOptions() options.is_case_sensitive = False text_profile2 = TextProfiler("Name", options=options) sample = pd.Series(["Bob and \"grant\", 'are' friends Grant Grant"]) text_profile2.update(sample) expected_diff = { "vocab": [ ["H", "l", "m", "y", ":", ".", "!"], ["e", "o", " ", "n", "a", "i", "s", "G", "r", "t"], ["B", "b", "d", '"', "g", ",", "'", "f"], ], "vocab_count": [ { "!": 3, "l": 2, "m": 2, "H": 1, "y": 1, ":": 1, ".": 1 }, { " ": -2, "e": "unchanged", "n": -3, "a": -3, "o": "unchanged", "i": "unchanged", "s": "unchanged", "G": -1, "r": -4, "t": -2, }, { "d": 2, '"': 2, "'": 2, "B": 1, "b": 1, "g": 1, ",": 1, "f": 1 }, ], "words": [["hello", "name"], ["grant"], ["bob", "friends"]], "word_count": [ { "hello": 1, "name": 1 }, { "grant": -2 }, { "bob": 1, "friends": 1 }, ], } self.assertDictEqual(expected_diff, text_profile1.diff(text_profile2))