Ejemplo n.º 1
0
    def test_options_most_common_chars_count(self):
        # None value for number of common chars
        options = TextProfilerOptions()
        options.top_k_chars = None

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # set number of common chars to 3
        options.top_k_chars = 3

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {'s': 10, 't': 9, ' ': 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars
        options.top_k_chars = 2
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {'s': 10, 't': 9}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars greater than length of vocab_counts list
        options.top_k_chars = 300
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            's': 10,
            't': 9,
            ' ': 8,
            'i': 7,
            'e': 5,
            'h': 4,
            'n': 2,
            ',': 1,
            'a': 1,
            'c': 1
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])
Ejemplo n.º 2
0
    def test_options_most_common_chars_count(self):
        # None value for number of common chars
        options = TextProfilerOptions()
        options.top_k_chars = None

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # set number of common chars to 3
        options.top_k_chars = 3

        text_profile = TextProfiler("Name", options=options)
        sample = pd.Series(
            ["this is test,", " this is a test sentence", "this is", "this"])
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {"s": 10, "t": 9, " ": 8}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars
        options.top_k_chars = 2
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {"s": 10, "t": 9}
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])

        # change number of common chars greater than length of vocab_counts list
        options.top_k_chars = 300
        text_profile = TextProfiler("Name", options=options)
        text_profile.update(sample)
        profile = text_profile.profile

        expected_vocab_count = {
            "s": 10,
            "t": 9,
            " ": 8,
            "i": 7,
            "e": 5,
            "h": 4,
            "n": 2,
            ",": 1,
            "a": 1,
            "c": 1,
        }
        self.assertDictEqual(expected_vocab_count, profile["vocab_count"])