Example #1
0
    def test_load_csv(self):
        """Test loading queries from csv file"""

        analysis = QueryAnalysis(self.csv,
                                 token='word',
                                 field='noisy',
                                 header=0)
        analysis.analyze_text()
        self.assertEqual(300, analysis.nqueries)
Example #2
0
    def test_bad_extension(self):
        """Test wrong file extension"""

        fn = io_utils.change_extension(self.jsonl, 'png')
        with self.assertRaises(Exception) as context:
            QueryAnalysis(fn, token='word', field='noisy')
        self.assertTrue('Unknown file extension' in str(context.exception))
Example #3
0
    def test_words(self):
        """Test word analysis"""

        analysis = QueryAnalysis(self.jsonl, token='word', field='noisy')
        analysis.analyze_text(cleaner='get_words')

        analysis.plot_query_length(self.temp_png)
        self.assertTrue(os.path.exists(self.temp_png))

        analysis.info_tokens()
        self.assertEqual(816, len(analysis.data))
Example #4
0
    def test_chars(self):
        """Test char analysis"""

        analysis = QueryAnalysis(self.jsonl, token='char', field='noisy')
        analysis.analyze_text(cleaner='get_characters')

        analysis.plot_minoccurrences(self.temp_png, mins=[1, 2, 3])
        self.assertTrue(os.path.exists(self.temp_png))

        analysis.info_tokens()
        self.assertEqual(51, len(analysis.data))

        tokens = {
            "'": 20,
            ',': 1,
            '.': 2,
            '0': 7,
            '1': 6,
            '2': 6,
            '3': 2,
            '4': 2,
            '6': 1,
            '7': 2,
            '8': 1,
            '9': 1,
            '?': 2,
            'a': 570,
            'b': 93,
            'c': 288,
            'd': 214,
            'e': 1015,
            'f': 87,
            'g': 143,
            'h': 110,
            'i': 493,
            'j': 19,
            'k': 20,
            'l': 338,
            'm': 254,
            'n': 501,
            'o': 456,
            'p': 220,
            'q': 35,
            'r': 551,
            's': 457,
            't': 448,
            'u': 338,
            'v': 92,
            'w': 6,
            'x': 36,
            'y': 25,
            'z': 5,
            'à': 10,
            'â': 5,
            'ç': 3,
            'è': 10,
            'é': 94,
            'ê': 8,
            'ë': 1,
            'î': 3,
            'ï': 1,
            'ô': 6,
            'œ': 4,
            '’': 1
        }
        self.assertEqual(tokens, dict(analysis.data))

        analysis.save_tokens(self.temp_json)
        self.assertTrue(os.path.exists(self.temp_json))
Example #5
0
    def test_load_text(self):
        """Test loading queries from text file"""

        analysis = QueryAnalysis(self.txt, token='word')
        analysis.analyze_text()
        self.assertEqual(300, analysis.nqueries)
Example #6
0
    def test_load_jsonl(self):
        """Test loading queries from jsonl file"""

        analysis = QueryAnalysis(self.jsonl, token='word', field='noisy')
        analysis.analyze_text()
        self.assertEqual(300, analysis.nqueries)
Example #7
0
    def test_bad_token(self):
        """Test wrong token"""

        with self.assertRaises(Exception) as context:
            QueryAnalysis(self.jsonl, token='token', field='noisy')
        self.assertTrue('Unknown token type' in str(context.exception))