def test_frequency_analysis_fast0(self):
        with timer() as t1:
            rows1 = self.query('''
            SELECT fn1.performance_reduce_characters_fast(w, c)
            FROM (
                SELECT fn1.performance_map_characters_fast0(text)
                FROM daten.wiki_names
                WHERE text IS NOT NULL
            )
            GROUP BY w
            ORDER BY c DESC, w ASC''')

        with timer() as t2:
            rows2 = self.query('''
            SELECT w, c 
            FROM daten.wiki_freq
            ORDER BY c DESC, w ASC''')

        data = [tuple(x) for x in rows1]
        reference = [tuple(x) for x in rows2]
        print "test_frequency_analysis_fast0 query:", t1.duration, t2.duration
        self.compare(reference, data)
    def test_frequency_analysis(self):
        if udf.opts.lang == 'r':
            raise SkipTest('this R implementation is too slow')
        with timer() as t1:
            rows1 = self.query('''
            SELECT fn1.performance_reduce_characters(w, c)
            FROM (
                SELECT fn1.performance_map_characters(text)
                FROM daten.wiki_names
            )
            GROUP BY w
            ORDER BY c DESC, w ASC''')

        with timer() as t2:
            rows2 = self.query('''
            SELECT w, c 
            FROM daten.wiki_freq
            ORDER BY c DESC, w ASC''')

        data = [tuple(x) for x in rows1]
        reference = [tuple(x) for x in rows2]
        print "test_frequency_analysis query:", t1.duration, t2.duration
        self.compare(reference, data)
    def test_word_unicode_count(self):
        '''DWA-13860 (lua)'''
        sql = '''
            SELECT performance_reduce_counts(w, c)
            FROM (
	            SELECT performance_map_unicode_words(c3_varchar100)
	            FROM test.enginetablebigunicode
            )
            GROUP BY w
            ORDER BY 1 DESC'''

        with timer() as t:
            self.query(sql)
        self.assertLessEqual(t.duration, 11)
    def test_word_count_fast77(self):
        sql = '''
        SELECT COUNT(*) FROM (
            SELECT performance_reduce_counts_fast77(w, c)
            FROM (
	            SELECT performance_map_words(varchar02)
	            FROM test.enginetablebig1
            )
            GROUP BY w
            ORDER BY 1 DESC)'''

        with timer() as t:
            ret = self.query(sql)
        print "test_word_count_fast77 query:", t.duration, repr(ret)
        self.assertLessEqual(t.duration, 160)