def test_big5(self):

        self.fp = file(testdir + 'hk.yahoo_big5.qlog', 'rb')
        title, content = self.test_data[10:12]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta['encoding'], 'big5 [META]')
        self.assertEqual(self.meta['title'], title)

        s = self.buf.getvalue().decode('utf8')
        self.assert_(s.find(content) > 0)
    def test_euc_kr(self):

        self.fp = file(testdir + 'apache_euc-kr.qlog', 'rb')
        title, content = self.test_data[16:18]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta['encoding'], 'euc-kr [HTTP]')
        self.assertEqual(self.meta['title'], title)

        s = self.buf.getvalue().decode('utf8')
        self.assert_(s.find(content) > 0)
    def test_utf8(self):

        self.fp = file(testdir + 'ah_ying_utf8.qlog', 'rb')
        title, content = self.test_data[1:3]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta['encoding'], 'utf-8 [META]')
        self.assertEqual(self.meta['title'], title)

        s = self.buf.getvalue().decode('utf8')
        self.assert_(s.find(content) > 0)
    def test_iso_8851_1(self):

        self.fp = file(testdir + 'apache_ISO-8859-1.qlog', 'rb')
        title, content = self.test_data[4:6]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta['encoding'], 'iso-8859-1 [HTTP]')
        self.assertEqual(self.meta['title'], title)

        s = self.buf.getvalue().decode('utf8')
        self.assert_(s.find(content) > 0)
    def test_euc_kr(self):

        self.fp = file(testpath / "apache_euc-kr.qlog", "rb")
        title, content = self.test_data[16:18]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta["encoding"], "euc-kr [HTTP]")
        self.assertEqual(self.meta["title"], title)

        s = self.buf.getvalue().decode("utf8")
        self.assert_(s.find(content) > 0)
    def test_big5(self):

        self.fp = file(testpath / "hk.yahoo_big5.qlog", "rb")
        title, content = self.test_data[10:12]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta["encoding"], "big5 [META]")
        self.assertEqual(self.meta["title"], title)

        s = self.buf.getvalue().decode("utf8")
        self.assert_(s.find(content) > 0)
    def test_iso_8851_1(self):

        self.fp = file(testpath / "apache_ISO-8859-1.qlog", "rb")
        title, content = self.test_data[4:6]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta["encoding"], "iso-8859-1 [HTTP]")
        self.assertEqual(self.meta["title"], title)

        s = self.buf.getvalue().decode("utf8")
        self.assert_(s.find(content) > 0)
    def test_utf8(self):

        self.fp = file(testpath / "ah_ying_utf8.qlog", "rb")
        title, content = self.test_data[1:3]

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(0, result)
        self.assertEqual(self.meta["encoding"], "utf-8 [META]")
        self.assertEqual(self.meta["title"], title)

        s = self.buf.getvalue().decode("utf8")
        self.assert_(s.find(content) > 0)
    def test_bad_encoding(self):

        self.fp = file(testdir + 'ah_ying_bad.qlog', 'rb')

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(self.meta['encoding'], 'iso-8859-1 [DEFAULT]')     # invalid encoding -> default
    def test_bad_encoding(self):

        self.fp = file(testpath / "ah_ying_bad.qlog", "rb")

        result = distillML.test_distill(self.fp, self.buf, self.meta)
        self.assertEqual(self.meta["encoding"], "iso-8859-1 [DEFAULT]")  # invalid encoding -> default