Example #1
0
class CharsetConverterTest(unittest.TestCase):
    def setUp(self):
        self.converter = CharsetConverter()
        self.samples = path(resource_filename(__package__, "samples"))

    def test_detection(self):
        file = self.samples / "sample1-ISO-8859-1.txt"
        self.assertEquals("ISO-8859-1",
                          self.converter.detect_encoding(file).charset)
        file = self.samples / "sample1-UTF-8.txt"
        self.assertEquals("UTF-8",
                          self.converter.detect_encoding(file).charset)

    def test_convertion_from_iso_to_utf(self):
        # setup files
        iso = self.samples / "sample1-ISO-8859-1.txt"
        iso_checksum = self.__checksum(iso)
        utf = self.samples / "sample1-UTF-8.txt"
        utf_checksum = self.__checksum(utf)
        # create temp file
        fd, filename = tempfile.mkstemp(prefix="baudot")
        tmp = path(filename)
        tmp_checksum = self.__checksum(tmp)
        # validate before convertion
        self.assertNotEquals(iso_checksum, utf_checksum)
        self.assertNotEquals(tmp_checksum, utf_checksum)
        self.assertNotEquals(iso_checksum, tmp_checksum)
        # convert files
        self.converter.convert_encoding(iso, tmp, "ISO-8859-1", "UTF-8")
        tmp_checksum = self.__checksum(tmp)
        # validate output
        self.assertNotEquals(iso_checksum, tmp_checksum)
        self.assertEquals(tmp_checksum, utf_checksum)
        tmp.remove()
        self.assertFalse(tmp.exists())

    def test_get_encodings(self):
        available = self.converter.get_encodings()
        self.assertIn("UTF-8", available)
        self.assertIn("ISO-8859-1", available)
        self.assertIn("windows-1251", available)

    def __checksum(self, file):
        block_size = 0x10000

        def upd(m, data):
            m.update(data)
            return m

        fd = open(file, "rb")
        try:
            contents = iter(lambda: fd.read(block_size), "")
            m = reduce(upd, contents, md5())
            return m.hexdigest()
        finally:
            fd.close()
class CharsetConverterTest(unittest.TestCase):

    def setUp(self):
        self.converter = CharsetConverter()
        self.samples = path(resource_filename(__package__, "samples"))

    def test_detection(self):
        file = self.samples / "sample1-ISO-8859-1.txt"
        self.assertEquals("ISO-8859-1", self.converter.detect_encoding(file).charset)
        file = self.samples / "sample1-UTF-8.txt"
        self.assertEquals("UTF-8", self.converter.detect_encoding(file).charset)

    def test_convertion_from_iso_to_utf(self):
        # setup files
        iso = self.samples / "sample1-ISO-8859-1.txt"
        iso_checksum = self.__checksum(iso)
        utf = self.samples / "sample1-UTF-8.txt"
        utf_checksum = self.__checksum(utf)
        # create temp file
        fd, filename = tempfile.mkstemp(prefix="baudot")
        tmp = path(filename)
        tmp_checksum = self.__checksum(tmp)
        # validate before convertion
        self.assertNotEquals(iso_checksum, utf_checksum)
        self.assertNotEquals(tmp_checksum, utf_checksum)
        self.assertNotEquals(iso_checksum, tmp_checksum)
        # convert files
        self.converter.convert_encoding(iso, tmp, "ISO-8859-1", "UTF-8")
        tmp_checksum = self.__checksum(tmp)
        # validate output
        self.assertNotEquals(iso_checksum, tmp_checksum)
        self.assertEquals(tmp_checksum, utf_checksum)
        tmp.remove()
        self.assertFalse(tmp.exists())

    def test_get_encodings(self):
        available = self.converter.get_encodings()
        self.assertIn("UTF-8", available)
        self.assertIn("ISO-8859-1", available)
        self.assertIn("windows-1251", available)

    def __checksum(self, file):
        block_size = 0x10000
        def upd(m, data):
            m.update(data)
            return m
        fd = open(file, "rb")
        try:
            contents = iter(lambda: fd.read(block_size), "")
            m = reduce(upd, contents, md5())
            return m.hexdigest()
        finally:
            fd.close()
Example #3
0
 def setUp(self):
     self.converter = CharsetConverter()
     self.samples = path(resource_filename(__package__, "samples"))
 def setUp(self):
     self.converter = CharsetConverter()
     self.samples = path(resource_filename(__package__, "samples"))
Example #5
0
class FileManagerTest(unittest.TestCase):
    def setUp(self):
        self.converter = CharsetConverter()
        self.fm = FileManager()
        self.samples = path(ResourceManager().resource_filename(__package__, "samples"))

    def test_add_search(self):
        dir1 = self.samples / "dir1"
        txt = dir1 / "sample1-ISO-8859-1.txt"
        image = dir1 / "locked.svg"
        empty = dir1 / "empty"

        self.assertEqual(0, len(self.fm))
        cmd = self.fm.add(dir1)
        cmd.start()
        cmd.join()

        self.assertEqual(1, len(self.fm))

        dir_row = cmd.search(dir1)
        self.assertIsNotNone(dir_row)
        self.assertEqual(6, len(dir1.listdir()))
        self.assertEqual(dir1, dir_row[0])
        self.assertEqual(gtk.STOCK_DIRECTORY, dir_row[1])
        self.assertEqual(dir1, dir_row[2])
        self.assertEqual("4 items", dir_row[3])
        self.assertEqual("Folder", dir_row[4])
        self.assertIsNone(dir_row[5])

        txt_row = cmd.search(txt)
        self.assertIsNotNone(txt_row)
        self.assertEqual(txt, txt_row[0])
        self.assertEqual(gtk.STOCK_FILE, txt_row[1])
        self.assertEqual("sample1-ISO-8859-1.txt", txt_row[2])
        self.assertEqual("1.16 KB", txt_row[3])
        self.assertEqual("text/plain", txt_row[4])
        self.assertEqual("ISO-8859-1", txt_row[5])

        self.assertTrue(image.exists())
        self.assertIsNone(cmd.search(image))

        self.assertTrue(empty.exists())
        self.assertIsNone(cmd.search(empty))

        cmd = self.fm.add(empty)
        cmd.start()
        cmd.join()

        self.assertEqual(2, len(self.fm))

    def test_add_duplicate(self):
        dir1 = self.samples / "dir1"
        self.assertEqual(0, len(self.fm))
        cmd = self.fm.add(dir1)
        cmd.start()
        cmd.join()
        self.assertEqual(1, len(self.fm))
        mock = MockListener()
        cmd = self.fm.add(dir1)
        cmd.connect("command-aborted", mock.slot)
        cmd.start()
        cmd.join()
        self.assertTrue(mock.invoked)
        self.assertEqual(1, len(self.fm))

    def test_convert_copy(self):
        orig = self.samples / "dir1"
        copy = path(tempfile.mkdtemp())

        try:
            cmd = self.fm.add(orig)
            cmd.start()
            cmd.join()
            cmd = self.fm.convert("ISO-8859-1", copy)
            cmd.start()
            cmd.join()
            converted = copy / "sample2-UTF-8.txt"
            self.assertTrue(converted.exists())
            self.assertEqual("ISO-8859-1", self.converter.detect_encoding(converted).charset)
        finally:
            copy.rmtree()

    def test_convert_in_place(self):
        orig = self.samples / "dir1"
        tmp = path(tempfile.mkdtemp())
        copy = tmp / "copy"
        orig.copytree(copy)
        try:
            cmd = self.fm.add(copy)
            cmd.start()
            cmd.join()
            cmd = self.fm.convert("ISO-8859-1")
            cmd.start()
            cmd.join()
            converted = copy / "sample2-UTF-8.txt"
            self.assertTrue(converted.exists())
            self.assertEqual("ISO-8859-1", self.converter.detect_encoding(converted).charset)
        finally:
            tmp.rmtree()
            self.assertFalse(tmp.exists())
Example #6
0
class FileManagerTest(unittest.TestCase):

    def setUp(self):
        self.converter = CharsetConverter()
        self.fm = FileManager()
        self.samples = path(ResourceManager().resource_filename(__package__, "samples"))

    def test_add_search(self):
        dir1 = self.samples / "dir1"
        txt = dir1 / "sample1-ISO-8859-1.txt"
        image = dir1 / "locked.svg"
        empty = dir1 / "empty"

        self.assertEqual(0, len(self.fm))
        cmd = self.fm.add(dir1)
        cmd.start()
        cmd.join()

        self.assertEqual(1, len(self.fm))

        dir_row = cmd.search(dir1)
        self.assertIsNotNone(dir_row)
        self.assertEqual(6, len(dir1.listdir()))
        self.assertEqual(dir1, dir_row[0])
        self.assertEqual(gtk.STOCK_DIRECTORY, dir_row[1])
        self.assertEqual(dir1, dir_row[2])
        self.assertEqual("4 items", dir_row[3])
        self.assertEqual("Folder", dir_row[4])
        self.assertIsNone(dir_row[5])
        
        txt_row = cmd.search(txt)
        self.assertIsNotNone(txt_row)
        self.assertEqual(txt, txt_row[0])
        self.assertEqual(gtk.STOCK_FILE, txt_row[1])
        self.assertEqual("sample1-ISO-8859-1.txt", txt_row[2])
        self.assertEqual("1.16 KB", txt_row[3])
        self.assertEqual("text/plain", txt_row[4])
        self.assertEqual("ISO-8859-1", txt_row[5])
        
        self.assertTrue(image.exists())
        self.assertIsNone(cmd.search(image))
        
        self.assertTrue(empty.exists())
        self.assertIsNone(cmd.search(empty))

        cmd = self.fm.add(empty)
        cmd.start()
        cmd.join()
        
        self.assertEqual(2, len(self.fm))

    def test_add_duplicate(self):
        dir1 = self.samples / "dir1"
        self.assertEqual(0, len(self.fm))
        cmd = self.fm.add(dir1)
        cmd.start()
        cmd.join()
        self.assertEqual(1, len(self.fm))
        mock = MockListener()
        cmd = self.fm.add(dir1)
        cmd.connect("command-aborted", mock.slot)
        cmd.start()
        cmd.join()
        self.assertTrue(mock.invoked) 
        self.assertEqual(1, len(self.fm))

    def test_convert_copy(self):
        orig = self.samples / "dir1"
        copy = path(tempfile.mkdtemp())

        try:
            cmd = self.fm.add(orig)
            cmd.start()
            cmd.join()
            cmd = self.fm.convert("ISO-8859-1", copy)
            cmd.start()
            cmd.join()
            converted = copy / "sample2-UTF-8.txt"
            self.assertTrue(converted.exists())
            self.assertEqual("ISO-8859-1", self.converter.detect_encoding(converted).charset)
        finally:
            copy.rmtree()
    
    def test_convert_in_place(self):
        orig = self.samples / "dir1"
        tmp = path(tempfile.mkdtemp())
        copy = tmp / "copy"
        orig.copytree(copy)
        try:
            cmd = self.fm.add(copy)
            cmd.start()
            cmd.join()
            cmd = self.fm.convert("ISO-8859-1")
            cmd.start()
            cmd.join()
            converted = copy / "sample2-UTF-8.txt"
            self.assertTrue(converted.exists())
            self.assertEqual("ISO-8859-1", self.converter.detect_encoding(converted).charset)
        finally:
            tmp.rmtree()
            self.assertFalse(tmp.exists())