Ejemplo n.º 1
0
 def test_write_to_ro_file(self):
     write_test_file = "dummy.lzma"
     assert not os.path.exists(write_test_file)
     with self.assertRaises(IOError):
         c = CompressedFile(write_test_file, mode="r")
         c.write("testing...")
     assert not os.path.exists(write_test_file)
Ejemplo n.º 2
0
 def test_write_to_ro_file(self):
     write_test_file = "dummy.lzma"
     assert not os.path.exists(write_test_file)
     with self.assertRaises(IOError):
         c = CompressedFile(write_test_file, mode="r")
         c.write("testing...")
     assert not os.path.exists(write_test_file)
Ejemplo n.º 3
0
 def compress_from(self, filetype, force_popen):
     base_dir = self.get_test_dir()
     raw_test_file = self.get_raw_test_file()
     comp_test_file = os.path.join(base_dir, "from_test." + filetype)
     assert not os.path.exists(comp_test_file)
     c = CompressedFile(comp_test_file, mode="w", force_popen=force_popen)
     c.compress_from(raw_test_file)
     c.close()
     self.verify_contents(comp_test_file, force_popen)
     os.remove(comp_test_file)
Ejemplo n.º 4
0
 def test_compress_from_cleanup(self):
     base_dir = self.get_test_dir()
     comp_test_file = os.path.join(base_dir, "cleanup_test.gz")
     raw_test_file = self.get_raw_test_file()
     assert os.path.exists(raw_test_file)
     c = CompressedFile(comp_test_file, mode="w")
     c.compress_from(raw_test_file, remove_original=True)
     c.close()
     assert not os.path.exists(raw_test_file)
     os.remove(comp_test_file)
Ejemplo n.º 5
0
    def decompress_one_file(self, filetype, force_popen):
        base_dir = self.get_test_dir()
        c = CompressedFile(os.path.join(base_dir, "test.txt." + filetype), force_popen=force_popen)
        lines = []
        for line in c:
            lines.append(line.strip())

        expected = self.get_test_data()
        self.assertEqual(len(expected), c.line_num)
        c.close()
        for i in range(c.line_num):
            self.assertEqual(expected[i], lines[i])
Ejemplo n.º 6
0
    def decompress_one_file(self, filetype, force_popen):
        base_dir = self.get_test_dir()
        c = CompressedFile(os.path.join(base_dir, "test.txt." + filetype),
                           force_popen=force_popen)
        lines = []
        for line in c:
            lines.append(line.strip())

        expected = self.get_test_data()
        self.assertEqual(len(expected), c.line_num)
        c.close()
        for i in range(c.line_num):
            self.assertEqual(expected[i], lines[i])
Ejemplo n.º 7
0
    def test_detect_compression_type(self):
        c = CompressedFile("dummy.gz")
        for t in self.get_supported_compression_types():
            example_file = "/path/to/some.compressed.file." + t
            #print "Checking", example_file
            self.assertEqual(t, c.detect_compression_type(example_file))

            # Check "auto":
            c2 = CompressedFile(example_file, compression_type="foo")
            self.assertEqual("foo", c2.compression_type)

            c3 = CompressedFile(example_file, compression_type="auto")
            self.assertEqual(t, c3.compression_type)
Ejemplo n.º 8
0
    def test_detect_compression_type(self):
        c = CompressedFile("dummy.gz")
        for t in self.get_supported_compression_types():
            example_file = "/path/to/some.compressed.file." + t
            # print "Checking", example_file
            self.assertEqual(t, c.detect_compression_type(example_file))

            # Check "auto":
            c2 = CompressedFile(example_file, compression_type="foo")
            self.assertEqual("foo", c2.compression_type)

            c3 = CompressedFile(example_file, compression_type="auto")
            self.assertEqual(t, c3.compression_type)
Ejemplo n.º 9
0
    def compress_one_file(self, filetype, force_popen):
        base_dir = self.get_test_dir()
        write_test_file = os.path.join(base_dir, "write_test." + filetype)
        assert not os.path.exists(write_test_file)
        c = CompressedFile(write_test_file, mode="w", force_popen=force_popen)

        # Write data to file.lzma
        lines = self.get_test_data()
        for line in lines:
            c.write(line + "\n")
        c.close()

        # Read it back
        after = []
        c = CompressedFile(write_test_file, mode="r", force_popen=force_popen)
        for line in c:
            after.append(line.strip())

        # make sure it looks ok
        self.assertEqual(len(lines), len(after))
        for i in range(len(lines)):
            self.assertEqual(lines[i], after[i])

        # all is well, remove the file.
        os.remove(write_test_file)
Ejemplo n.º 10
0
 def open_input_file(self, input_file):
     filename = input_file.name
     if input_file.remote:
         # Read so-called remote files from the local cache. Go on the
         # assumption that they have already been downloaded.
         filename = os.path.join(self.work_dir, "cache", input_file.name)
     return CompressedFile(filename)
Ejemplo n.º 11
0
 def test_read_from_wo_file(self):
     with self.assertRaises(IOError):
         c = CompressedFile("dummy.lzma", mode="w")
         last_line = None
         for line in c:
             last_line = line
             break
Ejemplo n.º 12
0
    def light_versus_heavy(self, file_type, force_popen):
        base_dir = self.get_test_dir()
        test_lines = []
        for i in range(2000):
            test_lines.append("Hello there {0}!".format(i))
        test_contents = "\n".join(test_lines)
        # Write it with a little compression.
        write_light = os.path.join(base_dir, "clevel_test.1.{}.{}".format(force_popen, file_type))
        assert not os.path.exists(write_light)
        c = CompressedFile(write_light, mode="w", force_popen=force_popen, compression_level=1)
        c.write(test_contents)
        c.close()

        # Check the resulting size
        light_compression_size = os.stat(write_light).st_size
        os.remove(write_light)

        # Write it with max compression.
        write_heavy = os.path.join(base_dir, "clevel_test.9.{}.{}".format(force_popen, file_type))
        assert not os.path.exists(write_heavy)
        c = CompressedFile(write_heavy, mode="w", force_popen=force_popen, compression_level=9)
        c.write(test_contents)
        c.close()

        # Check the size again.
        heavy_compression_size = os.stat(write_heavy).st_size
        os.remove(write_heavy)

        common_msg = "size should be less than raw size for type {0} " "(popen={1})".format(file_type, force_popen)

        self.assertTrue(light_compression_size < len(test_contents), msg="Lightly Compressed " + common_msg)
        self.assertTrue(heavy_compression_size < len(test_contents), msg="Heavily Compressed " + common_msg)

        # print "{0}, popen={1} - raw: {2}, light: {3}, heavy: {4}".format(
        #    t, popen, len(test_contents), light_compression_size,
        #    heavy_compression_size)
        self.assertTrue(
            light_compression_size > heavy_compression_size,
            msg="Light compression ({0}) should be larger "
            "than heavy compression ({1}) for type {2} "
            "(popen={3})".format(light_compression_size, heavy_compression_size, file_type, force_popen),
        )
Ejemplo n.º 13
0
    def verify_contents(self, filename, force_popen):
        # Read it back
        expected = self.get_test_data()
        actual = []
        c = CompressedFile(filename, mode="r", force_popen=force_popen)
        for line in c:
            actual.append(line.strip())

        # make sure it looks ok
        self.assertEqual(len(expected), len(actual))
        for i in range(len(expected)):
            self.assertEqual(expected[i], actual[i])
    def handle(self, record):
        filename = record
        base_ends = filename.find(".log") + 4
        if base_ends < 4:
            self.log("Bad filename encountered, skipping: " + filename)
            self.stats.increment(records_read=1, bad_records=1,
                    bad_record_type="bad_filename")
            return
        basename = filename[0:base_ends]
        # Get a unique name for the compressed file:
        comp_name = basename + "." + uuid.uuid4().hex + StorageLayout.COMPRESSED_SUFFIX
        comp_file = CompressedFile(comp_name, mode="w", open_now=True, compression_level=1)

        # Rename uncompressed file to a temp name
        tmp_name = comp_name + ".compressing"
        os.rename(filename, tmp_name)

        start = now()
        try:
            comp_file.compress_from(tmp_name, remove_original=False)
            comp_file.close()
        except Exception as e:
            self.stats.increment(records_read=1, bad_records=1,
                    bad_record_type="compression_error")
            self.log("Error compressing file {0}: {1}".format(filename, e))
            return
        raw_bytes = os.stat(tmp_name).st_size
        comp_bytes = os.stat(comp_name).st_size
        raw_mb = float(raw_bytes) / 1024.0 / 1024.0
        comp_mb = float(comp_bytes) / 1024.0 / 1024.0

        self.stats.increment(records_read=1, records_written=1,
                bytes_read=raw_bytes, bytes_written=comp_bytes)

        # Remove raw file
        os.remove(tmp_name)
        sec = timer.delta_sec(start, now())
        self.log("Compressed %s as %s in %.2fs. Size before: %.2fMB, after:" \
                 " %.2fMB (r: %.2fMB/s, w: %.2fMB/s)" % (filename, comp_name,
                    sec, raw_mb, comp_mb, (raw_mb/sec), (comp_mb/sec)))
Ejemplo n.º 15
0
    def compress_one_file(self, filetype, force_popen):
        base_dir = self.get_test_dir()
        write_test_file = os.path.join(base_dir, "write_test." + filetype)
        assert not os.path.exists(write_test_file)
        c = CompressedFile(write_test_file, mode="w", force_popen=force_popen)

        # Write data to file.lzma
        lines = self.get_test_data()
        for line in lines:
            c.write(line + "\n")
        c.close()

        # Read it back
        after = []
        c = CompressedFile(write_test_file, mode="r", force_popen=force_popen)
        for line in c:
            after.append(line.strip())

        # make sure it looks ok
        self.assertEqual(len(lines), len(after))
        for i in range(len(lines)):
            self.assertEqual(lines[i], after[i])

        # all is well, remove the file.
        os.remove(write_test_file)
Ejemplo n.º 16
0
 def compress_from(self, filetype, force_popen):
     base_dir = self.get_test_dir()
     raw_test_file = self.get_raw_test_file()
     comp_test_file = os.path.join(base_dir, "from_test." + filetype)
     assert not os.path.exists(comp_test_file)
     c = CompressedFile(comp_test_file, mode="w", force_popen=force_popen)
     c.compress_from(raw_test_file)
     c.close()
     self.verify_contents(comp_test_file, force_popen)
     os.remove(comp_test_file)
Ejemplo n.º 17
0
 def test_compress_from_cleanup(self):
     base_dir = self.get_test_dir()
     comp_test_file = os.path.join(base_dir, "cleanup_test.gz")
     raw_test_file = self.get_raw_test_file()
     assert os.path.exists(raw_test_file)
     c = CompressedFile(comp_test_file, mode="w")
     c.compress_from(raw_test_file, remove_original=True)
     c.close()
     assert not os.path.exists(raw_test_file)
     os.remove(comp_test_file)
Ejemplo n.º 18
0
    def handle(self, record):
        filename = record
        base_ends = filename.find(".log") + 4
        if base_ends < 4:
            self.log("Bad filename encountered, skipping: " + filename)
            self.stats.increment(records_read=1,
                                 bad_records=1,
                                 bad_record_type="bad_filename")
            return
        basename = filename[0:base_ends]
        # Get a unique name for the compressed file:
        comp_name = basename + "." + uuid.uuid4(
        ).hex + StorageLayout.COMPRESSED_SUFFIX
        comp_file = CompressedFile(comp_name,
                                   mode="w",
                                   open_now=True,
                                   compression_level=1)

        # Rename uncompressed file to a temp name
        tmp_name = comp_name + ".compressing"
        os.rename(filename, tmp_name)

        start = now()
        try:
            comp_file.compress_from(tmp_name, remove_original=False)
            comp_file.close()
        except Exception as e:
            self.stats.increment(records_read=1,
                                 bad_records=1,
                                 bad_record_type="compression_error")
            self.log("Error compressing file {0}: {1}".format(filename, e))
            return
        raw_bytes = os.stat(tmp_name).st_size
        comp_bytes = os.stat(comp_name).st_size
        raw_mb = float(raw_bytes) / 1024.0 / 1024.0
        comp_mb = float(comp_bytes) / 1024.0 / 1024.0

        self.stats.increment(records_read=1,
                             records_written=1,
                             bytes_read=raw_bytes,
                             bytes_written=comp_bytes)

        # Remove raw file
        os.remove(tmp_name)
        sec = timer.delta_sec(start, now())
        self.log("Compressed %s as %s in %.2fs. Size before: %.2fMB, after:" \
                 " %.2fMB (r: %.2fMB/s, w: %.2fMB/s)" % (filename, comp_name,
                    sec, raw_mb, comp_mb, (raw_mb/sec), (comp_mb/sec)))
Ejemplo n.º 19
0
 def test_no_extension(self):
     # we can't auto-detect with no file extension
     with self.assertRaises(ValueError):
         c = CompressedFile("dummy", compression_type="auto")
Ejemplo n.º 20
0
 def test_missing_executable(self):
     with self.assertRaises(RuntimeError):
         c = CompressedFile("dummy.lzma", open_now=False)
         CompressedFile.SEARCH_PATH = []
         path = c.get_executable()
Ejemplo n.º 21
0
    def light_versus_heavy(self, file_type, force_popen):
        base_dir = self.get_test_dir()
        test_lines = []
        for i in range(2000):
            test_lines.append("Hello there {0}!".format(i))
        test_contents = "\n".join(test_lines)
        # Write it with a little compression.
        write_light = os.path.join(
            base_dir, "clevel_test.1.{}.{}".format(force_popen, file_type))
        assert not os.path.exists(write_light)
        c = CompressedFile(write_light,
                           mode="w",
                           force_popen=force_popen,
                           compression_level=1)
        c.write(test_contents)
        c.close()

        # Check the resulting size
        light_compression_size = os.stat(write_light).st_size
        os.remove(write_light)

        # Write it with max compression.
        write_heavy = os.path.join(
            base_dir, "clevel_test.9.{}.{}".format(force_popen, file_type))
        assert not os.path.exists(write_heavy)
        c = CompressedFile(write_heavy,
                           mode="w",
                           force_popen=force_popen,
                           compression_level=9)
        c.write(test_contents)
        c.close()

        # Check the size again.
        heavy_compression_size = os.stat(write_heavy).st_size
        os.remove(write_heavy)

        common_msg = "size should be less than raw size for type {0} " \
                     "(popen={1})".format(file_type, force_popen)

        self.assertTrue(light_compression_size < len(test_contents),
                        msg="Lightly Compressed " + common_msg)
        self.assertTrue(heavy_compression_size < len(test_contents),
                        msg="Heavily Compressed " + common_msg)

        #print "{0}, popen={1} - raw: {2}, light: {3}, heavy: {4}".format(
        #    t, popen, len(test_contents), light_compression_size,
        #    heavy_compression_size)
        self.assertTrue(light_compression_size > heavy_compression_size,
                        msg="Light compression ({0}) should be larger " \
                            "than heavy compression ({1}) for type {2} " \
                            "(popen={3})".format(light_compression_size,
                                heavy_compression_size, file_type, force_popen))
Ejemplo n.º 22
0
 def test_unknown_compression_type(self):
     with self.assertRaises(ValueError):
         c = CompressedFile("dummy.gz",
                            compression_type="foo",
                            open_now=True)
Ejemplo n.º 23
0
    def test_open_now(self):
        with self.assertRaises(IOError):
            c = CompressedFile("dummy.gz", open_now=True)

        c = CompressedFile("dummy.gz", open_now=False)
        self.assertEqual(0, c.line_num)
Ejemplo n.º 24
0
 def test_open_bad_mode(self):
     with self.assertRaises(ValueError):
         c = CompressedFile("dummy.lzma",
                            mode="bogus",
                            open_now=True,
                            force_popen=True)
Ejemplo n.º 25
0
 def test_missing_executable(self):
     with self.assertRaises(RuntimeError):
         c = CompressedFile("dummy.lzma", open_now=False)
         CompressedFile.SEARCH_PATH = []
         path = c.get_executable()
Ejemplo n.º 26
0
 def test_no_extension_manual(self):
     # we don't need to auto-detect if we specify the type.
     c = CompressedFile("dummy", compression_type="gz")
     self.assertEqual("gz", c.compression_type)