def test_compress_from_cleanup(self): base_dir = self.get_test_dir() comp_test_file = os.path.join(base_dir, "cleanup_test.gz") raw_test_file = self.get_raw_test_file() assert os.path.exists(raw_test_file) c = CompressedFile(comp_test_file, mode="w") c.compress_from(raw_test_file, remove_original=True) c.close() assert not os.path.exists(raw_test_file) os.remove(comp_test_file)
def compress_from(self, filetype, force_popen): base_dir = self.get_test_dir() raw_test_file = self.get_raw_test_file() comp_test_file = os.path.join(base_dir, "from_test." + filetype) assert not os.path.exists(comp_test_file) c = CompressedFile(comp_test_file, mode="w", force_popen=force_popen) c.compress_from(raw_test_file) c.close() self.verify_contents(comp_test_file, force_popen) os.remove(comp_test_file)
def handle(self, record): filename = record base_ends = filename.find(".log") + 4 if base_ends < 4: self.log("Bad filename encountered, skipping: " + filename) self.stats.increment(records_read=1, bad_records=1, bad_record_type="bad_filename") return basename = filename[0:base_ends] # Get a unique name for the compressed file: comp_name = basename + "." + uuid.uuid4( ).hex + StorageLayout.COMPRESSED_SUFFIX comp_file = CompressedFile(comp_name, mode="w", open_now=True, compression_level=1) # Rename uncompressed file to a temp name tmp_name = comp_name + ".compressing" os.rename(filename, tmp_name) start = now() try: comp_file.compress_from(tmp_name, remove_original=False) comp_file.close() except Exception as e: self.stats.increment(records_read=1, bad_records=1, bad_record_type="compression_error") self.log("Error compressing file {0}: {1}".format(filename, e)) return raw_bytes = os.stat(tmp_name).st_size comp_bytes = os.stat(comp_name).st_size raw_mb = float(raw_bytes) / 1024.0 / 1024.0 comp_mb = float(comp_bytes) / 1024.0 / 1024.0 self.stats.increment(records_read=1, records_written=1, bytes_read=raw_bytes, bytes_written=comp_bytes) # Remove raw file os.remove(tmp_name) sec = timer.delta_sec(start, now()) self.log("Compressed %s as %s in %.2fs. Size before: %.2fMB, after:" \ " %.2fMB (r: %.2fMB/s, w: %.2fMB/s)" % (filename, comp_name, sec, raw_mb, comp_mb, (raw_mb/sec), (comp_mb/sec)))
def handle(self, record): filename = record base_ends = filename.find(".log") + 4 if base_ends < 4: self.log("Bad filename encountered, skipping: " + filename) self.stats.increment(records_read=1, bad_records=1, bad_record_type="bad_filename") return basename = filename[0:base_ends] # Get a unique name for the compressed file: comp_name = basename + "." + uuid.uuid4().hex + StorageLayout.COMPRESSED_SUFFIX comp_file = CompressedFile(comp_name, mode="w", open_now=True, compression_level=1) # Rename uncompressed file to a temp name tmp_name = comp_name + ".compressing" os.rename(filename, tmp_name) start = now() try: comp_file.compress_from(tmp_name, remove_original=False) comp_file.close() except Exception as e: self.stats.increment(records_read=1, bad_records=1, bad_record_type="compression_error") self.log("Error compressing file {0}: {1}".format(filename, e)) return raw_bytes = os.stat(tmp_name).st_size comp_bytes = os.stat(comp_name).st_size raw_mb = float(raw_bytes) / 1024.0 / 1024.0 comp_mb = float(comp_bytes) / 1024.0 / 1024.0 self.stats.increment(records_read=1, records_written=1, bytes_read=raw_bytes, bytes_written=comp_bytes) # Remove raw file os.remove(tmp_name) sec = timer.delta_sec(start, now()) self.log("Compressed %s as %s in %.2fs. Size before: %.2fMB, after:" \ " %.2fMB (r: %.2fMB/s, w: %.2fMB/s)" % (filename, comp_name, sec, raw_mb, comp_mb, (raw_mb/sec), (comp_mb/sec)))