def test_json_gz(): path = save({"a": [1, 2, 3]}, "test.json", {"compression": "gzip"}) assert "json" in path try: res = load(path) finally: os.remove(path)
def test_bytes_png(): images = just.ls("~/*.png") if images: path = save(images[0], images[0]) try: res = load(path) finally: os.remove(path)
def test_bytes_gz(): path = save(b"asdfasdf", "test", {"compression": "gzip"}, allow_overwrite=True) assert "gz" in path try: res = load(path) finally: os.remove(path)
def test_pyarrow_brotli_pandas(): path = save(pd.DataFrame({"a": [1]}), "test", { "engine": "pyarrow", "compression": "brotli" }) try: res = load(path) finally: os.remove(path)
def test_bytes(): path = save(b"asdfasdf", "test", allow_overwrite=True) try: res = load(path) finally: os.remove(path)
def test_json(): path = save({"a": [1, 2, 3]}, "test.json") try: res = load(path) finally: os.remove(path)
def test_pandas(): path = save(pd.DataFrame({"a": [1]}), "test") try: res = load(path) finally: os.remove(path)
dir_path += "/" fnames = just.glob(dir_path + "*.json") + just.glob(dir_path + "*.csv") random.shuffle(fnames) for x in fnames: print(x) if x.endswith(".json"): shrynk = jc tp = "JSON" elif x.endswith(".csv"): shrynk = pdc tp = "CSV" old_size = os.path.getsize(x) old_total += old_size data = shrynk.load(x) new_file = shrynk.save( data, x.replace("csv_", "").replace(".csv", "").replace(".json", "")) comp = new_file.split(".")[-1] if comp == "None": new_size = old_size else: new_size = os.path.getsize(new_file) new_total += new_size just.remove(new_file) improvement = 100 - int(new_size / old_size * 100) if improvement > 50: improvement = Fore.GREEN + "{}%".format(improvement) + Fore.RESET elif improvement > 20: improvement = Fore.YELLOW + "{}%".format(improvement) + Fore.RESET
def main(): """ This is the function that is run from commandline with `yagmail` """ import argparse parser = argparse.ArgumentParser( description='Use the machine learning meta library shrynk to compress') subparsers = parser.add_subparsers(dest="command") compress = subparsers.add_parser('compress') compress.add_argument('file', help='file you want to compress') compress.add_argument('--size', '-s', default=3, type=int, help='Size weight for model') compress.add_argument('--write', '-w', default=1, type=int, help='Write-time weight for model') compress.add_argument('--read', '-r', default=1, type=int, help='Read-time weight for model') decompress = subparsers.add_parser('decompress') decompress.add_argument('file', help='file you want to decompress') benchmark = subparsers.add_parser('benchmark') benchmark.add_argument('file', help='file you want to benchmark') benchmark.add_argument('--size', '-s', default=3, type=int, help='Size weight for model') benchmark.add_argument('--write', '-w', default=1, type=int, help='Write-time weight for model') benchmark.add_argument('--read', '-r', default=1, type=int, help='Read-time weight for model') benchmark.add_argument('--predict', help='Read-time weight for model', action="store_true") benchmark.add_argument('--save', help='Read-time weight for model', action="store_true") args = parser.parse_args() if args.command == "compress": data = load(args.file) print( save(data, args.file, size=args.size, write=args.write, read=args.read)) if args.command == "decompress": data = load(args.file) if "json" in args.file: ext = "json" kwargs = {"compression": None} end = args.file.index("." + ext) destination = args.file[:end] + "." + ext elif "csv" in args.file or "parquet" in args.file: ext = "csv" kwargs = {"engine": "csv", "compression": None} end = args.file.index("." + ext) destination = args.file[:end] + "." + ext else: kwargs = {"compression": None} destination = ".".join(args.file.split(".")[:-1]) save(data, destination, kwargs) elif args.command == "benchmark": if args.predict: data = load(args.file) print( "Predicted:", infer(data, size=args.size, write=args.write, read=args.read)) if args.save: bench = run_benchmarks(args.file) bench = pd.DataFrame( bench, columns=["kwargs", "size", "write_time", "read_time"]) return print( add_z_to_bench(bench, args.size, args.write, args.read)) else: print( show_benchmark(args.file, size=args.size, write=args.write, read=args.read))