def accounts_csvs(num_files, n, k): from accounts import account_entries, account_params fn = os.path.join(data_dir, 'accounts.%d.csv' % (num_files - 1)) if os.path.exists(fn): return print("Create CSV accounts for dataframe exercise") args = account_params(k) for i in range(num_files): df = account_entries(n, *args) df.to_csv(os.path.join(data_dir, 'accounts.%d.csv' % i), index=False)
def accounts_json(num_files, n, k): from accounts import account_params, json_entries import json import gzip fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % (num_files - 1)) if os.path.exists(fn): return print("Create JSON accounts for bag exercise") args = account_params(k) for i in range(num_files): seq = json_entries(n, *args) fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % i) with gzip.open(fn, 'wb') as f: f.write(os.linesep.join(map(json.dumps, seq)).encode())
def accounts_json(small=False): t0 = time.time() if small: num_files, n, k = 50, 10000, 250 else: num_files, n, k = 50, 100000, 500 fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % (num_files - 1)) if os.path.exists(fn): return args = account_params(k) for i in range(num_files): seq = json_entries(n, *args) fn = os.path.join(data_dir, 'accounts.%02d.json.gz' % i) with gzip.open(fn, 'wb') as f: f.write(os.linesep.join(map(json.dumps, seq)).encode()) t1 = time.time() print("Created CSV acccouts in {:0.2f}s".format(t1 - t0))
def accounts_csvs(small=False): t0 = time.time() if small: num_files, n, k = 3, 10000, 100 else: num_files, n, k = 3, 1000000, 500 fn = os.path.join(data_dir, 'accounts.%d.csv' % (num_files - 1)) if os.path.exists(fn): return args = account_params(k) for i in range(num_files): df = account_entries(n, *args) df.to_csv(os.path.join(data_dir, 'accounts.%d.csv' % i), index=False) t1 = time.time() print("Created CSV acccouts in {:0.2f}s".format(t1 - t0))