def numpy_to_arkouda(A: np.ndarray, tmp_dir: str = '') -> ak.pdarray: """Convert from numpy to arkouda using disk rather than sockets.""" rng = np.random.randint(2**64, dtype=np.uint64) tmp_dir = os.getcwd() if not tmp_dir else tmp_dir with h5py.File(f'{tmp_dir}/{rng}.hdf5', 'w') as f: arr = f.create_dataset('arr', (A.shape[0], ), dtype='int64') arr[:] = A[:] B = ak.read_hdf('arr', f'{tmp_dir}/{rng}.hdf5') os.remove(f'{tmp_dir}/{rng}.hdf5') return B
import arkouda as ak import sys, os saveone = '/tmp/ak_save.hdf' saveall = '/tmp/ak_save_all.hdf' if len(sys.argv) < 4: print("Usage: {} <hostname> <port> <HDF5_filenames>".format(sys.argv[0])) sys.exit() ak.connect(sys.argv[1], sys.argv[2]) onefile = sys.argv[3] print(ak.ls_hdf(onefile)) allfiles = sys.argv[3:] print(f"srcIP = ak.read_hdf('srcIP', {onefile})") srcIP = ak.read_hdf('srcIP', onefile) print(f"srcIP.save({saveone}, 'srcIP')") srcIP.save(saveone, 'srcIP') print(f"srcIP2 = ak.load({saveone}, 'srcIP')") srcIP2 = ak.load(saveone, 'srcIP') assert (srcIP == srcIP2).all() del srcIP del srcIP2 print(f"df = ak.read_all(['srcPort', 'proto', 'packets'], {allfiles})") df = ak.read_all(['srcPort', 'proto', 'packets'], allfiles) print(f"ak.save_all(df, {saveall})") ak.save_all(df, saveall) print(f"newdf = ak.load_all({saveall})") newdf = ak.load_all(saveall) print(newdf) os.system('rm -rf /tmp/ak_save*')
if args.port is not None: ak.connect(port=args.port) else: ak.connect() print(ak.get_config()) if len(args.hdffiles) == 0: print("usage: {} [--server server] [--port port] hdffiles ".format(sys.argv[0])) # fields in the files to read and create pdarrays in the dict fields = ['srcIP', 'dstIP', 'srcPort', 'dstPort', 'start'] # read in the files, all data from hdffiles # will be concatenated together in the fields/columns nfDF = {field: ak.read_hdf(field, args.hdffiles) for field in fields} # print out the pdarrays in the dict and their types print(nfDF['start'],nfDF['start'].dtype) print(nfDF['srcIP'],type(nfDF['srcIP'])) # Strings dosen't have a dtype?!? print(nfDF['dstIP'],type(nfDF['dstIP'])) # Strings dosen't have a dtype?!? print(nfDF['srcPort'],nfDF['srcPort'].dtype) print(nfDF['dstPort'],nfDF['dstPort'].dtype) print(nfDF) # print oput the symbols the server knows about print(ak.info(ak.AllSymbols)) # print out how much memory is being used by the server print("mem used: ", ak.get_mem_used())
parser.add_argument('filenames', nargs='+') args = parser.parse_args() if args.server is not None: if args.port is not None: ak.connect(server=args.server, port=args.port) else: ak.connect(server=args.server) else: if args.port is not None: ak.connect(port=args.port) else: ak.connect() print("Reading files...") start = time.time() a = ak.read_hdf(args.dsetName, args.filenames) end = time.time() t = end - start print(a) print(f'{t:.2f} seconds ({8*a.size/t:.2e} bytes/sec)') print("Testing bad filename...") badfilename = args.filenames[0] + '-should-not-exist-5473219431' try: ak.read_hdf(args.dsetName, args.filenames + [badfilename]) except RuntimeError as e: print(e) print("Testing bad dsetName...") try: ak.read_hdf(args.dsetName+'-not-a-dset', args.filenames) except RuntimeError as e: print(e)