Ejemplo n.º 1
0
def numpy_to_arkouda(A: np.ndarray, tmp_dir: str = '') -> ak.pdarray:
    """Convert from numpy to arkouda using disk rather than sockets."""
    rng = np.random.randint(2**64, dtype=np.uint64)
    tmp_dir = os.getcwd() if not tmp_dir else tmp_dir
    with h5py.File(f'{tmp_dir}/{rng}.hdf5', 'w') as f:
        arr = f.create_dataset('arr', (A.shape[0], ), dtype='int64')
        arr[:] = A[:]

    B = ak.read_hdf('arr', f'{tmp_dir}/{rng}.hdf5')
    os.remove(f'{tmp_dir}/{rng}.hdf5')

    return B
Ejemplo n.º 2
0
import arkouda as ak
import sys, os

saveone = '/tmp/ak_save.hdf'
saveall = '/tmp/ak_save_all.hdf'

if len(sys.argv) < 4:
    print("Usage: {} <hostname> <port> <HDF5_filenames>".format(sys.argv[0]))
    sys.exit()
ak.connect(sys.argv[1], sys.argv[2])
onefile = sys.argv[3]
print(ak.ls_hdf(onefile))
allfiles = sys.argv[3:]
print(f"srcIP = ak.read_hdf('srcIP', {onefile})")
srcIP = ak.read_hdf('srcIP', onefile)
print(f"srcIP.save({saveone}, 'srcIP')")
srcIP.save(saveone, 'srcIP')
print(f"srcIP2 = ak.load({saveone}, 'srcIP')")
srcIP2 = ak.load(saveone, 'srcIP')
assert (srcIP == srcIP2).all()
del srcIP
del srcIP2
print(f"df = ak.read_all(['srcPort', 'proto', 'packets'], {allfiles})")
df = ak.read_all(['srcPort', 'proto', 'packets'], allfiles)
print(f"ak.save_all(df, {saveall})")
ak.save_all(df, saveall)
print(f"newdf = ak.load_all({saveall})")
newdf = ak.load_all(saveall)
print(newdf)
os.system('rm -rf /tmp/ak_save*')
Ejemplo n.º 3
0
        if args.port is not None:
            ak.connect(port=args.port)
        else:
            ak.connect()

    print(ak.get_config())
            
    if len(args.hdffiles) == 0:
        print("usage: {} [--server server] [--port port] hdffiles ".format(sys.argv[0]))

    # fields in the files to read and create pdarrays in the dict
    fields = ['srcIP', 'dstIP', 'srcPort', 'dstPort', 'start']

    # read in the files, all data from hdffiles
    # will be concatenated together in the fields/columns
    nfDF = {field: ak.read_hdf(field, args.hdffiles) for field in fields}

    # print out the pdarrays in the dict and their types
    print(nfDF['start'],nfDF['start'].dtype)
    print(nfDF['srcIP'],type(nfDF['srcIP'])) # Strings dosen't have a dtype?!?
    print(nfDF['dstIP'],type(nfDF['dstIP'])) # Strings dosen't have a dtype?!?
    print(nfDF['srcPort'],nfDF['srcPort'].dtype)
    print(nfDF['dstPort'],nfDF['dstPort'].dtype)
    print(nfDF)

    # print oput the symbols the server knows about
    print(ak.info(ak.AllSymbols))

    # print out how much memory is being used by the server
    print("mem used: ", ak.get_mem_used())
Ejemplo n.º 4
0
    parser.add_argument('filenames', nargs='+')

    args = parser.parse_args()
    if args.server is not None:
        if args.port is not None:
            ak.connect(server=args.server, port=args.port)
        else:
            ak.connect(server=args.server)
    else:
        if args.port is not None:
            ak.connect(port=args.port)
        else:
            ak.connect()
    print("Reading files...")
    start = time.time()
    a = ak.read_hdf(args.dsetName, args.filenames)
    end = time.time()
    t = end - start
    print(a)
    print(f'{t:.2f} seconds ({8*a.size/t:.2e} bytes/sec)')
    print("Testing bad filename...")
    badfilename = args.filenames[0] + '-should-not-exist-5473219431'
    try:
        ak.read_hdf(args.dsetName, args.filenames + [badfilename])
    except RuntimeError as e:
        print(e)
    print("Testing bad dsetName...")
    try:
        ak.read_hdf(args.dsetName+'-not-a-dset', args.filenames)
    except RuntimeError as e:
        print(e)