print(".stoi():", strs.stoi()) print(".stof():", strs.stof()) print(".hash():", strs.hash()) # pad print(".pad(8):", strs.pad(8)) print(".zfill(7):", strs.zfill(7)) print(".repeat(2):", strs.repeat(2)) # strip print(".strip(e):", strs.strip('e')) # slice print(".slice(2,4):", strs.slice(2, 4)) print(".slice_replace(2,4,z):", strs.slice_replace(2, 4, 'z')) print(".replace(e,é):", strs.replace('e', 'é')) # split nstrs = strs.split("e") print(".split(e):") for s in nstrs: print(" ", s) nvstrings.free(s) # very important nstrs = None # this will free the strings object which deallocates from rmm # this is important because rmm may be destroyed before the strings are strs = None #print(rmm.csv_log()) # not necessary here #rmm.finalize()
def test_free(): # TODO: Check that GPU memory has been freed. data = nvstrings.to_device(["a", "b", "c", "d"]) nvstrings.free(data)
rmm.initialize() df = pd.read_csv('/data/7584-rows.csv', sep=',') df.columns values = df["address"].values values dstrs = nvstrings.to_device(values.tolist()) hstrs = pd.Series(values.tolist()) print("precision = %0.9f seconds" % time.clock_getres(time.CLOCK_MONOTONIC_RAW)) print(str(dstrs.size()), "strings") # st = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) da = dstrs.split(' ') et1 = (time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - st) print("nvstrings.split() = %05f" % et1) # st = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) ha = hstrs.str.split(' ') et2 = (time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - st) print(" pandas.split() = %05f" % et2) print("speedup = %0.5fx" % (et2 / et1)) dstrs = None for d in da: nvstrings.free(d)
def test_free(): # TODO: Check that GPU memory has been freed. data = nvstrings.to_device(['a', 'b', 'c', 'd']) nvstrings.free(data)
print(".findall_record('[aA]'):") rows = strs.findall_record('[aA]') for row in rows: print(" ", row) print("----------------------") strs = nvstrings.to_device([ 'ALA-PEK Flight:HU7934', 'HKT-PEK Flight:CA822', 'FRA-PEK Flight:LA8769', 'FRA-PEK Flight:LH7332', '', None, 'Flight:ZZ' ]) print(strs) print(".extract(r'Flight:([A-Z]+)(\d+)'):") columns = strs.extract(r'Flight:([A-Z]+)(\d+)') for col in columns: print(" ", col) nvstrings.free(col) print(".extract_record(r'Flight:([A-Z]+)(\d+)'):") rows = strs.extract_record(r'Flight:([A-Z]+)(\d+)') for row in rows: print(" ", row) nvstrings.free(row) print("----------------------") strs = nvstrings.to_device('word [[wikt:anarchism|anarchism]] is') print(strs) print(".replace('\\[\\[[a-z\\-]+:[^]]+\\]\\]','')", strs.replace('\\[\\[[a-z\\-]+:[^]]+\\]\\]', '')) print("----------------------") strs = nvstrings.to_device(["A543", "Z756", "", None]) print(strs)
print("strs.rsplit_record(_,3):") nstrs = strs.rsplit_record("_", 3) for s in nstrs: print(" ", s) print("strs.rsplit(_,4):") nstrs = strs.rsplit("_", 4) for s in nstrs: print(" ", s) print("strs.rsplit_record(_,4):") nstrs = strs.rsplit_record("_", 4) for s in nstrs: print(" ", s) # print("strs.partition(_):") nstrs = strs.partition('_') for s in nstrs: print(" ", s) nvstrings.free(s) # print("strs.rpartition(_):") rstrs = strs.rpartition('_') for s in rstrs: print(" ", s) nvstrings.free(s) strs = None
# import nvstrings # from librmm_cffi import librmm as rmm from librmm_cffi import librmm_config as rmm_cfg rmm_cfg.use_pool_allocator = True rmm.initialize() # strs = nvstrings.from_csv("../../data/7584-rows.csv", 1) #print(strs) cols = strs.split(" ", 2) print(cols[1]) #print(cols[1].len()) strs = None for c in cols: nvstrings.free(c)