Example #1
0
print(".stoi():", strs.stoi())
print(".stof():", strs.stof())
print(".hash():", strs.hash())

# pad
print(".pad(8):", strs.pad(8))
print(".zfill(7):", strs.zfill(7))
print(".repeat(2):", strs.repeat(2))

# strip
print(".strip(e):", strs.strip('e'))

# slice
print(".slice(2,4):", strs.slice(2, 4))
print(".slice_replace(2,4,z):", strs.slice_replace(2, 4, 'z'))
print(".replace(e,é):", strs.replace('e', 'é'))

# split
nstrs = strs.split("e")
print(".split(e):")
for s in nstrs:
    print(" ", s)
    nvstrings.free(s)  # very important

nstrs = None
# this will free the strings object which deallocates from rmm
# this is important because rmm may be destroyed before the strings are
strs = None
#print(rmm.csv_log())
# not necessary here
#rmm.finalize()
Example #2
0
def test_free():
    # TODO: Check that GPU memory has been freed.
    data = nvstrings.to_device(["a", "b", "c", "d"])
    nvstrings.free(data)
Example #3
0
rmm.initialize()

df = pd.read_csv('/data/7584-rows.csv', sep=',')
df.columns

values = df["address"].values
values

dstrs = nvstrings.to_device(values.tolist())
hstrs = pd.Series(values.tolist())

print("precision = %0.9f seconds" %
      time.clock_getres(time.CLOCK_MONOTONIC_RAW))
print(str(dstrs.size()), "strings")
#
st = time.clock_gettime(time.CLOCK_MONOTONIC_RAW)
da = dstrs.split(' ')
et1 = (time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - st)
print("nvstrings.split() = %05f" % et1)

#
st = time.clock_gettime(time.CLOCK_MONOTONIC_RAW)
ha = hstrs.str.split(' ')
et2 = (time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - st)
print("   pandas.split() = %05f" % et2)
print("speedup = %0.5fx" % (et2 / et1))

dstrs = None
for d in da:
    nvstrings.free(d)
Example #4
0
def test_free():
    # TODO: Check that GPU memory has been freed.
    data = nvstrings.to_device(['a', 'b', 'c', 'd'])
    nvstrings.free(data)
Example #5
0
print(".findall_record('[aA]'):")
rows = strs.findall_record('[aA]')
for row in rows:
    print(" ", row)

print("----------------------")
strs = nvstrings.to_device([
    'ALA-PEK Flight:HU7934', 'HKT-PEK Flight:CA822', 'FRA-PEK Flight:LA8769',
    'FRA-PEK Flight:LH7332', '', None, 'Flight:ZZ'
])
print(strs)
print(".extract(r'Flight:([A-Z]+)(\d+)'):")
columns = strs.extract(r'Flight:([A-Z]+)(\d+)')
for col in columns:
    print(" ", col)
    nvstrings.free(col)
print(".extract_record(r'Flight:([A-Z]+)(\d+)'):")
rows = strs.extract_record(r'Flight:([A-Z]+)(\d+)')
for row in rows:
    print(" ", row)
    nvstrings.free(row)

print("----------------------")
strs = nvstrings.to_device('word [[wikt:anarchism|anarchism]] is')
print(strs)
print(".replace('\\[\\[[a-z\\-]+:[^]]+\\]\\]','')",
      strs.replace('\\[\\[[a-z\\-]+:[^]]+\\]\\]', ''))

print("----------------------")
strs = nvstrings.to_device(["A543", "Z756", "", None])
print(strs)
Example #6
0
print("strs.rsplit_record(_,3):")
nstrs = strs.rsplit_record("_", 3)
for s in nstrs:
    print(" ", s)

print("strs.rsplit(_,4):")
nstrs = strs.rsplit("_", 4)
for s in nstrs:
    print(" ", s)

print("strs.rsplit_record(_,4):")
nstrs = strs.rsplit_record("_", 4)
for s in nstrs:
    print(" ", s)

#
print("strs.partition(_):")
nstrs = strs.partition('_')
for s in nstrs:
    print(" ", s)
    nvstrings.free(s)
#
print("strs.rpartition(_):")
rstrs = strs.rpartition('_')
for s in rstrs:
    print(" ", s)
    nvstrings.free(s)

strs = None
Example #7
0
#
import nvstrings

#
from librmm_cffi import librmm as rmm
from librmm_cffi import librmm_config as rmm_cfg
rmm_cfg.use_pool_allocator = True
rmm.initialize()
#
strs = nvstrings.from_csv("../../data/7584-rows.csv", 1)
#print(strs)

cols = strs.split(" ", 2)
print(cols[1])
#print(cols[1].len())

strs = None
for c in cols:
    nvstrings.free(c)