Beispiel #1
0
def add_matches_fpga_arrow(strings, regexes, platform_type, t_copy, t_fpga):
    t = Timer()

    # Match Arrow array on FPGA
    platform = pf.Platform(platform_type)
    context = pf.Context(platform)
    rc = RegExCore(context)

    # Initialize the platform
    platform.init()

    # Reset the UserCore
    rc.reset()

    # Prepare the column buffers
    context.queue_record_batch(rb)
    t.start()
    context.enable()
    t.stop()
    t_copy.append(t.seconds())

    # Run the example
    rc.set_reg_exp_arguments(0, num_rows)

    # Start the matchers and poll until completion
    t.start()
    rc.start()
    rc.wait_for_finish(10)
    t.stop()
    t_fpga.append(t.seconds())


    # Get the number of matches from the UserCore
    matches = rc.get_matches(np)
    return matches
Beispiel #2
0
def test_platform():
    # Create
    platform = pf.Platform("echo")

    # Init
    platform.init()

    # Info
    print("Platform name: " + platform.get_name())

    # Malloc/free
    address = platform.device_malloc(1024)
    platform.device_free(address)

    # MMIO
    platform.write_mmio(0, 0)
    val = platform.read_mmio(0)

    # Buffers
    size = 7
    host_bytes = bytes([1, 2, 3, 4, 5, 6, 7])
    host_bytearray = bytearray([1, 2, 3, 4, 5, 6, 7])
    host_nparray = np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.uint8)

    platform.copy_host_to_device(host_bytes, 0, size)
    platform.copy_host_to_device(host_bytearray, 7, size)
    platform.copy_host_to_device(host_nparray, 14, size)

    buffer = platform.copy_device_to_host(0, 7)

    platform.terminate()

    return True
Beispiel #3
0
def test_context():
    # Create
    platform = pf.Platform("echo")

    # Init
    platform.init()

    # Create a schema with some stuff
    fields = [
        pa.field("a", pa.uint64(), False),
        pa.field("b", pa.string(), False),
        pa.field("c", pa.uint64(), True),
        pa.field("d", pa.list_(pa.field("e", pa.uint32(), True)), False)
    ]

    schema = pa.schema(fields)

    a = pa.array([1, 2, 3, 4], type=pa.uint64())
    b = pa.array(["hello", "world", "fletcher", "arrow"], type=pa.string())
    c = pa.array([5, 6, 7, 8],
                 mask=np.array([True, False, True, True]),
                 type=pa.uint64())
    d = pa.array([[9, 10, 11, 12], [13, 14], [15, 16, 17], [18]],
                 type=pa.list_(pa.uint32()))
    f = pa.array([19, 20, 21, 22], type=pa.uint32())
    g = pa.array([23, 24, 25, 26], type=pa.uint32())

    rb = pa.RecordBatch.from_arrays([a, b, c, d], schema)

    context = pf.Context(platform)

    context.queue_record_batch(rb)

    context.queue_array(f)

    context.queue_array(g, field=pa.field("g", pa.uint32(), False))

    # Write buffers
    context.enable()

    # Terminate
    platform.terminate()
Beispiel #4
0
def test_platform():
    # Create
    platform = pf.Platform("echo", False)

    # Init
    platform.init()

    # Info
    print("Platform name: " + platform.name())

    # Malloc
    address = platform.device_malloc(1024)

    # MMIO
    platform.write_mmio(0, 0)
    val = platform.read_mmio(0)
    val64 = platform.read_mmio_64(0)

    # Buffers
    size = 7
    host_bytes = bytes([1, 2, 3, 4, 5, 6, 7])
    host_bytearray = bytearray([1, 2, 3, 4, 5, 6, 7])
    host_nparray = np.array([1, 2, 3, 4, 5, 6, 7], dtype=np.uint8)

    platform.copy_host_to_device(host_bytes, address, size)
    platform.copy_host_to_device(host_bytearray, address + 7, size)
    platform.copy_host_to_device(host_nparray, address + 14, size)

    buffer = platform.copy_device_to_host(address, 21)
    assert list(buffer) == [1, 2, 3, 4, 5, 6, 7] * 3

    # Free buffer
    platform.device_free(address)

    platform.terminate()

    return True
Beispiel #5
0
        # Match Pandas series on CPU (marginal performance improvement most likely possible with Cython)
        t.start()
        m_pcpu.append(add_matches_cpu(strings_pandas, regexes))
        t.stop()
        t_pcpu.append(t.seconds())
        print(t.seconds())

        # Match Arrow array on CPU (significant performance improvement most likely possible with Cython)
        t.start()
        m_acpu.append(add_matches_cpu_arrow(rb.column(0), regexes))
        t.stop()
        t_acpu.append(t.seconds())
        print(t.seconds())

        # Match Arrow array on FPGA
        platform = pf.Platform(platform_type)
        context = pf.Context(platform)
        rc = RegExCore(context)

        # Initialize the platform
        platform.init()

        # Reset the UserCore
        rc.reset()

        # Prepare the column buffers
        t.start()
        context.queue_record_batch(rb)
        bytes_copied += context.get_queue_size()
        context.enable()
        t.stop()
Beispiel #6
0
import pyfletcher as pf
import numpy as np
import timeit
import sys
import argparse

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("recordbatch_path")
    args = parser.parse_args()

    # Set up a RecordBatch reader and read the RecordBatch.
    reader = pa.RecordBatchFileReader(args.recordbatch_path)
    batch = reader.get_batch(0)

    platform = pf.Platform(
    )  # Create an interface to an auto-detected FPGA Platform.
    platform.init()  # Initialize the Platform.

    context = pf.Context(
        platform)  # Create a Context for our data on the Platform.
    context.queue_record_batch(batch)  # Queue the RecordBatch to the Context.
    context.enable(
    )  # Enable the Context, (potentially transferring the data to FPGA).

    kernel = pf.Kernel(
        context)  # Set up an interface to the Kernel, supplying the Context.
    kernel.start()  # Start the kernel.
    kernel.poll_until_done()  # Wait for the kernel to finish.

    result = kernel.get_return(np.dtype(np.uint32))  # Obtain the result.
    print("Sum: " + str(result))  # Print the result.
def arrow_kmeans_fpga(batch, centroids, iteration_limit, max_hw_dim,
                      max_hw_centroids, t_copy, t_fpga):
    t = Timer()

    platform = pf.Platform(platform_type)
    context = pf.Context(platform)
    uc = pf.UserCore(context)

    # Initialize the platform
    platform.init()

    # Reset the UserCore
    uc.reset()

    # Prepare the column buffers
    context.queue_record_batch(batch)
    t.start()
    context.enable()
    t.stop()
    t_copy.append(t.seconds())

    # Determine size of table
    last_index = batch.num_rows
    uc.set_range(0, last_index)

    # Set UserCore arguments
    args = []
    for centroid in centroids:
        for dim in centroid:
            lo = dim & 0xFFFFFFFF
            hi = (dim >> 32) & 0xFFFFFFFF
            args.append(lo)
            args.append(hi)

        for dim in range(max_hw_dim - len(centroid)):
            args.append(0)
            args.append(0)

    for centroid in range(max_hw_centroids - len(centroids)):
        for dim in range(max_hw_dim - 1):
            args.append(0)
            args.append(0)

        args.append(0x80000000)
        args.append(0)

    args.append(iteration_limit)
    uc.set_arguments(args)

    t.start()
    uc.start()
    uc.wait_for_finish(10)
    t.stop()
    t_fpga.append(t.seconds())

    num_centroids = len(centroids)
    dimensionality = len(centroids[0])
    regs_per_dim = 2
    regs_offset = 10

    for c in range(num_centroids):
        for d in range(dimensionality):
            reg_num = (c * max_hw_dim + d) * regs_per_dim + regs_offset
            centroids[c][d] = platform.read_mmio_64(reg_num, type="int")

    return centroids