import veo import os from cffi import FFI print("\nVEO test:") print("Pass a cffi built structure to the VE as argument on the stack.") print("Sum the elements and multiply with a factor. Correct result is 30.") print("\n") p = veo.VeoProc(0) lib = p.load_library(os.getcwd() + "/libvetest6.so") c = p.open_context() ffi = FFI() ffi.cdef(""" struct abc { int a, b, c; }; """) abc = ffi.new("struct abc *") abc.a = 1 abc.b = 2 abc.c = 3 # we'll pass the struct * as a void * lib.multeach.args_type("void *", "int") lib.multeach.ret_type("int") req = lib.multeach(c, veo.OnStack(ffi.buffer(abc)), 5) r = req.wait_result()
from __future__ import print_function import numpy as np import veo as v import vecblas as vb import time #Select a problem size M = 10000 K = 1000 N = 5000 total_flops = M*K*(2*N+3) # start VE process p = v.VeoProc(0) lib = p.lib["__static__"] # open a context (worker thread) ctx = p.open_context() # Generate input data as numpy arrays a_np = np.random.rand(M*K).astype(np.float32).reshape(M,K) b_np = np.random.rand(K*N).astype(np.float32).reshape(K,N) res_np = np.zeros((M,N)).astype(np.float32) exp_np = np.copy(res_np) # Copy memory to VE a_v = p.alloc_mem(a_np.nbytes) b_v = p.alloc_mem(b_np.nbytes) res_v = p.alloc_mem(res_np.nbytes) p.write_mem(a_v, a_np, a_np.nbytes) p.write_mem(b_v, b_np, b_np.nbytes)