def RunDPU(kernel, img, count): """ DPU run function kernel: dpu kernel img: image to be run count : test rounds count """ """Create DPU Tasks from DPU Kernel""" task = n2cube.dpuCreateTask(kernel, 0) while count < 1000: """Load image to DPU""" dputils.dpuSetInputImage2(task, KERNEL_CONV_INPUT, img) """Get input Tesor""" tensor = n2cube.dpuGetInputTensor(task, KERNEL_CONV_INPUT) """Model run on DPU""" n2cube.dpuRunTask(task) """Get the output tensor size from FC output""" size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT) """Get the output tensor channel from FC output""" channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT) softmax = np.zeros(size, dtype=float32) """Get FC result""" conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT) """Get output scale of FC""" outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT) """Run softmax""" softmax = n2cube.dpuRunSoftmax(conf, channel, size // channel, outputScale) l.acquire() count = count + threadnum l.release() """Destroy DPU Tasks & free resources""" n2cube.dpuDestroyTask(task)
def predict_label(imfile): task = n2cube.dpuCreateTask(kernel, 0) # Set client to get file from S3 s3_client.download_file(BUCKET, imfile, image_folder + imfile) img_obj = os.path.join(image_folder, imfile) #To get it from local path #img_file = os.path.join(image_folder, imfile) img = cv2.imread(img_obj) img = cv2.resize(img, (IMG_DIMS, IMG_DIMS)) img = img.astype(np.float32) img = (img/255.0) """Get input Tensor""" tensor = n2cube.dpuGetInputTensor(task, KERNEL_CONV_INPUT) input_len = n2cube.dpuGetInputTensorSize(task, KERNEL_CONV_INPUT) """Set input Tesor""" n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT, img, input_len) """Model run on DPU""" n2cube.dpuRunTask(task) """Get the output tensor size from FC output""" size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT) """Get the output tensor channel from FC output""" channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT) softmax = np.zeros(size,dtype=np.float32) """Get FC result""" conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT) """Get output scale of FC""" outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT) """Run softmax""" softmax = n2cube.dpuRunSoftmax(conf, channel, size // channel, outputScale) #print("softmax =", softmax) n2cube.dpuDestroyTask(task) return slabels[np.argmax(softmax)].strip('\n')
def main(): print("STARTING UNETv2 on DPU...") if USE_DPU: # Attach to DPU driver n2cube.dpuOpen() # Load DPU Kernel and create a task kernel = n2cube.dpuLoadKernel(KERNEL_CONV) task = n2cube.dpuCreateTask(kernel, 0) # load and preprocess images and load segmentation labels assert os.path.isdir(IMG_TEST_DIR) #print(IMG_TEST_DIR) x_test, y_test, img_file, seg_file = dpu_get_data(IMG_TEST_DIR, SEG_TEST_DIR, cfg.NUM_CLASSES, cfg.WIDTH, cfg.HEIGHT) y_pred = [] # process all images for i in range(len(x_test)): # opened image as BGR, convert it to RGB #B,G,R = cv2.split(x_test[i]) #imageRun = cv2.merge((R,G,B)) imageRun = x_test[i] imageRun = imageRun.reshape( (imageRun.shape[0] * imageRun.shape[1] * imageRun.shape[2])) input_len = len(imageRun) if USE_DPU: # load pre-processed image as DPU input n2cube.dpuSetInputTensorInHWCFP32(task, CONV_INPUT_NODE, imageRun, input_len) dpu_in = n2cube.dpuGetInputTensor(task, CONV_INPUT_NODE) ti_scale = n2cube.dpuGetTensorScale(dpu_in) ti_h = n2cube.dpuGetTensorHeight(dpu_in) ti_w = n2cube.dpuGetTensorWidth(dpu_in) ti_sz = n2cube.dpuGetTensorSize(dpu_in) ti_ch = n2cube.dpuGetTensorChannel(dpu_in) if (i == 0): print( "Input tensor=%3d ch=%3d H=%3d W=%3d Size=%6d scale=%4d" % (i, ti_ch, ti_h, ti_w, ti_sz, ti_scale)) # run DPU task n2cube.dpuRunTask(task) # get output tensor address dpu_out = n2cube.dpuGetOutputTensorAddress(task, CONV_OUTPUT_NODE) # get number of channels in output tensor to_ch = n2cube.dpuGetOutputTensorChannel(task, CONV_OUTPUT_NODE) # get size in bytes of output tensor to_sz = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE) # get width output tensor to_w = n2cube.dpuGetOutputTensorWidth(task, CONV_OUTPUT_NODE) # get height output tensor to_h = n2cube.dpuGetOutputTensorHeight(task, CONV_OUTPUT_NODE) # get output tensor scale to_scale = n2cube.dpuGetOutputTensorScale(task, CONV_OUTPUT_NODE) softmax = np.zeros(to_sz, dtype=np.float32) if (i == 0): print("Output tensor=%3d ch=%3d H=%3d W=%3d Size=%6d" % (i, to_ch, to_h, to_w, to_sz)) print("Output tensor scaling factor", to_scale) softmax = n2cube.dpuRunSoftmax(dpu_out, to_ch, to_sz // to_ch, to_scale) prediction = softmax.reshape((to_h, to_w, to_ch)) y_pred.append(prediction) if (i == 0): print("prediction shape: ", prediction.shape) # Calculate intersection over union for each segmentation class y_pred = np.asarray(y_pred) y_test = np.asarray(y_test) print("y_pred shape: ", y_pred.shape) print("y_test shape: ", y_test.shape) y_predi = np.argmax(y_pred, axis=3) y_testi = np.argmax(y_test, axis=3) print("shape of y_testi and y_predi ", y_testi.shape, y_predi.shape) dpu_IoU(y_testi, y_predi) # print results print("Processed", len(x_test), "images") print("FINISHED") if USE_DPU: # Destroy DPU Kernel & detach n2cube.dpuDestroyKernel(kernel) n2cube.dpuClose()