def prediction(self, image): task = n2cube.dpuCreateTask(self.kernel, 0) image_data = Detector.pre_process(image, (416, 416)) image_data = np.array(image_data,dtype=np.float32) input_len = n2cube.dpuGetInputTensorSize(task, CONV_INPUT_NODE) """Get input Tesor""" n2cube.dpuSetInputTensorInHWCFP32(task,CONV_INPUT_NODE,image_data,input_len) """Model run on DPU""" n2cube.dpuRunTask(task) """Get the output tensor""" conv_sbbox_size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE1) conv_out1 = n2cube.dpuGetOutputTensorInHWCFP32(task, CONV_OUTPUT_NODE1, conv_sbbox_size) conv_out1 = np.reshape(conv_out1, (1, 13, 13, 75)) conv_mbbox_size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE2) conv_out2 = n2cube.dpuGetOutputTensorInHWCFP32(task, CONV_OUTPUT_NODE2, conv_mbbox_size) conv_out2 = np.reshape(conv_out2, (1, 26, 26, 75)) conv_lbbox_size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE3) conv_out3 = n2cube.dpuGetOutputTensorInHWCFP32(task, CONV_OUTPUT_NODE3, conv_lbbox_size) conv_out3 = np.reshape(conv_out3, (1, 52, 52, 75)) return [conv_out1, conv_out2, conv_out3]
def run_dpu_task(self, kernel, i, iter_cnt, listimage, result): start = 0 count = 0 task = n2cube.dpuCreateTask(kernel, 0) pre_time = 0 calc_time = 0 while count < iter_cnt: img_name = listimage[i].pop(0) path = os.path.join(image_folder, img_name) im = cv2.imread(path) im = cv2.cvtColor(im,cv2.COLOR_BGR2RGB) h, w = im.shape[:2] padded_center_crop_size = int((IMAGE_SIZE / (IMAGE_SIZE + CROP_PADDING)) * min(h, w)) offset_height = ((h - padded_center_crop_size) + 1) // 2 offset_width = ((w - padded_center_crop_size) + 1) // 2 image_crop = im[offset_height: padded_center_crop_size + offset_height, offset_width: padded_center_crop_size + offset_width,:] image = cv2.resize(image_crop, dsize=(IMAGE_SIZE, IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) im = cv2.normalize(image, None, alpha=-1, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) count = count + 1 input_len = n2cube.dpuGetInputTensorSize(task, KERNEL_CONV_INPUT) n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT, im, input_len) n2cube.dpuRunTask(task) size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT) channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT) conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT) outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT) softmax = n2cube.dpuRunSoftmax(conf, channel, size//channel, outputScale) idx = np.argpartition(softmax, -5)[-5:] top_5 = idx[np.argsort(-softmax[idx])] for val in top_5: result[i].append("{} {}".format(img_name, val)) n2cube.dpuDestroyTask(task)
def predict_label(imfile): task = n2cube.dpuCreateTask(kernel, 0) # Set client to get file from S3 s3_client.download_file(BUCKET, imfile, image_folder + imfile) img_obj = os.path.join(image_folder, imfile) #To get it from local path #img_file = os.path.join(image_folder, imfile) img = cv2.imread(img_obj) img = cv2.resize(img, (IMG_DIMS, IMG_DIMS)) img = img.astype(np.float32) img = (img/255.0) """Get input Tensor""" tensor = n2cube.dpuGetInputTensor(task, KERNEL_CONV_INPUT) input_len = n2cube.dpuGetInputTensorSize(task, KERNEL_CONV_INPUT) """Set input Tesor""" n2cube.dpuSetInputTensorInHWCFP32(task, KERNEL_CONV_INPUT, img, input_len) """Model run on DPU""" n2cube.dpuRunTask(task) """Get the output tensor size from FC output""" size = n2cube.dpuGetOutputTensorSize(task, KERNEL_FC_OUTPUT) """Get the output tensor channel from FC output""" channel = n2cube.dpuGetOutputTensorChannel(task, KERNEL_FC_OUTPUT) softmax = np.zeros(size,dtype=np.float32) """Get FC result""" conf = n2cube.dpuGetOutputTensorAddress(task, KERNEL_FC_OUTPUT) """Get output scale of FC""" outputScale = n2cube.dpuGetOutputTensorScale(task, KERNEL_FC_OUTPUT) """Run softmax""" softmax = n2cube.dpuRunSoftmax(conf, channel, size // channel, outputScale) #print("softmax =", softmax) n2cube.dpuDestroyTask(task) return slabels[np.argmax(softmax)].strip('\n')
# cv2.resizeWindow("Display", 1024, 512) # for i in range(imagenumber): # print(f"i = {i+1}") while(1): ret,image = cap.read() # print(listimage[i]) # path = image_folder + listimage[i] # img = cv2.imread(path) # image = cv2.imread(path) image_ho, image_wo, _ = image.shape image_size = image.shape[:2] image_data = pre_process(image, (416, 416)) image_data = np.array(image_data,dtype=np.float32) input_len = n2cube.dpuGetInputTensorSize(task, CONV_INPUT_NODE) """Get input Tesor""" n2cube.dpuSetInputTensorInHWCFP32(task,CONV_INPUT_NODE,image_data,input_len) """Model run on DPU""" n2cube.dpuRunTask(task) """Get the output tensor""" conv_sbbox_size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE1) conv_out1 = n2cube.dpuGetOutputTensorInHWCFP32(task, CONV_OUTPUT_NODE1, conv_sbbox_size) conv_out1 = np.reshape(conv_out1, (1, 13, 13, 75)) conv_mbbox_size = n2cube.dpuGetOutputTensorSize(task, CONV_OUTPUT_NODE2) conv_out2 = n2cube.dpuGetOutputTensorInHWCFP32(task, CONV_OUTPUT_NODE2, conv_mbbox_size) conv_out2 = np.reshape(conv_out2, (1, 26, 26, 75))
def main(): # UI: DPU ui = UI() ui.update_boot_window('Initializing DPU...') from dnndk import n2cube from pynq_dpu import DpuOverlay # Set up the DPU IP overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file)) overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file)) # Set up the Neural Network Runtime (N2Cube) kernel_name = fh.kernel_name kernel_conv_input = fh.kernel_conv_input kernel_fc_output = fh.kernel_fc_output n2cube.dpuOpen() kernel = n2cube.dpuLoadKernel(kernel_name) task = n2cube.dpuCreateTask(kernel, 0) input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input) output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output) output_tensor_channel = n2cube.dpuGetOutputTensorChannel(task, kernel_fc_output) output_tensor_address = n2cube.dpuGetOutputTensorAddress(task, kernel_fc_output) output_tensor_scale = n2cube.dpuGetOutputTensorScale(task, kernel_fc_output) # UI: Camera ui.update_boot_window('Initializing Camera...') # libcamera libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file) # Getter libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte) libcamera.get_frame_ptr.argtypes = [ctypes.c_uint] libcamera.get_throw_bgn_idx.restype = ctypes.c_uint libcamera.get_throw_bgn_idx.argtypes = None libcamera.get_throw_end_idx.restype = ctypes.c_uint libcamera.get_throw_end_idx.argtypes = None libcamera.get_throw_bgn.restype = ctypes.c_bool libcamera.get_throw_bgn.argtypes = None libcamera.get_throw_end.restype = ctypes.c_bool libcamera.get_throw_end.argtypes = None # Setter libcamera.set_frame_rate.restype = None libcamera.set_frame_rate.argtypes = [ctypes.c_double] libcamera.set_buff_size.restype = None libcamera.set_buff_size.argtypes = [ctypes.c_uint] libcamera.set_exposure_time.restype = None libcamera.set_exposure_time.argtypes = [ctypes.c_double] libcamera.set_camera_gain.restype = None libcamera.set_camera_gain.argtypes = [ctypes.c_double] libcamera.set_avg_diffs.restype = None libcamera.set_avg_diffs.argtypes = [ctypes.c_uint] libcamera.set_threshold_mult.restype = None libcamera.set_threshold_mult.argtypes = [ctypes.c_double] libcamera.set_frames_to_acquire.restype = None libcamera.set_frames_to_acquire.argtypes = [ctypes.c_uint] # Camera libcamera.initialize.restype = ctypes.c_int libcamera.initialize.argtypes = None libcamera.reset_global_variables.restype = None libcamera.reset_global_variables.argtypes = None libcamera.start_acquisition.restype = ctypes.c_int libcamera.start_acquisition.argtypes = None libcamera.terminate.restype = ctypes.c_int libcamera.terminate.argtypes = None # Set the global variables according to the module `fhnwtoys.settings` libcamera.set_frame_rate(fh.frame_rate) libcamera.set_buff_size(fh.buff_size) libcamera.set_exposure_time(fh.exposure_time) libcamera.set_camera_gain(fh.camera_gain) libcamera.set_avg_diffs(fh.avg_diffs) libcamera.set_threshold_mult(fh.threshold_mult) libcamera.set_frames_to_acquire(fh.frames_to_acquire) # Initialize Camera initialize = fh.ReturnCodes.NOT_INITIALIZED(*\label{lst:ln:camera_init1}*) initialization_tries = 0 while initialize != fh.ReturnCodes.SUCCESS: if initialization_tries > 0: try: return_code = fh.ReturnCodes(initialize).name except ValueError: return_code = initialize ui.update_boot_window(f'Camera Error ({return_code}), try to replug the camera.') initialize = libcamera.initialize() initialization_tries += 1(*\label{lst:ln:camera_init2}*) # UI: Ready ui.update_boot_window('READY') # Set up the `frames` array frames = np.empty((fh.frames_to_consider,) + fh.bgr_shape, dtype=np.uint8) while True: # Reset the predictions predictions = np.zeros((fh.frames_to_consider, fh.num_objects), dtype=np.float32)(*\label{lst:ln:predictions_matrix}*) # Start acquisition (threaded) # todo: error handling ('Unexpected Error, system reboot required.') # start_acquisition = libcamera.start_acquisition() # non threaded approach t = Thread(target=libcamera.start_acquisition)(*\label{lst:ln:threading}*) # threaded approach (process due to ctypes) t.start() # Wait until the throw has ended (the Ultra96-V2 is not powerful enough to process the data during the acquisition) while not libcamera.get_throw_end(): pass(*\label{lst:ln:polling}*) throw_bgn_idx = libcamera.get_throw_bgn_idx() throw_end_idx = libcamera.get_throw_end_idx() num_frames = throw_end_idx - throw_bgn_idx - 1 # Ignore the last two captured frames # Image processing (including inference) for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)): frame_ptr = libcamera.get_frame_ptr(frame_id)(*\label{lst:ln:image_preprocessing1}*) raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape) # Raw Baumer BayerRG8 frame # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG = OpenCV BayerBG) frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR) # Color space conversion # Image scaling using nearest-neighbor interpolation frame_resized = cv2.resize(frames[idx], fh.inf_dsize, interpolation=fh.Interpolation.NEAREST) frame_inference = frame_resized.astype(np.float32) / 255.0(*\label{lst:ln:image_preprocessing2}*) # Normalization (float32 precision) # Inference n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input, frame_inference, input_tensor_size) n2cube.dpuRunTask(task)(*\label{lst:ln:image_classification}*) # Softmax function (normalized exponential function) # Confident predictions lead to all zeros and a NaN, when run through `n2cube.dpuRunSoftmax(.)` # This section replaces the first occurrence of NaN in the `prediction` array with 1.0 and sets everything else to 0.0 prediction = n2cube.dpuRunSoftmax(output_tensor_address, output_tensor_channel, output_tensor_size//output_tensor_channel, output_tensor_scale)(*\label{lst:ln:softmax1}*) nan = np.isnan(prediction) if nan.any(): nan_idx = nan.argmax() # returns the index of the first occurrence of NaN prediction = np.zeros((fh.num_objects,), dtype=np.float32) prediction[nan_idx] = 1.0(*\label{lst:ln:softmax2}*) predictions[idx] = prediction # Only consider `fh.frames_to_consider` frames if idx == fh.frames_to_consider - 1: # (-1: idx starts with 0) break num_frames_considered = min(fh.frames_to_consider, num_frames) window = sine_squared_window(num_frames, num_frames_considered) # weighting function weighted_prediction = np.matmul(window, predictions) / np.sum(window)(*\label{lst:ln:matrix_multiplication}*) # computation of the weighted prediction # UI: Prepare data for the UI weighted_prediction_percent = weighted_prediction * 100 weighted_prediction_sorted = np.sort(weighted_prediction_percent)[::-1] weighted_prediction_argsorted = np.argsort(weighted_prediction_percent)[::-1] # this is the index of the best guess (computed by weighting the `fh.frames_to_consider` frames) guess_idx = weighted_prediction_argsorted[0] relevant_pct_ui = np.asarray(weighted_prediction_percent >= 1.0).nonzero()[0] # value of prediction must be at least 1.0% relevant_pct_ui_len = len(relevant_pct_ui) predictions_ui_len = min(4, relevant_pct_ui_len) # show at most Top 4 predictions_ui = [] # the object names percentages_ui = np.empty((predictions_ui_len + 1,), dtype=np.float32) # the percentages (+1: 'Others') for i, w in enumerate(weighted_prediction_argsorted[0:predictions_ui_len]): predictions_ui.append(fh.objects_ui[w]) percentages_ui[i] = weighted_prediction_percent[w] # the object names predictions_ui.append('Others') # the percentages percentages_ui[-1] = np.sum(weighted_prediction_sorted[predictions_ui_len:]) percentages_ui = lrm_round(percentages_ui) # the frame wighted_guesses = np.multiply(window, predictions[:, guess_idx])(*\label{lst:ln:frame_selection1}*) frame_ui_idx = wighted_guesses.argmax() frame_ui_resized = cv2.resize(frames[frame_ui_idx], fh.ui_dsize, interpolation=fh.Interpolation.NEAREST) _, frame_ui_png = cv2.imencode('.png', frame_ui_resized) frame_ui = frame_ui_png.tobytes()(*\label{lst:ln:frame_selection2}*) # the frame # UI: Show results if percentages_ui[-1] == 0.0: predictions_ui = predictions_ui[:-1] percentages_ui = percentages_ui[:-1] # UI: Inference ui.update_inference_window(predictions_ui, percentages_ui, frame_ui) # Wait until the camera thread (process due to ctypes) is terminated t.join() # Reset the global variables (has to be done manually to avoid race conditions) libcamera.reset_global_variables() # Under regular circumstances, this section should never be reached # Terminate Camera terminate = libcamera.terminate() # Clean up the DPU IP n2cube.dpuDestroyKernel(kernel) n2cube.dpuDestroyTask(task)
def main(): # Set up the DPU IP overlay = DpuOverlay(str(fh.dir_dpu / fh.dpu_bit_file)) overlay.load_model(str(fh.dir_dpu / fh.dpu_assembly_file)) # Set up the Neural Network Runtime (N2Cube) kernel_name = fh.kernel_name kernel_conv_input = fh.kernel_conv_input kernel_fc_output = fh.kernel_fc_output n2cube.dpuOpen() kernel = n2cube.dpuLoadKernel(kernel_name) task = n2cube.dpuCreateTask(kernel, 0) input_tensor_size = n2cube.dpuGetInputTensorSize(task, kernel_conv_input) output_tensor_size = n2cube.dpuGetOutputTensorSize(task, kernel_fc_output) output_tensor_channel = n2cube.dpuGetOutputTensorChannel( task, kernel_fc_output) output_tensor_address = n2cube.dpuGetOutputTensorAddress( task, kernel_fc_output) output_tensor_scale = n2cube.dpuGetOutputTensorScale( task, kernel_fc_output) # libcamera libcamera = ctypes.CDLL(fh.dir_cam / fh.libcamera_file) libcamera.get_frame_ptr.restype = ctypes.POINTER(ctypes.c_ubyte) libcamera.get_throw_bgn_idx.restype = ctypes.c_uint libcamera.get_throw_end_idx.restype = ctypes.c_uint libcamera.get_throw_bgn.restype = ctypes.c_bool libcamera.get_throw_end.restype = ctypes.c_bool libcamera.set_frame_rate.restype = None libcamera.set_buff_size.restype = None libcamera.set_exposure_time.restype = None libcamera.set_camera_gain.restype = None libcamera.set_avg_diffs.restype = None libcamera.set_threshold_mult.restype = None libcamera.set_frames_to_acquire.restype = None libcamera.initialize.restype = ctypes.c_int libcamera.start_acquisition.restype = ctypes.c_int libcamera.terminate.restype = ctypes.c_int # Set up of variables frames = np.empty((fh.frames_to_consider, ) + fh.bgr_shape, dtype=np.uint8) # Initialize Camera initialize = libcamera.initialize() if initialize != fh.ReturnCodes.SUCCESS: try: return_code = fh.ReturnCodes(initialize).name except ValueError: return_code = initialize print(f'Initialization failed: {return_code}') sys.exit() else: print( '================================= READY =================================' ) # Reset predictions predictions = np.zeros((fh.frames_to_consider, fh.num_objects), dtype=np.float32) # Start acquisition (Threaded) t = Thread(target=libcamera.start_acquisition) t.start() # Wait until the throw has ended while not libcamera.get_throw_end(): pass stages = [ 'Get raw bayer', 'Transform color', 'Resize', 'Normalize', 'Run inference', 'Softmax', 'Weighting' ] meas_time = {s: get_dict() for s in stages} throw_bgn_idx = libcamera.get_throw_bgn_idx() throw_end_idx = libcamera.get_throw_end_idx() num_frames = throw_end_idx - throw_bgn_idx - 1 # Ignore the last two captured frames for idx, frame_id in enumerate(range(throw_bgn_idx, throw_end_idx - 1)): meas_time['Get raw bayer']['start'].append(datetime.now()) frame_ptr = libcamera.get_frame_ptr(frame_id) raw_frame = np.ctypeslib.as_array(frame_ptr, shape=fh.raw_shape) meas_time['Get raw bayer']['end'].append(datetime.now()) # Transform Baumer BayerRG8 to BGR8 (Baumer BayerRG ≙ OpenCV BayerBG) meas_time['Transform color']['start'].append(datetime.now()) frames[idx] = cv2.cvtColor(raw_frame, cv2.COLOR_BayerBG2BGR) meas_time['Transform color']['end'].append(datetime.now()) meas_time['Resize']['start'].append(datetime.now()) frame_resized = cv2.resize(frames[idx], fh.inf_dsize, interpolation=fh.Interpolation.NEAREST) meas_time['Resize']['end'].append(datetime.now()) meas_time['Normalize']['start'].append(datetime.now()) frame_inference = frame_resized.astype(np.float32) / 255.0 meas_time['Normalize']['end'].append(datetime.now()) meas_time['Run inference']['start'].append(datetime.now()) n2cube.dpuSetInputTensorInHWCFP32(task, kernel_conv_input, frame_inference, input_tensor_size) n2cube.dpuRunTask(task) meas_time['Run inference']['end'].append(datetime.now()) # n2cube.dpuRunSoftmax(.) sometimes returns all zeros except one NaN # This section replaces the first occurrence of NaN in the prediction array with 1.0 and sets everything else to 0.0 meas_time['Softmax']['start'].append(datetime.now()) prediction = n2cube.dpuRunSoftmax( output_tensor_address, output_tensor_channel, output_tensor_size // output_tensor_channel, output_tensor_scale) nan = np.isnan(prediction) if nan.any(): nan_idx = nan.argmax( ) # return the index of the first occurrence of NaN prediction = np.zeros((fh.num_objects, ), dtype=np.float32) prediction[nan_idx] = 1.0 predictions[idx] = prediction meas_time['Softmax']['end'].append(datetime.now()) if idx == fh.frames_to_consider - 1: break meas_time['Weighting']['start'].append(datetime.now()) num_frames_considered = min(fh.frames_to_consider, num_frames) window = sine_window(num_frames, num_frames_considered) # weighting weighted_prediction = np.matmul(window, predictions) / np.sum(window) meas_time['Weighting']['end'].append(datetime.now()) for k in meas_time: meas_time[k] = [ (e - s).total_seconds() * 1000 for s, e in zip(meas_time[k]['start'], meas_time[k]['end']) ] meas_time[k] = sum(meas_time[k]) / len(meas_time[k]) # create output file mmax = 0 for s in stages: if len(s) > mmax: mmax = len(s) output = f'Number of captured frames: {num_frames_considered}\n\n' for idx, s in enumerate(stages): output += f'{s}:{" "*(mmax - len(stages[idx]))} {meas_time[s]:.3f} ms\n' output += f'\nSum:{" "*(mmax - len("Sum"))} {sum(meas_time.values()):.3f} ms\n' output += f'Frame rate:{" "*(mmax - len("Frame rate"))} {1000 / sum(meas_time.values()):.3f} fps\n' print(output) with open(fh.dir_verification / 'throughput.log', 'w') as f: f.write(output) # Wait until the camera thread (process due to ctypes) is terminated t.join() # Terminate Camera terminate = libcamera.terminate() # Clean up the DPU IP n2cube.dpuDestroyKernel(kernel) n2cube.dpuDestroyTask(task)