def fpga_init(): # Parse arguments parser = xdnn_io.default_parser_args() parser.add_argument('--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') args = parser.parse_args() args = xdnn_io.make_dict_args(args) # Create manager if not xdnn.createManager(): raise Exception("Failed to create manager") compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) # Get input and output shape input_shapes = list(map(lambda x: (x), compilerJSONObj.getInputs().itervalues())) output_shapes = list(map(lambda x: (x), compilerJSONObj.getOutputs().itervalues())) for in_idx in range(len(input_shapes)): input_shapes[in_idx][0] = args['batch_sz'] for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_node_names = list(map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys())) output_node_names = list(map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys())) num_inputs = len(input_shapes) num_outputs = len(output_shapes) # Create runtime ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", [args["deviceID"]]) if ret != 0: raise Exception("Failed to create handle, return value: {error}".format(error=ret)) fpgaRT = xdnn.XDNNFPGAOp(handles, args) print("Batch size:", args['batch_sz']) print("Input shapes:", input_shapes) print("Input nodes:", input_node_names) print("Ouput shapes:", output_shapes) print("Ouput nodes:", output_node_names) output_buffers = [] for _ in range(N_STREAMS): buffer = {name: np.empty(shape=shape, dtype=np.float32) for name, shape in zip(output_node_names, output_shapes)} output_buffers.append(buffer) # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])}, # output_buffers[0], 0) # fpgaRT.get_result(0) (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args) return fpgaRT, output_buffers,\ {name: shape for name, shape in zip(input_node_names, input_shapes)},\ fcWeight, fcBias
def setup(self, bottom, top): _args = eval(self.param_str) #args = xdnn_io.make_dict_args(param_dict) if 'save' in _args and len(_args['save']) > 0: import os.path as osp import os if not osp.exists(_args['save']): os.makedirs(_args['save']) self.rt = compilerxdnnRT(_args['netcfg'], _args['weights'], _args['device'], _args["input_names"], _args["output_names"], _args['xdnnv3'], _args['save']) self._indictnames = _args["input_names"] self._outdictnames = _args["output_names"] self._parser = xdnn.CompilerJsonParser(_args["netcfg"])
def setup(self, bottom, top): self.param_dict = eval(self.param_str) # Get args from prototxt self._args = xdnn_io.make_dict_args(self.param_dict) self._numPE = self._args[ "batch_sz"] # Bryan hack to detremine number of PEs in FPGA # Establish FPGA Communication, Load bitstream ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0") if ret != 0: raise Exception("Failed to open FPGA handle.") self._args["scaleB"] = 1 self._args["PE"] = -1 # Instantiate runtime interface object self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args) self._indictnames = self._args["input_names"] self._outdictnames = self._args["output_names"] self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
def __init__(self, params): self._args = xdnn_io.make_dict_args(params) self._numPE = self._args[ "batch_sz" ] # Bryan hack to detremine number of PEs in FPGA # Establish FPGA Communication, Load bitstream ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0") if ret != 0: raise Exception("Failed to open FPGA handle.") self._args["scaleB"] = 1 self._args["PE"] = -1 self._streamIds = [0, 1, 2, 3, 4, 5, 6, 7] # Allow 8 streams # Instantiate runtime interface object self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args) self._indictnames = self._args["input_names"] self._outdictnames = self._args["output_names"] self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
def run(self, imgList, fpgaOutput_list, fpgaOutputShape_list, shapeArr): if self.numProcessed == 0: self.startTime = timeit.default_timer() self.labels = xdnn_io.get_labels(self.args['labels']) self.zmqPub = None if self.args['zmqpub']: self.zmqPub = mp_classify.ZmqResultPublisher( self.args['deviceID']) self.goldenMap = None self.numProcessed += len(imgList) firstInputShape = xdnn.CompilerJsonParser( self.args['netcfg']).getInputs().itervalues().next() if ((args['yolo_model'] == 'standard_yolo_v3') or (args['yolo_model'] == 'tiny_yolo_v3')): num_ouptut_layers = len(fpgaOutput_list) fpgaOutput = [] for idx in range(num_ouptut_layers): fpgaOutput.append( np.frombuffer(fpgaOutput_list[idx], dtype=np.float32).reshape( tuple(fpgaOutputShape_list[idx]))) bboxlist_for_images = det_postprocess(fpgaOutput, args, shapeArr) for i in range(min(self.args['batch_sz'], len(shapeArr))): print "image: ", imgList[ i], " has num boxes detected : ", len( bboxlist_for_images[i]) else: fpgaOutput = fpgaOutput_list[0] fpgaOutputShape = fpgaOutputShape_list[0] npout_view = np.frombuffer(fpgaOutput, dtype=np.float32)\ .reshape(tuple(fpgaOutputShape)) npout_view = npout_view.flatten() fpgaoutsz = fpgaOutputShape[1] * fpgaOutputShape[ 2] * fpgaOutputShape[3] bboxlist_for_images = [] for i in range(min(self.args['batch_sz'], len(shapeArr))): startidx = i * fpgaoutsz softmaxout = npout_view[startidx:startidx + fpgaoutsz] # first activate first two channels of each bbox subgroup (n) for b in range(self.args['bboxplanes']): for r in range(\ self.args['batchstride']*b, self.args['batchstride']*b+2*self.args['groups']): softmaxout[r] = sigmoid(softmaxout[r]) for r in range(\ self.args['batchstride']*b\ +self.args['groups']*self.args['coords'], self.args['batchstride']*b\ +self.args['groups']*self.args['coords']+self.args['groups']): softmaxout[r] = sigmoid(softmaxout[r]) # Now softmax on all classification arrays in image for b in range(self.args['bboxplanes']): for g in range(self.args['groups']): softmax( self.args['beginoffset'] + b * self.args['batchstride'] + g * self.args['groupstride'], softmaxout, softmaxout, self.args['outsz'], self.args['groups']) # NMS bboxes = nms.do_baseline_nms( softmaxout, shapeArr[i][1], shapeArr[i][0], firstInputShape[2], firstInputShape[3], self.args['out_w'], self.args['out_h'], self.args['bboxplanes'], self.args['outsz'], self.args['scorethresh'], self.args['iouthresh']) bboxlist_for_images.append(bboxes) print "image: ", imgList[ i], " has num boxes detected : ", len(bboxes) if self.args['golden'] is None: return for i in range(min(self.args['batch_sz'], len(shapeArr))): filename = imgList[i] out_file_txt = ((filename.split("/")[-1]).split(".")[0]) out_file_txt = self.args[ 'detection_labels'] + "/" + out_file_txt + ".txt" out_line_list = [] bboxes = bboxlist_for_images[i] for j in range(len(bboxes)): x, y, w, h = darknet_style_xywh(shapeArr[i][1], shapeArr[i][0], bboxes[j]["ll"]["x"], bboxes[j]["ll"]["y"], bboxes[j]['ur']['x'], bboxes[j]['ur']['y']) line_string = str(bboxes[j]["classid"]) line_string = line_string + " " + str( round(bboxes[j]['prob'], 3)) line_string = line_string + " " + str(x) line_string = line_string + " " + str(y) line_string = line_string + " " + str(w) line_string = line_string + " " + str(h) out_line_list.append(line_string + "\n") log.info("writing this into prediction file at %s" % (out_file_txt)) with open(out_file_txt, "w") as the_file: for lines in out_line_list: the_file.write(lines)
default=5, help='thresohold on iouthresh across 2 candidate detections') parser.add_argument( '--detection_labels', help="direcotry path detected lable files in darknet style", default=None, type=str, metavar="FILE") parser.add_argument('--prob_threshold', type=float, default=0.1, help='threshold for calculation of f1 score') args = parser.parse_args() args = xdnn_io.make_dict_args(args) compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) firstInputShape = compilerJSONObj.getInputs().itervalues().next() firstOutputShape = compilerJSONObj.getOutputs().itervalues().next() out_w = firstOutputShape[2] out_h = firstOutputShape[3] args['net_w'] = int(firstInputShape[2]) args['net_h'] = int(firstInputShape[3]) args['out_w'] = int(out_w) args['out_h'] = int(out_h) args['coords'] = 4 args['beginoffset'] = (args['coords'] + 1) * int(out_w * out_h) args['groups'] = int(out_w * out_h) args['batchstride'] = args['groups'] * (args['outsz'] + args['coords'] + 1) args['groupstride'] = 1 args['classes'] = args['outsz']
print('drawing boxes time: {0} seconds'.format(end_time - start_time)) if __name__ == '__main__': frame_q = mp.Queue() resize_q = mp.Queue() trans_q = mp.Queue() output_q = mp.Queue() face_q = mp.Queue() ready_fpga = mp.Queue() sharedInputArrs = [] compilerJSONObj = xdnn.CompilerJsonParser('deploy.compiler.json') input_shapes = map(lambda x: tuple(x), compilerJSONObj.getInputs().itervalues()) output_shapes = map(lambda x: tuple(x), compilerJSONObj.getOutputs().itervalues()) input_sizes = map(lambda x: np.prod(x), input_shapes) output_sizes = map(lambda x: np.prod(x), output_shapes) print input_shapes print output_shapes # shared memory from video capture to preprocessing shared_frame_arrs = SharedMemoryQueue("frame", num_shared_slots, [(320, 320, 3)])
def fpga_init(): global PORT global N_STREAMS # Parse arguments parser = xdnn_io.default_parser_args() parser.add_argument('--device-ids', type=int, default=[0], nargs="+", help='a list of device IDs for FPGA') parser.add_argument('--port', type=int, default=5000, help='port to listen on') args = parser.parse_args() device_ids = args.device_ids PORT = args.port N_STREAMS *= len(device_ids) args = xdnn_io.make_dict_args(args) # Create manager if not xdnn.createManager(): raise Exception("Failed to create manager") compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) # Get input and output shape input_shapes = list( map(lambda x: (x), compilerJSONObj.getInputs().itervalues())) output_shapes = list( map(lambda x: (x), compilerJSONObj.getOutputs().itervalues())) for in_idx in range(len(input_shapes)): input_shapes[in_idx][0] = args['batch_sz'] for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_node_names = list( map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys())) output_node_names = list( map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys())) num_inputs = len(input_shapes) num_outputs = len(output_shapes) # Create runtime ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", device_ids) if ret != 0: raise Exception( "Failed to create handle, return value: {error}".format(error=ret)) fpgaRT = xdnn.XDNNFPGAOp(handles, args) print("Batch size:", args['batch_sz']) print("Input shapes:", input_shapes) print("Input nodes:", input_node_names) print("Ouput shapes:", output_shapes) print("Ouput nodes:", output_node_names) print("Using model {path}".format(path=args["netcfg"])) print("Using FPGA device:", device_ids) output_buffers = [] for _ in range(N_STREAMS): buffer = { name: np.empty(shape=shape, dtype=np.float32) for name, shape in zip(output_node_names, output_shapes) } output_buffers.append(buffer) # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])}, # output_buffers[0], 0) # fpgaRT.get_result(0) (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args) return fpgaRT, output_buffers, output_node_names[0],\ {name: shape for name, shape in zip(input_node_names, input_shapes)},\ fcWeight, fcBias, args['batch_sz']
def run(args=None): if not args: parser = xdnn_io.default_parser_args() parser.add_argument( '--numprepproc', type=int, default=1, help= 'number of parallel processes used to decode and quantize images') parser.add_argument('--numstream', type=int, default=16, help='number of FPGA streams') parser.add_argument( '--deviceID', type=int, default=0, help='FPGA no. -> FPGA ID to run in case multiple FPGAs') parser.add_argument('--benchmarkmode', type=int, default=0, help='bypass pre/post processing for benchmarking') args = parser.parse_args() args = xdnn_io.make_dict_args(args) if not xdnn.createManager(): sys.exit(1) fpgaRT = None sharedInputArrs = [] fpgaOutputs = [] compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg']) input_shapes = map(lambda x: (x), compilerJSONObj.getInputs().itervalues()) output_shapes = map(lambda x: (x), compilerJSONObj.getOutputs().itervalues()) #args['batch_sz'] = 1 for out_idx in range(len(output_shapes)): output_shapes[out_idx][0] = args['batch_sz'] input_sizes = map(lambda x: np.prod(x), input_shapes) output_sizes = map(lambda x: np.prod(x), output_shapes) num_shared_slots = args['numstream'] # shared memory from preprocessing to fpga forward shared_trans_arrs = SharedMemoryQueue( "trans", num_shared_slots * (args['numprepproc'] * args['batch_sz']), input_shapes + [(4)]) # shared memory from fpga forward to postprocessing shared_output_arrs = SharedMemoryQueue( "output", num_shared_slots, output_shapes + [(args['batch_sz'], 4)]) img_paths = xdnn_io.getFilePaths(args['images']) p = mp.Pool(initializer=init_pre_process, initargs=( args, img_paths, input_shapes, shared_trans_arrs, ), processes=args['numprepproc']) xdnnProc = mp.Process(target=fpga_process, args=( fpgaRT, args, len(img_paths), compilerJSONObj, shared_trans_arrs, shared_output_arrs, )) postProc = mp.Process(target=post_process, args=( args, img_paths, fpgaOutputs, output_shapes, shared_output_arrs, )) xdnnProc.start() postProc.start() if args['perpetual']: while True: res = [p.map_async(run_pre_process, range(len(img_paths)))] for j in res: j.wait() del j else: p.map_async(run_pre_process, range(len(img_paths))) xdnnProc.join() postProc.join() p.close() p.join()