Ejemplo n.º 1
0
def fpga_init():
    # Parse arguments
    parser = xdnn_io.default_parser_args()
    parser.add_argument('--deviceID', type=int, default=0,
                        help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    # Create manager
    if not xdnn.createManager():
        raise Exception("Failed to create manager")

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    # Get input and output shape
    input_shapes = list(map(lambda x: (x), compilerJSONObj.getInputs().itervalues()))
    output_shapes = list(map(lambda x: (x), compilerJSONObj.getOutputs().itervalues()))

    for in_idx in range(len(input_shapes)):
        input_shapes[in_idx][0] = args['batch_sz']
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_node_names = list(map(lambda x: str(x), compilerJSONObj.getInputs().iterkeys()))
    output_node_names = list(map(lambda x: str(x), compilerJSONObj.getOutputs().iterkeys()))

    num_inputs = len(input_shapes)
    num_outputs = len(output_shapes)

    # Create runtime
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", [args["deviceID"]])
    if ret != 0:
        raise Exception("Failed to create handle, return value: {error}".format(error=ret))
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)

    print("Batch size:", args['batch_sz'])
    print("Input shapes:", input_shapes)
    print("Input nodes:", input_node_names)
    print("Ouput shapes:", output_shapes)
    print("Ouput nodes:", output_node_names)

    output_buffers = []
    for _ in range(N_STREAMS):
        buffer = {name: np.empty(shape=shape, dtype=np.float32)
                  for name, shape in zip(output_node_names, output_shapes)}
        output_buffers.append(buffer)

    # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])},
    #                   output_buffers[0], 0)
    # fpgaRT.get_result(0)
    (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args)

    return fpgaRT, output_buffers,\
        {name: shape for name, shape in zip(input_node_names, input_shapes)},\
        fcWeight, fcBias
Ejemplo n.º 2
0
    def setup(self, bottom, top):

        _args = eval(self.param_str)

        #args = xdnn_io.make_dict_args(param_dict)
        if 'save' in _args and len(_args['save']) > 0:
            import os.path as osp
            import os
            if not osp.exists(_args['save']): os.makedirs(_args['save'])
        self.rt = compilerxdnnRT(_args['netcfg'], _args['weights'],
                                 _args['device'], _args["input_names"],
                                 _args["output_names"], _args['xdnnv3'],
                                 _args['save'])
        self._indictnames = _args["input_names"]
        self._outdictnames = _args["output_names"]
        self._parser = xdnn.CompilerJsonParser(_args["netcfg"])
Ejemplo n.º 3
0
    def setup(self, bottom, top):
        self.param_dict = eval(self.param_str)  # Get args from prototxt
        self._args = xdnn_io.make_dict_args(self.param_dict)
        self._numPE = self._args[
            "batch_sz"]  # Bryan hack to detremine number of PEs in FPGA
        # Establish FPGA Communication, Load bitstream
        ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
        if ret != 0:
            raise Exception("Failed to open FPGA handle.")

        self._args["scaleB"] = 1
        self._args["PE"] = -1
        # Instantiate runtime interface object
        self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)
        self._indictnames = self._args["input_names"]
        self._outdictnames = self._args["output_names"]
        self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
Ejemplo n.º 4
0
    def __init__(self, params):
        self._args = xdnn_io.make_dict_args(params)
        self._numPE = self._args[
            "batch_sz"
        ]  # Bryan hack to detremine number of PEs in FPGA

        # Establish FPGA Communication, Load bitstream
        ret, handles = xdnn.createHandle(self._args["xclbin"], "kernelSxdnn_0")
        if ret != 0:
            raise Exception("Failed to open FPGA handle.")

        self._args["scaleB"] = 1
        self._args["PE"] = -1
        self._streamIds = [0, 1, 2, 3, 4, 5, 6, 7]  # Allow 8 streams

        # Instantiate runtime interface object
        self.fpgaRT = xdnn.XDNNFPGAOp(handles, self._args)
        self._indictnames = self._args["input_names"]
        self._outdictnames = self._args["output_names"]
        self._parser = xdnn.CompilerJsonParser(self._args["netcfg"])
Ejemplo n.º 5
0
    def run(self, imgList, fpgaOutput_list, fpgaOutputShape_list, shapeArr):

        if self.numProcessed == 0:
            self.startTime = timeit.default_timer()
            self.labels = xdnn_io.get_labels(self.args['labels'])
            self.zmqPub = None
            if self.args['zmqpub']:
                self.zmqPub = mp_classify.ZmqResultPublisher(
                    self.args['deviceID'])
            self.goldenMap = None

        self.numProcessed += len(imgList)

        firstInputShape = xdnn.CompilerJsonParser(
            self.args['netcfg']).getInputs().itervalues().next()

        if ((args['yolo_model'] == 'standard_yolo_v3')
                or (args['yolo_model'] == 'tiny_yolo_v3')):
            num_ouptut_layers = len(fpgaOutput_list)
            fpgaOutput = []
            for idx in range(num_ouptut_layers):
                fpgaOutput.append(
                    np.frombuffer(fpgaOutput_list[idx],
                                  dtype=np.float32).reshape(
                                      tuple(fpgaOutputShape_list[idx])))
            bboxlist_for_images = det_postprocess(fpgaOutput, args, shapeArr)

            for i in range(min(self.args['batch_sz'], len(shapeArr))):
                print "image: ", imgList[
                    i], " has num boxes detected  : ", len(
                        bboxlist_for_images[i])

        else:

            fpgaOutput = fpgaOutput_list[0]
            fpgaOutputShape = fpgaOutputShape_list[0]
            npout_view = np.frombuffer(fpgaOutput, dtype=np.float32)\
              .reshape(tuple(fpgaOutputShape))
            npout_view = npout_view.flatten()
            fpgaoutsz = fpgaOutputShape[1] * fpgaOutputShape[
                2] * fpgaOutputShape[3]
            bboxlist_for_images = []
            for i in range(min(self.args['batch_sz'], len(shapeArr))):
                startidx = i * fpgaoutsz
                softmaxout = npout_view[startidx:startidx + fpgaoutsz]

                # first activate first two channels of each bbox subgroup (n)
                for b in range(self.args['bboxplanes']):
                    for r in range(\
                      self.args['batchstride']*b,
                      self.args['batchstride']*b+2*self.args['groups']):
                        softmaxout[r] = sigmoid(softmaxout[r])

                    for r in range(\
                      self.args['batchstride']*b\
                        +self.args['groups']*self.args['coords'],
                      self.args['batchstride']*b\
                        +self.args['groups']*self.args['coords']+self.args['groups']):
                        softmaxout[r] = sigmoid(softmaxout[r])

                # Now softmax on all classification arrays in image
                for b in range(self.args['bboxplanes']):
                    for g in range(self.args['groups']):
                        softmax(
                            self.args['beginoffset'] +
                            b * self.args['batchstride'] +
                            g * self.args['groupstride'], softmaxout,
                            softmaxout, self.args['outsz'],
                            self.args['groups'])

                # NMS
                bboxes = nms.do_baseline_nms(
                    softmaxout, shapeArr[i][1], shapeArr[i][0],
                    firstInputShape[2], firstInputShape[3], self.args['out_w'],
                    self.args['out_h'], self.args['bboxplanes'],
                    self.args['outsz'], self.args['scorethresh'],
                    self.args['iouthresh'])
                bboxlist_for_images.append(bboxes)
                print "image: ", imgList[
                    i], " has num boxes detected  : ", len(bboxes)

        if self.args['golden'] is None:
            return

        for i in range(min(self.args['batch_sz'], len(shapeArr))):
            filename = imgList[i]
            out_file_txt = ((filename.split("/")[-1]).split(".")[0])
            out_file_txt = self.args[
                'detection_labels'] + "/" + out_file_txt + ".txt"
            out_line_list = []
            bboxes = bboxlist_for_images[i]
            for j in range(len(bboxes)):
                x, y, w, h = darknet_style_xywh(shapeArr[i][1], shapeArr[i][0],
                                                bboxes[j]["ll"]["x"],
                                                bboxes[j]["ll"]["y"],
                                                bboxes[j]['ur']['x'],
                                                bboxes[j]['ur']['y'])

                line_string = str(bboxes[j]["classid"])
                line_string = line_string + " " + str(
                    round(bboxes[j]['prob'], 3))
                line_string = line_string + " " + str(x)
                line_string = line_string + " " + str(y)
                line_string = line_string + " " + str(w)
                line_string = line_string + " " + str(h)
                out_line_list.append(line_string + "\n")

            log.info("writing this into prediction file at %s" %
                     (out_file_txt))
            with open(out_file_txt, "w") as the_file:
                for lines in out_line_list:
                    the_file.write(lines)
Ejemplo n.º 6
0
        default=5,
        help='thresohold on iouthresh across 2 candidate detections')
    parser.add_argument(
        '--detection_labels',
        help="direcotry path detected lable files in darknet style",
        default=None,
        type=str,
        metavar="FILE")
    parser.add_argument('--prob_threshold',
                        type=float,
                        default=0.1,
                        help='threshold for calculation of f1 score')

    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)
    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])
    firstInputShape = compilerJSONObj.getInputs().itervalues().next()
    firstOutputShape = compilerJSONObj.getOutputs().itervalues().next()
    out_w = firstOutputShape[2]
    out_h = firstOutputShape[3]

    args['net_w'] = int(firstInputShape[2])
    args['net_h'] = int(firstInputShape[3])
    args['out_w'] = int(out_w)
    args['out_h'] = int(out_h)
    args['coords'] = 4
    args['beginoffset'] = (args['coords'] + 1) * int(out_w * out_h)
    args['groups'] = int(out_w * out_h)
    args['batchstride'] = args['groups'] * (args['outsz'] + args['coords'] + 1)
    args['groupstride'] = 1
    args['classes'] = args['outsz']
Ejemplo n.º 7
0
    print('drawing boxes time: {0} seconds'.format(end_time - start_time))


if __name__ == '__main__':

    frame_q = mp.Queue()
    resize_q = mp.Queue()
    trans_q = mp.Queue()
    output_q = mp.Queue()
    face_q = mp.Queue()

    ready_fpga = mp.Queue()

    sharedInputArrs = []

    compilerJSONObj = xdnn.CompilerJsonParser('deploy.compiler.json')

    input_shapes = map(lambda x: tuple(x),
                       compilerJSONObj.getInputs().itervalues())
    output_shapes = map(lambda x: tuple(x),
                        compilerJSONObj.getOutputs().itervalues())

    input_sizes = map(lambda x: np.prod(x), input_shapes)
    output_sizes = map(lambda x: np.prod(x), output_shapes)

    print input_shapes
    print output_shapes

    # shared memory from video capture to preprocessing
    shared_frame_arrs = SharedMemoryQueue("frame", num_shared_slots,
                                          [(320, 320, 3)])
Ejemplo n.º 8
0
def fpga_init():
    global PORT
    global N_STREAMS
    # Parse arguments
    parser = xdnn_io.default_parser_args()
    parser.add_argument('--device-ids',
                        type=int,
                        default=[0],
                        nargs="+",
                        help='a list of device IDs for FPGA')
    parser.add_argument('--port',
                        type=int,
                        default=5000,
                        help='port to listen on')
    args = parser.parse_args()
    device_ids = args.device_ids
    PORT = args.port
    N_STREAMS *= len(device_ids)
    args = xdnn_io.make_dict_args(args)

    # Create manager
    if not xdnn.createManager():
        raise Exception("Failed to create manager")

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    # Get input and output shape
    input_shapes = list(
        map(lambda x: (x),
            compilerJSONObj.getInputs().itervalues()))
    output_shapes = list(
        map(lambda x: (x),
            compilerJSONObj.getOutputs().itervalues()))

    for in_idx in range(len(input_shapes)):
        input_shapes[in_idx][0] = args['batch_sz']
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_node_names = list(
        map(lambda x: str(x),
            compilerJSONObj.getInputs().iterkeys()))
    output_node_names = list(
        map(lambda x: str(x),
            compilerJSONObj.getOutputs().iterkeys()))

    num_inputs = len(input_shapes)
    num_outputs = len(output_shapes)

    # Create runtime
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0",
                                     device_ids)
    if ret != 0:
        raise Exception(
            "Failed to create handle, return value: {error}".format(error=ret))
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)

    print("Batch size:", args['batch_sz'])
    print("Input shapes:", input_shapes)
    print("Input nodes:", input_node_names)
    print("Ouput shapes:", output_shapes)
    print("Ouput nodes:", output_node_names)
    print("Using model {path}".format(path=args["netcfg"]))
    print("Using FPGA device:", device_ids)

    output_buffers = []
    for _ in range(N_STREAMS):
        buffer = {
            name: np.empty(shape=shape, dtype=np.float32)
            for name, shape in zip(output_node_names, output_shapes)
        }
        output_buffers.append(buffer)

    # fpgaRT.exec_async({input_node_names[0]: np.zeros(input_shapes[0])},
    #                   output_buffers[0], 0)
    # fpgaRT.get_result(0)
    (fcWeight, fcBias) = xdnn_io.loadFCWeightsBias(args)

    return fpgaRT, output_buffers, output_node_names[0],\
        {name: shape for name, shape in zip(input_node_names, input_shapes)},\
        fcWeight, fcBias, args['batch_sz']
Ejemplo n.º 9
0
def run(args=None):
    if not args:
        parser = xdnn_io.default_parser_args()
        parser.add_argument(
            '--numprepproc',
            type=int,
            default=1,
            help=
            'number of parallel processes used to decode and quantize images')
        parser.add_argument('--numstream',
                            type=int,
                            default=16,
                            help='number of FPGA streams')
        parser.add_argument(
            '--deviceID',
            type=int,
            default=0,
            help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
        parser.add_argument('--benchmarkmode',
                            type=int,
                            default=0,
                            help='bypass pre/post processing for benchmarking')
        args = parser.parse_args()
        args = xdnn_io.make_dict_args(args)

    if not xdnn.createManager():
        sys.exit(1)
    fpgaRT = None
    sharedInputArrs = []
    fpgaOutputs = []

    compilerJSONObj = xdnn.CompilerJsonParser(args['netcfg'])

    input_shapes = map(lambda x: (x), compilerJSONObj.getInputs().itervalues())
    output_shapes = map(lambda x: (x),
                        compilerJSONObj.getOutputs().itervalues())

    #args['batch_sz'] = 1
    for out_idx in range(len(output_shapes)):
        output_shapes[out_idx][0] = args['batch_sz']

    input_sizes = map(lambda x: np.prod(x), input_shapes)
    output_sizes = map(lambda x: np.prod(x), output_shapes)

    num_shared_slots = args['numstream']

    # shared memory from preprocessing to fpga forward
    shared_trans_arrs = SharedMemoryQueue(
        "trans", num_shared_slots * (args['numprepproc'] * args['batch_sz']),
        input_shapes + [(4)])
    # shared memory from fpga forward to postprocessing
    shared_output_arrs = SharedMemoryQueue(
        "output", num_shared_slots, output_shapes + [(args['batch_sz'], 4)])

    img_paths = xdnn_io.getFilePaths(args['images'])

    p = mp.Pool(initializer=init_pre_process,
                initargs=(
                    args,
                    img_paths,
                    input_shapes,
                    shared_trans_arrs,
                ),
                processes=args['numprepproc'])

    xdnnProc = mp.Process(target=fpga_process,
                          args=(
                              fpgaRT,
                              args,
                              len(img_paths),
                              compilerJSONObj,
                              shared_trans_arrs,
                              shared_output_arrs,
                          ))

    postProc = mp.Process(target=post_process,
                          args=(
                              args,
                              img_paths,
                              fpgaOutputs,
                              output_shapes,
                              shared_output_arrs,
                          ))
    xdnnProc.start()
    postProc.start()

    if args['perpetual']:
        while True:
            res = [p.map_async(run_pre_process, range(len(img_paths)))]
            for j in res:
                j.wait()
                del j
    else:
        p.map_async(run_pre_process, range(len(img_paths)))

    xdnnProc.join()
    postProc.join()

    p.close()
    p.join()