Esempio n. 1
0
    def loop(self):
        fpgaOutputShapes = []
        for idx in range(len(self.output_shapes)):
            fpgaOutputShape_l = self.output_shapes[idx]
            fpgaOutputShape_l[0] = self.args['batch_sz']
            fpgaOutputShapes.append(fpgaOutputShape_l)

        if self.args['yolo_version'] == 'v2':
            self.yolo_postproc = yolo.yolov2_postproc
        elif self.args['yolo_version'] == 'v3':
            self.yolo_postproc = yolo.yolov3_postproc

        self.biases = bias_selector(self.args)
        self.labels = xdnn_io.get_labels(self.args['labels'])
        self.colors = generate_colors(len(self.labels))

        while True:
            read_slot = self._shared_output_arrs.openReadId()
            if read_slot is None:
                break

            read_slot_arrs = self._shared_output_arrs.accessNumpyBuffer(
                read_slot)
            imgList = []
            shape_list = []
            #image_id = self._qFrom.get()
            num_images = (read_slot_arrs[-1].shape)[0]
            for image_num in range(num_images):
                image_id = read_slot_arrs[-1][image_num][0]

                if image_id == -1:
                    break
                imgList.append(self.img_paths[int(image_id)])
                shape_list.append(read_slot_arrs[-1][image_num][1:4])

            if self.args["benchmarkmode"]:
                self.numProcessed += len(imgList)
                #self.streamQ.put(sId)
                self._shared_output_arrs.closeReadId(read_slot)
                continue

            self.run(imgList, read_slot_arrs[0:-1], fpgaOutputShapes,
                     shape_list)
            self._shared_output_arrs.closeReadId(read_slot)

        self.finish()
Esempio n. 2
0
    def initialize(self, args):
        self.numProcessed = 0
        self.startTime = timeit.default_timer()
        self.net = caffe.Net(args['deploymodel'], args['caffemodel'],
                             caffe.TEST)
        self.netOut = np.empty((args['batch_sz'], ) +
                               self.net.blobs['layer31-conv'].data.shape[1:],
                               dtype=np.float32)
        self.biases = bias_selector(args)
        self._args['net_h'] = self.net.blobs['data'].data.shape[2]
        self._args['net_w'] = self.net.blobs['data'].data.shape[3]
        self.fpgaOutputShapes = list(
            itervalues(
                xdnn.CompilerJsonParser(self._args['netcfg']).getOutputs()))
        for i in range(len(self.fpgaOutputShapes)):
            self.fpgaOutputShapes[i][0] = self._args['batch_sz']

        # indices for unpacking concatenated arrays to individual array.
        self.buf_indices = [0]
        for i, outputShape in enumerate(self.fpgaOutputShapes):
            self.buf_indices.append(self.buf_indices[-1] +
                                    np.prod(outputShape))
        print("Post is starting loop")
        self.run()
Esempio n. 3
0
    def run(rundir, chanIdx, q, args):
        xspub = xstream.Publisher()
        xssub = xstream.Subscribe(chanIdx2Str(chanIdx))
        runner = Runner(rundir)
        inTensors = runner.get_input_tensors()
        outTensors = runner.get_output_tensors()

        q.put(1)  # ready for work

        fpgaBlobs = None
        labels = xdnn_io.get_labels(args['labels'])
        if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc
        elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc
        else:
            assert args['yolo_version'] in (
                'v2', 'v3'), "--yolo_version should be <v2|v3>"

        biases = bias_selector(args)
        if (args['visualize']): colors = generate_colors(len(labels))

        while True:
            try:
                payload = xssub.get()
                if not payload:
                    break
                (meta, buf) = payload

                if fpgaBlobs == None:
                    # allocate buffers
                    fpgaBlobs = []
                    batchsz = meta['shape'][0]  # inTensors[0].dims[0]

                    for io in [inTensors, outTensors]:
                        blobs = []
                        for t in io:
                            shape = (batchsz, ) + tuple(
                                [t.dims[i] for i in range(t.ndims)][1:])
                            blobs.append(
                                np.empty((shape), dtype=np.float32, order='C'))
                        fpgaBlobs.append(blobs)

                    fcOutput = np.empty((
                        batchsz,
                        args['outsz'],
                    ),
                                        dtype=np.float32,
                                        order='C')

                fpgaInput = fpgaBlobs[0][0]
                assert (tuple(meta['shape']) == fpgaInput.shape)
                data = np.frombuffer(buf,
                                     dtype=np.float32).reshape(fpgaInput.shape)
                np.copyto(fpgaInput, data)

                jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1])
                runner.wait(jid)

                boxes = yolo_postproc(fpgaBlobs[1],
                                      args,
                                      meta['image_shapes'],
                                      biases=biases)

                if (not args['profile']):
                    for i in range(min(batchsz, len(meta['image_shapes']))):
                        print("Detected {} boxes in {}".format(
                            len(boxes[i]), meta['images'][i]),
                              flush=True)

                # Save the result
                if (args['results_dir']):
                    for i in range(min(batchsz, len(meta['image_shapes']))):
                        fname = meta['images'][i]
                        filename = os.path.splitext(os.path.basename(fname))[0]
                        out_file_txt = os.path.join(args['results_dir'],
                                                    filename + '.txt')
                        print("Saving {} boxes to {}".format(
                            len(boxes[i]), out_file_txt))
                        sys.stdout.flush()
                        saveDetectionDarknetStyle(out_file_txt, boxes[i],
                                                  meta['image_shapes'][i])

                        if (args['visualize']):
                            out_file_png = os.path.join(
                                args['results_dir'], filename + '.png')
                            print("Saving result to {}".format(out_file_png))
                            sys.stdout.flush()
                            draw_boxes(fname, boxes[i], labels, colors,
                                       out_file_png)

                if meta['id'] % 1000 == 0:
                    print("Recvd query %d" % meta['id'])
                    sys.stdout.flush()

                del data
                del buf
                del payload

                xspub.send(meta['from'], "success")

            except Exception as e:
                logging.error("Worker exception " + str(e))
Esempio n. 4
0
def main():
    parser = xdnn_io.default_parser_args()
    parser = yolo_parser_args(parser)
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)

    # Setup the environment
    img_paths = xdnn_io.getFilePaths(args['images'])
    if (args['golden'] or args['visualize']):
        assert args['labels'], "Provide --labels to compute mAP."
        assert args[
            'results_dir'], "For accuracy measurements, provide --results_dir to save the detections."
        labels = xdnn_io.get_labels(args['labels'])
        colors = generate_colors(len(labels))

    if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc
    elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc

    runner = Runner(args['vitis_rundir'])

    # Setup the blobs
    inTensors = runner.get_input_tensors()
    outTensors = runner.get_output_tensors()
    batch_sz = args['batch_sz']
    if batch_sz == -1:
        batch_sz = inTensors[0].dims[0]

    fpgaBlobs = []
    for io in [inTensors, outTensors]:
        blobs = []
        for t in io:
            shape = (batch_sz, ) + tuple([t.dims[i]
                                          for i in range(t.ndims)][1:])
            blobs.append(np.empty((shape), dtype=np.float32, order='C'))
        fpgaBlobs.append(blobs)
    fpgaInput = fpgaBlobs[0][0]

    # Setup the YOLO config
    net_h, net_w = fpgaInput.shape[-2:]
    args['net_h'] = net_h
    args['net_w'] = net_w
    biases = bias_selector(args)

    # Setup profiling env
    prep_time = 0
    exec_time = 0
    post_time = 0

    # Start the execution
    for i in range(0, len(img_paths), batch_sz):
        pl = []
        img_shapes = []

        # Prep images
        t1 = timeit.default_timer()
        for j, p in enumerate(img_paths[i:i + batch_sz]):
            fpgaInput[j, ...], img_shape = xdnn_io.loadYoloImageBlobFromFile(
                p, net_h, net_w)
            pl.append(p)
            img_shapes.append(img_shape)
        t2 = timeit.default_timer()

        # Execute
        jid = runner.execute_async(fpgaBlobs[0], fpgaBlobs[1])
        runner.wait(jid)

        # Post Proc
        t3 = timeit.default_timer()
        boxes = yolo_postproc(fpgaBlobs[1], args, img_shapes, biases=biases)
        t4 = timeit.default_timer()

        prep_time += (t2 - t1)
        exec_time += (t3 - t2)
        post_time += (t4 - t3)

        for i in range(min(batch_sz, len(img_shapes))):
            print("Detected {} boxes in {}".format(len(boxes[i]), pl[i]))

        # Save the result
        if (args['results_dir']):
            for i in range(min(batch_sz, len(img_shapes))):
                filename = os.path.splitext(os.path.basename(pl[i]))[0]
                out_file_txt = os.path.join(args['results_dir'],
                                            filename + '.txt')
                print("Saving {} boxes to {}".format(len(boxes[i]),
                                                     out_file_txt))
                sys.stdout.flush()
                saveDetectionDarknetStyle(out_file_txt, boxes[i],
                                          img_shapes[i])
                if (args['visualize']):
                    out_file_png = os.path.join(args['results_dir'],
                                                filename + '.png')
                    print("Saving result to {}".format(out_file_png))
                    sys.stdout.flush()
                    draw_boxes(pl[i], boxes[i], labels, colors, out_file_png)

    # Profiling results
    if (args['profile']):
        print("\nAverage Latency in ms:")
        print("  Image Prep: {0:3f}".format(prep_time * 1000.0 /
                                            len(img_paths)))
        print("  Exec: {0:3f}".format(exec_time * 1000.0 / len(img_paths)))
        print("  Post Proc: {0:3f}".format(post_time * 1000.0 /
                                           len(img_paths)))
        sys.stdout.flush()

    # mAP calculation
    if (args['golden']):
        print()
        print("Computing mAP score  : ")
        print("Class names are  : {} ".format(labels))
        mAP = calc_detector_mAP(args['results_dir'], args['golden'],
                                len(labels), labels, args['prob_threshold'],
                                args['mapiouthresh'], args['points'])
        sys.stdout.flush()
Esempio n. 5
0
def yolo_gpu_inference(backend_path, image_dir, deploy_model, weights,
                       out_labels, IOU_threshold, scorethresh, mean_value,
                       pxscale, transpose, channel_swap, yolo_model,
                       num_classes, args):

    # Setup the environment
    images = xdnn_io.getFilePaths(args['images'])
    if (args['golden'] or args['visualize']):
        assert args['labels'], "Provide --labels to compute mAP."
        assert args[
            'results_dir'], "For accuracy measurements, provide --results_dir to save the detections."
        labels = xdnn_io.get_labels(args['labels'])
        colors = generate_colors(len(labels))

    # Select postproc and biases
    if args['yolo_version'] == 'v2': yolo_postproc = yolo.yolov2_postproc
    elif args['yolo_version'] == 'v3': yolo_postproc = yolo.yolov3_postproc
    biases = bias_selector(args)

    import caffe
    caffe.set_mode_cpu()
    print(args)
    if (args['gpu'] is not None):
        caffe.set_mode_gpu()
        caffe.set_device(args['gpu'])

    net = caffe.Net(deploy_model, weights, caffe.TEST)

    net_h, net_w = net.blobs['data'].data.shape[-2:]
    args['net_h'] = net_h
    args['net_w'] = net_w

    for i, img in enumerate(images):
        if ((i + 1) % 100 == 0): print(i + 1, "images processed")
        raw_img, img_shape = xdnn_io.loadYoloImageBlobFromFile(
            img, net_h, net_w)

        net.blobs['data'].data[...] = raw_img
        out = net.forward()

        caffeOutput = sorted(out.values(), key=lambda item: item.shape[-1])
        boxes = yolo_postproc(caffeOutput, args, [img_shape], biases=biases)

        print("{}. Detected {} boxes in {}".format(i, len(boxes[0]), img))

        # Save the result
        boxes = boxes[0]
        if (args['results_dir']):
            filename = os.path.splitext(os.path.basename(img))[0]
            out_file_txt = os.path.join(args['results_dir'], filename + '.txt')
            print("Saving {} boxes to {}".format(len(boxes), out_file_txt))
            sys.stdout.flush()
            saveDetectionDarknetStyle(out_file_txt, boxes, img_shape)
            if (args['visualize']):
                out_file_png = os.path.join(args['results_dir'],
                                            filename + '.png')
                print("Saving result to {}".format(out_file_png))
                sys.stdout.flush()
                draw_boxes(img, boxes, labels, colors, out_file_png)
        # draw_boxes(images[i],bboxes,class_names,colors=[(0,0,0)]*num_classes)

    return len(images)