Example #1
0
def InferImage(net, image, labels):
    import numpy as np
    import xdnn_io
    global board_avail
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2, 0, 1))
    transformer.set_mean('data', np.array([104, 117, 123]))
    transformer.set_raw_scale('data', 255)
    transformer.set_channel_swap('data',
                                 (2, 1, 0))  # if using RGB instead if BGR
    img = caffe.io.load_image(image)
    net.blobs['data'].data[...] = transformer.preprocess('data', img)
    ptxtShape = net.blobs["data"].data.shape
    print("Running with shape of: ", ptxtShape)
    with board_avail:
        out = net.forward()
        for key in out:
            try:
                if out[key].shape[1] == 1000:
                    softmax = out[key]
            except:
                pass
        Labels = xdnn_io.get_labels(labels)
        xdnn_io.printClassification(softmax, [image], Labels)
        result = xdnn_io.getClassification(softmax, [image], Labels)
    return result
Example #2
0
def main(argv):
    args = xdnn_io.processCommandLine(argv)
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    # ret = xdnn.createHandle(g_xclbin, "kernelSxdnn_0", g_xdnnLib)
    if ret != 0:
      sys.exit(1)
    labels = xdnn_io.get_labels(args['labels'])

    # TODO dict of tuples instead?
    fpgaRT          = {}
    fpgaOutputs     = {}
    fcWeights       = {}
    fcBiases        = {}
    netFiles        = {}
    confNames       = []

    args = args['jsoncfg']      # we do not use other args' keys
    for netconf_args in args:
      
      confName   = str(netconf_args['name'])
      confNames += [confName]
      # netconf_args['netcfg'] = './data/{}_{}.json'.format(netconf_args['net'], netconf_args['dsp'])
      fpgaRT[confName] = xdnn.XDNNFPGAOp(handles, netconf_args)
      netconf_args['in_shape'] = tuple((netconf_args['batch_sz'],) + tuple(fpgaRT[confName].getInputDescriptors().itervalues().next()[1:] )) 
      (fcWeights[confName],
        fcBiases[confName]) = xdnn_io.loadFCWeightsBias(netconf_args)
      fpgaOutputs[confName]             = np.empty ((netconf_args['batch_sz'], int(netconf_args['fpgaoutsz']),), dtype=np.float32, order='C')
      netFiles[confName]                = str(netconf_args['netcfg'])

    batchArrays = []
    for streamId, netconf_args in enumerate(args):
      batchArrays.append(np.empty(netconf_args['in_shape'], dtype=np.float32, order='C'))
      pl = []
      img_paths = xdnn_io.getFilePaths(netconf_args['images'])
      for j, p in enumerate(img_paths[:netconf_args['batch_sz']]):
        batchArrays[-1][j, ...], _ = xdnn_io.loadImageBlobFromFile(p, netconf_args['img_raw_scale'],
                                                                  netconf_args['img_mean'],
                                                                  netconf_args['img_input_scale'],
                                                                  netconf_args['in_shape'][2],
                                                                  netconf_args['in_shape'][3])
        pl.append(p)

      confName = str(netconf_args['name'])
      firstInputName = fpgaRT[confName].getInputs().iterkeys().next()
      firstOutputName = fpgaRT[confName].getOutputs().iterkeys().next()
      fpgaRT[confName].exec_async({ firstInputName : batchArrays[-1] }, { firstOutputName : fpgaOutputs[confName] }, streamId)

    for streamId, confName in enumerate(confNames):
      fpgaRT[confName].get_result (streamId)

    for netconf_args in args:
      confName = str(netconf_args['name'])
      fcOut = np.empty( (netconf_args['batch_sz'], netconf_args['outsz']), dtype=np.float32, order = 'C')
      xdnn.computeFC (fcWeights[confName], fcBiases[confName], fpgaOutputs[confName], fcOut)

      softmaxOut = xdnn.computeSoftmax(fcOut)
      xdnn_io.printClassification(softmaxOut, netconf_args['images'], labels);

    xdnn.closeHandle()
Example #3
0
def main():
    args = xdnn_io.processCommandLine()

    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    if ret != 0:
        sys.exit(1)
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)
    fcWeight, fcBias = xdnn_io.loadFCWeightsBias(args)
    img_paths = xdnn_io.getFilePaths(args['images'])
    fpgaOutput = np.empty((
        args['batch_sz'],
        args['fpgaoutsz'],
    ),
                          dtype=np.float32,
                          order='C')
    fcOutput = np.empty((
        args['batch_sz'],
        args['outsz'],
    ),
                        dtype=np.float32,
                        order='C')
    batch_array = np.empty(((args['batch_sz'], ) + args['in_shape']),
                           dtype=np.float32,
                           order='C')
    labels = xdnn_io.get_labels(args['labels'])
    if args['golden']:
        goldenMap = xdnn_io.getGoldenMap(args['golden'])
        top5Count = 0
        top1Count = 0

    for i in xrange(0, len(img_paths), args['batch_sz']):
        pl = []
        for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
            batch_array[j, ...], _ = xdnn_io.loadImageBlobFromFile(
                p, args['img_raw_scale'], args['img_mean'],
                args['img_input_scale'], args['in_shape'][2],
                args['in_shape'][1])
            pl.append(p)

        fpgaRT.execute(batch_array, fpgaOutput)
        xdnn.computeFC(fcWeight, fcBias, fpgaOutput, args['batch_sz'],
                       args['outsz'], args['fpgaoutsz'], fcOutput)
        softmaxOut = xdnn.computeSoftmax(fcOutput)
        xdnn_io.printClassification(softmaxOut, pl, labels)
        if args['golden']:
            for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
                top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 1)
                top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 5)

    xdnn.closeHandle()
    if args['golden']:
        print("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (
            len(img_paths), float(top1Count) / float(len(img_paths)) * 100.,
            float(top5Count) / float(len(img_paths)) * 100.)
Example #4
0
def main():
    args = xdnn_io.processCommandLine()
    ret = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", args['xlnxlib'])
    if ret != 0:
        sys.exit(1)
    (weightsBlob, fcWeight, fcBias) = xdnn_io.loadWeights(args)
    (fpgaInputs, batch_sz) = xdnn_io.prepareInput(args)
    fpgaOutput = xdnn_io.prepareOutput(args['fpgaoutsz'], batch_sz)
    for i in range(1):
        startTime = timeit.default_timer()
        xdnn.execute(
            args['netcfg'],
            weightsBlob,
            fpgaInputs,
            fpgaOutput,
            batch_sz,  # num batches
            args['quantizecfg'],
            args['scaleB'],
            args['PE'])
        elapsedTime = timeit.default_timer() - startTime
        print "\nAfter FPGA (%f ms)" % (elapsedTime * 1000)

    startTime = timeit.default_timer()
    fcOut = xdnn.computeFC(fcWeight, fcBias, fpgaOutput, batch_sz,
                           args['outsz'], args['fpgaoutsz'], args['useblas'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter FC (%f ms)" % (elapsedTime * 1000)
    #for i in range(10):
    #  print "%f" % fpgaOutput[i],

    startTime = timeit.default_timer()
    softmaxOut = xdnn.computeSoftmax(fcOut, batch_sz)
    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter Softmax (%f ms)" % (elapsedTime * 1000)

    #for i in range(10):
    #  print "%f" % fpgaOutput[i],

    xdnn_io.printClassification(softmaxOut, args)

    print "\nSuccess!\n"
    xdnn.closeHandle()
Example #5
0
def main():
    args = xdnn_io.processCommandLine()

    # processCommandLine()
    startTime = timeit.default_timer()
    ret = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", args['xlnxlib'])
    # ret = xdnn.createHandle(g_xclbin, "kernelSxdnn_0", g_xdnnLib)
    if ret != 0:
        sys.exit(1)
    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter createHandle (%f ms):" % (elapsedTime * 1000)
    startTime = timeit.default_timer()

    # TODO dict of tuples instead?
    fpgaInputs = {}
    fpgaOutputs = {}
    weightsBlobs = {}
    fcWeights = {}
    fcBiases = {}
    batch_sizes = {}
    fpgaOutputSizes = {}
    PEs = {}
    netFiles = {}
    confNames = []

    for netconf_args in args['jsoncfg']:
        confName = str(netconf_args['name'])
        confNames.append(confName)
        # make a tuple instead
        PE = [int(x) for x in netconf_args['PE'].split()]
        # if cuMask in cuMaskList:
        #  raise Exception('cuMasks are non-disjoint')
        datadir = str(netconf_args['datadir'])
        fpgaoutsz = int(netconf_args['fpgaoutsz'])
        netfile = str(netconf_args['netcfg'])

        PEs[confName] = PE
        (weightsBlobs[confName], fcWeights[confName],
         fcBiases[confName]) = xdnn_io.loadWeights(netconf_args)
        fpgaOutputSizes[confName] = fpgaoutsz
        (fpgaInputs[confName],
         batch_sz) = xdnn_io.prepareInput(netconf_args, PE)
        batch_sizes[confName] = batch_sz
        fpgaOutputs[confName] = xdnn_io.prepareOutput(
            int(netconf_args['fpgaoutsz']), batch_sz)
        netFiles[confName] = netfile

    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter init (%f ms):" % (elapsedTime * 1000)
    startTime = timeit.default_timer()

    for netconf_args in args['jsoncfg']:
        confName = str(netconf_args['name'])
        xdnn.exec_async(netFiles[confName], weightsBlobs[confName],
                        fpgaInputs[confName], fpgaOutputs[confName],
                        int(batch_sizes[confName]),
                        netconf_args['quantizecfg'], netconf_args['scaleB'],
                        PEs[confName])

    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter Execonly (%f ms):" % (elapsedTime * 1000)
    startTime = timeit.default_timer()

    for confName in confNames:
        xdnn.get_result(PEs[confName])

    elapsedTime = timeit.default_timer() - startTime
    print "\nAfter wait (%f ms):" % (elapsedTime * 1000)
    startTime = timeit.default_timer()

    for netconf_args in args['jsoncfg']:
        confName = str(netconf_args['name'])
        fcOut = xdnn.computeFC(fcWeights[confName], fcBiases[confName],
                               fpgaOutputs[confName], batch_sizes[confName],
                               netconf_args['outsz'],
                               netconf_args['fpgaoutsz'],
                               netconf_args['useblas'])

        elapsedTime = timeit.default_timer() - startTime
        print "\nAfter FC (%f ms):" % (elapsedTime * 1000)
        startTime = timeit.default_timer()

        softmaxOut = xdnn.computeSoftmax(fcOut, batch_sizes[confName])

        elapsedTime = timeit.default_timer() - startTime
        print "\nAfter Softmax (%f ms):" % (elapsedTime * 1000)

        xdnn_io.printClassification(softmaxOut, netconf_args)

    print "\nSuccess!\n"

    xdnn.closeHandle()
def main(argv=None):
    args = xdnn_io.processCommandLine(argv)

    startTime = timeit.default_timer()
    ret = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0", args['xlnxlib'])
    if ret != 0:
        sys.exit(1)
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to createHandle (%f ms):" % (elapsedTime * 1000)

    # we do not need other args keys except 'jsoncfg'
    args = args['jsoncfg']

    netCfgs = defaultdict(dict)
    confNames = []
    startTime = timeit.default_timer()
    for streamId, netCfg_args in enumerate(args):
        confName = str(netCfg_args['name'])
        confNames += [confName]

        netCfg_args['netcfg'] = './data/{}_{}.cmd'.format(
            netCfg_args['net'], netCfg_args['dsp'])
        netCfgs[confName]['streamId'] = streamId
        netCfgs[confName]['args'] = netCfg_args
        (netCfgs[confName]['weightsBlobs'], netCfgs[confName]['fcWeights'],
         netCfgs[confName]['fcBiases']) = xdnn_io.loadWeights(netCfg_args)
        netCfgs[confName]['batch_sz'] = 1
        netCfgs[confName]['fpgaOutputs'] = xdnn_io.prepareOutput(
            netCfg_args["fpgaoutsz"], netCfgs[confName]['batch_sz'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to init (%f ms):" % (elapsedTime * 1000)

    ## run YOLO
    confName = 'yolo'
    netCfg = netCfgs[confName]

    startTime = timeit.default_timer()
    (netCfg['fpgaInputs'], netCfg['batch_sz'],
     netCfg['shapes']) = xdnn_io.prepareInput(netCfg['args'],
                                              netCfg['args']['PE'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to transfer input image to FPGA (%f ms):" % (elapsedTime *
                                                               1000)

    startTime = timeit.default_timer()
    xdnn.exec_async(netCfg['args']['netcfg'], netCfg['weightsBlobs'],
                    netCfg['fpgaInputs'], netCfg['fpgaOutputs'],
                    netCfg['batch_sz'], netCfg['args']['quantizecfg'],
                    netCfg['args']['scaleB'], netCfg['args']['PE'],
                    netCfg['streamId'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to execute Yolo on FPGA (%f ms):" % (elapsedTime * 1000)

    startTime = timeit.default_timer()
    xdnn.get_result(netCfg['args']['PE'], netCfg['streamId'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to retrieve yolo outputs from FPGA (%f ms):" % (elapsedTime *
                                                                  1000)

    startTime = timeit.default_timer()
    out_h         = \
    out_w         = netCfg['args']['in_shape'][1] / 32
    anchor_boxes = 5
    objectness = 1
    coordinates = 4
    classes = 80
    out_c = objectness + coordinates + classes

    # Reshape the fpgaOutputs into a 4D volume
    yolo_outputs = netCfg['fpgaOutputs'].reshape(anchor_boxes, out_c, out_h,
                                                 out_w)

    # Apply sigmoid to 1st, 2nd, 4th channel for all anchor boxes
    yolo_outputs[:, 0:2, :, :] = sigmoid(
        yolo_outputs[:, 0:2, :, :])  # (X,Y) Predictions
    yolo_outputs[:, 4, :, :] = sigmoid(
        yolo_outputs[:, 4, :, :])  # Objectness / Box Confidence

    # Apply softmax on the class scores foreach anchor box
    for box in range(anchor_boxes):
        yolo_outputs[box, 5:, :, :] = softmax(yolo_outputs[box, 5:, :, :])

    # Perform Non-Max Suppression
    # Non-Max Suppression filters out detections with a score lesser than 0.24
    # Additionally if there are two predections with an overlap > 30%, the prediction with the lower score will be filtered
    scorethresh = 0.24
    iouthresh = 0.3
    bboxes = nms.do_baseline_nms(yolo_outputs.flat, netCfg['shapes'][0][1],
                                 netCfg['shapes'][0][0],
                                 netCfg['args']['in_shape'][2],
                                 netCfg['args']['in_shape'][1], out_w, out_h,
                                 anchor_boxes, classes, scorethresh, iouthresh)

    with open(netCfg['args']['labels']) as f:
        namez = f.readlines()
        names = [x.strip() for x in namez]

    # Lets print the detections our model made
    for j in range(len(bboxes)):
        print("Obj %d: %s" % (j, names[bboxes[j]['classid']]))
        print("\t score = %f" % (bboxes[j]['prob']))
        print("\t (xlo,ylo) = (%d,%d)" %
              (bboxes[j]['ll']['x'], bboxes[j]['ll']['y']))
        print("\t (xhi,yhi) = (%d,%d)" %
              (bboxes[j]['ur']['x'], bboxes[j]['ur']['y']))

    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to execute on CPU (%f ms):" % (elapsedTime * 1000)

    startTime = timeit.default_timer()

    img = cv2.imread(netCfg['args']['images'][0])
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # YOLO was trained with RGB, not BGR like Caffe

    # choose one of the bounding boxes
    obj_idx = 0

    # specify a margin added to the selected bounding box
    margin = 10

    H_slice = slice(max(0, bboxes[obj_idx]['ur']['y'] - margin),
                    min(img.shape[0], bboxes[obj_idx]['ll']['y'] + margin))
    W_slice = slice(max(0, bboxes[obj_idx]['ll']['x'] - margin),
                    min(img.shape[1], bboxes[obj_idx]['ur']['x'] + margin))
    img = img[H_slice, W_slice, :]

    print('pass obj {}: {} with size {} to googlenet'.format(
        obj_idx, names[bboxes[obj_idx]['classid']], img.shape))

    cv2.imwrite('cropped_yolo_output.jpg', img)
    '''
    if img.shape[-1] == 1 or img.shape[-1] == 3:
        # [H, W, C]
        old_dims = np.array(img.shape[:2], dtype=float)
    else:
        # [C, H, W]
        old_dims = np.array(img.shape[1:], dtype=float)
    '''

    ## run GOOGLENET
    confName = 'googlenet'
    netCfg = netCfgs[confName]
    '''
    new_dims = netCfg['args']['in_shape']
    if new_dims[-1] == 1 or new_dims[-1] == 3:
        # [H, W, C]
        new_dims = np.array(new_dims[:2], dtype=int)
    else:
        # [C, H, W]
        new_dims = np.array(new_dims[1:], dtype=int)

    scale_dims    = new_dims.copy()
    min_scale_idx = np.argmin(old_dims/new_dims)
    if min_scale_idx == 0:
      scale_dims[1] = scale_dims[0] * old_dims[1] / old_dims[0]
    else:
      scale_dims[0] = scale_dims[1] * old_dims[0] / old_dims[1]

    scale_dims = scale_dims.astype(int)

    # transform input image to match googlenet
    # scale the image
    print('scale image to {}'.format(scale_dims))
    img = resize_image(img, list(scale_dims))
    cv2.imwrite('rescaled_scaled.jpg', img)

    # crop the image
    crop_idxs = [np.arange(new_dims[i]) + int((scale_dims[i]-new_dims[i])/2) for i in range(2)]

    if img.shape[-1] == 1 or img.shape[-1] == 3:
        # [H, W, C]
        img = img[crop_idxs[0].reshape(-1,1), crop_idxs[1], :]
    else:
        # [C, H, W]
        img = img[:, crop_idxs[0].reshape(-1,1), crop_idxs[1]]

    print('crop image to {}'.format(img.shape))
    cv2.imwrite('rescaled_cropped.jpg', img)

    #img = np.transpose(img, (2, 0, 1))
    #cv2.imwrite('rescaled_transposed.jpg', img)
    '''

    netCfg['args']['images'] = [img]
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to prepare googlenet image on CPU (%f ms):" % (elapsedTime *
                                                                 1000)

    startTime = timeit.default_timer()
    (netCfg['fpgaInputs'], netCfg['batch_sz'],
     netCfg['shapes']) = xdnn_io.prepareInput(netCfg['args'],
                                              netCfg['args']['PE'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to transfer input image to FPGA (%f ms):" % (elapsedTime *
                                                               1000)

    startTime = timeit.default_timer()
    xdnn.exec_async(netCfg['args']['netcfg'], netCfg['weightsBlobs'],
                    netCfg['fpgaInputs'], netCfg['fpgaOutputs'],
                    netCfg['batch_sz'], netCfg['args']['quantizecfg'],
                    netCfg['args']['scaleB'], netCfg['args']['PE'],
                    netCfg['streamId'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to execute googlenet on FPGA (%f ms):" % (elapsedTime * 1000)

    startTime = timeit.default_timer()
    xdnn.get_result(netCfg['args']['PE'], netCfg['streamId'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to retrieve googlenet outputs from FPGA (%f ms):" % (
        elapsedTime * 1000)

    startTime = timeit.default_timer()
    fcOut = np.empty((netCfg['batch_sz'] * netCfg['args']['outsz']),
                     dtype=np.float32,
                     order='C')
    xdnn.computeFC(netCfg['fcWeights'], netCfg['fcBiases'],
                   netCfg['fpgaOutputs'], netCfg['batch_sz'],
                   netCfg['args']['outsz'], netCfg['args']['fpgaoutsz'], fcOut)
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to run FC layers on CPU (%f ms):" % (elapsedTime * 1000)

    startTime = timeit.default_timer()
    softmaxOut = xdnn.computeSoftmax(fcOut, netCfg['batch_sz'])
    elapsedTime = timeit.default_timer() - startTime
    print "\nTime to run Softmax on CPU (%f ms):" % (elapsedTime * 1000)

    xdnn_io.printClassification(softmaxOut, netCfg['args'])

    print "\nSuccess!\n"

    xdnn.closeHandle()