Exemple #1
0
def main(argv):
    args = xdnn_io.processCommandLine(argv)
    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    # ret = xdnn.createHandle(g_xclbin, "kernelSxdnn_0", g_xdnnLib)
    if ret != 0:
      sys.exit(1)
    labels = xdnn_io.get_labels(args['labels'])

    # TODO dict of tuples instead?
    fpgaRT          = {}
    fpgaOutputs     = {}
    fcWeights       = {}
    fcBiases        = {}
    netFiles        = {}
    confNames       = []

    args = args['jsoncfg']      # we do not use other args' keys
    for netconf_args in args:
      
      confName   = str(netconf_args['name'])
      confNames += [confName]
      # netconf_args['netcfg'] = './data/{}_{}.json'.format(netconf_args['net'], netconf_args['dsp'])
      fpgaRT[confName] = xdnn.XDNNFPGAOp(handles, netconf_args)
      netconf_args['in_shape'] = tuple((netconf_args['batch_sz'],) + tuple(fpgaRT[confName].getInputDescriptors().itervalues().next()[1:] )) 
      (fcWeights[confName],
        fcBiases[confName]) = xdnn_io.loadFCWeightsBias(netconf_args)
      fpgaOutputs[confName]             = np.empty ((netconf_args['batch_sz'], int(netconf_args['fpgaoutsz']),), dtype=np.float32, order='C')
      netFiles[confName]                = str(netconf_args['netcfg'])

    batchArrays = []
    for streamId, netconf_args in enumerate(args):
      batchArrays.append(np.empty(netconf_args['in_shape'], dtype=np.float32, order='C'))
      pl = []
      img_paths = xdnn_io.getFilePaths(netconf_args['images'])
      for j, p in enumerate(img_paths[:netconf_args['batch_sz']]):
        batchArrays[-1][j, ...], _ = xdnn_io.loadImageBlobFromFile(p, netconf_args['img_raw_scale'],
                                                                  netconf_args['img_mean'],
                                                                  netconf_args['img_input_scale'],
                                                                  netconf_args['in_shape'][2],
                                                                  netconf_args['in_shape'][3])
        pl.append(p)

      confName = str(netconf_args['name'])
      firstInputName = fpgaRT[confName].getInputs().iterkeys().next()
      firstOutputName = fpgaRT[confName].getOutputs().iterkeys().next()
      fpgaRT[confName].exec_async({ firstInputName : batchArrays[-1] }, { firstOutputName : fpgaOutputs[confName] }, streamId)

    for streamId, confName in enumerate(confNames):
      fpgaRT[confName].get_result (streamId)

    for netconf_args in args:
      confName = str(netconf_args['name'])
      fcOut = np.empty( (netconf_args['batch_sz'], netconf_args['outsz']), dtype=np.float32, order = 'C')
      xdnn.computeFC (fcWeights[confName], fcBiases[confName], fpgaOutputs[confName], fcOut)

      softmaxOut = xdnn.computeSoftmax(fcOut)
      xdnn_io.printClassification(softmaxOut, netconf_args['images'], labels);

    xdnn.closeHandle()
Exemple #2
0
def main():
  parser = xdnn_io.default_parser_args()
  parser.add_argument('--numprepproc', type=int, default=1,
                      help='number of parallel processes used to decode and quantize images')
  parser.add_argument('--numstream', type=int, default=16,
                      help='number of FPGA streams')
  parser.add_argument('--deviceID', type=int, default=0,
                      help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
  parser.add_argument('--benchmarkmode', type=int, default=0,
                      help='bypass pre/post processing for benchmarking')
  args = parser.parse_args()
  args = xdnn_io.make_dict_args(args)
  ret = xdnn.createManager()
  if ret != True:
    sys.exit(1)

  sharedInputArrs = []
  fpgaOutputs = []
  compilerJSONObj = xdnn.CompilerJsonParser( args['netcfg'])
  qPrep = mp.Queue(maxsize=args['numprepproc']*10)
  qFpga = mp.Queue(maxsize=100)
  streamQ = mp.Queue(maxsize=args['numstream'])
  prepProcQ = mp.Queue(maxsize=100)
  firstOutputShape = compilerJSONObj.getOutputs().itervalues().next()
  firstInputShape = compilerJSONObj.getInputs().itervalues().next()

  for i in range( args['numstream'] ):
    fpgaOutputs.append(mp.Array(ctypes.c_float, args['batch_sz'] * np.prod( tuple(firstOutputShape[1:]) ) ))
    streamQ.put ( i )

  for i in range(100):
    bufSize = np.prod(tuple(firstInputShape))
    sharedInputArrs.append( mp.Array(ctypes.c_float, bufSize ) )
    prepProcQ.put (i)

  img_paths = xdnn_io.getFilePaths(args['images'])

  p = mp.Pool( initializer = init_prepImage, initargs = (args, qPrep, img_paths, sharedInputArrs, prepProcQ, compilerJSONObj, ), processes = args['numprepproc'])

  xdnnProc = mp.Process(target=fpga_process_async, args=(qPrep, qFpga, args, len(img_paths), sharedInputArrs,prepProcQ, streamQ, fpgaOutputs, compilerJSONObj,))
  xdnnProc.start()

  postProc = mp.Process(target=post_process, args=(qFpga, args, img_paths,streamQ, fpgaOutputs,))
  postProc.start()
  if args['perpetual']:
    while True:
      res = [p.map_async(run_prepImage, range(len(img_paths)))]
      for j in res:
        j.wait()
        del j
  else:
    p.map_async(run_prepImage, range(len(img_paths)))

  xdnnProc.join()
  postProc.join()

  p.close()
  p.join()
Exemple #3
0
def main():
    args = xdnn_io.processCommandLine()

    ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
    if ret != 0:
        sys.exit(1)
    fpgaRT = xdnn.XDNNFPGAOp(handles, args)
    fcWeight, fcBias = xdnn_io.loadFCWeightsBias(args)
    img_paths = xdnn_io.getFilePaths(args['images'])
    fpgaOutput = np.empty((
        args['batch_sz'],
        args['fpgaoutsz'],
    ),
                          dtype=np.float32,
                          order='C')
    fcOutput = np.empty((
        args['batch_sz'],
        args['outsz'],
    ),
                        dtype=np.float32,
                        order='C')
    batch_array = np.empty(((args['batch_sz'], ) + args['in_shape']),
                           dtype=np.float32,
                           order='C')
    labels = xdnn_io.get_labels(args['labels'])
    if args['golden']:
        goldenMap = xdnn_io.getGoldenMap(args['golden'])
        top5Count = 0
        top1Count = 0

    for i in xrange(0, len(img_paths), args['batch_sz']):
        pl = []
        for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
            batch_array[j, ...], _ = xdnn_io.loadImageBlobFromFile(
                p, args['img_raw_scale'], args['img_mean'],
                args['img_input_scale'], args['in_shape'][2],
                args['in_shape'][1])
            pl.append(p)

        fpgaRT.execute(batch_array, fpgaOutput)
        xdnn.computeFC(fcWeight, fcBias, fpgaOutput, args['batch_sz'],
                       args['outsz'], args['fpgaoutsz'], fcOutput)
        softmaxOut = xdnn.computeSoftmax(fcOutput)
        xdnn_io.printClassification(softmaxOut, pl, labels)
        if args['golden']:
            for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
                top1Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 1)
                top5Count += xdnn_io.isTopK(softmaxOut[j], goldenMap, p,
                                            labels, 5)

    xdnn.closeHandle()
    if args['golden']:
        print("\nAverage accuracy (n=%d) Top-1: %.1f%%, Top-5: %.1f%%\n") % (
            len(img_paths), float(top1Count) / float(len(img_paths)) * 100.,
            float(top5Count) / float(len(img_paths)) * 100.)
Exemple #4
0
def networkForward(netcfg, layername):

  #args = xdnn_io.processCommandLine()
  parser = xdnn_io.default_parser_args()
  parser.add_argument('--layerindex', type=int, default=0, help='Index value for layer in json', required=True)
  argvt = parser.parse_args()
  args  = xdnn_io.make_dict_args(argvt)
  
  args['netcfg'] = netcfg
  # Hardcode these parameters, so we only have to look at performance of 1 PE
  args["batch_sz"] = 1
  args["PE"] = 0

  #print "{:-^100}".format(' Before: createHandle ')
  ret, handles = xdnn.createHandle(args['xclbin'], "kernelSxdnn_0")
  #print "{:-^100}".format(' After: createHandle ')
  if ret != 0:
      sys.exit(1)

  fpgaRT = xdnn.XDNNFPGAOp(handles, args)
  #print "{:-^100}".format('1')
  fpgaOutput = fpgaRT.getOutputs()
  #print "{:-^100}".format('2')
  fpgaInput = fpgaRT.getInputs()
  #print "{:-^100}".format('3')

  img_paths = xdnn_io.getFilePaths(args['images'])
  inShape = (args['batch_sz'],) +  tuple ( tuple (fpgaRT.getInputDescriptors().values() )[0][1:] )

  firstInput = list(fpgaInput.values())[0]
  firstOutput = list (fpgaOutput.values())[0] 


  for i in xrange(0, len(img_paths), args['batch_sz']):
    pl = []
    for j, p in enumerate(img_paths[i:i + args['batch_sz']]):
        firstInput[0, ...], _ = xdnn_io.loadImageBlobFromFile(img_paths[0], args['img_raw_scale'], args['img_mean'], args['img_input_scale'], inShape[2], inShape[3])
    pl.append(p)

    with open(args['netcfg']) as fp:
      data = json.load(fp)
      #print json.dumps(data, indent=2)

      # Strip nodes that don't run in hardware
      nodes = data['network']
      nodes = [x for x in nodes if x['xdnn_kv']]

      nLayers = len(nodes)

      # How many iterations to run, and average across
      iterations = 1

      # Initialize empty list to hold accumulated runtime
      t1 = []
      for k in range(iterations):
        t1.append(0.0)

      # Run N iterations of network permutations
      for l in range(iterations):
        fpgaRT.execute(fpgaInput, fpgaOutput)
        t1[l] += (fpgaRT.get_exec_time())

      #for node in nodes:
      #  print node['name']

      # Average it
      avetime = sum(t1)/iterations
      #print "{:<25} = {:<25}".format(layername, avetime)

  return avetime
  xdnn.closeHandle()
  del fpgaRT
  del fpgaInput
  del fpgaOutput
  del ret
def main():
    parser = xdnn_io.default_parser_args()
    parser.add_argument(
        '--numprepproc',
        type=int,
        default=1,
        help='number of parallel processes used to decode and quantize images')
    parser.add_argument('--numstream',
                        type=int,
                        default=16,
                        help='number of FPGA streams')
    parser.add_argument(
        '--deviceID',
        type=int,
        default=0,
        help='FPGA no. -> FPGA ID to run in case multiple FPGAs')
    args = parser.parse_args()
    args = xdnn_io.make_dict_args(args)
    ret = xdnn.createManager(args['xlnxlib'])
    if ret != True:
        sys.exit(1)

    sharedInputArrs = []
    fpgaOutputs = []

    qPrep = mp.Queue(maxsize=args['numprepproc'] * 10)
    qFpga = mp.Queue(maxsize=100)
    streamQ = mp.Queue(maxsize=args['numstream'])
    prepProcQ = mp.Queue(maxsize=100)
    for i in range(args['numstream']):
        shared_arr = mp.Array(ctypes.c_float,
                              args['batch_sz'] * args['fpgaoutsz'])
        fpgaOutputs.append(shared_arr)
        streamQ.put(i)

    for i in range(100):
        bufSize = np.prod(args['in_shape'])
        sharedInputArrs.append(mp.Array(ctypes.c_float, bufSize))
        prepProcQ.put(i)

    img_paths = xdnn_io.getFilePaths(args['images'])

    p = mp.Pool(initializer=init_prepImage,
                initargs=(
                    args,
                    qPrep,
                    img_paths,
                    sharedInputArrs,
                    prepProcQ,
                ),
                processes=args['numprepproc'])

    xdnnProc = mp.Process(target=fpga_process_async,
                          args=(
                              qPrep,
                              qFpga,
                              args,
                              len(img_paths),
                              sharedInputArrs,
                              prepProcQ,
                              streamQ,
                              fpgaOutputs,
                          ))
    xdnnProc.start()

    postProc = mp.Process(target=post_process,
                          args=(
                              qFpga,
                              args,
                              img_paths,
                              streamQ,
                              fpgaOutputs,
                          ))
    postProc.start()
    if args['perpetual']:
        while True:
            res = [p.map_async(run_prepImage, range(len(img_paths)))]
            for j in res:
                j.wait()
                del j
    else:
        p.map_async(run_prepImage, range(len(img_paths)))

    xdnnProc.join()
    postProc.join()

    p.close()
    p.join()