from math import ceil import time from pynq import Xlnk import numpy as np import matplotlib.pyplot as plt from pynq.lib import Pmod_ADC from pynq.overlays.base import BaseOverlay ol = BaseOverlay("base.bit") #create an instance of Xlnk xlnk = Xlnk() xlnk.cma_stats() #allocate a memory buffer py_buffer = xlnk.cma_array(shape=(100, ), dtype=np.uint32) #allocate a output memory buffer out_buffer = xlnk.cma_array(shape=(100, ), dtype=np.uint32) adc = Pmod_ADC(ol.PMODA) #delay = 0.00 #values = np.linspace(0, 2, 20) samples = [] count = 0 while count < 100: count = count + 1 sample = adc.read() #time.sleep(0.1) #samples.append(sample[0])
class CnnFPGA(object): def __init__(self, batchsize=8): logger.info('Define a HyperFace CNN model on FPGA') self.batchsize = batchsize self.batchsizeMax = batchsize self.xlnk = Xlnk() self.runFactor = { 'conv_1': 1, 'conv_1a': 8, 'conv_2': 8, 'conv_3': 16, 'conv_3a': 8, 'conv_4': 16, 'conv_5': 16, 'conv_all': 2 } self.cmaIn = [] self.inConvAll = [] b = 0 while b < self.batchsize: self.cmaIn.append( self.xlnk.cma_array(shape=(3 * 227 * 227, ), dtype=np.float32)) self.inConvAll.append( self.xlnk.cma_array(shape=(768 * 6 * 6, ), dtype=np.float32)) b += 1 self.conv1 = ConvLayer('conv_1', 96, 55, self.xlnk, runFactor=1, batchsize=self.batchsize) self.max1 = MaxLayer('max_1', 96, 27, self.xlnk, batchsize=self.batchsize) self.conv1a = ConvLayer('conv_1a', 256, 6, self.xlnk, runFactor=8, batchsize=self.batchsize) self.conv2 = ConvLayer('conv_2', 256, 27, self.xlnk, runFactor=8, batchsize=self.batchsize) self.max2 = MaxLayer('max_2', 256, 13, self.xlnk, batchsize=self.batchsize) self.conv3 = ConvLayer('conv_3', 384, 13, self.xlnk, runFactor=16, batchsize=self.batchsize) self.conv3a = ConvLayer('conv_3a', 256, 6, self.xlnk, runFactor=8, batchsize=self.batchsize) self.conv4 = ConvLayer('conv_4', 384, 13, self.xlnk, runFactor=16, batchsize=self.batchsize) self.conv5 = ConvLayer('conv_5', 256, 13, self.xlnk, runFactor=16, batchsize=self.batchsize) self.max5 = MaxLayer('max_5', 256, 6, self.xlnk, batchsize=self.batchsize) self.convAll = ConvLayer('conv_all', 192, 6, self.xlnk, runFactor=2, batchsize=self.batchsize) softLayer = [ 'fc_full', 'fc_detection1', 'fc_detection2', 'fc_gender1', 'fc_gender2', 'fc_landmarks1', 'fc_landmarks2', 'fc_visibility1', 'fc_visibility2', 'fc_pose1', 'fc_pose2' ] self.weights = {} for sl in softLayer: w = np.load( os.path.dirname(os.path.realpath(__file__)) + "/weights/" + sl + "/W.npy") b = np.load( os.path.dirname(os.path.realpath(__file__)) + "/weights/" + sl + "/b.npy") self.weights[sl] = (w, b) memStat = self.xlnk.cma_stats() logger.info("CMA Stat : " + str(memStat['CMA Memory Usage']) + ' / ' + str(memStat['CMA Memory Available'] + memStat['CMA Memory Usage']) + " [ " + str( int((memStat['CMA Memory Usage'] / (memStat['CMA Memory Available'] + memStat['CMA Memory Usage'])) * 100)) + " % ] ") def __call__(self, img): self.batchsize = len(img) logger.info('Start computation on FPGA with batch size ' + str(self.batchsize)) if self.batchsize > self.batchsizeMax: raise Exception('Batch size exceed the max threshold') b = 0 while b < self.batchsize: np.copyto(self.cmaIn[b], img[b].ravel()) b += 1 outMax1 = self.max1(self.conv1(self.cmaIn)) outConv1a = self.conv1a(outMax1) outConv3 = self.conv3(self.max2(self.conv2(outMax1))) outConv3a = self.conv3a(outConv3) outMax5 = self.max5(self.conv5(self.conv4(outConv3))) self.concat(outConv1a, outConv3a, outMax5) outConvAll = self.convAll(self.inConvAll) res = self.softLayer(outConvAll) logger.info('End computation') return res def concat(self, first, second, third): b = 0 while b < self.batchsize: f = first[b].reshape((36, 256)) f = f.transpose() f = f.reshape(36 * 256) s = second[b].reshape((36, 256)) s = s.transpose() s = s.reshape(36 * 256) t = third[b].reshape((36, 256)) t = t.transpose() t = t.reshape(36 * 256) full = np.concatenate((f, s, t)) full = full.reshape(768, 36).transpose().flatten() np.copyto(self.inConvAll[b], full) b += 1 def softLayer(self, input): res = [] b = 0 while b < self.batchsize: inFcFull = np.transpose(input[b].reshape(6, 6, 192), (2, 0, 1)).flatten() outFcFull = self.fcLayer(inFcFull, self.weights['fc_full']) outFcFull = outFcFull * (outFcFull > 0) outFcDet2 = self.featDetect(outFcFull, self.weights['fc_detection1'], self.weights['fc_detection2']) (face_det, face_val) = self.softMax(outFcDet2) if face_det > 0.25: logger.info('Possible region found on batch index ' + str(b)) outFcGen2 = self.featDetect(outFcFull, self.weights['fc_gender1'], self.weights['fc_gender2']) (gen_det, gen_val) = self.softMax(outFcGen2) outFcLan2 = self.featDetect(outFcFull, self.weights['fc_landmarks1'], self.weights['fc_landmarks2']) outFcVis2 = self.featDetect(outFcFull, self.weights['fc_visibility1'], self.weights['fc_visibility2']) outFcPos2 = self.featDetect(outFcFull, self.weights['fc_pose1'], self.weights['fc_pose2']) res.append({ 'img': self.cmaIn[b], 'detection': face_val, 'landmark': outFcLan2, 'visibility': outFcVis2, 'pose': outFcPos2, 'gender': gen_val }) b += 1 return res def fcLayer(self, x, y): (w, b) = y out = np.matmul(w, x) out = np.sum([b, out], axis=0) return out def featDetect(self, x, y1, y2): out = self.fcLayer(x, y1) out = out * (out > 0) out = self.fcLayer(out, y2) return out def softMax(self, x): e_x = np.exp(x) vals = e_x / e_x.sum() val = vals[1] c = np.argmax(vals) return (c, val)