Exemple #1
0
# copy to device
devI = fp.array(I, iwl)
devF = fp.array(F, iwl)
devE = fp.array(E, iwl)

# set output bit widths at approximately mean scaling
def scale(n,q):
    return ((struct.unpack('I',struct.pack('f',float(0x7fff**2 * n) / q )))[0] >> 23)-126

iwlO = scale(C*T*R*S, 2)
iwlB = scale(K*T*R*S, 4)
iwlU = scale(N*M*P*Q, 4)

# allocate output 
devO = fp.empty(dimO, iwlO)
devB = fp.zeros(dimI, iwlB)
devU = fp.zeros(dimF, iwlU)
args = dict(padding=padding, strides=strides, upscale=upscale, repeat=100)

# perform convolutions
print 'Warming up'
fp.fprop_conv(devI, devF, devO, strides=strides, upscale=upscale, repeat=10)  # spin up clock

print 'Starting'
fp.fprop_conv(devI, devF, devO, **args)
fp.bprop_conv(devF, devE, devB, **args)
fp.update_conv(devI, devE, devU, **args)
print 'Done'