def feedForward(x, params, train): snrg = RandomStreams(seed=12345) x = layers.dropout(x, train, 0.8, snrg) l = 0 current_params = params[l] c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) c1 = layers.dropout(c1, train, 0.75, snrg) l += 1 current_params = params[l] c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) c2 = layers.dropout(c2, train, 0.75, snrg) l += 1 current_params = params[l] c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) c3 = layers.dropout(c3, train, 0.75, snrg) l += 1 current_params = params[l] z = layers.linOutermost(c3, current_params) return z
def feedForward(x, params,B): x = layers.quantizeAct(x,B) l=0 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c1 = layers.linOutermost(x,current_params) c1 = layers.slopedClipping(c1) c1 = layers.quantizeAct(c1,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c2 = layers.linOutermost(c1,current_params) c2 = layers.slopedClipping(c2) c2 = layers.quantizeAct(c2,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c3 = layers.linOutermost(c2,current_params) c3 = layers.slopedClipping(c3) c3 = layers.quantizeAct(c3,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) z = layers.linOutermost(c3,current_params) return z
def feedForward(x, params): l = 0 current_params = params[l] c1 = conv2d(x, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c1 = layers.slopedClipping(c1) l += 1 current_params = params[l] c2 = conv2d(c1, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c2 = layers.slopedClipping(c2) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c4 = conv2d(p3, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c4 = layers.slopedClipping(c4) l += 1 current_params = params[l] c5 = conv2d(c4, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c5 = layers.slopedClipping(c5) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c7 = conv2d(p6, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c7 = layers.slopedClipping(c7) l += 1 current_params = params[l] c8 = conv2d(c7, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c8 = layers.slopedClipping(c8) f9 = c8.flatten(2) l += 1 current_params = params[l] h1 = T.dot(f9, current_params[0]) + current_params[1] h1 = layers.slopedClipping(h1) l += 1 current_params = params[l] h2 = layers.linOutermost(h1, current_params) h2 = layers.slopedClipping(h2) l += 1 current_params = params[l] z = layers.linOutermost(h2, current_params) # return z
def feedForward(x, params): l = 0 current_params = params[l] c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) l += 1 current_params = params[l] c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) l += 1 current_params = params[l] c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) l += 1 current_params = params[l] z = layers.linOutermost(c3, current_params) return z
def feedForward(x, params): bn_updates = [] l=0 current_params = params[l] c1,newRM,newRV = layers.convBNAct(x,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c2,newRM,newRV = layers.convBNAct(c1,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) p3 = pool_2d(c2,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] c4,newRM,newRV = layers.convBNAct(p3,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c5,newRM,newRV = layers.convBNAct(c4,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) p6 = pool_2d(c5,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] c7,newRM,newRV = layers.convBNAct(p6,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c8,newRM,newRV = layers.convBNAct(c7,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) f9 = c8.flatten(2) l+=1 current_params = params[l] h1, newRM, newRV = layers.linBNAct(f9,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] h2, newRM, newRV = layers.linBNAct(h1,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] z = layers.linOutermost(h2,current_params) # return z,bn_updates
def feedForward(x, params): evalues = [] activations = [] weights = [] biases = [] activations.append(x) l = 0 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) activations.append(c1) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) activations.append(c2) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) activations.append(c3) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) z = layers.linOutermost(c3, current_params) #z contains all numerical outputs weights.append(w_flattened) biases.append(current_params[1]) z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) for l in range(4): activation = activations[l] E = 0.0 deriv_fl = T.grad( T.sum(z_fl), activation ) #sum is taken for batches shape is now batchSize x actshape for i in range(10): z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) #batchsize x shape denum = T.switch( T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) #shape is batchsize ->need to add broadcast result = numerator / (denum.dimshuffle(0, 'x')) E = E + T.sum(result) evalues.append(E / 24.0) E = 0.0 w = weights[l] b = biases[l] deriv_fl_w = T.jacobian(z_fl, w) #jacobian so shape is batchsize x shape deriv_fl_b = T.jacobian(z_fl, b) for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i, w) deriv_i_b = T.jacobian(z_i, b) numerator_w = T.sqr(deriv_i_w - deriv_fl_w) numerator_b = T.sqr(deriv_i_b - deriv_fl_b) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.dimshuffle(0, 'x')) result_b = numerator_b / (denum.dimshuffle(0, 'x')) E = E + T.sum(result_w) E = E + T.sum(result_b) evalues.append(E / 24.0) return evalues
def feedForward(x, params, B, BA, BW): train = 0 res0Params = params[0] current_params = res0Params[0] current_params[0] = layers.quantizeWeight(current_params[0], BW.take(0) + B) outAct, _, _ = layers.convBNAct(x, current_params, train) outAct = layers.quantizeAct(outAct, BA.take(1) + B) outAct = resBlock(outAct, params[1], train, BA.take(2) + B, BA.take(3) + B, BW.take(1) + B, BW.take(2) + B) outAct = resBlock(outAct, params[2], train, BA.take(4) + B, BA.take(5) + B, BW.take(3) + B, BW.take(4) + B) outAct = resBlock(outAct, params[3], train, BA.take(6) + B, BA.take(7) + B, BW.take(5) + B, BW.take(6) + B) outAct = resBlockStride(outAct, params[4], train, BA.take(8) + B, BA.take(9) + B, BW.take(7) + B, BW.take(8) + B, BW.take(9) + B) outAct = resBlock(outAct, params[5], train, BA.take(10) + B, BA.take(11) + B, BW.take(10) + B, BW.take(11) + B) outAct = resBlock(outAct, params[6], train, BA.take(12) + B, BA.take(13) + B, BW.take(12) + B, BW.take(13) + B) outAct = resBlockStride(outAct, params[7], train, BA.take(14) + B, BA.take(15) + B, BW.take(14) + B, BW.take(15) + B, BW.take(16) + B) outAct = resBlock(outAct, params[8], train, BA.take(16) + B, BA.take(17) + B, BW.take(17) + B, BW.take(18) + B) outAct = resBlock(outAct, params[9], train, BA.take(18) + B, BA.take(19) + B, BW.take(19) + B, BW.take(20) + B) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] current_params[0] = layers.quantizeWeight(current_params[0], BW.take(21) + B) z = layers.linOutermost(pooled, current_params) # return z
def feedForward(x, params, train): snrg = RandomStreams(seed=12345) bn_updates = [] l = 0 current_params = params[l] c1, newRM, newRV = layers.convBNAct(x, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c1 = layers.dropout(c1, train, 0.8, snrg) l += 1 current_params = params[l] c2, newRM, newRV = layers.convBNAct(c1, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c4, newRM, newRV = layers.convBNAct(p3, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c4 = layers.dropout(c4, train, 0.7, snrg) l += 1 current_params = params[l] c5, newRM, newRV = layers.convBNAct(c4, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c7, newRM, newRV = layers.convBNAct(p6, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c7 = layers.dropout(c7, train, 0.7, snrg) l += 1 current_params = params[l] c8, newRM, newRV = layers.convBNAct(c7, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) #p9 = pool_2d(c8,ws=(2,2),ignore_border=True) # f9 = c8.flatten(2) l += 1 current_params = params[l] h1, newRM, newRV = layers.linBNAct(f9, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) h1 = layers.dropout(h1, train, 0.6, snrg) l += 1 current_params = params[l] h2, newRM, newRV = layers.linBNAct(h1, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) h2 = layers.dropout(h2, train, 0.6, snrg) l += 1 current_params = params[l] z = layers.linOutermost(h2, current_params) # return z, bn_updates
def feedForward(x, params): train = 0 activations = [] weights = [] res0Params = params[0] res0Activations = [] current_params = res0Params[0] outAct, _, _ = layers.convBNAct(x, current_params, train) res0Activations.append(outAct) activations.append(res0Activations[0]) weights.append(current_params[0]) outAct, resActivations = resBlock(outAct, params[1], train) weights.append(params[1][0][0]) weights.append(params[1][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[2], train) weights.append(params[2][0][0]) weights.append(params[2][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[3], train) weights.append(params[3][0][0]) weights.append(params[3][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlockStride(outAct, params[4], train) weights.append(params[4][0][0]) weights.append(params[4][1][0]) weights.append(params[4][2][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[5], train) weights.append(params[5][0][0]) weights.append(params[5][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[6], train) weights.append(params[6][0][0]) weights.append(params[6][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlockStride(outAct, params[7], train) weights.append(params[7][0][0]) weights.append(params[7][1][0]) weights.append(params[7][2][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[8], train) weights.append(params[8][0][0]) weights.append(params[8][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[9], train) weights.append(params[9][0][0]) weights.append(params[9][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params) weights.append(current_params[0]) # z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) evalues = [] print('got here') for activation in activations: E = 0.0 deriv_fl = T.grad(T.sum(z_fl), activation) for i in range( 100 ): #should run over 100 but is too time consuming, use this approximation z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) numerator = numerator.flatten() result = numerator / (denum.sum()) E = E + T.sum(result) evalues.append(E / 24) print('got here') for w in weights: E = 0.0 deriv_fl_w = T.grad(z_fl.sum(), w) deriv_fl_w = deriv_fl_w.flatten() for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i.sum(), w) deriv_i_w = deriv_i_w.flatten() numerator_w = T.sqr(deriv_i_w - deriv_fl_w) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.sum()) E = E + T.sum(result_w) evalues.append(E / 24) print('got here') return evalues
def feedForward(x, params): evalues = [] activations = [] weights = [] biases = [] activations.append(x) l = 0 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c1 = conv2d(x, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c1 = layers.slopedClipping(c1) activations.append(c1) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c2 = conv2d(c1, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c2 = layers.slopedClipping(c2) activations.append(c2) weights.append(wf) biases.append(current_params[1]) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c4 = conv2d(p3, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c4 = layers.slopedClipping(c4) activations.append(c4) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c5 = conv2d(c4, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c5 = layers.slopedClipping(c5) activations.append(c5) weights.append(wf) biases.append(current_params[1]) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c7 = conv2d(p6, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c7 = layers.slopedClipping(c7) activations.append(c7) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c8 = conv2d(c7, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c8 = layers.slopedClipping(c8) activations.append(c8) weights.append(wf) biases.append(current_params[1]) f9 = c8.flatten(2) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) h1 = T.dot(f9, new_W) + current_params[1] h1 = layers.slopedClipping(h1) activations.append(h1) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) h2 = layers.linOutermost(h1, [new_W, current_params[1]]) h2 = layers.slopedClipping(h2) activations.append(h2) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) z = layers.linOutermost(h2, [new_W, current_params[1]]) weights.append(wf) biases.append(current_params[1]) # z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) for activation in activations: E = 0.0 deriv_fl = T.grad(T.sum(z_fl), activation) for i in range(10): z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) numerator = numerator.flatten( 2) # shape is batchsize x something big result = numerator / (denum.dimshuffle(0, 'x')) E = E + T.sum(result) evalues.append(E / 24.0) for l in range(9): w = weights[l] b = biases[l] E = 0.0 deriv_fl_w = T.jacobian(z_fl, w) deriv_fl_b = T.jacobian(z_fl, b) for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i, w) deriv_i_b = T.jacobian(z_i, b) numerator_w = T.sqr(deriv_i_w - deriv_fl_w) numerator_b = T.sqr(deriv_i_b - deriv_fl_b) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.dimshuffle(0, 'x')) result_b = numerator_b / (denum.dimshuffle(0, 'x')) E = E + T.sum(result_w) E = E + T.sum(result_b) evalues.append(E / 24.0) return evalues
def feedForward(x, params, train): activations = [] bn_updates = [] res0Params = params[0] res0Activations = [] current_params = res0Params[0] outAct, newRM, newRV = layers.convBNAct(x, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) res0Activations.append(outAct) activations.append(res0Activations) outAct, resActivations, bn_updates = resBlock(outAct, params[1], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[2], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[3], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlockStride( outAct, params[4], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[5], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[6], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlockStride( outAct, params[7], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[8], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[9], train, bn_updates) activations.append(resActivations) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Activations = [] res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params) res10Activations.append(z) activations.append(res10Activations) # return z, bn_updates, activations
def feedForward(x, params, B, BA, BW ): x = layers.quantizeAct(x, B+BA.take(0)) l=0 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),16.0,0.0625) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),16.0,0.0625) c1 = conv2d(x,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c1 = layers.quantizeAct(layers.slopedClipping(c1),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c2 = conv2d(c1,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c2 = layers.quantizeAct(layers.slopedClipping(c2),B+BA.take(l+1)) p3 = pool_2d(c2,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),4.0,0.25) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),4.0,0.25) c4 = conv2d(p3,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c4 = layers.quantizeAct(layers.slopedClipping(c4),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c5 = conv2d(c4,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c5 = layers.quantizeAct(layers.slopedClipping(c5),B+BA.take(l+1)) p6 = pool_2d(c5,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c7 = conv2d(p6,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c7 = layers.quantizeAct(layers.slopedClipping(c7),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) c8 = conv2d(c7,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c8 = layers.quantizeAct(layers.slopedClipping(c8),B+BA.take(l+1)) f9 = c8.flatten(2) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) h1 = T.dot(f9,current_params[0]) + current_params[1] h1 = layers.quantizeAct(layers.slopedClipping(h1),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) h2 = layers.linOutermost(h1,current_params) h2 = layers.quantizeAct(layers.slopedClipping(h2),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) z = layers.linOutermost(h2,current_params) # return z
def feedForward(x, params, train): bn_updates = [] BA = [ 8., 8., 7., 7., 6., 6., 6., 6., 6., 7., 7., 7., 7., 6., 6., 7., 6., 5., 4., 3. ] res0Params = params[0] current_params = res0Params[0] outAct, newRM, newRV = layers.convBNAct(x, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = STEquant(outAct, BA[1]) outAct = quantizeGrad.quantizeGradL1(outAct) outAct, bn_updates = resBlock(outAct, params[1], train, bn_updates, BA[2], 2) outAct = STEquant(outAct, BA[3]) outAct = quantizeGrad.quantizeGradL3(outAct) outAct, bn_updates = resBlock(outAct, params[2], train, bn_updates, BA[4], 4) outAct = STEquant(outAct, BA[5]) outAct = quantizeGrad.quantizeGradL5(outAct) outAct, bn_updates = resBlock(outAct, params[3], train, bn_updates, BA[6], 6) outAct = STEquant(outAct, BA[7]) outAct = quantizeGrad.quantizeGradL7(outAct) outAct, bn_updates = resBlockStride(outAct, params[4], train, bn_updates, BA[8], 8) outAct = STEquant(outAct, BA[9]) outAct = quantizeGrad.quantizeGradL9(outAct) outAct, bn_updates = resBlock(outAct, params[5], train, bn_updates, BA[10], 10) outAct = STEquant(outAct, BA[11]) outAct = quantizeGrad.quantizeGradL11(outAct) outAct, bn_updates = resBlock(outAct, params[6], train, bn_updates, BA[12], 12) outAct = STEquant(outAct, BA[13]) outAct = quantizeGrad.quantizeGradL13(outAct) outAct, bn_updates = resBlockStride(outAct, params[7], train, bn_updates, BA[14], 14) outAct = STEquant(outAct, BA[15]) outAct = quantizeGrad.quantizeGradL15(outAct) outAct, bn_updates = resBlock(outAct, params[8], train, bn_updates, BA[16], 16) outAct = STEquant(outAct, BA[17]) outAct = quantizeGrad.quantizeGradL17(outAct) outAct, bn_updates = resBlock(outAct, params[9], train, bn_updates, BA[18], 18) outAct = STEquant(outAct, BA[19]) outAct = quantizeGrad.quantizeGradL19(outAct) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params[:2]) z = quantizeGrad.quantizeGradL20(z) # return z, bn_updates