def resBlock(preAct, resParams, train, bn_updates, BA, GAselect): snrg = RandomStreams(12345) current_params = resParams[0] inAct, newRM, newRV = layers.convBNAct(preAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) inAct = STEquant(inAct, BA) if GAselect == 2: inAct = quantizeGrad.quantizeGradL2(inAct) elif GAselect == 4: inAct = quantizeGrad.quantizeGradL4(inAct) elif GAselect == 6: inAct = quantizeGrad.quantizeGradL6(inAct) elif GAselect == 10: inAct = quantizeGrad.quantizeGradL10(inAct) elif GAselect == 12: inAct = quantizeGrad.quantizeGradL12(inAct) elif GAselect == 16: inAct = quantizeGrad.quantizeGradL16(inAct) elif GAselect == 18: inAct = quantizeGrad.quantizeGradL18(inAct) inAct = layers.dropout(inAct, train, 0.8, snrg) current_params = resParams[1] outAct, newRM, newRV = layers.convBN(inAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = layers.slopedClipping(outAct + preAct) return outAct, bn_updates
def resBlock(preAct, resParams, train): resActivations = [] current_params = resParams[0] inAct, _, _ = layers.convBNAct(preAct, current_params, train) resActivations.append(inAct) current_params = resParams[1] outAct, _, _ = layers.convBN(inAct, current_params, train) outAct = layers.slopedClipping(outAct + preAct) resActivations.append(outAct) return outAct, resActivations
def resBlock(preAct, resParams, train, BA1, BA2, BW1, BW2): current_params = resParams[0] current_params[0] = layers.quantizeWeight(current_params[0], BW1) inAct, _, _ = layers.convBNAct(preAct, current_params, train) inAct = layers.quantizeAct(inAct, BA1) inAct = 0.8 * inAct current_params = resParams[1] current_params[0] = layers.quantizeWeight(current_params[0], BW2) outAct, _, _ = layers.convBN(inAct, current_params, train) outAct = layers.slopedClipping(outAct + preAct) outAct = layers.quantizeAct(outAct, BA2) return outAct
def resBlock(preAct, resParams, train, bn_updates): snrg = RandomStreams(12345) resActivations = [] current_params = resParams[0] inAct, newRM, newRV = layers.convBNAct(preAct, current_params, train) resActivations.append(inAct) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) inAct = layers.dropout(inAct, train, 0.8, snrg) current_params = resParams[1] outAct, newRM, newRV = layers.convBN(inAct, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = layers.slopedClipping(outAct + preAct) resActivations.append(outAct) return outAct, resActivations, bn_updates
def feedForward(x, params): bn_updates = [] l=0 current_params = params[l] c1,newRM,newRV = layers.convBNAct(x,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c2,newRM,newRV = layers.convBNAct(c1,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) p3 = pool_2d(c2,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] c4,newRM,newRV = layers.convBNAct(p3,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c5,newRM,newRV = layers.convBNAct(c4,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) p6 = pool_2d(c5,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] c7,newRM,newRV = layers.convBNAct(p6,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] c8,newRM,newRV = layers.convBNAct(c7,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) f9 = c8.flatten(2) l+=1 current_params = params[l] h1, newRM, newRV = layers.linBNAct(f9,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] h2, newRM, newRV = layers.linBNAct(h1,current_params,0) bn_updates.append((current_params[4],newRM)) bn_updates.append((current_params[5],newRV)) l+=1 current_params = params[l] z = layers.linOutermost(h2,current_params) # return z,bn_updates
def feedForward(x, params, B, BA, BW): train = 0 res0Params = params[0] current_params = res0Params[0] current_params[0] = layers.quantizeWeight(current_params[0], BW.take(0) + B) outAct, _, _ = layers.convBNAct(x, current_params, train) outAct = layers.quantizeAct(outAct, BA.take(1) + B) outAct = resBlock(outAct, params[1], train, BA.take(2) + B, BA.take(3) + B, BW.take(1) + B, BW.take(2) + B) outAct = resBlock(outAct, params[2], train, BA.take(4) + B, BA.take(5) + B, BW.take(3) + B, BW.take(4) + B) outAct = resBlock(outAct, params[3], train, BA.take(6) + B, BA.take(7) + B, BW.take(5) + B, BW.take(6) + B) outAct = resBlockStride(outAct, params[4], train, BA.take(8) + B, BA.take(9) + B, BW.take(7) + B, BW.take(8) + B, BW.take(9) + B) outAct = resBlock(outAct, params[5], train, BA.take(10) + B, BA.take(11) + B, BW.take(10) + B, BW.take(11) + B) outAct = resBlock(outAct, params[6], train, BA.take(12) + B, BA.take(13) + B, BW.take(12) + B, BW.take(13) + B) outAct = resBlockStride(outAct, params[7], train, BA.take(14) + B, BA.take(15) + B, BW.take(14) + B, BW.take(15) + B, BW.take(16) + B) outAct = resBlock(outAct, params[8], train, BA.take(16) + B, BA.take(17) + B, BW.take(17) + B, BW.take(18) + B) outAct = resBlock(outAct, params[9], train, BA.take(18) + B, BA.take(19) + B, BW.take(19) + B, BW.take(20) + B) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] current_params[0] = layers.quantizeWeight(current_params[0], BW.take(21) + B) z = layers.linOutermost(pooled, current_params) # return z
def feedForward(x, params, train): snrg = RandomStreams(seed=12345) bn_updates = [] l = 0 current_params = params[l] c1, newRM, newRV = layers.convBNAct(x, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c1 = layers.dropout(c1, train, 0.8, snrg) l += 1 current_params = params[l] c2, newRM, newRV = layers.convBNAct(c1, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c4, newRM, newRV = layers.convBNAct(p3, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c4 = layers.dropout(c4, train, 0.7, snrg) l += 1 current_params = params[l] c5, newRM, newRV = layers.convBNAct(c4, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c7, newRM, newRV = layers.convBNAct(p6, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) c7 = layers.dropout(c7, train, 0.7, snrg) l += 1 current_params = params[l] c8, newRM, newRV = layers.convBNAct(c7, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) #p9 = pool_2d(c8,ws=(2,2),ignore_border=True) # f9 = c8.flatten(2) l += 1 current_params = params[l] h1, newRM, newRV = layers.linBNAct(f9, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) h1 = layers.dropout(h1, train, 0.6, snrg) l += 1 current_params = params[l] h2, newRM, newRV = layers.linBNAct(h1, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) h2 = layers.dropout(h2, train, 0.6, snrg) l += 1 current_params = params[l] z = layers.linOutermost(h2, current_params) # return z, bn_updates
def feedForward(x, params): train = 0 activations = [] weights = [] res0Params = params[0] res0Activations = [] current_params = res0Params[0] outAct, _, _ = layers.convBNAct(x, current_params, train) res0Activations.append(outAct) activations.append(res0Activations[0]) weights.append(current_params[0]) outAct, resActivations = resBlock(outAct, params[1], train) weights.append(params[1][0][0]) weights.append(params[1][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[2], train) weights.append(params[2][0][0]) weights.append(params[2][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[3], train) weights.append(params[3][0][0]) weights.append(params[3][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlockStride(outAct, params[4], train) weights.append(params[4][0][0]) weights.append(params[4][1][0]) weights.append(params[4][2][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[5], train) weights.append(params[5][0][0]) weights.append(params[5][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[6], train) weights.append(params[6][0][0]) weights.append(params[6][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlockStride(outAct, params[7], train) weights.append(params[7][0][0]) weights.append(params[7][1][0]) weights.append(params[7][2][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[8], train) weights.append(params[8][0][0]) weights.append(params[8][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) outAct, resActivations = resBlock(outAct, params[9], train) weights.append(params[9][0][0]) weights.append(params[9][1][0]) activations.append(resActivations[0]) activations.append(resActivations[1]) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params) weights.append(current_params[0]) # z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) evalues = [] print('got here') for activation in activations: E = 0.0 deriv_fl = T.grad(T.sum(z_fl), activation) for i in range( 100 ): #should run over 100 but is too time consuming, use this approximation z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) numerator = numerator.flatten() result = numerator / (denum.sum()) E = E + T.sum(result) evalues.append(E / 24) print('got here') for w in weights: E = 0.0 deriv_fl_w = T.grad(z_fl.sum(), w) deriv_fl_w = deriv_fl_w.flatten() for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i.sum(), w) deriv_i_w = deriv_i_w.flatten() numerator_w = T.sqr(deriv_i_w - deriv_fl_w) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.sum()) E = E + T.sum(result_w) evalues.append(E / 24) print('got here') return evalues
def feedForward(x, params, train): activations = [] bn_updates = [] res0Params = params[0] res0Activations = [] current_params = res0Params[0] outAct, newRM, newRV = layers.convBNAct(x, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) res0Activations.append(outAct) activations.append(res0Activations) outAct, resActivations, bn_updates = resBlock(outAct, params[1], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[2], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[3], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlockStride( outAct, params[4], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[5], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[6], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlockStride( outAct, params[7], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[8], train, bn_updates) activations.append(resActivations) outAct, resActivations, bn_updates = resBlock(outAct, params[9], train, bn_updates) activations.append(resActivations) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Activations = [] res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params) res10Activations.append(z) activations.append(res10Activations) # return z, bn_updates, activations
def feedForward(x, params, train): bn_updates = [] BA = [ 8., 8., 7., 7., 6., 6., 6., 6., 6., 7., 7., 7., 7., 6., 6., 7., 6., 5., 4., 3. ] res0Params = params[0] current_params = res0Params[0] outAct, newRM, newRV = layers.convBNAct(x, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = STEquant(outAct, BA[1]) outAct = quantizeGrad.quantizeGradL1(outAct) outAct, bn_updates = resBlock(outAct, params[1], train, bn_updates, BA[2], 2) outAct = STEquant(outAct, BA[3]) outAct = quantizeGrad.quantizeGradL3(outAct) outAct, bn_updates = resBlock(outAct, params[2], train, bn_updates, BA[4], 4) outAct = STEquant(outAct, BA[5]) outAct = quantizeGrad.quantizeGradL5(outAct) outAct, bn_updates = resBlock(outAct, params[3], train, bn_updates, BA[6], 6) outAct = STEquant(outAct, BA[7]) outAct = quantizeGrad.quantizeGradL7(outAct) outAct, bn_updates = resBlockStride(outAct, params[4], train, bn_updates, BA[8], 8) outAct = STEquant(outAct, BA[9]) outAct = quantizeGrad.quantizeGradL9(outAct) outAct, bn_updates = resBlock(outAct, params[5], train, bn_updates, BA[10], 10) outAct = STEquant(outAct, BA[11]) outAct = quantizeGrad.quantizeGradL11(outAct) outAct, bn_updates = resBlock(outAct, params[6], train, bn_updates, BA[12], 12) outAct = STEquant(outAct, BA[13]) outAct = quantizeGrad.quantizeGradL13(outAct) outAct, bn_updates = resBlockStride(outAct, params[7], train, bn_updates, BA[14], 14) outAct = STEquant(outAct, BA[15]) outAct = quantizeGrad.quantizeGradL15(outAct) outAct, bn_updates = resBlock(outAct, params[8], train, bn_updates, BA[16], 16) outAct = STEquant(outAct, BA[17]) outAct = quantizeGrad.quantizeGradL17(outAct) outAct, bn_updates = resBlock(outAct, params[9], train, bn_updates, BA[18], 18) outAct = STEquant(outAct, BA[19]) outAct = quantizeGrad.quantizeGradL19(outAct) pooled = pool_2d(outAct, ws=(8, 8), ignore_border=True, mode='average_exc_pad') pooled = pooled.flatten(2) res10Params = params[10] current_params = res10Params[0] z = layers.linOutermost(pooled, current_params[:2]) z = quantizeGrad.quantizeGradL20(z) # return z, bn_updates