def feedForward(x, params,B): x = layers.quantizeAct(x,B) l=0 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c1 = layers.linOutermost(x,current_params) c1 = layers.slopedClipping(c1) c1 = layers.quantizeAct(c1,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c2 = layers.linOutermost(c1,current_params) c2 = layers.slopedClipping(c2) c2 = layers.quantizeAct(c2,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) c3 = layers.linOutermost(c2,current_params) c3 = layers.slopedClipping(c3) c3 = layers.quantizeAct(c3,B) l+=1 current_params = params[l] current_params[0] = layers.quantizeWeight(current_params[0],B+2) current_params[1] = layers.quantizeWeight(current_params[1],B+2) z = layers.linOutermost(c3,current_params) return z
def feedForward(x, params, train): snrg = RandomStreams(seed=12345) x = layers.dropout(x, train, 0.8, snrg) l = 0 current_params = params[l] c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) c1 = layers.dropout(c1, train, 0.75, snrg) l += 1 current_params = params[l] c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) c2 = layers.dropout(c2, train, 0.75, snrg) l += 1 current_params = params[l] c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) c3 = layers.dropout(c3, train, 0.75, snrg) l += 1 current_params = params[l] z = layers.linOutermost(c3, current_params) return z
def feedForward(x, params): l = 0 current_params = params[l] c1 = conv2d(x, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c1 = layers.slopedClipping(c1) l += 1 current_params = params[l] c2 = conv2d(c1, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c2 = layers.slopedClipping(c2) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c4 = conv2d(p3, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c4 = layers.slopedClipping(c4) l += 1 current_params = params[l] c5 = conv2d(c4, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c5 = layers.slopedClipping(c5) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] c7 = conv2d(p6, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c7 = layers.slopedClipping(c7) l += 1 current_params = params[l] c8 = conv2d(c7, current_params[0]) + current_params[1].dimshuffle( 'x', 0, 'x', 'x') c8 = layers.slopedClipping(c8) f9 = c8.flatten(2) l += 1 current_params = params[l] h1 = T.dot(f9, current_params[0]) + current_params[1] h1 = layers.slopedClipping(h1) l += 1 current_params = params[l] h2 = layers.linOutermost(h1, current_params) h2 = layers.slopedClipping(h2) l += 1 current_params = params[l] z = layers.linOutermost(h2, current_params) # return z
def resBlock(preAct, resParams, train, bn_updates, BA, GAselect): snrg = RandomStreams(12345) current_params = resParams[0] inAct, newRM, newRV = layers.convBNAct(preAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) inAct = STEquant(inAct, BA) if GAselect == 2: inAct = quantizeGrad.quantizeGradL2(inAct) elif GAselect == 4: inAct = quantizeGrad.quantizeGradL4(inAct) elif GAselect == 6: inAct = quantizeGrad.quantizeGradL6(inAct) elif GAselect == 10: inAct = quantizeGrad.quantizeGradL10(inAct) elif GAselect == 12: inAct = quantizeGrad.quantizeGradL12(inAct) elif GAselect == 16: inAct = quantizeGrad.quantizeGradL16(inAct) elif GAselect == 18: inAct = quantizeGrad.quantizeGradL18(inAct) inAct = layers.dropout(inAct, train, 0.8, snrg) current_params = resParams[1] outAct, newRM, newRV = layers.convBN(inAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = layers.slopedClipping(outAct + preAct) return outAct, bn_updates
def resBlockStride(preAct, resParams, train, bn_updates, BA, GAselect): snrg = RandomStreams(12345) current_params = resParams[0] inAct, newRM, newRV = layers.convStrideBNAct(preAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) inAct = STEquant(inAct, BA) if GAselect == 8: inAct = quantizeGrad.quantizeGradL8(inAct) elif GAselect == 14: inAct = quantizeGrad.quantizeGradL14(inAct) inAct = layers.dropout(inAct, train, 0.8, snrg) current_params = resParams[1] outAct, newRM, newRV = layers.convBN(inAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) current_params = resParams[2] shortCut, newRM, newRV = layers.convStrideBN(preAct, current_params[:6], train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = layers.slopedClipping(outAct + shortCut) return outAct, bn_updates
def resBlock(preAct, resParams, train): resActivations = [] current_params = resParams[0] inAct, _, _ = layers.convBNAct(preAct, current_params, train) resActivations.append(inAct) current_params = resParams[1] outAct, _, _ = layers.convBN(inAct, current_params, train) outAct = layers.slopedClipping(outAct + preAct) resActivations.append(outAct) return outAct, resActivations
def feedForward(x, params): l = 0 current_params = params[l] c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) l += 1 current_params = params[l] c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) l += 1 current_params = params[l] c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) l += 1 current_params = params[l] z = layers.linOutermost(c3, current_params) return z
def resBlock(preAct, resParams, train, BA1, BA2, BW1, BW2): current_params = resParams[0] current_params[0] = layers.quantizeWeight(current_params[0], BW1) inAct, _, _ = layers.convBNAct(preAct, current_params, train) inAct = layers.quantizeAct(inAct, BA1) inAct = 0.8 * inAct current_params = resParams[1] current_params[0] = layers.quantizeWeight(current_params[0], BW2) outAct, _, _ = layers.convBN(inAct, current_params, train) outAct = layers.slopedClipping(outAct + preAct) outAct = layers.quantizeAct(outAct, BA2) return outAct
def resBlockStride(preAct, resParams, train, BA1, BA2, BW1, BW2, BW3): current_params = resParams[0] current_params[0] = layers.quantizeWeight(current_params[0], BW1) inAct, _, _ = layers.convStrideBNAct(preAct, current_params, train) inAct = 0.8 * inAct current_params = resParams[1] current_params[0] = layers.quantizeWeight(current_params[0], BW2) outAct, _, _ = layers.convBN(inAct, current_params, train) current_params = resParams[2] current_params[0] = layers.quantizeWeight(current_params[0], BW3) shortCut, _, _ = layers.convStrideBN(preAct, current_params, train) outAct = layers.slopedClipping(outAct + shortCut) return outAct
def resBlockStride(preAct, resParams, train): resActivations = [] current_params = resParams[0] inAct, _, _ = layers.convStrideBNAct(preAct, current_params, train) resActivations.append(inAct) inAct = 0.8 * inAct current_params = resParams[1] outAct, _, _ = layers.convBN(inAct, current_params, train) current_params = resParams[2] shortCut, _, _ = layers.convStrideBN(preAct, current_params, train) outAct = layers.slopedClipping(outAct + shortCut) resActivations.append(outAct) return outAct, resActivations
def resBlock(preAct, resParams, train, bn_updates): snrg = RandomStreams(12345) resActivations = [] current_params = resParams[0] inAct, newRM, newRV = layers.convBNAct(preAct, current_params, train) resActivations.append(inAct) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) inAct = layers.dropout(inAct, train, 0.8, snrg) current_params = resParams[1] outAct, newRM, newRV = layers.convBN(inAct, current_params, train) bn_updates.append((current_params[4], newRM)) bn_updates.append((current_params[5], newRV)) outAct = layers.slopedClipping(outAct + preAct) resActivations.append(outAct) return outAct, resActivations, bn_updates
def feedForward(x, params): evalues = [] activations = [] weights = [] biases = [] activations.append(x) l = 0 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c1 = layers.linOutermost(x, current_params) c1 = layers.slopedClipping(c1) activations.append(c1) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c2 = layers.linOutermost(c1, current_params) c2 = layers.slopedClipping(c2) activations.append(c2) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) c3 = layers.linOutermost(c2, current_params) c3 = layers.slopedClipping(c3) activations.append(c3) weights.append(w_flattened) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) w_flattened = w.flatten() current_params[0] = T.reshape(w_flattened, w_shape) z = layers.linOutermost(c3, current_params) #z contains all numerical outputs weights.append(w_flattened) biases.append(current_params[1]) z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) for l in range(4): activation = activations[l] E = 0.0 deriv_fl = T.grad( T.sum(z_fl), activation ) #sum is taken for batches shape is now batchSize x actshape for i in range(10): z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) #batchsize x shape denum = T.switch( T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) #shape is batchsize ->need to add broadcast result = numerator / (denum.dimshuffle(0, 'x')) E = E + T.sum(result) evalues.append(E / 24.0) E = 0.0 w = weights[l] b = biases[l] deriv_fl_w = T.jacobian(z_fl, w) #jacobian so shape is batchsize x shape deriv_fl_b = T.jacobian(z_fl, b) for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i, w) deriv_i_b = T.jacobian(z_i, b) numerator_w = T.sqr(deriv_i_w - deriv_fl_w) numerator_b = T.sqr(deriv_i_b - deriv_fl_b) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.dimshuffle(0, 'x')) result_b = numerator_b / (denum.dimshuffle(0, 'x')) E = E + T.sum(result_w) E = E + T.sum(result_b) evalues.append(E / 24.0) return evalues
def feedForward(x, params): evalues = [] activations = [] weights = [] biases = [] activations.append(x) l = 0 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c1 = conv2d(x, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c1 = layers.slopedClipping(c1) activations.append(c1) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c2 = conv2d(c1, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c2 = layers.slopedClipping(c2) activations.append(c2) weights.append(wf) biases.append(current_params[1]) p3 = pool_2d(c2, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c4 = conv2d(p3, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c4 = layers.slopedClipping(c4) activations.append(c4) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c5 = conv2d(c4, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c5 = layers.slopedClipping(c5) activations.append(c5) weights.append(wf) biases.append(current_params[1]) p6 = pool_2d(c5, ws=(2, 2), ignore_border=True) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c7 = conv2d(p6, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c7 = layers.slopedClipping(c7) activations.append(c7) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) c8 = conv2d(c7, new_W) + current_params[1].dimshuffle('x', 0, 'x', 'x') c8 = layers.slopedClipping(c8) activations.append(c8) weights.append(wf) biases.append(current_params[1]) f9 = c8.flatten(2) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) h1 = T.dot(f9, new_W) + current_params[1] h1 = layers.slopedClipping(h1) activations.append(h1) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) h2 = layers.linOutermost(h1, [new_W, current_params[1]]) h2 = layers.slopedClipping(h2) activations.append(h2) weights.append(wf) biases.append(current_params[1]) l += 1 current_params = params[l] w = current_params[0] w_shape = T.shape(w) wf = w.flatten() new_W = T.reshape(wf, w_shape) z = layers.linOutermost(h2, [new_W, current_params[1]]) weights.append(wf) biases.append(current_params[1]) # z_fl = z.max(axis=1) y_fl = z.argmax(axis=1) for activation in activations: E = 0.0 deriv_fl = T.grad(T.sum(z_fl), activation) for i in range(10): z_i = z.take(i, axis=1) deriv_i = T.grad(T.sum(z_i), activation) numerator = T.sqr(deriv_i - deriv_fl) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) numerator = numerator.flatten( 2) # shape is batchsize x something big result = numerator / (denum.dimshuffle(0, 'x')) E = E + T.sum(result) evalues.append(E / 24.0) for l in range(9): w = weights[l] b = biases[l] E = 0.0 deriv_fl_w = T.jacobian(z_fl, w) deriv_fl_b = T.jacobian(z_fl, b) for i in range(10): z_i = z.take(i, axis=1) deriv_i_w = T.jacobian(z_i, w) deriv_i_b = T.jacobian(z_i, b) numerator_w = T.sqr(deriv_i_w - deriv_fl_w) numerator_b = T.sqr(deriv_i_b - deriv_fl_b) denum = T.switch(T.eq(z_fl, z_i), 1 + 0.0 * z_i, T.sqr(z_i - z_fl)) result_w = numerator_w / (denum.dimshuffle(0, 'x')) result_b = numerator_b / (denum.dimshuffle(0, 'x')) E = E + T.sum(result_w) E = E + T.sum(result_b) evalues.append(E / 24.0) return evalues
def feedForward(x, params, B, BA, BW ): x = layers.quantizeAct(x, B+BA.take(0)) l=0 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),16.0,0.0625) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),16.0,0.0625) c1 = conv2d(x,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c1 = layers.quantizeAct(layers.slopedClipping(c1),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c2 = conv2d(c1,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c2 = layers.quantizeAct(layers.slopedClipping(c2),B+BA.take(l+1)) p3 = pool_2d(c2,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),4.0,0.25) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),4.0,0.25) c4 = conv2d(p3,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c4 = layers.quantizeAct(layers.slopedClipping(c4),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c5 = conv2d(c4,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c5 = layers.quantizeAct(layers.slopedClipping(c5),B+BA.take(l+1)) p6 = pool_2d(c5,ws=(2,2),ignore_border=True) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) c7 = conv2d(p6,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c7 = layers.quantizeAct(layers.slopedClipping(c7),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) c8 = conv2d(c7,current_params[0]) + current_params[1].dimshuffle('x',0,'x','x') c8 = layers.quantizeAct(layers.slopedClipping(c8),B+BA.take(l+1)) f9 = c8.flatten(2) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),2.0,0.5) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),2.0,0.5) h1 = T.dot(f9,current_params[0]) + current_params[1] h1 = layers.quantizeAct(layers.slopedClipping(h1),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) h2 = layers.linOutermost(h1,current_params) h2 = layers.quantizeAct(layers.slopedClipping(h2),B+BA.take(l+1)) l+=1 current_params = params[l] current_params[0] = layers.quantizeNormalizedWeight(current_params[0],B+BW.take(l),1.0,1.0) current_params[1] = layers.quantizeNormalizedWeight(current_params[1],B+BW.take(l),1.0,1.0) z = layers.linOutermost(h2,current_params) # return z