def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay, w = conv_bn(inp, 3, 1, 1, 16, True) lis_w = [w] n = 3 lis = [16, 32, 64] for i in lis: lay, lis_new = res_block(lay, i, n) lis_w += lis_new #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) feature = Pooling2D("pooling", lay, window=8, stride=8, padding=0, mode="AVERAGE") pred = Softmax( "pred", FullyConnected( "fc0", feature, output_dim=10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) lmd = 1 for w in lis_w: w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = O.MatMul(w.dimshuffle(1, 0), w) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).mean() if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay, w = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lis_w = [w] k, l = 12, (40 - 4) // 3 for i in range(3): #lay = transition(dense_block(lay, k, l), i) lay, lis_new = dense_block(lay, k, l) lis_w += lis_new lay, lis_new = transition(lay, i) lis_w += lis_new #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) lmd = 0.01 for w in lis_w: if w is None: continue print(w.partial_shape) w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = O.MatMul(w.dimshuffle(1, 0), w) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).sum() return network