def res_layer(inp, chl, stride = 1, proj = False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) #inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = den_layer(inp, chl // 4) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) sum_lay = 0 out_lay = 0 lay = FullyConnected( "fc0({})".format(name), SE, output_dim = chl, nonlinearity = ReLU() ) #fc1 lay = FullyConnected( "fc1({})".format(name), lay, output_dim = chl, nonlinearity = Sigmoid() ) inp = inp * lay.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp
def dense_block(inp, k, l): lay = inp for i in range(l): cur_lay = bn_relu_conv(lay, 3, 1, 1, k, True, True) name = cur_lay.name group = k // 4 #G.P. SE = cur_lay.mean(axis=3).mean(axis=2) SE = FullyConnected("fc0({})".format(name), SE, output_dim=(k // group)**2 * group, nonlinearity=ReLU()) SE = FullyConnected("fc1({})".format(name), SE, output_dim=(k // group)**2 * group, nonlinearity=Sigmoid()) print(SE.name) SE = SE.reshape(cur_lay.shape[0] * group, k // group, k // group, 1, 1) preshape = cur_lay.shape cur_lay = cur_lay.reshape(1, cur_lay.shape[0] * cur_lay.shape[1], cur_lay.shape[2], cur_lay.shape[3]) cur_lay = Conv2D("conv({})".format(name), cur_lay, kernel_shape=1, stride=1, padding=0, W=SE, nonlinearity=Identity()) cur_lay = cur_lay.reshape(preshape) #cur_lay = cur_lay * SE.dimshuffle(0, 1, 'x', 'x') lay = Concat([lay, cur_lay], axis=1) return lay
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) k, l = 20, (40 - 4) // 3 lay = bn_relu_conv(inp, 3, 1, 1, k, False, False) for i in range(3): lay = transition(dense_block(lay, k, l), i) #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) info = CInfo() info.get_complexity(network.outputs).as_table().show() return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) k, l = 24, (100 - 4) // 3 for i in range(3): lay = transition(dense_block(lay, k, l, False), i) feature = lay pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 15, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 16, True) out = [conv] for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = b_resize("pooling{}".format(chl), lay) lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX") #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) k, l = 12, (40 - 4) // 3 for i in range(3): lay = transition(dense_block(lay, k, l), i) #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size = 128, debug = False): patch_size = 32 inp = DataProvider("data", shape = (minibatch_size, 3, patch_size, patch_size), dtype = np.float32) label = DataProvider("label", shape = (minibatch_size, ), dtype = np.int32) lay = conv_bn(inp, 3, 1, 1, 16, True) n = 18 lis = [16, 32, 64] for i in lis: lay = res_block(lay, i, n) #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE") pred = Softmax("pred", FullyConnected( "fc0", feature, output_dim = 10, nonlinearity = Identity() )) network = Network(outputs = [pred]) network.loss_var = CrossEntropyLoss(pred, label) if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() return network
def make_network(minibatch_size = 128): patch_size = 32 inp = DataProvider("data", shape = (minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape = (minibatch_size, )) #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 16, True) out = [conv] for chl in [32 * 3, 64 * 3, 128 * 3]: for i in range(10): lay, conv1, conv2 = xcep_layer(lay, chl) out.append(conv1) out.append(conv2) if chl != 128 * 3: lay = Pooling2D("pooling{}".format(chl), lay, window = 2, mode = "MAX") #global average pooling print(lay.partial_shape) feature = lay.mean(axis = 2).mean(axis = 2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") W = ortho_group.rvs(feature.partial_shape[1]) W = W[:, :10] W = ConstProvider(W) b = ConstProvider(np.zeros((10, ))) pred = Softmax("pred", FullyConnected( "fc0", feature, output_dim = 10, W = W, b = b, nonlinearity = Identity() )) network = Network(outputs = [pred] + out) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = conv_bn(inp, 3, 1, 1, 16, True) n = 3 lis = [16, 32, 64] for i in lis: lay = res_block(lay, i, n) #global average pooling feature = lay.mean(axis=2).mean(axis=2) pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(2 / 64)**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def dense_block(inp, k, l): lay = inp for i in range(l): cur_lay = bn_relu_conv(lay, 3, 1, 1, k, True, True) name = cur_lay.name #G.P. SE = cur_lay.mean(axis=3).mean(axis=2) SE = FullyConnected("fc0({})".format(name), SE, output_dim=k, nonlinearity=ReLU()) SE = FullyConnected("fc1({})".format(name), SE, output_dim=k, nonlinearity=Sigmoid()) cur_lay = cur_lay * SE.dimshuffle(0, 1, 'x', 'x') lay = Concat([lay, cur_lay], axis=1) return lay
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) idxmap = np.zeros((128, 3, 32, 32, 4), dtype=np.int32) sample = IndexingRemap(inp, idxmap) network = Network(outputs=[sample]) sample = FullyConnected("fc", sample, output_dim=1) network.loss_var = sample.sum() return network #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 32, True) out = [conv] """ for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = dfpooling("pooling{}".format(chl), lay) """ chl = 32 for i in range(3): lay, conv = dfconv(lay, chl, True, i == 0) #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay, w = conv_bn(inp, 3, 1, 1, 16, True) lis_w = [w] n = 3 lis = [16, 32, 64] for i in lis: lay, lis_new = res_block(lay, i, n) lis_w += lis_new #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) feature = Pooling2D("pooling", lay, window=8, stride=8, padding=0, mode="AVERAGE") pred = Softmax( "pred", FullyConnected( "fc0", feature, output_dim=10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) lmd = 1 for w in lis_w: w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = O.MatMul(w.dimshuffle(1, 0), w) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).mean() if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() return network
def res_layer(inp, chl): pre = inp inp = conv_bn(inp, 3, 1, 1, chl, True) inp = conv_bn(inp, 3, 1, 1, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) #fc0 SE = FullyConnected("fc0({})".format(name), SE, output_dim=SE.partial_shape[1], nonlinearity=ReLU()) #fc1 SE = FullyConnected("fc1({})".format(name), SE, output_dim=SE.partial_shape[1], nonlinearity=Sigmoid()) inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp
def res_layer(inp, chl, stride = 1, proj = False, se = None): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis = 3).mean(axis = 2) #fc0 SE = FullyConnected( "fc0({})".format(name), SE, output_dim = chl // 4, nonlinearity = ReLU() ) #fc1 if se is None: se = SE else: se = O.Concat([se, SE], axis = 1) SE = FullyConnected( "fc1({})".format(name), se, output_dim = chl, nonlinearity = Sigmoid() ) se = FullyConnected( "fc({})".format(se.name), se, output_dim = chl // 4, nonlinearity = ReLU() ) inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = arith.ReLU(inp + pre) return inp, se
def res_layer(inp, chl, stride=1, proj=False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) sum_lay = 0 out_lay = 0 width = 4 lay = FullyConnected("fc0({})".format(name), SE, output_dim=chl // 4, nonlinearity=ReLU()) #fc1 lay = FullyConnected("fc1({})".format(name), lay, output_dim=chl // 4 * width, nonlinearity=Identity()) lay = lay.reshape(inp.shape[0], chl // 4, width) lay = Softmax("softmax({})".format(name), lay, axis=2) for i in range(width): if i == 0: inp_lay = inp else: inp_lay = O.Concat([inp[:, width:, :, :], inp[:, :width, :, :]], axis=1) inp_lay = inp_lay * lay[:, :, i].dimshuffle(0, 1, 'x', 'x') inp = O.ReLU(inp_lay) inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) inp = arith.ReLU(inp + pre) return inp
def res_layer(inp, chl): pre = inp inp = conv_bn(inp, 3, 1, 1, chl, True) inp = conv_bn(inp, 3, 1, 1, chl, False) name = inp.name #Global Average Pooling SE = inp.mean(axis=3).mean(axis=2) group = chl // 4 ksize = 3 #fc0 SE = FullyConnected("fc0({})".format(name), SE, output_dim=(chl // group)**2 * group * ksize, nonlinearity=ReLU()) #fc1 SE = FullyConnected("fc1({})".format(name), SE, output_dim=(chl // group)**2 * group * ksize**2, nonlinearity=Sigmoid()) SE = SE - 0.5 SE = SE.reshape(inp.shape[0] * group, chl // group, chl // group, ksize, ksize) #inp = inp * SE.dimshuffle(0, 1, 'x', 'x') inp = inp.reshape(1, inp.shape[0] * inp.shape[1], inp.shape[2], inp.shape[3]) inp = Conv2D( "conv({})".format(name), inp, kernel_shape=ksize, stride=1, padding=1, #output_nr_channel = chl, W=SE, nonlinearity=Identity(), #group = group ) inp = inp.reshape(pre.shape) inp = arith.ReLU(inp + pre) return inp
def get(args): img_size = 224 num_inputs = 3 data = DataProvider('data', shape=(args.batch_size, num_inputs, img_size, img_size)) inp = data f = create_bn_relu("conv1", inp, ksize=7, stride=2, pad=3, num_outputs=64, has_relu=True, conv_name_fun=None, args=args) f = Pooling2D("pool1", f, window=3, stride=2, padding=1, mode="MAX", format=args.format) pre = [2, 3, 4, 5] stages = [3, 4, 6, 3] mid_outputs = [64, 128, 256, 512] enable_stride = [False, True, True, True] for p, s, o, es in zip(pre, stages, mid_outputs, enable_stride): for i in range(s): has_proj = False if i > 0 else True stride = 1 if not es or i > 0 else 2 prefix = "{}{}".format(p, chr(ord("a") + i)) f = create_bottleneck(prefix, f, stride, o, o * 4, args, has_proj) print("{}\t{}".format(prefix, f.partial_shape)) f = Pooling2D("pool5", f, window=7, stride=7, padding=0, mode="AVERAGE", format=args.format) f = FullyConnected("fc1000", f, output_dim=1000, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f = Softmax("cls_softmax", f) f.init_weights() net = RawNetworkBuilder(inputs=[data], outputs=[f]) return net
def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay = conv_bn(inp, 3, 1, 1, 16 * 4 * 2, True) n = 4 * 3 group = 8 lis = [16 * 4, 32 * 4, 64 * 4] for i in range(len(lis)): lay = res_block(lay, lis[i], i, n, group) #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) feature = Pooling2D("pooling", lay, window=8, stride=8, padding=0, mode="AVERAGE") pred = Softmax( "pred", FullyConnected( "fc0", feature, output_dim=10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) info = CInfo() info.get_complexity(network.outputs).as_table().show() """ if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() """ return network
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 16, True) out = [conv] for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX") #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) #conv1 = out[0] #print(conv1.inputs[1].partial_shape) lmd = 0.01 for conv_lay in out: w = conv_lay #w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w.dimshuffle(1, 0, 2, 3) w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = MatMul(w.dimshuffle(1, 0), w) #print(A.partial_shape) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).sum() return network
def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay = conv_bn(inp, 3, 1, 1, 16, True) lis = [16, 32, 64] for i in range(len(lis)): #lay = res_block(lay, lis[i], i, n) for j in range(40): lay = conv_bn(lay, 3, 1, 1, lis[i], False) if i < len(lis) - 1: lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True) #global average pooling feature = lay.mean(axis=2).mean(axis=2) pred = Softmax( "pred", FullyConnected( "fc0", feature, output_dim=10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) #info = CInfo() #info.get_complexity(network.outputs).as_table().show() network.loss_var = CrossEntropyLoss(pred, label) """ if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() """ return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay, w = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lis_w = [w] k, l = 12, (40 - 4) // 3 for i in range(3): #lay = transition(dense_block(lay, k, l), i) lay, lis_new = dense_block(lay, k, l) lis_w += lis_new lay, lis_new = transition(lay, i) lis_w += lis_new #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) lmd = 0.01 for w in lis_w: if w is None: continue print(w.partial_shape) w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = O.MatMul(w.dimshuffle(1, 0), w) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).sum() return network
def make_network(): batch_size = 200 img_size = 224 data = DataProvider("data", shape=(batch_size, 3, img_size, img_size)) label = DataProvider("label", shape=(batch_size, )) f = create_conv_relu("conv1_1", data, ksize=3, stride=1, pad=1, num_outputs=64) f = create_conv_relu("conv1_2", f, ksize=3, stride=1, pad=1, num_outputs=64) f = CaffePooling2D("pool1", f, window=2, stride=2, padding=0, mode="MAX") f = create_conv_relu("conv2_1", f, ksize=3, stride=1, pad=1, num_outputs=128) f = create_conv_relu("conv2_2", f, ksize=3, stride=1, pad=1, num_outputs=128) f = CaffePooling2D("pool2", f, window=2, stride=2, padding=0, mode="MAX") f = create_conv_relu("conv3_1", f, ksize=3, stride=1, pad=1, num_outputs=256) f = create_conv_relu("conv3_2", f, ksize=3, stride=1, pad=1, num_outputs=256) f = create_conv_relu("conv3_3", f, ksize=3, stride=1, pad=1, num_outputs=256) f = CaffePooling2D("pool3", f, window=2, stride=2, padding=0, mode="MAX") f = create_conv_relu("conv4_1", f, ksize=3, stride=1, pad=1, num_outputs=512) f = create_conv_relu("conv4_2", f, ksize=3, stride=1, pad=1, num_outputs=512) f = create_conv_relu("conv4_3", f, ksize=3, stride=1, pad=1, num_outputs=512) f = CaffePooling2D("pool4", f, window=2, stride=2, padding=0, mode="MAX") f = create_conv_relu("conv5_1", f, ksize=3, stride=1, pad=1, num_outputs=512) f = create_conv_relu("conv5_2", f, ksize=3, stride=1, pad=1, num_outputs=512) f = create_conv_relu("conv5_3", f, ksize=3, stride=1, pad=1, num_outputs=512) f = CaffePooling2D("pool5", f, window=2, stride=2, padding=0, mode="MAX") f = FullyConnected("fc6", f, output_dim=4096, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f = ReLU(f) f = FullyConnected("fc7", f, output_dim=4096, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f = ReLU(f) f = FullyConnected("fc8", f, output_dim=1000, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f = Softmax("cls_softmax", f) net = RawNetworkBuilder(inputs=[data, label], outputs=[f], loss=CrossEntropyLoss(f, label)) return net
def make_network(): batch_size = config.minibatch_size img_size = config.img_size data = DataProvider("data", shape=(batch_size, 3, img_size, img_size)) label = DataProvider("label", shape=(batch_size, 8)) f = create_bn_relu("conv1", data, ksize=3, stride=2, pad=1, num_outputs=24) f = Pooling2D("pool1", f, window=3, stride=2, padding=1, mode="MAX") pre = [2, 3, 4] stages = [4, 8, 4] mid_outputs = [32, 64, 128] enable_stride = [True, True, True] for p, s, o, es in zip(pre, stages, mid_outputs, enable_stride): for i in range(s): prefix = "{}{}".format(p, chr(ord("a") + i)) stride = 1 if not es or i > 0 else 2 has_proj = False if i > 0 else True f = create_xception(prefix, f, stride, o, o * 4, has_proj) print("{}\t{}".format(prefix, f.partial_shape)) f1 = Pooling2D("pool5_1", f, window=7, stride=7, padding=0, mode="AVERAGE") f1 = FullyConnected("fc3_1", f1, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f1 = Softmax("cls_softmax_1", f1) f2 = Pooling2D("pool5_2", f, window=7, stride=7, padding=0, mode="AVERAGE") f2 = FullyConnected("fc3_2", f2, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f2 = Softmax("cls_softmax_2", f2) f3 = Pooling2D("pool5_3", f, window=7, stride=7, padding=0, mode="AVERAGE") f3 = FullyConnected("fc3_3", f3, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f3 = Softmax("cls_softmax_3", f3) f4 = Pooling2D("pool5_4", f, window=7, stride=7, padding=0, mode="AVERAGE") f4 = FullyConnected("fc3_4", f4, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f4 = Softmax("cls_softmax_4", f4) f5 = Pooling2D("pool5_5", f, window=7, stride=7, padding=0, mode="AVERAGE") f5 = FullyConnected("fc3_5", f5, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f5 = Softmax("cls_softmax_5", f5) f6 = Pooling2D("pool5_6", f, window=7, stride=7, padding=0, mode="AVERAGE") f6 = FullyConnected("fc3_6", f6, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f6 = Softmax("cls_softmax_6", f6) f7 = Pooling2D("pool5_7", f, window=7, stride=7, padding=0, mode="AVERAGE") f7 = FullyConnected("fc3_7", f7, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f7 = Softmax("cls_softmax_7", f7) f8 = Pooling2D("pool5_8", f, window=7, stride=7, padding=0, mode="AVERAGE") f8 = FullyConnected("fc3_8", f8, output_dim=2, nonlinearity=mgsk.opr.helper.elemwise_trans.Identity()) f8 = Softmax("cls_softmax_8", f8) losses = {} # cross-entropy loss # from IPython import embed # embed() label_1 = label[:, 0] label_2 = label[:, 1] label_3 = label[:, 2] label_4 = label[:, 3] label_5 = label[:, 4] label_6 = label[:, 5] label_7 = label[:, 6] label_8 = label[:, 7] loss_xent_0 = O.cross_entropy(f1, label_1, name='loss_pose') try: loss_xent_1 = O.cross_entropy_with_mask(f2, label_2, label_1) loss_xent_2 = O.cross_entropy_with_mask(f3, label_3, label_1) loss_xent_3 = O.cross_entropy_with_mask(f4, label_4, label_1) loss_xent_4 = O.cross_entropy_with_mask(f5, label_5, label_1) loss_xent_5 = O.cross_entropy_with_mask(f6, label_6, label_1) loss_xent_6 = O.cross_entropy_with_mask(f7, label_7, label_1) loss_xent_7 = O.cross_entropy_with_mask(f8, label_8, label_1) except Exception as err: print(err) loss_xent = loss_xent_0 + loss_xent_1 + loss_xent_2 + loss_xent_3 + loss_xent_4 + loss_xent_5 + loss_xent_6 + loss_xent_7 losses['loss_xent'] = loss_xent # weight decay regularization loss loss_weight_decay = 0 if config.weight_decay: weight_decay = config.weight_decay with GroupNode('weight_decay').context_reg(): for opr in iter_dep_opr(loss_xent): if not isinstance(opr, ParamProvider) or opr.freezed: continue param = opr name = param.name if not (name.endswith('W')): continue # logger.info('L2 regularization on `{}`'.format(name)) loss_weight_decay += 0.5 * weight_decay * (param**2).sum() losses['loss_weight_decay'] = loss_weight_decay # total loss with GroupNode('loss').context_reg(): loss = sum(losses.values()) losses['loss'] = loss # for multi-GPU task, tell the GPUs to summarize the final loss O.utils.hint_loss_subgraph([loss_xent, loss_weight_decay], loss) # --------3.23----------- net = RawNetworkBuilder(inputs=[data, label], outputs=[f1, f2, f3, f4, f5, f6, f7, f8], loss=loss) # net = RawNetworkBuilder(inputs=[data, label], outputs=f1, loss=loss) metrics1 = get_metrics(f1, label_1) # metrics2 = get_metrics(f2, label_2) # metrics3 = get_metrics(f3, label_3) # metrics4 = get_metrics(f4, label_4) # metrics5 = get_metrics(f5, label_5) net.extra['extra_outputs'] = { 'pred_0': f1, 'pred_1': f1, 'pred_2': f2, 'pred_3': f3, 'pred_4': f4, 'pred_5': f5, 'pred_6': f6, 'pred_7': f7, 'label': label } # net.extra['extra_outputs'] = {'pred': f1, 'label': label} net.extra['extra_outputs'].update(metrics1) # net.extra['extra_outputs'].update(metrics2) # net.extra['extra_outputs'].update(metrics3) # net.extra['extra_outputs'].update(metrics4) # net.extra['extra_outputs'].update(metrics5) net.extra['extra_outputs'].update(losses) net.extra['extra_config'] = { 'monitor_vars': list(losses.keys()) + list(metrics1.keys()) } return net
def dfpooling(name, inp, window = 2, padding = 0, dx = [0, 1], dy = [0, 1]): #inp = ConstProvider([[[[1, 2], [3, 4]]]], dtype = np.float32) """ Add a new conv&bn to insure that the scale of the feature map is variance 1. """ ker_shape = window stride = window offsetlay = Conv2D( name + "conv", inp, kernel_shape = 3, stride = 1, padding = 1, output_nr_channel = ker_shape**2, W = G(mean = 0, std = ((1) / (3**2 * inp.partial_shape[1]))**0.5), nonlinearity = Identity() ) #offsetlay = BN(name + "BN", offsetlay, eps = 1e-9) offsetx = Conv2D( name + "conv1x", offsetlay, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = ker_shape**2, W = G(mean = 0, std = (1 / (ker_shape**2 * inp.partial_shape[2]))**0.5), nonlinearity = Identity() ) offsety = Conv2D( name + "conv1y", offsetlay, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = ker_shape**2, W = G(mean = 0, std = (1 / (ker_shape**2 * inp.partial_shape[3]))**0.5), nonlinearity = Identity() ) offset = Concat([offsetx, offsety], axis = 1) ndim = ker_shape**2 * offsetx.partial_shape[2] * offsetx.partial_shape[3] * 2 offset = FullyConnected( name + "offset", offsetx, output_dim = ndim, W = G(mean = 0, std = (1 / ndim)**2), #W = C(0), b = C(0), nonlinearity = Identity() ) offsetx = offset[:, :ndim // 2].reshape(offsetx.shape) offsety = offset[:, ndim // 2:].reshape(offsety.shape) """ offsetx = FullyConnected( name + "offsetx", offsetx, output_dim = ndim, W = G(mean = 0, std = gamma / ndim), b = C(0), nonlinearity = Identity() ) offsetx = offsetx.reshape(offsety.shape) offsety = FullyConnected( name + "offsety", offsety, output_dim = ndim, W = G(mean = 0, std = gamma / ndim), b = C(0), nonlinearity = Identity() ) offsety = offsety.reshape(offsetx.shape) print(offsety.partial_shape) """ #offsetx = ZeroGrad(offsetx) #offsety = ZeroGrad(offsety) outputs = [] for sx in range(2): for sy in range(2): if sx == 0: ofx = Floor(offsetx) bilx = 1 - (offsetx - ofx) else: ofx = Ceil(offsetx) bilx = 1 - (ofx - offsetx) if sy == 0: ofy = Floor(offsety) bily = 1 - (offsety - ofy) else: ofy = Ceil(offsety) bily = 1 - (ofy - offsety) """ No padding padding1 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], 1, inp.partial_shape[3]))) padding2 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], inp.partial_shape[2] + 2, 1))) arg_fea = Concat([padding1, inp, padding1], axis = 2) arg_fea = Concat([padding2, arg_fea, padding2], axis = 3) """ arg_fea = inp #one_mat = ConstProvider(np.ones((inp.partial_shape[2], inp.partial_shape[3])), dtype = np.int32) one_mat = ConstProvider(1, dtype = np.int32).add_axis(0).broadcast((ofx.shape[2], ofx.shape[3])) affx = (Cumsum(one_mat, axis = 0) - 1) * stride affy = (Cumsum(one_mat, axis = 1) - 1) * stride ofx = ofx + affx.dimshuffle('x', 'x', 0, 1) ofy = ofy + affy.dimshuffle('x', 'x', 0, 1) one_mat = ConstProvider(np.ones((ker_shape, ofx.partial_shape[2], ofx.partial_shape[3]))) #ofx[:, :ker_shape, :, :] -= 1 #ofx[:, ker_shape*2:, :, :] += 1 ofx += Concat([one_mat * i for i in dx], axis = 0).dimshuffle('x', 0, 1, 2) #ofy[:, ::3, :, :] -= 1 #ofy[:, 2::3, :, :] += 1 one_mat = ones((1, ofx.partial_shape[2], ofx.partial_shape[3])) one_mat = Concat([one_mat * i for i in dy], axis = 0) one_mat = Concat([one_mat] * ker_shape, axis = 0) ofy += one_mat.dimshuffle('x', 0, 1, 2) ofx = Max(Min(ofx, arg_fea.partial_shape[2] - 1), 0) ofy = Max(Min(ofy, arg_fea.partial_shape[3] - 1), 0) def DeformReshape(inp, ker_shape): inp = inp.reshape(inp.shape[0], ker_shape, ker_shape, inp.shape[2], inp.partial_shape[3]) inp = inp.dimshuffle(0, 3, 1, 4, 2) inp = inp.reshape(inp.shape[0], inp.shape[1] * inp.shape[2], inp.shape[3] * inp.shape[4]) return inp ofx = DeformReshape(ofx, ker_shape) ofy = DeformReshape(ofy, ker_shape) bilx = DeformReshape(bilx, ker_shape) bily = DeformReshape(bily, ker_shape) of = ofx * arg_fea.partial_shape[2] + ofy arg_fea = arg_fea.reshape(arg_fea.shape[0], arg_fea.shape[1], -1) of = of.reshape(ofx.shape[0], -1) of = of.dimshuffle(0, 'x', 1) #of = Concat([of] * arg_fea.partial_shape[1], axis = 1) of = of.broadcast((of.shape[0], arg_fea.shape[1], of.shape[2])) arx = Linspace(0, arg_fea.shape[0], arg_fea.shape[0], endpoint = False) arx = arx.add_axis(1).add_axis(2).broadcast(of.shape) ary = Linspace(0, arg_fea.shape[1], arg_fea.shape[1], endpoint = False) ary = ary.add_axis(0).add_axis(2).broadcast(of.shape) of = of.add_axis(3) arx = arx.add_axis(3) ary = ary.add_axis(3) idxmap = Astype(Concat([arx, ary, of], axis = 3), np.int32) """ sample = [] for i in range(arg_fea.partial_shape[0]): for j in range(arg_fea.partial_shape[1]): sample.append(arg_fea[i][j].ai[of[i][j]].dimshuffle('x', 0)) sample = Concat(sample, axis = 0) """ sample = IndexingRemap(arg_fea, idxmap).reshape(inp.shape[0], inp.shape[1], bilx.shape[1], -1) bilx = bilx.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) bily = bily.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) sample *= bilx * bily outputs.append(sample) output = outputs[0] for i in outputs[1:]: output += i return Pooling2D(name, output, window = 2, mode = "AVERAGE")
def make_network(minibatch_size=128, debug=False): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size), dtype=np.float32) label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32) lay = conv_bn(inp, 3, 1, 1, 16, True) lis = [16, 32, 64] for i in range(len(lis)): #lay = res_block(lay, lis[i], i, n) for j in range(10): lay = conv_bn(lay, 3, 1, 1, lis[i], True) if i < len(lis) - 1: lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True) feature = lay #global average pooling #feature = lay.mean(axis = 2).mean(axis = 2) #feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE") """ lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3]) print(lay.partial_shape) a = O.ParamProvider( "a", np.random.randn(lay.partial_shape[2], 10) * (1 / lay.partial_shape[2])**0.5) a = a.dimshuffle('x', 0, 1) a = a.broadcast((lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2])) print(a.partial_shape) b = O.ParamProvider( "b", np.random.randn(lay.partial_shape[2], 10) * (1 / lay.partial_shape[2])**0.5) b = b.dimshuffle('x', 0, 1) b = b.broadcast((lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2])) print(b.partial_shape) fca = O.BatchedMatMul(lay, a) fcb = O.BatchedMatMul(lay, b) fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1), fcb) / 64 outs = [] for i in range(10): outs.append(fc[:, i, i].dimshuffle(0, 'x')) fc = O.Concat(outs, axis = 1) pred = Softmax("pred", fc) """ pred = Softmax( "pred", FullyConnected( "fc0", feature, output_dim=10, #W = G(mean = 0, std = (1 / 64)**0.5), #b = C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) #info = CInfo() #info.get_complexity(network.outputs).as_table().show() network.loss_var = CrossEntropyLoss(pred, label) """ if debug: visitor = NetworkVisitor(network.loss_var) for i in visitor.all_oprs: print(i) print(i.partial_shape) print("input = ", i.inputs) print("output = ", i.outputs) print() """ return network