def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu, mode = None): global idx idx += 1 print(inp.partial_shape, ker_shape, out_chl) if ker_shape == 1: W = ortho_group.rvs(out_chl) W = W[:, :inp.partial_shape[1]] W = W.reshape(W.shape[0], W.shape[1], 1, 1) W = ConstProvider(W) b = ConstProvider(np.zeros(out_chl)) else: W = G(mean = 0, std = ((1 + int(isrelu)) / (ker_shape**2 * inp.partial_shape[1]))**0.5) b = C(0) l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, group = mode, W = W, b = b, nonlinearity = Identity() ) l2 = BN("bn{}".format(idx), l1, eps = 1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels = False, k = C(1), b = C(0)) if isrelu: l2 = arith.ReLU(l2) return l2, l1
def relu_conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu=True, isbn=True): global idx idx += 1 if isrelu: inp = arith.ReLU(inp) inp = Conv2D("conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, nonlinearity=Identity()) if isbn: inp = BN("bn{}".format(idx), inp, eps=1e-9) inp = ElementwiseAffine("bnaff{}".format(idx), inp, shared_in_channels=False, k=C(1), b=C(0)) return inp
def skip(inp, isdown, chl): if isdown == -1: return inp global idx l1 = inp if isdown != 0: l1 = Pooling2D("pooling1_{}".format(idx), inp, window=1, stride=2, mode="AVERAGE") l1 = relu_conv_bn(l1, 1, 1, 0, chl // 2, isrelu=False, isbn=False) l2 = inp if isdown != 0: l2 = Pooling2D("pooling2_{}".format(idx), inp[:, :, 1:, 1:], window=1, stride=2, mode="AVERAGE") l2 = relu_conv_bn(l2, 1, 1, 0, chl // 2, isrelu=False, isbn=False) lay = O.Concat([l1, l2], axis=1) lay = BN("bn_down_{}".format(isdown), lay, eps=1e-9) lay = ElementwiseAffine("bnaff_down_{}".format(isdown), lay, shared_in_channels=False, k=C(1), b=C(0)) return lay
def bn_relu_conv(inp, ker_shape, stride, padding, out_chl, has_relu, has_bn, has_conv=True, group=None): global idx idx += 1 if has_bn: l1 = BN("bn{}".format(idx), inp, eps=1e-9) l1 = ElementwiseAffine("bnaff{}".format(idx), l1, shared_in_channels=False, k=C(1), b=C(0)) else: l1 = inp if has_relu: l2 = arith.ReLU(l1) else: l2 = l1 if not has_conv: return l2 if group is None: l3 = Conv2D("conv{}".format(idx), l2, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, nonlinearity=Identity()) else: l3 = Conv2D( "conv{}".format(idx), l2, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, nonlinearity=Identity(), group=group, ) return l3
def res_layer(inp, chl, stride=1, proj=False): pre = inp inp = conv_bn(inp, 1, stride, 0, chl // 4, True) inp = conv_bn(inp, 3, 1, 1, chl // 4, True) inp = conv_bn(inp, 1, 1, 0, chl, False) name = inp.name inp = ElementwiseAffine("aff({})".format(name), inp, shared_in_channels=False, k=C(0.5), b=C(0)) if proj: pre = conv_bn(pre, 1, stride, 0, chl, False) inp = arith.ReLU(inp + pre) return inp
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 15, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 16, True) out = [conv] for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = b_resize("pooling{}".format(chl), lay) lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX") #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) return network
def bn_relu_conv(inp, ker_shape, stride, padding, out_chl, has_relu, has_bn, has_conv = True): global idx idx += 1 if has_bn: l1 = BN("bn{}".format(idx), inp, eps = 1e-9) l1 = ElementwiseAffine("bnaff{}".format(idx), l1, shared_in_channels = False, k = C(1), b = C(0)) else: l1 = inp if has_relu: l2 = arith.ReLU(l1) else: l2 = l1 if not has_conv: return l2 l3 = Conv2D( "conv{}".format(idx), l2, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = (1 / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity() ) return l3
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = conv_bn(inp, 3, 1, 1, 16, True) n = 3 lis = [16, 32, 64] for i in lis: lay = res_block(lay, i, n) #global average pooling feature = lay.mean(axis=2).mean(axis=2) pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(2 / 64)**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) k, l = 12, (40 - 4) // 3 for i in range(3): lay = transition(dense_block(lay, k, l), i) #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def make_network(minibatch_size=64): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) k, l = 24, (100 - 4) // 3 for i in range(3): lay = transition(dense_block(lay, k, l, False), i) feature = lay pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred]) network.loss_var = CrossEntropyLoss(pred, label) return network
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 l1 = Conv2D("conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, nonlinearity=Identity()) l2 = BN("bn{}".format(idx), l1, eps=1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels=False, k=C(1), b=C(0)) if isrelu: l2 = arith.ReLU(l2) return l2, l1
def deconv_bn_relu(name, inp, kernel_shape = None, stride = None, padding = None, output_nr_channel = None, isbnrelu = True): lay = O.Deconv2DVanilla(name, inp, kernel_shape = kernel_shape, stride = stride, padding = padding, output_nr_channel = output_nr_channel) if isbnrelu: lay = BN(name + "bn", lay, eps = 1e-9) lay = ElementwiseAffine(name + "bnaff", lay, shared_in_channels = False, k = C(1), b = C(0)) lay = arith.ReLU(lay) return lay
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 l1 = Conv2D("encoder_conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, W=G(mean=0, std=((1 + int(isrelu)) / (ker_shape**2 * inp.partial_shape[1]))**0.5), nonlinearity=Identity()) l2 = BN("encoder_bn{}".format(idx), l1, eps=1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels=False, k=C(1), b=C(0)) if isrelu: l2 = arith.ReLU(l2) return l2, l1
def conv_norm(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 inp = Conv2D("conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, nonlinearity=Identity()) mean = inp.mean(axis=3).mean(axis=2) std = ((inp - mean.dimshuffle(0, 1, 'x', 'x'))**2).mean(axis=3).mean(axis=2)**0.5 inp = (inp - mean.dimshuffle(0, 1, 'x', 'x')) / std.dimshuffle( 0, 1, 'x', 'x') inp = ElementwiseAffine("aff{}".format(idx), inp, shared_in_channels=False, k=C(1), b=C(0)) if isrelu: inp = O.ReLU(inp) return inp
def bn_relu_conv(inp, ker_shape, stride, padding, out_chl, isrelu, isbn): global idx idx += 1 if isbn: inp = BN("bn{}".format(idx), inp, eps=1e-9) inp = ElementwiseAffine("bnaff{}".format(idx), inp, shared_in_channels=False, k=C(1), b=C(0)) if isrelu: inp = arith.ReLU(inp) inp = Conv2D( "conv{}".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity=Identity()) return inp
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu, group = 1, shift = 0): global idx idx += 1 if group == 1: l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity() ) else: if shift == 0: l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) else: shift = 1 l1 = inp while shift != group: l11 = Conv2D( "conv{}_{}_1".format(idx, shift), l1, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) inp_chl = l1.partial_shape[1] l1 = O.Concat([l1[:, shift * inp_chl // group:, :, :], l1[:, :shift * inp_chl // group, :, :]], axis = 1) l12 = Conv2D( "conv{}_{}_2".format(idx, shift), l1, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, #W = G(mean = 0, std = ((1) / (ker_shape**2 * inp.partial_shape[1]))**0.5), #b = C(0), nonlinearity = Identity(), group = group, ) l1 = l11 + l12 shift *= 2 l2 = BN("bn{}".format(idx), l1, eps = 1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels = False, k = C(1), b = C(0)) if isrelu: l2 = arith.ReLU(l2) return l2
def conv_bn(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 l10 = Conv2D("conv{}_0".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl // 2, W=G(mean=0, std=((1 + int(isrelu)) / (ker_shape**2 * inp.partial_shape[1]))**0.5), nonlinearity=Identity()) l11 = Conv2D("conv{}_1".format(idx), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=out_chl // 2, W=G(mean=0, std=((1 + int(isrelu)) / (ker_shape**2 * inp.partial_shape[1]))**0.5), nonlinearity=Identity()) W = l11.inputs[1].owner_opr b = l11.inputs[2].owner_opr W.set_freezed() b.set_freezed() l1 = Concat([l10, l11], axis=1) l2 = BN("bn{}".format(idx), l1, eps=1e-9) l2 = ElementwiseAffine("bnaff{}".format(idx), l2, shared_in_channels=False, k=C(1), b=C(0)) if isrelu: l2 = arith.ReLU(l2) return l2, l1
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 16, True) out = [conv] for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX") #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) #conv1 = out[0] #print(conv1.inputs[1].partial_shape) lmd = 0.01 for conv_lay in out: w = conv_lay #w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w.dimshuffle(1, 0, 2, 3) w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0) w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0) A = MatMul(w.dimshuffle(1, 0), w) #print(A.partial_shape) network.loss_var += lmd * ( (A - np.identity(A.partial_shape[0]))**2).sum() return network
def make_network(minibatch_size=128): patch_size = 32 inp = DataProvider("data", shape=(minibatch_size, 3, patch_size, patch_size)) label = DataProvider("label", shape=(minibatch_size, )) idxmap = np.zeros((128, 3, 32, 32, 4), dtype=np.int32) sample = IndexingRemap(inp, idxmap) network = Network(outputs=[sample]) sample = FullyConnected("fc", sample, output_dim=1) network.loss_var = sample.sum() return network #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False) lay, conv = conv_bn(inp, 3, 1, 1, 32, True) out = [conv] """ for chl in [32, 64, 128]: for i in range(10): lay, conv = conv_bn(lay, 3, 1, 1, chl, True) out.append(conv) if chl != 128: lay = dfpooling("pooling{}".format(chl), lay) """ chl = 32 for i in range(3): lay, conv = dfconv(lay, chl, True, i == 0) #global average pooling print(lay.partial_shape) feature = lay.mean(axis=2).mean(axis=2) #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE") pred = Softmax( "pred", FullyConnected("fc0", feature, output_dim=10, W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5), b=C(0), nonlinearity=Identity())) network = Network(outputs=[pred] + out) network.loss_var = CrossEntropyLoss(pred, label) return network
def conv_wn(inp, ker_shape, stride, padding, out_chl, isrelu): global idx idx += 1 l1 = Conv2D( "conv{}".format(idx), inp, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = out_chl, W = G(mean = 0, std = 0.05), nonlinearity = Identity() ) W = l1.inputs[1] #l2 = BN("bn{}".format(idx), l1, eps = 1e-9) w = l1.inputs[1] assert ":W" in w.name w = (w**2).sum(axis = 3).sum(axis = 2).sum(axis = 1)**0.5 l1 = l1 / w.dimshuffle('x', 0, 'x', 'x') l2 = ElementwiseAffine("bnaff{}".format(idx), l1, shared_in_channels = False, k = C(1), b = C(0)) if isrelu: l2 = arith.ReLU(l2) return l2, l1, W
def dfpooling(name, inp, window=2, padding=0, dx=[0, 1], dy=[0, 1]): #inp = ConstProvider([[[[1, 2], [3, 4]]]], dtype = np.float32) ker_shape = window stride = window gamma = 0.1 offsetx = gamma * inp.partial_shape[2] * Conv2D(name + "offsetx", inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=ker_shape **2, W=C(0), nonlinearity=Identity()) offsety = gamma * inp.partial_shape[3] * Conv2D(name + "offsety", inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=ker_shape **2, W=C(0), nonlinearity=Identity()) outputs = [] for sx in range(2): for sy in range(2): if sx == 0: ofx = Floor(offsetx) bilx = offsetx - ofx + Equal(Floor(offsetx), Ceil(offsetx)) else: ofx = Ceil(offsetx) bilx = ofx - offsetx if sy == 0: ofy = Floor(offsety) bily = offsety - ofy + Equal(Floor(offsety), Ceil(offsety)) else: ofy = Ceil(offsety) bily = ofy - offsety """ No padding padding1 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], 1, inp.partial_shape[3]))) padding2 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], inp.partial_shape[2] + 2, 1))) arg_fea = Concat([padding1, inp, padding1], axis = 2) arg_fea = Concat([padding2, arg_fea, padding2], axis = 3) """ arg_fea = inp #one_mat = ConstProvider(np.ones((inp.partial_shape[2], inp.partial_shape[3])), dtype = np.int32) one_mat = ConstProvider(1, dtype=np.int32).add_axis(0).broadcast( (ofx.partial_shape[2], ofx.partial_shape[3])) affx = (Cumsum(one_mat, axis=0) - 1) * stride affy = (Cumsum(one_mat, axis=1) - 1) * stride ofx = ofx + affx.dimshuffle('x', 'x', 0, 1) ofy = ofy + affy.dimshuffle('x', 'x', 0, 1) one_mat = ConstProvider( np.ones( (ker_shape, ofx.partial_shape[2], ofx.partial_shape[3]))) #ofx[:, :ker_shape, :, :] -= 1 #ofx[:, ker_shape*2:, :, :] += 1 ofx += Concat([one_mat * i for i in dx], axis=0).dimshuffle('x', 0, 1, 2) #ofy[:, ::3, :, :] -= 1 #ofy[:, 2::3, :, :] += 1 one_mat = ones((1, ofx.partial_shape[2], ofx.partial_shape[3])) one_mat = Concat([one_mat * i for i in dy], axis=0) one_mat = Concat([one_mat] * ker_shape, axis=0) ofy += one_mat.dimshuffle('x', 0, 1, 2) ofx = Max(Min(ofx, arg_fea.partial_shape[2] - 1), 0) ofy = Max(Min(ofy, arg_fea.partial_shape[3] - 1), 0) def DeformReshape(inp, ker_shape): inp = inp.reshape(inp.partial_shape[0], ker_shape, ker_shape, inp.partial_shape[2], inp.partial_shape[3]) inp = inp.dimshuffle(0, 3, 1, 4, 2) inp = inp.reshape(inp.partial_shape[0], inp.partial_shape[1] * inp.partial_shape[2], inp.partial_shape[3] * inp.partial_shape[4]) return inp ofx = DeformReshape(ofx, ker_shape) ofy = DeformReshape(ofy, ker_shape) bilx = DeformReshape(bilx, ker_shape) bily = DeformReshape(bily, ker_shape) of = ofx * arg_fea.partial_shape[2] + ofy arg_fea = arg_fea.reshape(arg_fea.partial_shape[0], arg_fea.partial_shape[1], -1) of = of.reshape(ofx.partial_shape[0], -1) of = of.dimshuffle(0, 'x', 1) #of = Concat([of] * arg_fea.partial_shape[1], axis = 1) of = of.broadcast((of.partial_shape[0], arg_fea.partial_shape[1], of.partial_shape[2])) arx = Linspace(0, arg_fea.partial_shape[0], arg_fea.partial_shape[0], endpoint=False) arx = arx.add_axis(1).add_axis(2).broadcast(of.shape) ary = Linspace(0, arg_fea.partial_shape[1], arg_fea.partial_shape[1], endpoint=False) ary = ary.add_axis(0).add_axis(2).broadcast(of.shape) of = of.add_axis(3) arx = arx.add_axis(3) ary = ary.add_axis(3) idxmap = Astype(Concat([arx, ary, of], axis=3), np.int32) """ sample = [] for i in range(arg_fea.partial_shape[0]): for j in range(arg_fea.partial_shape[1]): sample.append(arg_fea[i][j].ai[of[i][j]].dimshuffle('x', 0)) sample = Concat(sample, axis = 0) """ sample = IndexingRemap(arg_fea, idxmap).reshape(inp.partial_shape[0], inp.partial_shape[1], bilx.partial_shape[1], -1) bilx = bilx.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) bily = bily.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) sample *= bilx * bily outputs.append(sample) output = outputs[0] for i in outputs[1:]: output += i return Pooling2D(name, output, window=2, mode="AVERAGE")
from megskull.opr.all import DataProvider, Conv2D, Pooling2D, Exp, Log, Softmax, CrossEntropyLoss from megskull.opr.all import FullyConnected as FC from megskull.opr.helper.param_init import ConstantParamInitializer as C from megskull.opr.helper.param_init import AutoGaussianParamInitializer as G from megskull.opr.helper.elemwise_trans import Identity, ReLU from megskull.network import Network import numpy as np minibatch_size = 20 img_size = 28 input_mat = DataProvider(name = "input_mat", shape = (minibatch_size, 1, img_size, img_size)) conv1 = Conv2D("conv1", input_mat, kernel_shape = 3, output_nr_channel = 5, W = G(mean = 0.0001, std = (1 / (3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) conv2 = Conv2D("conv2", conv1, kernel_shape = 3, output_nr_channel = 5, W = G(mean = 0.0001, std = (1 / (5 * 3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) pooling1 = Pooling2D("pooling1", conv2, window = (2, 2), mode = "max") conv3 = Conv2D("conv3", pooling1, kernel_shape = 3, output_nr_channel = 10, W = G(mean = 0.0001, std = (1 / (5 * 3 * 3))**0.5), b = C(0), padding = (1, 1), nonlinearity = ReLU()) conv4 = Conv2D("conv4", conv3, kernel_shape = 3, output_nr_channel = 10,
def dfpooling(name, inp, window = 2, padding = 0, dx = [0, 1], dy = [0, 1]): #inp = ConstProvider([[[[1, 2], [3, 4]]]], dtype = np.float32) """ Add a new conv&bn to insure that the scale of the feature map is variance 1. """ ker_shape = window stride = window offsetlay = Conv2D( name + "conv", inp, kernel_shape = 3, stride = 1, padding = 1, output_nr_channel = ker_shape**2, W = G(mean = 0, std = ((1) / (3**2 * inp.partial_shape[1]))**0.5), nonlinearity = Identity() ) #offsetlay = BN(name + "BN", offsetlay, eps = 1e-9) offsetx = Conv2D( name + "conv1x", offsetlay, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = ker_shape**2, W = G(mean = 0, std = (1 / (ker_shape**2 * inp.partial_shape[2]))**0.5), nonlinearity = Identity() ) offsety = Conv2D( name + "conv1y", offsetlay, kernel_shape = ker_shape, stride = stride, padding = padding, output_nr_channel = ker_shape**2, W = G(mean = 0, std = (1 / (ker_shape**2 * inp.partial_shape[3]))**0.5), nonlinearity = Identity() ) offset = Concat([offsetx, offsety], axis = 1) ndim = ker_shape**2 * offsetx.partial_shape[2] * offsetx.partial_shape[3] * 2 offset = FullyConnected( name + "offset", offsetx, output_dim = ndim, W = G(mean = 0, std = (1 / ndim)**2), #W = C(0), b = C(0), nonlinearity = Identity() ) offsetx = offset[:, :ndim // 2].reshape(offsetx.shape) offsety = offset[:, ndim // 2:].reshape(offsety.shape) """ offsetx = FullyConnected( name + "offsetx", offsetx, output_dim = ndim, W = G(mean = 0, std = gamma / ndim), b = C(0), nonlinearity = Identity() ) offsetx = offsetx.reshape(offsety.shape) offsety = FullyConnected( name + "offsety", offsety, output_dim = ndim, W = G(mean = 0, std = gamma / ndim), b = C(0), nonlinearity = Identity() ) offsety = offsety.reshape(offsetx.shape) print(offsety.partial_shape) """ #offsetx = ZeroGrad(offsetx) #offsety = ZeroGrad(offsety) outputs = [] for sx in range(2): for sy in range(2): if sx == 0: ofx = Floor(offsetx) bilx = 1 - (offsetx - ofx) else: ofx = Ceil(offsetx) bilx = 1 - (ofx - offsetx) if sy == 0: ofy = Floor(offsety) bily = 1 - (offsety - ofy) else: ofy = Ceil(offsety) bily = 1 - (ofy - offsety) """ No padding padding1 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], 1, inp.partial_shape[3]))) padding2 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], inp.partial_shape[2] + 2, 1))) arg_fea = Concat([padding1, inp, padding1], axis = 2) arg_fea = Concat([padding2, arg_fea, padding2], axis = 3) """ arg_fea = inp #one_mat = ConstProvider(np.ones((inp.partial_shape[2], inp.partial_shape[3])), dtype = np.int32) one_mat = ConstProvider(1, dtype = np.int32).add_axis(0).broadcast((ofx.shape[2], ofx.shape[3])) affx = (Cumsum(one_mat, axis = 0) - 1) * stride affy = (Cumsum(one_mat, axis = 1) - 1) * stride ofx = ofx + affx.dimshuffle('x', 'x', 0, 1) ofy = ofy + affy.dimshuffle('x', 'x', 0, 1) one_mat = ConstProvider(np.ones((ker_shape, ofx.partial_shape[2], ofx.partial_shape[3]))) #ofx[:, :ker_shape, :, :] -= 1 #ofx[:, ker_shape*2:, :, :] += 1 ofx += Concat([one_mat * i for i in dx], axis = 0).dimshuffle('x', 0, 1, 2) #ofy[:, ::3, :, :] -= 1 #ofy[:, 2::3, :, :] += 1 one_mat = ones((1, ofx.partial_shape[2], ofx.partial_shape[3])) one_mat = Concat([one_mat * i for i in dy], axis = 0) one_mat = Concat([one_mat] * ker_shape, axis = 0) ofy += one_mat.dimshuffle('x', 0, 1, 2) ofx = Max(Min(ofx, arg_fea.partial_shape[2] - 1), 0) ofy = Max(Min(ofy, arg_fea.partial_shape[3] - 1), 0) def DeformReshape(inp, ker_shape): inp = inp.reshape(inp.shape[0], ker_shape, ker_shape, inp.shape[2], inp.partial_shape[3]) inp = inp.dimshuffle(0, 3, 1, 4, 2) inp = inp.reshape(inp.shape[0], inp.shape[1] * inp.shape[2], inp.shape[3] * inp.shape[4]) return inp ofx = DeformReshape(ofx, ker_shape) ofy = DeformReshape(ofy, ker_shape) bilx = DeformReshape(bilx, ker_shape) bily = DeformReshape(bily, ker_shape) of = ofx * arg_fea.partial_shape[2] + ofy arg_fea = arg_fea.reshape(arg_fea.shape[0], arg_fea.shape[1], -1) of = of.reshape(ofx.shape[0], -1) of = of.dimshuffle(0, 'x', 1) #of = Concat([of] * arg_fea.partial_shape[1], axis = 1) of = of.broadcast((of.shape[0], arg_fea.shape[1], of.shape[2])) arx = Linspace(0, arg_fea.shape[0], arg_fea.shape[0], endpoint = False) arx = arx.add_axis(1).add_axis(2).broadcast(of.shape) ary = Linspace(0, arg_fea.shape[1], arg_fea.shape[1], endpoint = False) ary = ary.add_axis(0).add_axis(2).broadcast(of.shape) of = of.add_axis(3) arx = arx.add_axis(3) ary = ary.add_axis(3) idxmap = Astype(Concat([arx, ary, of], axis = 3), np.int32) """ sample = [] for i in range(arg_fea.partial_shape[0]): for j in range(arg_fea.partial_shape[1]): sample.append(arg_fea[i][j].ai[of[i][j]].dimshuffle('x', 0)) sample = Concat(sample, axis = 0) """ sample = IndexingRemap(arg_fea, idxmap).reshape(inp.shape[0], inp.shape[1], bilx.shape[1], -1) bilx = bilx.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) bily = bily.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) sample *= bilx * bily outputs.append(sample) output = outputs[0] for i in outputs[1:]: output += i return Pooling2D(name, output, window = 2, mode = "AVERAGE")
def dfconv(inp, chl, isrelu, flag, ker_shape=3, stride=1, padding=1, dx=[-1, 0, 1], dy=[-1, 0, 1]): global idx #idx += 1 gamma = 0.1 offsetx = gamma * inp.partial_shape[2] * Conv2D( "conv{}_offsetx".format(idx + 1), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=ker_shape**2, W=C(0), nonlinearity=Identity()) offsety = gamma * inp.partial_shape[3] * Conv2D( "conv{}_offsety".format(idx + 1), inp, kernel_shape=ker_shape, stride=stride, padding=padding, output_nr_channel=ker_shape**2, W=C(0), nonlinearity=Identity()) outputs = [] for sx in range(2): for sy in range(2): if sx == 0: ofx = Floor(offsetx) bilx = offsetx - ofx + Equal(Floor(offsetx), Ceil(offsetx)) else: ofx = Ceil(offsetx) bilx = ofx - offsetx if sy == 0: ofy = Floor(offsety) bily = offsety - ofy + Equal(Floor(offsety), Ceil(offsety)) else: ofy = Ceil(offsety) bily = ofy - offsety """ No padding padding1 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], 1, inp.partial_shape[3]))) padding2 = ConstProvider(np.zeros((inp.partial_shape[0], inp.partial_shape[1], inp.partial_shape[2] + 2, 1))) arg_fea = Concat([padding1, inp, padding1], axis = 2) arg_fea = Concat([padding2, arg_fea, padding2], axis = 3) """ arg_fea = inp """ if flag: #one_mat = ConstProvider(np.ones((inp.partial_shape[2], inp.partial_shape[3])), dtype = np.int32) one_mat = ConstProvider(1, dtype = np.int32).add_axis(0).broadcast((ofx.partial_shape[2], ofx.partial_shape[3])) affx = (Cumsum(one_mat, axis = 0) - 1) * stride affy = (Cumsum(one_mat, axis = 1) - 1) * stride affx = affx.dimshuffle('x', 'x', 0, 1).broadcast(list(ofx.partial_shape)) affy = affy.dimshuffle('x', 'x', 0, 1).broadcast(list(ofy.partial_shape)) affx = ConstProvider(affx.eval()) affy = ConstProvider(affy.eval()) ofx = ofx + affx ofy = ofy + affy one_mat = ConstProvider(np.ones((ker_shape, ofx.partial_shape[2], ofx.partial_shape[3]))) #ofx[:, :ker_shape, :, :] -= 1 #ofx[:, ker_shape*2:, :, :] += 1 affx1 = Concat([one_mat * i for i in dx], axis = 0).dimshuffle('x', 0, 1, 2).broadcast(list(ofx.partial_shape)) affx1 = ConstProvider(affx1.eval()) ofx += affx1 #ofy[:, ::3, :, :] -= 1 #ofy[:, 2::3, :, :] += 1 one_mat = ones((1, ofx.partial_shape[2], ofx.partial_shape[3])) one_mat = Concat([one_mat * i for i in dy], axis = 0) one_mat = Concat([one_mat] * ker_shape, axis = 0) affy1 = one_mat.dimshuffle('x', 0, 1, 2).broadcast(list(ofy.partial_shape)) affy1 = ConstProvider(affy1.eval()) ofy += affy1 dic["affx"] = affx dic["affx1"] = affx1 dic["affy"] = affy dic["affy1"] = affy1 else: ofx = ofx + dic["affx"] + dic["affx1"] ofy = ofy + dic["affy"] + dic["affy1"] """ ofx = Max(Min(ofx, arg_fea.partial_shape[2] - 1), 0) ofy = Max(Min(ofy, arg_fea.partial_shape[3] - 1), 0) def DeformReshape(inp, ker_shape): inp = inp.reshape(inp.partial_shape[0], ker_shape, ker_shape, inp.partial_shape[2], inp.partial_shape[3]) inp = inp.dimshuffle(0, 3, 1, 4, 2) inp = inp.reshape(inp.partial_shape[0], inp.partial_shape[1] * inp.partial_shape[2], inp.partial_shape[3] * inp.partial_shape[4]) return inp ofx = DeformReshape(ofx, ker_shape) ofy = DeformReshape(ofy, ker_shape) bilx = DeformReshape(bilx, ker_shape) bily = DeformReshape(bily, ker_shape) of = ofx * arg_fea.partial_shape[2] + ofy arg_fea = arg_fea.reshape(arg_fea.partial_shape[0], arg_fea.partial_shape[1], -1) of = of.reshape(ofx.partial_shape[0], -1) of = of.dimshuffle(0, 'x', 1) #of = Concat([of] * arg_fea.partial_shape[1], axis = 1) of = of.broadcast((of.partial_shape[0], arg_fea.partial_shape[1], of.partial_shape[2])) if flag: arx = Linspace(0, arg_fea.partial_shape[0], arg_fea.partial_shape[0], endpoint=False) arx = arx.add_axis(1).add_axis(2).broadcast(of.shape) ary = Linspace(0, arg_fea.partial_shape[1], arg_fea.partial_shape[1], endpoint=False) ary = ary.add_axis(0).add_axis(2).broadcast(of.shape) arx = arx.add_axis(3) ary = ary.add_axis(3) dic["arx"] = arx dic["ary"] = ary else: arx = dic["arx"] ary = dic["ary"] of = of.add_axis(3) idxmap = Astype(Concat([arx, ary, of], axis=3), np.int32) idxmap = np.zeros(list(idxmap.partial_shape), dtype=np.int32) """ sample = [] for i in range(arg_fea.partial_shape[0]): for j in range(arg_fea.partial_shape[1]): sample.append(arg_fea[i][j].ai[of[i][j]].dimshuffle('x', 0)) sample = Concat(sample, axis = 0) """ sample = IndexingRemap(arg_fea, idxmap).reshape(inp.partial_shape[0], inp.partial_shape[1], bilx.partial_shape[1], -1) bilx = bilx.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) bily = bily.dimshuffle(0, 'x', 1, 2).broadcast(sample.shape) sample *= bilx * bily outputs.append(sample) output = outputs[0] for i in outputs[1:]: output += i return conv_bn(output, ker_shape, 3, 0, chl, isrelu)
from megskull.opr.helper.param_init import ConstantParamInitializer as C from megskull.optimizer import NaiveSGD, OptimizableFunc from megskull.network import Network from My import MyWeightDecay from megskull.opr.helper.elemwise_trans import ReLU, Identity minibatch_size = 10 patch_size = 32 net_name = "test_wc" inp = O.DataProvider("a", shape=(minibatch_size, 3)) out = O.FullyConnected("fc", inp, output_dim=3, W=C(1), nonlinearity=Identity()) W = out.inputs[1] loss = O.ZeroGrad(out.sum()) network = Network(outputs=[loss]) network.loss_var = loss """ func = OptimizableFunc.make_from_loss_var(loss) NaiveSGD(1)(func) func.compile(loss) print(func()) print(np.array(a.eval(), dtype = np.float32)) loss.Mul_Wc(10)