Python CrossEntropyLoss Beispiele, megskull.opr.all.CrossEntropyLoss Python Beispiele

Beispiel #1

0

Datei anzeigen

def make_network(minibatch_size=64):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    k, l = 20, (40 - 4) // 3
    lay = bn_relu_conv(inp, 3, 1, 1, k, False, False)

    for i in range(3):
        lay = transition(dense_block(lay, k, l), i)

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0", feature, output_dim=10, nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)

    info = CInfo()
    info.get_complexity(network.outputs).as_table().show()

    return network

Beispiel #2

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=64):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)

    k, l = 24, (100 - 4) // 3
    for i in range(3):
        lay = transition(dense_block(lay, k, l, False), i)

    feature = lay
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)
    return network

Beispiel #3

0

Datei anzeigen

def make_network(minibatch_size=128):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 15, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)
    lay, conv = conv_bn(inp, 3, 1, 1, 16, True)
    out = [conv]
    for chl in [32, 64, 128]:
        for i in range(10):
            lay, conv = conv_bn(lay, 3, 1, 1, chl, True)
            out.append(conv)
        if chl != 128:
            lay = b_resize("pooling{}".format(chl), lay)
            lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX")

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred] + out)
    network.loss_var = CrossEntropyLoss(pred, label)
    return network

Beispiel #4

0

Datei anzeigen

def make_network(minibatch_size=64):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)

    k, l = 12, (40 - 4) // 3
    for i in range(3):
        lay = transition(dense_block(lay, k, l), i)

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)
    return network

Beispiel #5

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size = 128, debug = False):
	patch_size = 32
	inp = DataProvider("data", shape = (minibatch_size, 3, patch_size, patch_size), dtype = np.float32)
	label = DataProvider("label", shape = (minibatch_size, ), dtype = np.int32)

	lay = conv_bn(inp, 3, 1, 1, 16, True)

	n = 18
	lis = [16, 32, 64]
	for i in lis:
		lay = res_block(lay, i, n)
	
	#global average pooling
	#feature = lay.mean(axis = 2).mean(axis = 2)
	feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE")
	pred = Softmax("pred", FullyConnected(
		"fc0", feature, output_dim = 10,
		nonlinearity = Identity()
		))
	
	network = Network(outputs = [pred])
	network.loss_var = CrossEntropyLoss(pred, label)
	
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()

	return network

Beispiel #6

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay = conv_bn(inp, 3, 1, 1, 16, True)

    n = 4
    lis = [16 * 4, 32 * 4, 64 * 4]
    for i in range(len(lis)):
        lay = res_block(lay, lis[i], i, n)

    fc = attentional_active_pooling(lay, 10)
    pred = Softmax("pred", fc)

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)
    """
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()
	"""

    return network

Beispiel #7

0

Datei anzeigen

def make_network(minibatch_size = 128):
	patch_size = 32
	inp = DataProvider("data", shape = (minibatch_size, 3, patch_size, patch_size))
	label = DataProvider("label", shape = (minibatch_size, ))

	#lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)
	lay, conv = conv_bn(inp, 3, 1, 1, 16, True)
	out = [conv]
	for chl in [32 * 3, 64 * 3, 128 * 3]:
		for i in range(10):
			lay, conv1, conv2 = xcep_layer(lay, chl)
			out.append(conv1)
			out.append(conv2)
		if chl != 128 * 3:
			lay = Pooling2D("pooling{}".format(chl), lay, window = 2, mode = "MAX")

	
	#global average pooling
	print(lay.partial_shape)
	feature = lay.mean(axis = 2).mean(axis = 2)
	#feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
	W = ortho_group.rvs(feature.partial_shape[1])
	W = W[:, :10]
	W = ConstProvider(W)
	b = ConstProvider(np.zeros((10, )))
	pred = Softmax("pred", FullyConnected(
		"fc0", feature, output_dim = 10,
		W = W,
		b = b,
		nonlinearity = Identity()
		))
	
	network = Network(outputs = [pred] + out)
	network.loss_var = CrossEntropyLoss(pred, label)
	return network

Beispiel #8

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR10

def make_network(minibatch_size=128):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    lay = conv_bn(inp, 3, 1, 1, 16, True)

    n = 3
    lis = [16, 32, 64]
    for i in lis:
        lay = res_block(lay, i, n)

    #global average pooling
    feature = lay.mean(axis=2).mean(axis=2)
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(2 / 64)**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)
    return network

Beispiel #9

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay, w = conv_bn(inp, 3, 1, 1, 16, True)
    lis_w = [w]

    n = 3
    lis = [16, 32, 64]
    for i in lis:
        lay, lis_new = res_block(lay, i, n)
        lis_w += lis_new

    #global average pooling
    #feature = lay.mean(axis = 2).mean(axis = 2)
    feature = Pooling2D("pooling",
                        lay,
                        window=8,
                        stride=8,
                        padding=0,
                        mode="AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected(
            "fc0",
            feature,
            output_dim=10,
            #W = G(mean = 0, std = (1 / 64)**0.5),
            #b = C(0),
            nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)

    lmd = 1
    for w in lis_w:
        w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0)
        w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0)
        A = O.MatMul(w.dimshuffle(1, 0), w)
        network.loss_var += lmd * (
            (A - np.identity(A.partial_shape[0]))**2).mean()

    if debug:
        visitor = NetworkVisitor(network.loss_var)
        for i in visitor.all_oprs:
            print(i)
            print(i.partial_shape)
            print("input = ", i.inputs)
            print("output = ", i.outputs)
            print()

    return network

Beispiel #10

0

Datei anzeigen

def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay = conv_bn(inp, 3, 1, 1, 16 * 4 * 2, True)

    n = 4 * 3
    group = 8
    lis = [16 * 4, 32 * 4, 64 * 4]
    for i in range(len(lis)):
        lay = res_block(lay, lis[i], i, n, group)

    #global average pooling
    #feature = lay.mean(axis = 2).mean(axis = 2)
    feature = Pooling2D("pooling",
                        lay,
                        window=8,
                        stride=8,
                        padding=0,
                        mode="AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected(
            "fc0",
            feature,
            output_dim=10,
            #W = G(mean = 0, std = (1 / 64)**0.5),
            #b = C(0),
            nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)

    info = CInfo()
    info.get_complexity(network.outputs).as_table().show()
    """
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()
	"""

    return network

Beispiel #11

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=128):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)
    lay, conv = conv_bn(inp, 3, 1, 1, 16, True)
    out = [conv]
    for chl in [32, 64, 128]:
        for i in range(10):
            lay, conv = conv_bn(lay, 3, 1, 1, chl, True)
            out.append(conv)
        if chl != 128:
            lay = Pooling2D("pooling{}".format(chl), lay, window=2, mode="MAX")

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred] + out)
    network.loss_var = CrossEntropyLoss(pred, label)
    #conv1 = out[0]
    #print(conv1.inputs[1].partial_shape)
    lmd = 0.01
    for conv_lay in out:
        w = conv_lay
        #w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0)
        w = w.dimshuffle(1, 0, 2, 3)
        w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0)
        w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0)
        A = MatMul(w.dimshuffle(1, 0), w)
        #print(A.partial_shape)
        network.loss_var += lmd * (
            (A - np.identity(A.partial_shape[0]))**2).sum()

    return network

Beispiel #12

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay = conv_bn(inp, 3, 1, 1, 16, True)

    lis = [16, 32, 64]
    for i in range(len(lis)):
        #lay = res_block(lay, lis[i], i, n)
        for j in range(40):
            lay = conv_bn(lay, 3, 1, 1, lis[i], False)
        if i < len(lis) - 1:
            lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True)

    #global average pooling
    feature = lay.mean(axis=2).mean(axis=2)
    pred = Softmax(
        "pred",
        FullyConnected(
            "fc0",
            feature,
            output_dim=10,
            #W = G(mean = 0, std = (1 / 64)**0.5),
            #b = C(0),
            nonlinearity=Identity()))

    network = Network(outputs=[pred])
    #info = CInfo()
    #info.get_complexity(network.outputs).as_table().show()
    network.loss_var = CrossEntropyLoss(pred, label)
    """
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()
	"""

    return network

Beispiel #13

0

Datei anzeigen

def make_network(minibatch_size=128):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))
    idxmap = np.zeros((128, 3, 32, 32, 4), dtype=np.int32)
    sample = IndexingRemap(inp, idxmap)
    network = Network(outputs=[sample])
    sample = FullyConnected("fc", sample, output_dim=1)
    network.loss_var = sample.sum()
    return network

    #lay = bn_relu_conv(inp, 3, 1, 1, 16, False, False)
    lay, conv = conv_bn(inp, 3, 1, 1, 32, True)
    out = [conv]
    """
	for chl in [32, 64, 128]:
		for i in range(10):
			lay, conv = conv_bn(lay, 3, 1, 1, chl, True)
			out.append(conv)
		if chl != 128:
			lay = dfpooling("pooling{}".format(chl), lay)
	"""
    chl = 32
    for i in range(3):
        lay, conv = dfconv(lay, chl, True, i == 0)

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0",
                       feature,
                       output_dim=10,
                       W=G(mean=0, std=(1 / feature.partial_shape[1])**0.5),
                       b=C(0),
                       nonlinearity=Identity()))

    network = Network(outputs=[pred] + out)
    network.loss_var = CrossEntropyLoss(pred, label)
    return network

Beispiel #14

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=64):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size))
    label = DataProvider("label", shape=(minibatch_size, ))

    lay, w = bn_relu_conv(inp, 3, 1, 1, 16, False, False)
    lis_w = [w]

    k, l = 12, (40 - 4) // 3
    for i in range(3):
        #lay = transition(dense_block(lay, k, l), i)
        lay, lis_new = dense_block(lay, k, l)
        lis_w += lis_new
        lay, lis_new = transition(lay, i)
        lis_w += lis_new

    #global average pooling
    print(lay.partial_shape)
    feature = lay.mean(axis=2).mean(axis=2)
    #feature = Pooling2D("glbpoling", lay, window = 8, stride = 8, mode = "AVERAGE")
    pred = Softmax(
        "pred",
        FullyConnected("fc0", feature, output_dim=10, nonlinearity=Identity()))

    network = Network(outputs=[pred])
    network.loss_var = CrossEntropyLoss(pred, label)

    lmd = 0.01
    for w in lis_w:
        if w is None:
            continue
        print(w.partial_shape)
        w = w.reshape(w.partial_shape[0], -1).dimshuffle(1, 0)
        w = w / ((w**2).sum(axis=0)).dimshuffle('x', 0)
        A = O.MatMul(w.dimshuffle(1, 0), w)
        network.loss_var += lmd * (
            (A - np.identity(A.partial_shape[0]))**2).sum()

    return network

Beispiel #15

0

Datei anzeigen

Datei: network.py Projekt: lyuyanyii/CIFAR

def make_network(minibatch_size=128, debug=False):
    patch_size = 32
    inp = DataProvider("data",
                       shape=(minibatch_size, 3, patch_size, patch_size),
                       dtype=np.float32)
    label = DataProvider("label", shape=(minibatch_size, ), dtype=np.int32)

    lay = conv_bn(inp, 3, 1, 1, 16, True)

    lis = [16, 32, 64]
    for i in range(len(lis)):
        #lay = res_block(lay, lis[i], i, n)
        for j in range(10):
            lay = conv_bn(lay, 3, 1, 1, lis[i], True)
        if i < len(lis) - 1:
            lay = conv_bn(lay, 2, 2, 0, lis[i + 1], True)

    #global average pooling
    #feature = lay.mean(axis = 2).mean(axis = 2)
    #feature = Pooling2D("pooling", lay, window = 8, stride = 8, padding = 0, mode = "AVERAGE")
    lay = lay.reshape(lay.shape[0], lay.shape[1], lay.shape[2] * lay.shape[3])
    print(lay.partial_shape)
    a = O.ParamProvider(
        "a",
        np.random.randn(lay.partial_shape[2], 10) *
        (1 / lay.partial_shape[2])**0.5)
    a = a.dimshuffle('x', 0, 1)
    a = a.broadcast(
        (lay.partial_shape[0], a.partial_shape[1], a.partial_shape[2]))
    print(a.partial_shape)
    b = O.ParamProvider(
        "b",
        np.random.randn(lay.partial_shape[2], 10) *
        (1 / lay.partial_shape[2])**0.5)
    b = b.dimshuffle('x', 0, 1)
    b = b.broadcast(
        (lay.partial_shape[0], b.partial_shape[1], b.partial_shape[2]))
    print(b.partial_shape)
    fca = O.BatchedMatMul(lay, a)
    fcb = O.BatchedMatMul(lay, b)
    fc = O.BatchedMatMul(fca.dimshuffle(0, 2, 1), fcb) / 64
    outs = []
    for i in range(10):
        outs.append(fc[:, i, i].dimshuffle(0, 'x'))
    fc = O.Concat(outs, axis=1)
    pred = Softmax("pred", fc)
    """
	pred = Softmax("pred", FullyConnected(
		"fc0", feature, output_dim = 10,
		#W = G(mean = 0, std = (1 / 64)**0.5),
		#b = C(0),
		nonlinearity = Identity()
		))
	"""

    network = Network(outputs=[pred])
    #info = CInfo()
    #info.get_complexity(network.outputs).as_table().show()
    network.loss_var = CrossEntropyLoss(pred, label)
    """
	if debug:
		visitor = NetworkVisitor(network.loss_var)
		for i in visitor.all_oprs:
			print(i)
			print(i.partial_shape)
			print("input = ", i.inputs)
			print("output = ", i.outputs)
			print()
	"""

    return network

Beispiel #16

0

Datei anzeigen

Datei: mnist_network.py Projekt: lyuyanyii/CIFAR10

conv3 = Conv2D("conv3", pooling1, kernel_shape = 3, output_nr_channel = 10, 
			W = G(mean = 0.0001, std = (1 / (5 * 3 * 3))**0.5),
			b = C(0),
			padding = (1, 1),
			nonlinearity = ReLU())
conv4 = Conv2D("conv4", conv3, kernel_shape = 3, output_nr_channel = 10,
			W = G(mean = 0.0001, std = (1 / (10 * 3 * 3))**0.5),
			b = C(0),
			padding = (1, 1),
			nonlinearity = ReLU())
pooling2 = Pooling2D("pooling2", conv4, window = (2, 2), mode = "max")

feature = pooling2.reshape((-1, 7 * 7 * 10))
fc1 = FC("fc1", feature, output_dim = 100,
			W = G(mean = 0.0001, std = (1 / 490)**0.5),
			b = C(0),
			nonlinearity = ReLU())
fc2 = FC("fc2", fc1, output_dim = 10,
			W = G(mean = 0, std = (1 / 100)**0.5),
			b = C(0),
			nonlinearity = Identity())
#output_mat = Exp(fc2) / Exp(fc2).sum(axis = 1).dimshuffle(0, 'x')
pred = Softmax("pred", fc2)

label = DataProvider(name = "label", shape = (minibatch_size, ), dtype = np.int32)
#loss = -Log(indexing_one_hot(output_mat, 1, label)).mean()
loss = CrossEntropyLoss(pred, label)

network = Network(pred, loss)

Beispiel #17

0

Datei anzeigen

Datei: vgg16.py Projekt: songzhaozhe/channel-pruning

def make_network():
    batch_size = 200
    img_size = 224

    data = DataProvider("data", shape=(batch_size, 3, img_size, img_size))
    label = DataProvider("label", shape=(batch_size, ))

    f = create_conv_relu("conv1_1",
                         data,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=64)
    f = create_conv_relu("conv1_2",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=64)
    f = CaffePooling2D("pool1", f, window=2, stride=2, padding=0, mode="MAX")

    f = create_conv_relu("conv2_1",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=128)
    f = create_conv_relu("conv2_2",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=128)
    f = CaffePooling2D("pool2", f, window=2, stride=2, padding=0, mode="MAX")

    f = create_conv_relu("conv3_1",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=256)
    f = create_conv_relu("conv3_2",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=256)
    f = create_conv_relu("conv3_3",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=256)
    f = CaffePooling2D("pool3", f, window=2, stride=2, padding=0, mode="MAX")

    f = create_conv_relu("conv4_1",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = create_conv_relu("conv4_2",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = create_conv_relu("conv4_3",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = CaffePooling2D("pool4", f, window=2, stride=2, padding=0, mode="MAX")

    f = create_conv_relu("conv5_1",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = create_conv_relu("conv5_2",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = create_conv_relu("conv5_3",
                         f,
                         ksize=3,
                         stride=1,
                         pad=1,
                         num_outputs=512)
    f = CaffePooling2D("pool5", f, window=2, stride=2, padding=0, mode="MAX")

    f = FullyConnected("fc6",
                       f,
                       output_dim=4096,
                       nonlinearity=mgsk.opr.helper.elemwise_trans.Identity())
    f = ReLU(f)

    f = FullyConnected("fc7",
                       f,
                       output_dim=4096,
                       nonlinearity=mgsk.opr.helper.elemwise_trans.Identity())
    f = ReLU(f)

    f = FullyConnected("fc8",
                       f,
                       output_dim=1000,
                       nonlinearity=mgsk.opr.helper.elemwise_trans.Identity())

    f = Softmax("cls_softmax", f)

    net = RawNetworkBuilder(inputs=[data, label],
                            outputs=[f],
                            loss=CrossEntropyLoss(f, label))
    return net