def get_symbol(num_class, num_level=3, num_block=9, num_filter=16, bn_momentum=0.9, pool_kernel=(8, 8)): # data = mxc.Variable(name='data') data = 'data' # Simulate z-score normalization as that in # https://github.com/gcr/torch-residual-networks/blob/master/data/cifar-dataset.lua zscore = mxc.BatchNorm(name='zscore', data=data, fix_gamma=True, momentum=bn_momentum) conv = get_conv(name='conv0', data=zscore, num_filter=num_filter, kernel=(3, 3), stride=(1, 1), pad=(1, 1), with_relu=True, bn_momentum=bn_momentum) body = get_body(conv, num_level, num_block, num_filter, bn_momentum) pool = mxc.Pooling(data=body, name='pool', kernel=pool_kernel, pool_type='avg') # The flatten layer seems superfluous flat = mxc.Flatten(data=pool, name='flatten') fc = mxc.FullyConnected(data=flat, num_hidden=num_class, name='fc') return mxc.SoftmaxOutput(data=fc, name='softmax')
def resnet(units, num_stage, filter_list, num_class, data_type, bottle_neck=True, bn_mom=0.9, workspace=512, memonger=False): """Return ResNet symbol of cifar10 and imagenet Parameters ---------- units : list Number of units in each stage num_stage : int Number of stage filter_list : list Channel size of each stage num_class : int Ouput size of symbol dataset : str Dataset type, only cifar10 and imagenet supports workspace : int Workspace used in convolution operator """ num_unit = len(units) assert(num_unit == num_stage) # data = mxc.Variable(name='data') data = 'data' data = mxc.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data') if data_type == 'cifar10': body = mxc.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1), no_bias=True, name="conv0", workspace=workspace) elif data_type == 'imagenet': body = mxc.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3), no_bias=True, name="conv0", workspace=workspace) body = mxc.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0') body = mxc.Activation(data=body, act_type='relu', name='relu0') body = mxc.Pooling(data=body, name="pool0", kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max') else: raise ValueError("do not support {} yet".format(data_type)) for i in range(num_stage): body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False, name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) for j in range(units[i]-1): body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck, workspace=workspace, memonger=memonger) bn1 = mxc.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1') relu1 = mxc.Activation(data=bn1, act_type='relu', name='relu1') # Although kernel is not used here when global_pool=True, we should put one # pool1 = mxc.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1') pool1 = mxc.Pooling(data=relu1, kernel=(7, 7), pool_type='avg', name='pool1') flat = mxc.Flatten(data=pool1, name='flatten') fc1 = mxc.FullyConnected(data=flat, num_hidden=num_class, name='fc1')
def Inception7B(data, num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2, pool, name): tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name)) tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv') tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1') tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2') pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name)) concat = mxc.Concat(bottoms=[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name) return concat
def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name): # 3x3 reduce + 3x3 c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_3x3' % name)) # double 3x3 reduce + double 3x3 cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name)) cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name)) # pool + proj pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type="max", name=('max_pool_%s_pool' % name)) # concat concat = mxc.Concat(bottoms=[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name) return concat
def Inception7D(data, num_3x3_red, num_3x3, num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3, pool, name): tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv') tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2') tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3') pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) # concat concat = mxc.Concat(bottoms=[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name) return concat
def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, pool, proj, name): # 1x1 c1x1 = ConvFactory(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_1x1' % name)) # 3x3 reduce + 3x3 c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_3x3' % name)) # double 3x3 reduce + double 3x3 cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_0' % name)) cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_1' % name)) # pool + proj pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = ConvFactory(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_proj' % name)) # concat concat = mxc.Concat(bottoms=[c1x1, c3x3, cd3x3, cproj], name='ch_concat_%s_chconcat' % name) return concat
def Inception7A(data, num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, pool, proj, name): tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name)) tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv') tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1') tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2') pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv') concat = mxc.Concat(bottoms=[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name) return concat
def Inception7E(data, num_1x1, num_d3_red, num_d3_1, num_d3_2, num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv') tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv') tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1') tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv') tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1') tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv') tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1') pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') # concat concat = mxc.Concat(bottoms=[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name) return concat
def Inception7C(data, num_1x1, num_d7_red, num_d7_1, num_d7_2, num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4, pool, proj, name): tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name)) tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1') tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2') tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3') tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4') pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') # concat concat = mxc.Concat(bottoms=[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name) return concat
cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name)) cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name)) # pool + proj pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type="max", name=('max_pool_%s_pool' % name)) # concat concat = mxc.Concat(bottoms=[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name) return concat num_classes=1000 # data # data = mxc.Variable(name="data") data = 'data' # stage 1 conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='conv1') pool1 = mxc.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool1', pool_type='max') # stage 2 conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='conv2red') conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv2') pool2 = mxc.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool2', pool_type='max') # stage 2 in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a') in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b') in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c') # stage 3 in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a') in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b') in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c') in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d') in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e') # stage 4
pooling = mxc.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name), suffix='_conv') # concat concat = mxc.Concat(bottoms=[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name) return concat # In[49]: num_classes=1000 # data = mxc.Variable(name="data") data = 'data' # stage 1 conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv") conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1") conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2") pool = mxc.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool") # stage 2 conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3") conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4") pool1 = mxc.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1") # stage 3 in3a = Inception7A(pool1, 64, 64, 96, 96, 48, 64, "avg", 32, "mixed") in3b = Inception7A(in3a, 64, 64, 96, 96, 48, 64, "avg", 64, "mixed_1") in3c = Inception7A(in3b, 64, 64, 96, 96,