def capsnet(batch_size, n_class, num_routing, recon_loss_weight): """Create CapsNet""" # data.shape = [batch_size, 1, 28, 28] data = mx.sym.Variable('data') input_shape = (1, 28, 28) # Conv2D layer # net.shape = [batch_size, 256, 20, 20] conv1 = mx.sym.Convolution(data=data, num_filter=256, kernel=(9, 9), layout='NCHW', name='conv1') conv1 = mx.sym.Activation(data=conv1, act_type='relu', name='conv1_act') # net.shape = [batch_size, 256, 6, 6] primarycaps = primary_caps(data=conv1, dim_vector=8, n_channels=32, kernel=(9, 9), strides=[2, 2], name='primarycaps') primarycaps.infer_shape(data=(batch_size, 1, 28, 28)) # CapsuleLayer kernel_initializer = mx.init.Xavier(rnd_type='uniform', factor_type='avg', magnitude=3) bias_initializer = mx.init.Zero() digitcaps = CapsuleLayer(num_capsule=10, dim_vector=16, batch_size=batch_size, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, num_routing=num_routing)(primarycaps) # out_caps : (batch_size, 10) out_caps = mx.sym.sqrt(data=mx.sym.sum(mx.sym.square(digitcaps), 2)) out_caps.infer_shape(data=(batch_size, 1, 28, 28)) y = mx.sym.Variable('softmax_label', shape=(batch_size,)) y_onehot = mx.sym.one_hot(y, n_class) y_reshaped = mx.sym.Reshape(data=y_onehot, shape=(batch_size, -4, n_class, -1)) y_reshaped.infer_shape(softmax_label=(batch_size,)) # inputs_masked : (batch_size, 16) inputs_masked = mx.sym.linalg_gemm2(y_reshaped, digitcaps, transpose_a=True) inputs_masked = mx.sym.Reshape(data=inputs_masked, shape=(-3, 0)) x_recon = mx.sym.FullyConnected(data=inputs_masked, num_hidden=512, name='x_recon') x_recon = mx.sym.Activation(data=x_recon, act_type='relu', name='x_recon_act') x_recon = mx.sym.FullyConnected(data=x_recon, num_hidden=1024, name='x_recon2') x_recon = mx.sym.Activation(data=x_recon, act_type='relu', name='x_recon_act2') x_recon = mx.sym.FullyConnected(data=x_recon, num_hidden=np.prod(input_shape), name='x_recon3') x_recon = mx.sym.Activation(data=x_recon, act_type='sigmoid', name='x_recon_act3') data_flatten = mx.sym.flatten(data=data) squared_error = mx.sym.square(x_recon-data_flatten) recon_error = mx.sym.mean(squared_error) recon_error_stopped = recon_error recon_error_stopped = mx.sym.BlockGrad(recon_error_stopped) loss = mx.symbol.MakeLoss((1-recon_loss_weight)*margin_loss(y_onehot, out_caps)+recon_loss_weight*recon_error) out_caps_blocked = out_caps out_caps_blocked = mx.sym.BlockGrad(out_caps_blocked) return mx.sym.Group([out_caps_blocked, loss, recon_error_stopped])
def capsnet(batch_size, n_class, num_routing, recon_loss_weight): # data.shape = [batch_size, 1, 28, 28] data = mx.sym.Variable('data') input_shape = (1, 28, 28) # Conv2D layer # net.shape = [batch_size, 256, 20, 20] conv1 = mx.sym.Convolution(data=data, num_filter=256, kernel=(9, 9), layout='NCHW', name='conv1') conv1 = mx.sym.Activation(data=conv1, act_type='relu', name='conv1_act') # net.shape = [batch_size, 256, 6, 6] primarycaps = primary_caps(data=conv1, dim_vector=8, n_channels=32, kernel=(9, 9), strides=[2, 2], name='primarycaps') primarycaps.infer_shape(data=(batch_size, 1, 28, 28)) # CapsuleLayer kernel_initializer = mx.init.Xavier(rnd_type='uniform', factor_type='avg', magnitude=3) bias_initializer = mx.init.Zero() digitcaps = CapsuleLayer(num_capsule=10, dim_vector=16, batch_size=batch_size, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, num_routing=num_routing)(primarycaps) # out_caps : (batch_size, 10) out_caps = mx.sym.sqrt(data=mx.sym.sum(mx.sym.square(digitcaps), 2)) out_caps.infer_shape(data=(batch_size, 1, 28, 28)) y = mx.sym.Variable('softmax_label', shape=(batch_size, )) y_onehot = mx.sym.one_hot(y, n_class) y_reshaped = mx.sym.Reshape(data=y_onehot, shape=(batch_size, -4, n_class, -1)) y_reshaped.infer_shape(softmax_label=(batch_size, )) # inputs_masked : (batch_size, 16) inputs_masked = mx.sym.linalg_gemm2(y_reshaped, digitcaps, transpose_a=True) inputs_masked = mx.sym.Reshape(data=inputs_masked, shape=(-3, 0)) x_recon = mx.sym.FullyConnected(data=inputs_masked, num_hidden=512, name='x_recon') x_recon = mx.sym.Activation(data=x_recon, act_type='relu', name='x_recon_act') x_recon = mx.sym.FullyConnected(data=x_recon, num_hidden=1024, name='x_recon2') x_recon = mx.sym.Activation(data=x_recon, act_type='relu', name='x_recon_act2') x_recon = mx.sym.FullyConnected(data=x_recon, num_hidden=np.prod(input_shape), name='x_recon3') x_recon = mx.sym.Activation(data=x_recon, act_type='sigmoid', name='x_recon_act3') data_flatten = mx.sym.flatten(data=data) squared_error = mx.sym.square(x_recon - data_flatten) recon_error = mx.sym.mean(squared_error) recon_error_stopped = recon_error recon_error_stopped = mx.sym.BlockGrad(recon_error_stopped) loss = mx.symbol.MakeLoss((1 - recon_loss_weight) * margin_loss(y_onehot, out_caps) + recon_loss_weight * recon_error) out_caps_blocked = out_caps out_caps_blocked = mx.sym.BlockGrad(out_caps_blocked) return mx.sym.Group([out_caps_blocked, loss, recon_error_stopped])