def __init__(self, train_x, train_y, test_x, test_y, psgd_type, optimizer_type, server_codec, epoches, server_type='asgd', target_acc=None, learn_rate=0.05, codec=None): super().__init__(train_x, train_y, test_x, test_y, psgd_type, optimizer_type, server_codec, epoches, server_type, target_acc, learn_rate) self.__nn = [] self.__nn.append(Conv2dLayer([5, 5], 64, 'SAME', [1, 1])) self.__nn.append(Conv2dLayer([5, 5], 64, 'SAME', [1, 1])) self.__nn.append(MaxPool([2, 2])) self.__nn.append(Conv2dLayer([3, 3], 64, 'SAME', [1, 1])) self.__nn.append(Conv2dLayer([3, 3], 64, 'SAME', [1, 1])) self.__nn.append(MaxPool([2, 2])) self.__nn.append(Reshape([12288])) self.__nn.append(FCLayer_v2(1024, act=get_activation('tanh'))) self.__nn.append(FCLayer_v2(784, act=get_activation('tanh'))) self.__nn.append(FCLayer_v2(10, act=get_activation('softmax'))) if codec is None: codec = ['plain' for _ in self.__nn] elif len(codec) != len(self.__nn): codec = codec[:1] * len(self.__nn) self.__codec = [get_codec(c_str) for c_str in codec]
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers, charcnn_size, charcnn_layers, alpha): assert sample_size % (2**encdec_layers) == 0 if encdec_layers == 2: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1", collect=False), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2", collect=False), ReLU(), Flatten(), Linear(sample_size // 4 * 256, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size // 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] elif encdec_layers == 3: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Flatten(), Linear(sample_size // 8 * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 8 * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // 8, 1)), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3", collect=False), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] else: raise Exception("unsupported number of encdec layers %d" % encdec_layers) start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate( [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ] layers = [ Parallel([encoder, []], shared_input=True), Parallel([decoder_from_z, decoder_from_words], shared_input=False), lambda x: T.concatenate(x, axis=2), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, 200 + n_classes + 2, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model