def test_linear(self): for in_dim, out_dim, n_batch in product(range(1, 10), range(1, 10), range(1, 10)): linear = Linear(in_dim, out_dim) x = np.random.rand(n_batch, in_dim) y = linear.forward(x) self.assertEqual(y.shape, (in_dim, out_dim))
def get_model(model_config): input_dim = model_config['input_dim'] fc_layer_dims = model_config['fc_layer_dims'] activation_f = get_activation_f(model_config['activation']) dropout_p = model_config['dropout_p'] batchnorm = model_config['batchnorm'] layers = list() for layer_dim in fc_layer_dims[:-1]: layers.append(Linear(input_dim, layer_dim)) if batchnorm: layers.append(BatchNorm(layer_dim, gamma=batchnorm)) if dropout_p: layers.append(DropOut(dropout_p)) layers.append(activation_f()) input_dim = layer_dim layers.append(Linear(input_dim, fc_layer_dims[-1])) layers.append(SoftMax()) return Model(layers)
def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(), inner_activation=Sigmoid(), activation=Tanh(), persistent=False): self.activation = activation self.inner_activation = inner_activation self.layer_size = layer_size self.batch_size = batch_size self.persistent = persistent self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init") self.rz = Sequential([ Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"), LayerNormalization(layer_size * 2, name=name+"_ln_r"), inner_activation ]) self.g = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"), LayerNormalization(layer_size, name=name+"_ln_g"), activation, Dropout(p) ]) self.params = self.rz.params + self.g.params
def __init__(self, input_size, image_feature_size, layer_size, p=0.0, name="", steps=16): self.steps = steps self.lstm = LNLSTM(input_size, layer_size, name=name, p=p) self.init = Sequential([ Linear(image_feature_size, 2 * layer_size, name=name + "_init"), Tanh() ]) self.params = self.init.params + self.lstm.params
def __init__(self, input_size, layer_size, batch_size, p=0.0, name="", activation=T.tanh, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.preact = Sequential([ Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"), LayerNormalization(layer_size, name=name+"_ln"), activation, Dropout(p) ]) self.params = self.preact.params self.activation = activation self.batch_size = batch_size self.layer_size = layer_size self.input_size = input_size self.persistent = persistent
def __init__(self, input_size, layer_size, batch_size=1, p=0.0, name="", activation=T.tanh, inner_activation=T.nnet.sigmoid, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.c = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_c_init") self.params = [] self.preact = Sequential([ Linear(input_size+layer_size, layer_size * 4, weight_init=weight_init, name=name+"_ifog"), LayerNormalization(layer_size * 4, name=name + "_ln") ]) self.params = self.preact.params self.dropout = Dropout(p) self.updates = [] self.activation = activation self.inner_activation = inner_activation self.batch_size = batch_size self.layer_size = layer_size self.persistent = persistent
def make_model(n_classes, charcnn_size, charcnn_layers): layers = [ OneHot(n_classes + 1), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, n_classes + 1, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMModel(layers) return model
def make_model(z, net, sample_size, p, n_classes): if net == "conv": assert sample_size % 4 == 0 layers = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Flatten(), Linear(sample_size / 4 * 256, z * 2, name="fc_encode"), Sampler(z), Linear(z, sample_size / 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size / 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Linear(200, n_classes, name="classifier"), SoftMax() ] elif net == "rnn": start_word = n_classes dummy_word = n_classes + 1 layers = [ Parallel( [[ OneHot(n_classes), LNLSTM(n_classes, 500, name="enc"), lambda x: x[-1], Linear(500, z * 2, name="encoder_fc"), Sampler(z), ], [ Dropword(p, dummy_word=dummy_word), lambda x: T.concatenate([ T.ones((1, x.shape[1]), dtype='int32') * start_word, x ], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ]]), ConditionalDecoderLNLSTM(n_classes + 2, z, 500, name="dec", steps=sample_size), Linear(500, n_classes, name="classifier"), SoftMax() ] else: raise Exception("unknown net %s" % net) model = LMReconstructionModel(layers, aux_loss=False) return model
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha): encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"), BatchNormalization(512, name="bn4"), ReLU(), Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"), BatchNormalization(512, name="bn5"), ReLU(), Flatten(), Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // (2**5) * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // (2**5), 1)), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"), BatchNormalization(512, name="deconv_bn5"), ReLU(), Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"), BatchNormalization(512, name="deconv_bn4"), ReLU(), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3"), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2"), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1"), ReLU(), LayoutCNNToRNN(), Parallel([ [ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], [] ], shared_input=True), lambda x: x[1] ] start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes+2), ] layers = [ Parallel([ encoder, [] ], shared_input=True), Parallel([ decoder_from_z, decoder_from_words ], shared_input=False), lambda x: T.concatenate(x, axis=2), LNLSTM(200+n_classes+2, lstm_size, name="declstm"), Linear(lstm_size, n_classes, name="classifier"), SoftMax() ] model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers, charcnn_size, charcnn_layers, alpha): assert sample_size % (2**encdec_layers) == 0 if encdec_layers == 2: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1", collect=False), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2", collect=False), ReLU(), Flatten(), Linear(sample_size // 4 * 256, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 4 * 256, name="fc_decode"), ReLU(), Reshape((-1, 256, sample_size // 4, 1)), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] elif encdec_layers == 3: encoder = [ OneHot(n_classes), LayoutRNNToCNN(), Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"), BatchNormalization(128, name="bn1"), ReLU(), Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"), BatchNormalization(256, name="bn2"), ReLU(), Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"), BatchNormalization(512, name="bn3"), ReLU(), Flatten(), Linear(sample_size // 8 * 512, z * 2, name="fc_encode"), Sampler(z), ] decoder_from_z = [ Linear(z, sample_size // 8 * 512, name="fc_decode"), ReLU(), Reshape((-1, 512, sample_size // 8, 1)), Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"), BatchNormalization(256, name="deconv_bn3", collect=False), ReLU(), Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"), BatchNormalization(128, name="deconv_bn2", collect=False), ReLU(), Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"), BatchNormalization(200, name="deconv_bn1", collect=False), ReLU(), LayoutCNNToRNN(), Parallel([[ Linear(200, n_classes, name="aux_classifier"), SoftMax(), Store() ], []], shared_input=True), lambda x: x[1] ] else: raise Exception("unsupported number of encdec layers %d" % encdec_layers) start_word = n_classes dummy_word = n_classes + 1 decoder_from_words = [ Dropword(dropword_p, dummy_word=dummy_word), lambda x: T.concatenate( [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0), lambda x: x[:-1], OneHot(n_classes + 2), ] layers = [ Parallel([encoder, []], shared_input=True), Parallel([decoder_from_z, decoder_from_words], shared_input=False), lambda x: T.concatenate(x, axis=2), LayoutRNNToCNN(), Convolution1d(1, charcnn_size * 2, 200 + n_classes + 2, name="decconvresize"), BatchNormalization(charcnn_size * 2, name="decbnresize"), Gated(), ] for i in range(charcnn_layers): layers.append( HighwayConvolution1d(3, charcnn_size, dilation=1, name="decconv%d" % i)) layers.extend([ LayoutCNNToRNN(), Linear(charcnn_size, n_classes, name="classifier"), SoftMax() ]) model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha) return model
def __init__(self): super(NNModel, self).__init__() self.layers = [Linear(784, 100), Linear(100, 100), Linear(100, 10)]