Esempio n. 1
0
 def test_linear(self):
     for in_dim, out_dim, n_batch in product(range(1, 10), range(1, 10),
                                             range(1, 10)):
         linear = Linear(in_dim, out_dim)
         x = np.random.rand(n_batch, in_dim)
         y = linear.forward(x)
         self.assertEqual(y.shape, (in_dim, out_dim))
Esempio n. 2
0
def get_model(model_config):
    input_dim = model_config['input_dim']
    fc_layer_dims = model_config['fc_layer_dims']
    activation_f = get_activation_f(model_config['activation'])
    dropout_p = model_config['dropout_p']
    batchnorm = model_config['batchnorm']

    layers = list()
    for layer_dim in fc_layer_dims[:-1]:
        layers.append(Linear(input_dim, layer_dim))
        if batchnorm:
            layers.append(BatchNorm(layer_dim, gamma=batchnorm))
        if dropout_p:
            layers.append(DropOut(dropout_p))
        layers.append(activation_f())
        input_dim = layer_dim
    layers.append(Linear(input_dim, fc_layer_dims[-1]))
    layers.append(SoftMax())
    return Model(layers)
Esempio n. 3
0
    def __init__(self, input_size, layer_size, batch_size=1, name="", p=0.0, weight_init=Uniform(),
                 inner_activation=Sigmoid(), activation=Tanh(), persistent=False):
        self.activation = activation
        self.inner_activation = inner_activation
        self.layer_size = layer_size
        self.batch_size = batch_size
        self.persistent = persistent
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name + "_h_init")

        self.rz = Sequential([
            Linear(input_size+layer_size, layer_size * 2, weight_init=weight_init, name=name+"_r"),
            LayerNormalization(layer_size * 2, name=name+"_ln_r"),
            inner_activation
        ])
        self.g = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_g"),
            LayerNormalization(layer_size, name=name+"_ln_g"),
            activation,
            Dropout(p)
        ])

        self.params = self.rz.params + self.g.params
Esempio n. 4
0
 def __init__(self,
              input_size,
              image_feature_size,
              layer_size,
              p=0.0,
              name="",
              steps=16):
     self.steps = steps
     self.lstm = LNLSTM(input_size, layer_size, name=name, p=p)
     self.init = Sequential([
         Linear(image_feature_size, 2 * layer_size, name=name + "_init"),
         Tanh()
     ])
     self.params = self.init.params + self.lstm.params
Esempio n. 5
0
    def __init__(self, input_size, layer_size, batch_size, p=0.0,
                 name="", activation=T.tanh, weight_init=Uniform(), persistent=False):
        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init")

        self.preact = Sequential([
            Linear(input_size+layer_size, layer_size, weight_init=weight_init, name=name+"_fc"),
            LayerNormalization(layer_size, name=name+"_ln"),
            activation,
            Dropout(p)
        ])
        self.params = self.preact.params

        self.activation = activation
        self.batch_size = batch_size
        self.layer_size = layer_size
        self.input_size = input_size
        self.persistent = persistent
Esempio n. 6
0
    def __init__(self, input_size, layer_size, batch_size=1, p=0.0,
                 name="", activation=T.tanh, inner_activation=T.nnet.sigmoid, weight_init=Uniform(), persistent=False):

        self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init")
        self.c = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_c_init")

        self.params = []
        self.preact = Sequential([
            Linear(input_size+layer_size, layer_size * 4, weight_init=weight_init, name=name+"_ifog"),
            LayerNormalization(layer_size * 4, name=name + "_ln")
        ])
        self.params = self.preact.params

        self.dropout = Dropout(p)

        self.updates = []
        self.activation = activation
        self.inner_activation = inner_activation
        self.batch_size = batch_size
        self.layer_size = layer_size
        self.persistent = persistent
Esempio n. 7
0
def make_model(n_classes, charcnn_size, charcnn_layers):
    layers = [
        OneHot(n_classes + 1),
        LayoutRNNToCNN(),
        Convolution1d(1, charcnn_size * 2, n_classes + 1,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMModel(layers)

    return model
Esempio n. 8
0
def make_model(z, net, sample_size, p, n_classes):
    if net == "conv":
        assert sample_size % 4 == 0
        layers = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Flatten(),
            Linear(sample_size / 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
            Linear(z, sample_size / 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size / 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2"),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1"),
            ReLU(),
            LayoutCNNToRNN(),
            Linear(200, n_classes, name="classifier"),
            SoftMax()
        ]
    elif net == "rnn":
        start_word = n_classes
        dummy_word = n_classes + 1
        layers = [
            Parallel(
                [[
                    OneHot(n_classes),
                    LNLSTM(n_classes, 500, name="enc"),
                    lambda x: x[-1],
                    Linear(500, z * 2, name="encoder_fc"),
                    Sampler(z),
                ],
                 [
                     Dropword(p, dummy_word=dummy_word),
                     lambda x: T.concatenate([
                         T.ones((1, x.shape[1]), dtype='int32') * start_word, x
                     ],
                                             axis=0),
                     lambda x: x[:-1],
                     OneHot(n_classes + 2),
                 ]]),
            ConditionalDecoderLNLSTM(n_classes + 2,
                                     z,
                                     500,
                                     name="dec",
                                     steps=sample_size),
            Linear(500, n_classes, name="classifier"),
            SoftMax()
        ]
    else:
        raise Exception("unknown net %s" % net)

    model = LMReconstructionModel(layers, aux_loss=False)

    return model
Esempio n. 9
0
def make_model(z, sample_size, dropword_p, n_classes, lstm_size, alpha):
    encoder = [
        OneHot(n_classes),
        LayoutRNNToCNN(),
        Convolution1d(3, 128, n_classes, pad=1, stride=2, causal=False, name="conv1"),
        BatchNormalization(128, name="bn1"),
        ReLU(),
        Convolution1d(3, 256, 128, pad=1, stride=2, causal=False, name="conv2"),
        BatchNormalization(256, name="bn2"),
        ReLU(),
        Convolution1d(3, 512, 256, pad=1, stride=2, causal=False, name="conv3"),
        BatchNormalization(512, name="bn3"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv4"),
        BatchNormalization(512, name="bn4"),
        ReLU(),
        Convolution1d(3, 512, 512, pad=1, stride=2, causal=False, name="conv5"),
        BatchNormalization(512, name="bn5"),
        ReLU(),
        Flatten(),
        Linear(sample_size // (2**5) * 512, z * 2, name="fc_encode"),
        Sampler(z),
    ]
    decoder_from_z = [
        Linear(z, sample_size // (2**5) * 512, name="fc_decode"),
        ReLU(),
        Reshape((-1, 512, sample_size // (2**5), 1)),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv5"),
        BatchNormalization(512, name="deconv_bn5"),
        ReLU(),
        Deconvolution1D(512, 512, 3, pad=1, stride=2, name="deconv4"),
        BatchNormalization(512, name="deconv_bn4"),
        ReLU(),
        Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
        BatchNormalization(256, name="deconv_bn3"),
        ReLU(),
        Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
        BatchNormalization(128, name="deconv_bn2"),
        ReLU(),
        Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
        BatchNormalization(200, name="deconv_bn1"),
        ReLU(),
        LayoutCNNToRNN(),
        Parallel([
            [
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ],
            []
        ], shared_input=True),
        lambda x: x[1]
    ]

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate([T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes+2),
    ]
    layers = [
        Parallel([
            encoder,
            []
        ], shared_input=True),
        Parallel([
            decoder_from_z,
            decoder_from_words
        ], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LNLSTM(200+n_classes+2, lstm_size, name="declstm"),
        Linear(lstm_size, n_classes, name="classifier"),
        SoftMax()
    ]

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model
Esempio n. 10
0
def make_model(z, sample_size, dropword_p, n_classes, encdec_layers,
               charcnn_size, charcnn_layers, alpha):
    assert sample_size % (2**encdec_layers) == 0
    if encdec_layers == 2:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1", collect=False),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2", collect=False),
            ReLU(),
            Flatten(),
            Linear(sample_size // 4 * 256, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 4 * 256, name="fc_decode"),
            ReLU(),
            Reshape((-1, 256, sample_size // 4, 1)),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    elif encdec_layers == 3:
        encoder = [
            OneHot(n_classes),
            LayoutRNNToCNN(),
            Convolution1d(3,
                          128,
                          n_classes,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv1"),
            BatchNormalization(128, name="bn1"),
            ReLU(),
            Convolution1d(3,
                          256,
                          128,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv2"),
            BatchNormalization(256, name="bn2"),
            ReLU(),
            Convolution1d(3,
                          512,
                          256,
                          pad=1,
                          stride=2,
                          causal=False,
                          name="conv3"),
            BatchNormalization(512, name="bn3"),
            ReLU(),
            Flatten(),
            Linear(sample_size // 8 * 512, z * 2, name="fc_encode"),
            Sampler(z),
        ]
        decoder_from_z = [
            Linear(z, sample_size // 8 * 512, name="fc_decode"),
            ReLU(),
            Reshape((-1, 512, sample_size // 8, 1)),
            Deconvolution1D(512, 256, 3, pad=1, stride=2, name="deconv3"),
            BatchNormalization(256, name="deconv_bn3", collect=False),
            ReLU(),
            Deconvolution1D(256, 128, 3, pad=1, stride=2, name="deconv2"),
            BatchNormalization(128, name="deconv_bn2", collect=False),
            ReLU(),
            Deconvolution1D(128, 200, 3, pad=1, stride=2, name="deconv1"),
            BatchNormalization(200, name="deconv_bn1", collect=False),
            ReLU(),
            LayoutCNNToRNN(),
            Parallel([[
                Linear(200, n_classes, name="aux_classifier"),
                SoftMax(),
                Store()
            ], []],
                     shared_input=True), lambda x: x[1]
        ]
    else:
        raise Exception("unsupported number of encdec layers %d" %
                        encdec_layers)

    start_word = n_classes
    dummy_word = n_classes + 1
    decoder_from_words = [
        Dropword(dropword_p, dummy_word=dummy_word),
        lambda x: T.concatenate(
            [T.ones((1, x.shape[1]), dtype='int32') * start_word, x], axis=0),
        lambda x: x[:-1],
        OneHot(n_classes + 2),
    ]
    layers = [
        Parallel([encoder, []], shared_input=True),
        Parallel([decoder_from_z, decoder_from_words], shared_input=False),
        lambda x: T.concatenate(x, axis=2),
        LayoutRNNToCNN(),
        Convolution1d(1,
                      charcnn_size * 2,
                      200 + n_classes + 2,
                      name="decconvresize"),
        BatchNormalization(charcnn_size * 2, name="decbnresize"),
        Gated(),
    ]
    for i in range(charcnn_layers):
        layers.append(
            HighwayConvolution1d(3,
                                 charcnn_size,
                                 dilation=1,
                                 name="decconv%d" % i))
    layers.extend([
        LayoutCNNToRNN(),
        Linear(charcnn_size, n_classes, name="classifier"),
        SoftMax()
    ])

    model = LMReconstructionModel(layers, aux_loss=True, alpha=alpha)

    return model
Esempio n. 11
0
 def __init__(self):
     super(NNModel, self).__init__()
     self.layers = [Linear(784, 100), Linear(100, 100), Linear(100, 10)]