예제 #1
0
 def __collect_child_model(self):
     self.child_models = []
     for i, n_unit in enumerate(self.n_units):
         if i == 0: continue
         self.child_models.append(
             ChildChainList(
                 L.Maxout(self.n_units[i - 1], n_unit, self.pool_size)))
예제 #2
0
파일: test_maxout.py 프로젝트: ejlb/chainer
    def setUp(self):
        # x, W, and b are set so that the result of forward
        # propagation gets stable, meaning that their small pertubations
        # do not change :math:`argmax_{j} W_{ij\cdot} x + b_{ij}`.

        x_shape = (self.batchsize, ) + self.in_shape
        self.x = numpy.random.uniform(-0.05, 0.05, x_shape).astype(
            numpy.float32) + 1
        self.gy = numpy.random.uniform(
            -0.05, 0.05, (self.batchsize, self.out_size)).astype(numpy.float32)

        in_size = numpy.prod(self.in_shape)
        initialW = numpy.random.uniform(
            -0.05, 0.05,
            (self.out_size, self.pool_size, in_size)).astype(numpy.float32)
        for o in six.moves.range(self.out_size):
            w = numpy.arange(in_size, dtype=numpy.float32) + 1
            for c in six.moves.range(self.pool_size):
                initialW[o, c, :] += w * c

        if self.initial_bias == 'random':
            initial_bias = numpy.random.uniform(
                -0.05, 0.05, (self.out_size, self.pool_size))
        elif self.initial_bias == 'scalar':
            initial_bias = numpy.full((self.out_size, self.pool_size),
                                      5,
                                      dtype=numpy.float32)
        elif self.initial_bias is None:
            initial_bias = None

        self.link = links.Maxout(in_size, self.out_size, self.pool_size,
                                 self.wscale, initialW, initial_bias)

        self.y = _maxout(self.x, initialW, initial_bias)
        self.link.zerograds()
예제 #3
0
 def setUp(self):
     self.initialW = numpy.random.uniform(
         -1, 1, (2, 3, 4)).astype(numpy.float32)
     self.initial_bias = numpy.random.uniform(
         -1, 1, (3, 4)).astype(numpy.float32)
     self.link = links.Maxout(
         2, 3, 4, initialW=self.initialW,
         initial_bias=self.initial_bias)
예제 #4
0
    def __init__(self,
                 Vo,
                 Eo,
                 Ho,
                 Ha,
                 Hi,
                 Hl,
                 attn_cls=AttentionModule,
                 init_orth=False,
                 cell_type=rnn_cells.LSTMCell,
                 use_goto_attention=False):
        #         assert cell_type in "gru dgru lstm slow_gru".split()
        #         self.cell_type = cell_type
        #         if cell_type == "gru":
        #             gru = faster_gru.GRU(Ho, Eo + Hi)
        #         elif cell_type == "dgru":
        #             gru = DoubleGRU(Ho, Eo + Hi)
        #         elif cell_type == "lstm":
        #             gru = L.StatelessLSTM(Eo + Hi, Ho) #, forget_bias_init = 3)
        #         elif cell_type == "slow_gru":
        #             gru = L.GRU(Ho, Eo + Hi)

        if isinstance(cell_type, (str, unicode)):
            cell_type = rnn_cells.create_cell_model_from_string(cell_type)

        gru = cell_type(Eo + Hi, Ho)

        log.info("constructing decoder [%r]" % (cell_type, ))
        if use_goto_attention:
            log.info("using 'Goto' attention")
        super(Decoder, self).__init__(
            emb=L.EmbedID(Vo, Eo),
            #             gru = L.GRU(Ho, Eo + Hi),
            gru=gru,
            maxo=L.Maxout(Eo + Hi + Ho, Hl, 2),
            lin_o=L.Linear(Hl, Vo, nobias=False),
            attn_module=attn_cls(
                Hi,
                Ha,
                Ho,
                init_orth=init_orth,
                prev_word_embedding_size=Eo if use_goto_attention else None))
        #         self.add_param("initial_state", (1, Ho))
        self.add_param("bos_embeding", (1, Eo))

        self.use_goto_attention = use_goto_attention
        self.Hi = Hi
        self.Ho = Ho
        self.Eo = Eo
        #         self.initial_state.data[...] = np.random.randn(Ho)
        self.bos_embeding.data[...] = np.random.randn(Eo)

        if init_orth:
            ortho_init(self.gru)
            ortho_init(self.lin_o)
            ortho_init(self.maxo)
예제 #5
0
 def test_invalid_initial_bias_ndarray(self):
     invalid_dim = 1
     initial_bias = self.initial_bias = numpy.random.uniform(
         -1, 1,
         (self.out_size, self.pool_size, invalid_dim)).astype(numpy.float32)
     with self.assertRaises(ValueError):
         links.Maxout(self.in_size,
                      self.out_size,
                      self.pool_size,
                      initial_bias=initial_bias)
예제 #6
0
 def setUp(self):
     self.in_size = 2
     self.out_size = 3
     self.pool_size = 4
     self.initialW = numpy.random.uniform(
         -1, 1, (self.out_size, self.pool_size, self.in_size)
     ).astype(numpy.float32)
     self.initial_bias = numpy.random.uniform(
         -1, 1, (self.out_size, self.pool_size)
     ).astype(numpy.float32)
     self.link = links.Maxout(
         self.in_size, self.out_size, self.pool_size,
         initialW=self.initialW, initial_bias=self.initial_bias)
예제 #7
0
 def __init__(self,
              vocabulary_size: int,
              word_embeddings_size: int,
              hidden_layer_size: int,
              attention_hidden_layer_size: int,
              encoder_output_size: int,
              maxout_layer_size: int,
              maxout_pool_size: int = 2,
              ignore_label: int = -1,
              dynamic_attention: bool = False):
     super(Decoder, self).__init__()
     with self.init_scope():
         self.embed_id = L.EmbedID(vocabulary_size,
                                   word_embeddings_size,
                                   ignore_label=ignore_label)
         self.rnn = L.StatelessLSTM(
             word_embeddings_size + encoder_output_size,
             hidden_layer_size
         )
         self.maxout = L.Maxout(word_embeddings_size +
                                encoder_output_size +
                                hidden_layer_size,
                                maxout_layer_size,
                                maxout_pool_size)
         self.linear = L.Linear(maxout_layer_size, vocabulary_size)
         if dynamic_attention:
             self.attention = DynamicAttentionModule(
                 encoder_output_size,
                 attention_hidden_layer_size,
                 hidden_layer_size,
                 word_embeddings_size
             )
         else:
             self.attention = AttentionModule(
                 encoder_output_size,
                 attention_hidden_layer_size,
                 hidden_layer_size,
                 word_embeddings_size
             )
         self.bos_state = Parameter(
             initializer=self.xp.random.randn(
                 1,
                 hidden_layer_size
             ).astype('f')
         )
     self.vocabulary_size = vocabulary_size
     self.word_embeddings_size = word_embeddings_size
     self.hidden_layer_size = hidden_layer_size
     self.encoder_output_size = encoder_output_size
예제 #8
0
    def test_forward(self):
        x = np.random.uniform(0, 1,
                              [self.batch_size, self.d_in]).astype(np.float32)
        c = L.Maxout(self.d_in,
                     self.d_hid,
                     self.pool_size,
                     initial_bias=np.random.uniform(
                         0, 1, self.pool_size * self.d_hid))
        t = nn.Maxout.from_chainer(c)

        tx = Variable(torch.from_numpy(x))

        co = c(x)
        to = t(tx)
        self.assertTrue(np.allclose(co.data, to.data.numpy()),
                        'co:\n{}\nto:\n{}'.format(co.data, to.data.numpy()))
예제 #9
0
 def __init__(self,
              n_vocab,
              n_units,
              n_attention_units,
              n_encoder_output_units,
              n_maxout_units,
              n_maxout_pools=2):
     super(Decoder, self).__init__()
     with self.init_scope():
         self.embed_y = L.EmbedID(n_vocab, n_units, ignore_label=-1)
         self.lstm = L.StatelessLSTM(n_units + n_encoder_output_units,
                                     n_units)
         self.maxout = L.Maxout(n_units + n_encoder_output_units + n_units,
                                n_maxout_units, n_maxout_pools)
         self.w = L.Linear(n_maxout_units, n_vocab)
         self.attention = AttentionModule(n_encoder_output_units,
                                          n_attention_units, n_units)
     self.n_units = n_units
예제 #10
0
    def __init__(self):
        initializer = chainer.initializers.HeNormal()
        super(GoogLeNet, self).__init__(
            #入力チャネル,出力チャネル, フィルタサイズpx
            #209*209が出力チャネル枚
            #Network in Network <http://arxiv.org/abs/1312.4400v3>
            #60.9%モデル--------------------------------------
            #conv1=L.Convolution2D(3, 8, 7),
            #conv2=L.Convolution2D(8, 16, 5),
            #conv3=L.Convolution2D(16, 32, 3),
            #conv4=L.Convolution2D(32, 48, 3),
            conv1=L.Convolution2D(3, 8, 7, wscale=w),
            conv2=L.Convolution2D(8, 16, 5, wscale=w),
            conv3=L.Convolution2D(16, 32, 3, wscale=w),
            conv4=L.Convolution2D(32, 48, 3, wscale=w),
            mo=L.Maxout(4800, 32, 6, wscale=w),

            #-----------------------------------------vasily
            #conv1 = F.Convolution2D(  3,  64, 4, stride = 2, pad = 1, initialW=initializer),
            #conv2 = F.Convolution2D( 64, 128, 4, stride = 2, pad = 1, initialW=initializer),
            #conv3 = F.Convolution2D(128, 256, 4, stride = 2, pad = 1, initialW=initializer),
            #conv4 = F.Convolution2D(256, 512, 4, stride = 2, pad = 1, initialW=initializer),
            #fl	= L.Linear(100352, 2, initialW=initializer),
            #bn1   = F.BatchNormalization(64),
            #bn2   = F.BatchNormalization(128),
            #bn3   = F.BatchNormalization(256),
            #bn4   = F.BatchNormalization(512))

            ###l3=F.Linear(500,7),
            #bn5=L.BatchNormalization(500),
            #l1=L.Linear(256, 512),
            #l2=L.Linear(512, 7)
            #'''
            #conv1=L.Convolution2D(3, 16, 3),
            #bn4=L.BatchNormalization(16),
            #conv2=L.Convolution2D(16, 32, 3),
            #bn5=L.BatchNormalization(32),
            #conv3=L.Convolution2D(32, 64, 3),
            #bn6=L.BatchNormalization(64),
            #l1=L.Linear(43264, 1000),
            #l2=L.Linear(1000, 7),
            #l3=L.Linear(1000, 7)
        )
        self.train = True
예제 #11
0
    def setUp(self):
        self.in_size = 2
        self.out_size = 3
        self.pool_size = 4

        if self.initializer == 'Initializer':
            self.initialW = constant.Constant(1.0)
            self.initial_bias = constant.Constant(2.0)
        elif self.initializer == 'scalar':
            self.initialW = 1.0
            self.initial_bias = 2.0
        elif self.initializer == 'ndarray':
            self.initialW = numpy.random.uniform(
                -1, 1, (self.out_size, self.pool_size, self.in_size)).astype(
                    self.dtype)
            self.initial_bias = numpy.random.uniform(
                -1, 1, (self.out_size, self.pool_size)).astype(self.dtype)
        elif self.initializer == 'callable':

            def callable_initialW(array):
                assert array.dtype == self.dtype
                assert array.shape == (self.out_size, self.pool_size,
                                       self.in_size)
                array.fill(1.0)

            self.initialW = callable_initialW

            def callable_initial_bias(array):
                assert array.dtype == self.dtype
                assert array.shape == (self.out_size, self.pool_size)
                array.fill(2.0)

            self.initial_bias = callable_initial_bias
        else:
            raise ValueError('invalid parameter')

        with chainer.using_config('dtype', self.dtype):
            self.link = links.Maxout(self.in_size,
                                     self.out_size,
                                     self.pool_size,
                                     initialW=self.initialW,
                                     initial_bias=self.initial_bias)
예제 #12
0
    def __init__(self,
                 Vo,
                 Eo,
                 Ho,
                 Ha,
                 Hi,
                 Hl,
                 attn_cls=AttentionModule,
                 init_orth=False,
                 cell_type=rnn_cells.LSTMCell,
                 is_multitarget=False):
        if (type(cell_type) == types.FunctionType):
            gru = cell_type(Eo + Hi, Ho)
        else:
            gru = rnn_cells.create_cell_model_from_string(cell_type)(Eo + Hi,
                                                                     Ho)

        #gru = cell_type(Eo + Hi, Ho)

        log.info("constructing decoder [%r]" % (cell_type, ))

        super(Decoder, self).__init__(
            emb=L.EmbedID(Vo, Eo),
            #             gru = L.GRU(Ho, Eo + Hi),
            gru=gru,
            maxo=L.Maxout(Eo + Hi + Ho, Hl, 2),
            lin_o=L.Linear(Hl, Vo, nobias=False),
            attn_module=attn_cls(Hi, Ha, Ho, init_orth=init_orth))
        #         self.add_param("initial_state", (1, Ho))
        self.add_param("bos_embeding", (1, Eo))

        self.Hi = Hi
        self.Ho = Ho
        self.Eo = Eo
        self.is_multitarget = is_multitarget
        #         self.initial_state.data[...] = np.random.randn(Ho)
        self.bos_embeding.data[...] = np.random.randn(Eo)

        if init_orth:
            ortho_init(self.gru)
            ortho_init(self.lin_o)
            ortho_init(self.maxo)
예제 #13
0
 def __init__(self,
              n_vocab,
              n_units,
              n_attention_units,
              n_encoder_output_units,
              n_maxout_units,
              n_maxout_pools=2):
     super(Decoder, self).__init__()
     with self.init_scope():
         self.embed_y = L.EmbedID(n_vocab, n_units, ignore_label=-1)
         self.lstm = L.StatelessLSTM(n_units + n_encoder_output_units,
                                     n_units)
         self.maxout = L.Maxout(n_units + n_encoder_output_units + n_units,
                                n_maxout_units, n_maxout_pools)
         self.w = L.Linear(n_maxout_units, n_vocab)
         self.attention = AttentionModule(n_encoder_output_units,
                                          n_attention_units, n_units)
         self.bos_state = Parameter(
             initializer=self.xp.random.randn(1, n_units).astype('f'))
     self.n_units = n_units
예제 #14
0
    def setUp(self):
        # x, W, and b are set so that the result of forward
        # propagation gets stable, meaning that their small pertubations
        # do not change :math:`argmax_{j} W_{\cdot ij} x + b_{ij}`.

        x_shape = (self.batchsize, ) + self.in_shape
        self.x = numpy.random.uniform(
            -0.05, 0.05, x_shape).astype(numpy.float32) + 1
        self.gy = numpy.random.uniform(
            -0.05, 0.05, (self.batchsize, self.out_size)
        ).astype(numpy.float32)

        in_size = numpy.prod(self.in_shape)
        initialW = numpy.random.uniform(
            -0.05, 0.05, (in_size, self.num_channel, self.out_size)
        ).astype(numpy.float32)
        for c in six.moves.range(self.num_channel):
            w = numpy.arange(in_size, dtype=numpy.float32) + 1
            for o in six.moves.range(self.out_size):
                initialW[:, c, o] += w * o

        initial_bias = None
        if self.initial_bias == 'random':
            initial_bias = numpy.random.uniform(
                -0.05, 0.05, (self.num_channel, self.out_size))

        self.link = links.Maxout(in_size, self.num_channel, self.out_size,
                                 self.wscale, initialW, initial_bias)

        W = self.link.W.data.copy()
        b = None
        if self.link.b is not None:
            b = self.link.b.data.copy()

        self.y = _maxout(self.x, W, b)
        self.link.zerograds()
예제 #15
0
파일: test_maxout.py 프로젝트: ejlb/chainer
 def setUp(self):
     self.link = links.Maxout(2, 3, 4)
     self.x = numpy.random.uniform(-1, 1, (10, 7)).astype(numpy.float32)
예제 #16
0
 def add_last_layer(self):
     self.add_link(
         L.Maxout(self.n_units[-1], self.last_unit, self.pool_size))
예제 #17
0
 def test_scalar_initial_bias(self):
     with chainer.using_config('dtype', self.dtype):
         link = links.Maxout(2, 3, 4, initial_bias=0)
     assert link.linear.b.dtype == self.dtype