def __collect_child_model(self): self.child_models = [] for i, n_unit in enumerate(self.n_units): if i == 0: continue self.child_models.append( ChildChainList( L.Maxout(self.n_units[i - 1], n_unit, self.pool_size)))
def setUp(self): # x, W, and b are set so that the result of forward # propagation gets stable, meaning that their small pertubations # do not change :math:`argmax_{j} W_{ij\cdot} x + b_{ij}`. x_shape = (self.batchsize, ) + self.in_shape self.x = numpy.random.uniform(-0.05, 0.05, x_shape).astype( numpy.float32) + 1 self.gy = numpy.random.uniform( -0.05, 0.05, (self.batchsize, self.out_size)).astype(numpy.float32) in_size = numpy.prod(self.in_shape) initialW = numpy.random.uniform( -0.05, 0.05, (self.out_size, self.pool_size, in_size)).astype(numpy.float32) for o in six.moves.range(self.out_size): w = numpy.arange(in_size, dtype=numpy.float32) + 1 for c in six.moves.range(self.pool_size): initialW[o, c, :] += w * c if self.initial_bias == 'random': initial_bias = numpy.random.uniform( -0.05, 0.05, (self.out_size, self.pool_size)) elif self.initial_bias == 'scalar': initial_bias = numpy.full((self.out_size, self.pool_size), 5, dtype=numpy.float32) elif self.initial_bias is None: initial_bias = None self.link = links.Maxout(in_size, self.out_size, self.pool_size, self.wscale, initialW, initial_bias) self.y = _maxout(self.x, initialW, initial_bias) self.link.zerograds()
def setUp(self): self.initialW = numpy.random.uniform( -1, 1, (2, 3, 4)).astype(numpy.float32) self.initial_bias = numpy.random.uniform( -1, 1, (3, 4)).astype(numpy.float32) self.link = links.Maxout( 2, 3, 4, initialW=self.initialW, initial_bias=self.initial_bias)
def __init__(self, Vo, Eo, Ho, Ha, Hi, Hl, attn_cls=AttentionModule, init_orth=False, cell_type=rnn_cells.LSTMCell, use_goto_attention=False): # assert cell_type in "gru dgru lstm slow_gru".split() # self.cell_type = cell_type # if cell_type == "gru": # gru = faster_gru.GRU(Ho, Eo + Hi) # elif cell_type == "dgru": # gru = DoubleGRU(Ho, Eo + Hi) # elif cell_type == "lstm": # gru = L.StatelessLSTM(Eo + Hi, Ho) #, forget_bias_init = 3) # elif cell_type == "slow_gru": # gru = L.GRU(Ho, Eo + Hi) if isinstance(cell_type, (str, unicode)): cell_type = rnn_cells.create_cell_model_from_string(cell_type) gru = cell_type(Eo + Hi, Ho) log.info("constructing decoder [%r]" % (cell_type, )) if use_goto_attention: log.info("using 'Goto' attention") super(Decoder, self).__init__( emb=L.EmbedID(Vo, Eo), # gru = L.GRU(Ho, Eo + Hi), gru=gru, maxo=L.Maxout(Eo + Hi + Ho, Hl, 2), lin_o=L.Linear(Hl, Vo, nobias=False), attn_module=attn_cls( Hi, Ha, Ho, init_orth=init_orth, prev_word_embedding_size=Eo if use_goto_attention else None)) # self.add_param("initial_state", (1, Ho)) self.add_param("bos_embeding", (1, Eo)) self.use_goto_attention = use_goto_attention self.Hi = Hi self.Ho = Ho self.Eo = Eo # self.initial_state.data[...] = np.random.randn(Ho) self.bos_embeding.data[...] = np.random.randn(Eo) if init_orth: ortho_init(self.gru) ortho_init(self.lin_o) ortho_init(self.maxo)
def test_invalid_initial_bias_ndarray(self): invalid_dim = 1 initial_bias = self.initial_bias = numpy.random.uniform( -1, 1, (self.out_size, self.pool_size, invalid_dim)).astype(numpy.float32) with self.assertRaises(ValueError): links.Maxout(self.in_size, self.out_size, self.pool_size, initial_bias=initial_bias)
def setUp(self): self.in_size = 2 self.out_size = 3 self.pool_size = 4 self.initialW = numpy.random.uniform( -1, 1, (self.out_size, self.pool_size, self.in_size) ).astype(numpy.float32) self.initial_bias = numpy.random.uniform( -1, 1, (self.out_size, self.pool_size) ).astype(numpy.float32) self.link = links.Maxout( self.in_size, self.out_size, self.pool_size, initialW=self.initialW, initial_bias=self.initial_bias)
def __init__(self, vocabulary_size: int, word_embeddings_size: int, hidden_layer_size: int, attention_hidden_layer_size: int, encoder_output_size: int, maxout_layer_size: int, maxout_pool_size: int = 2, ignore_label: int = -1, dynamic_attention: bool = False): super(Decoder, self).__init__() with self.init_scope(): self.embed_id = L.EmbedID(vocabulary_size, word_embeddings_size, ignore_label=ignore_label) self.rnn = L.StatelessLSTM( word_embeddings_size + encoder_output_size, hidden_layer_size ) self.maxout = L.Maxout(word_embeddings_size + encoder_output_size + hidden_layer_size, maxout_layer_size, maxout_pool_size) self.linear = L.Linear(maxout_layer_size, vocabulary_size) if dynamic_attention: self.attention = DynamicAttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) else: self.attention = AttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) self.bos_state = Parameter( initializer=self.xp.random.randn( 1, hidden_layer_size ).astype('f') ) self.vocabulary_size = vocabulary_size self.word_embeddings_size = word_embeddings_size self.hidden_layer_size = hidden_layer_size self.encoder_output_size = encoder_output_size
def test_forward(self): x = np.random.uniform(0, 1, [self.batch_size, self.d_in]).astype(np.float32) c = L.Maxout(self.d_in, self.d_hid, self.pool_size, initial_bias=np.random.uniform( 0, 1, self.pool_size * self.d_hid)) t = nn.Maxout.from_chainer(c) tx = Variable(torch.from_numpy(x)) co = c(x) to = t(tx) self.assertTrue(np.allclose(co.data, to.data.numpy()), 'co:\n{}\nto:\n{}'.format(co.data, to.data.numpy()))
def __init__(self, n_vocab, n_units, n_attention_units, n_encoder_output_units, n_maxout_units, n_maxout_pools=2): super(Decoder, self).__init__() with self.init_scope(): self.embed_y = L.EmbedID(n_vocab, n_units, ignore_label=-1) self.lstm = L.StatelessLSTM(n_units + n_encoder_output_units, n_units) self.maxout = L.Maxout(n_units + n_encoder_output_units + n_units, n_maxout_units, n_maxout_pools) self.w = L.Linear(n_maxout_units, n_vocab) self.attention = AttentionModule(n_encoder_output_units, n_attention_units, n_units) self.n_units = n_units
def __init__(self): initializer = chainer.initializers.HeNormal() super(GoogLeNet, self).__init__( #入力チャネル,出力チャネル, フィルタサイズpx #209*209が出力チャネル枚 #Network in Network <http://arxiv.org/abs/1312.4400v3> #60.9%モデル-------------------------------------- #conv1=L.Convolution2D(3, 8, 7), #conv2=L.Convolution2D(8, 16, 5), #conv3=L.Convolution2D(16, 32, 3), #conv4=L.Convolution2D(32, 48, 3), conv1=L.Convolution2D(3, 8, 7, wscale=w), conv2=L.Convolution2D(8, 16, 5, wscale=w), conv3=L.Convolution2D(16, 32, 3, wscale=w), conv4=L.Convolution2D(32, 48, 3, wscale=w), mo=L.Maxout(4800, 32, 6, wscale=w), #-----------------------------------------vasily #conv1 = F.Convolution2D( 3, 64, 4, stride = 2, pad = 1, initialW=initializer), #conv2 = F.Convolution2D( 64, 128, 4, stride = 2, pad = 1, initialW=initializer), #conv3 = F.Convolution2D(128, 256, 4, stride = 2, pad = 1, initialW=initializer), #conv4 = F.Convolution2D(256, 512, 4, stride = 2, pad = 1, initialW=initializer), #fl = L.Linear(100352, 2, initialW=initializer), #bn1 = F.BatchNormalization(64), #bn2 = F.BatchNormalization(128), #bn3 = F.BatchNormalization(256), #bn4 = F.BatchNormalization(512)) ###l3=F.Linear(500,7), #bn5=L.BatchNormalization(500), #l1=L.Linear(256, 512), #l2=L.Linear(512, 7) #''' #conv1=L.Convolution2D(3, 16, 3), #bn4=L.BatchNormalization(16), #conv2=L.Convolution2D(16, 32, 3), #bn5=L.BatchNormalization(32), #conv3=L.Convolution2D(32, 64, 3), #bn6=L.BatchNormalization(64), #l1=L.Linear(43264, 1000), #l2=L.Linear(1000, 7), #l3=L.Linear(1000, 7) ) self.train = True
def setUp(self): self.in_size = 2 self.out_size = 3 self.pool_size = 4 if self.initializer == 'Initializer': self.initialW = constant.Constant(1.0) self.initial_bias = constant.Constant(2.0) elif self.initializer == 'scalar': self.initialW = 1.0 self.initial_bias = 2.0 elif self.initializer == 'ndarray': self.initialW = numpy.random.uniform( -1, 1, (self.out_size, self.pool_size, self.in_size)).astype( self.dtype) self.initial_bias = numpy.random.uniform( -1, 1, (self.out_size, self.pool_size)).astype(self.dtype) elif self.initializer == 'callable': def callable_initialW(array): assert array.dtype == self.dtype assert array.shape == (self.out_size, self.pool_size, self.in_size) array.fill(1.0) self.initialW = callable_initialW def callable_initial_bias(array): assert array.dtype == self.dtype assert array.shape == (self.out_size, self.pool_size) array.fill(2.0) self.initial_bias = callable_initial_bias else: raise ValueError('invalid parameter') with chainer.using_config('dtype', self.dtype): self.link = links.Maxout(self.in_size, self.out_size, self.pool_size, initialW=self.initialW, initial_bias=self.initial_bias)
def __init__(self, Vo, Eo, Ho, Ha, Hi, Hl, attn_cls=AttentionModule, init_orth=False, cell_type=rnn_cells.LSTMCell, is_multitarget=False): if (type(cell_type) == types.FunctionType): gru = cell_type(Eo + Hi, Ho) else: gru = rnn_cells.create_cell_model_from_string(cell_type)(Eo + Hi, Ho) #gru = cell_type(Eo + Hi, Ho) log.info("constructing decoder [%r]" % (cell_type, )) super(Decoder, self).__init__( emb=L.EmbedID(Vo, Eo), # gru = L.GRU(Ho, Eo + Hi), gru=gru, maxo=L.Maxout(Eo + Hi + Ho, Hl, 2), lin_o=L.Linear(Hl, Vo, nobias=False), attn_module=attn_cls(Hi, Ha, Ho, init_orth=init_orth)) # self.add_param("initial_state", (1, Ho)) self.add_param("bos_embeding", (1, Eo)) self.Hi = Hi self.Ho = Ho self.Eo = Eo self.is_multitarget = is_multitarget # self.initial_state.data[...] = np.random.randn(Ho) self.bos_embeding.data[...] = np.random.randn(Eo) if init_orth: ortho_init(self.gru) ortho_init(self.lin_o) ortho_init(self.maxo)
def __init__(self, n_vocab, n_units, n_attention_units, n_encoder_output_units, n_maxout_units, n_maxout_pools=2): super(Decoder, self).__init__() with self.init_scope(): self.embed_y = L.EmbedID(n_vocab, n_units, ignore_label=-1) self.lstm = L.StatelessLSTM(n_units + n_encoder_output_units, n_units) self.maxout = L.Maxout(n_units + n_encoder_output_units + n_units, n_maxout_units, n_maxout_pools) self.w = L.Linear(n_maxout_units, n_vocab) self.attention = AttentionModule(n_encoder_output_units, n_attention_units, n_units) self.bos_state = Parameter( initializer=self.xp.random.randn(1, n_units).astype('f')) self.n_units = n_units
def setUp(self): # x, W, and b are set so that the result of forward # propagation gets stable, meaning that their small pertubations # do not change :math:`argmax_{j} W_{\cdot ij} x + b_{ij}`. x_shape = (self.batchsize, ) + self.in_shape self.x = numpy.random.uniform( -0.05, 0.05, x_shape).astype(numpy.float32) + 1 self.gy = numpy.random.uniform( -0.05, 0.05, (self.batchsize, self.out_size) ).astype(numpy.float32) in_size = numpy.prod(self.in_shape) initialW = numpy.random.uniform( -0.05, 0.05, (in_size, self.num_channel, self.out_size) ).astype(numpy.float32) for c in six.moves.range(self.num_channel): w = numpy.arange(in_size, dtype=numpy.float32) + 1 for o in six.moves.range(self.out_size): initialW[:, c, o] += w * o initial_bias = None if self.initial_bias == 'random': initial_bias = numpy.random.uniform( -0.05, 0.05, (self.num_channel, self.out_size)) self.link = links.Maxout(in_size, self.num_channel, self.out_size, self.wscale, initialW, initial_bias) W = self.link.W.data.copy() b = None if self.link.b is not None: b = self.link.b.data.copy() self.y = _maxout(self.x, W, b) self.link.zerograds()
def setUp(self): self.link = links.Maxout(2, 3, 4) self.x = numpy.random.uniform(-1, 1, (10, 7)).astype(numpy.float32)
def add_last_layer(self): self.add_link( L.Maxout(self.n_units[-1], self.last_unit, self.pool_size))
def test_scalar_initial_bias(self): with chainer.using_config('dtype', self.dtype): link = links.Maxout(2, 3, 4, initial_bias=0) assert link.linear.b.dtype == self.dtype