def __init__(self, vocab_size, embed_size, batch_size, max_len): super(EncoderCNN, self).__init__(conv_1=L.Convolution2D(1, 200, ksize=(1, vocab_size)), conv_2=L.Convolution2D(1, 200, ksize=(2, vocab_size)), conv_3=L.Convolution2D(1, 200, ksize=(3, vocab_size)), conv_4=L.Convolution2D(1, 200, ksize=(4, vocab_size)), conv_5=L.Convolution2D(1, 250, ksize=(5, vocab_size)), conv_6=L.Convolution2D(1, 300, ksize=(6, vocab_size)), conv_7=L.Convolution2D(1, 350, ksize=(7, vocab_size)), highway_1=L.Highway(1700, activate=F.tanh), highway_2=L.Highway(1700, activate=F.tanh), linear=L.Linear(1700, embed_size)) self.vocab_size = vocab_size self.batch_size = batch_size self.max_len = max_len self.identity = xp.identity(vocab_size)
def setUp(self): self.x = numpy.random.uniform(-1, 1, (5, self.in_out_size)).astype( numpy.float32) self.gy = numpy.random.uniform(-1, 1, (5, self.in_out_size)).astype( numpy.float32) self.link = links.Highway(self.in_out_size, activate=functions.tanh) Wh = self.link.plain.W.data Wh[...] = numpy.random.uniform(-1, 1, Wh.shape) bh = self.link.plain.b.data bh[...] = numpy.random.uniform(-1, 1, bh.shape) Wt = self.link.transform.W.data Wt[...] = numpy.random.uniform(-1, 1, Wt.shape) bt = self.link.transform.b.data bt[...] = numpy.random.uniform(-1, 1, bt.shape) self.link.cleargrads() self.Wh = Wh.copy() # fixed on CPU self.bh = bh.copy() # fixed on CPU self.Wt = Wt.copy() # fixed on CPU self.bt = bt.copy() # fixed on CPU a = numpy.tanh(self.x.dot(Wh.T) + bh) b = self.sigmoid(self.x.dot(Wt.T) + bt) self.y = (a * b + self.x * (numpy.ones_like(self.x) - b))
def __init__(self, vocab_size, embed_units=15, num_highway_layers=1, highway_dropout=0.0, ngrams=(1, 2, 3, 4, 5, 6), stride=1, num_filters=None): super(CNNWordEncoder, self).__init__() if num_filters is None: # http://www.people.fas.harvard.edu/~yoonkim/data/char-nlm.pdf # Table 2 small model uses constant size num_filters = [n * self.FILTER_MULTIPLIER for n in ngrams] assert(len(num_filters) == len(ngrams)) assert(num_highway_layers >= 0) out_size = sum(num_filters) with self.init_scope(): self.embed_layer = L.EmbedID(vocab_size, embed_units, ignore_label=self.IGNORE_LABEL) self.cnn_blocks = ['cnn_%d' % n for n in ngrams] self.min_width = max(ngrams) self.highways = ['highway_%d' % i for i in range(num_highway_layers)] # for n in ngrams: # setattr(self, self.cnn_blocks[n]) for i, name in enumerate(self.cnn_blocks): setattr(self, name, L.Convolution2D(1, num_filters[i], (ngrams[i], embed_units), stride)) for name in self.highways: # init_bt -2 used in Kim paper setattr(self, name, L.Highway(out_size, init_bt=-2)) self.vocab_size = vocab_size self.embed_units = embed_units self.num_highway_layers = num_highway_layers self.highway_dropout = highway_dropout # highway doesn't change dimensionality self.out_size = out_size self.cache = dict()
def __init__(self, n_cell, size_hidden, rate_dropout): super(ONT, self).__init__() self.rate_dropout = rate_dropout with self.init_scope(): self.rnn_a = L.NStepRNNReLU(n_cell, 300, size_hidden, rate_dropout) self.rnn_b = L.NStepRNNReLU(n_cell, 300, size_hidden, rate_dropout) self.l1 = L.Highway(size_hidden * 2) self.l2 = L.Linear(size_hidden * 2, 4)
def __init__(self, in_size, bank_k, proj_filters1, proj_filters2): super(CBHG, self).__init__() with self.init_scope(): self.conv1d_banks = [ Conv1DwithBatchNorm(in_size, 128, i + 1) for i in range(bank_k) ] self.conv1d_proj1 = Conv1DwithBatchNorm(128, proj_filters1, 3) self.conv1d_proj2 = Conv1DwithBatchNorm(proj_filters1, proj_filters2, 3) self.highways = [ L.Highway(proj_filters2) for i in range(4) ] # The parameters of the original paper are probably wrong. self.gru = L.NStepBiGRU(1, proj_filters2, 128, dropout=0)
def __init__(self, config): super(HighwayNetwork, self).__init__() self.seq_length = config.seq_length self.enc_dim = config.enc_dim self.layer_num = config.highway_n_layer with self.init_scope(): for i in range(self.layer_num): setattr(self, 'highway_layer_{0}'.format(i), L.Highway(self.enc_dim)) if config.gpu[0] >= 0: for i in range(self.layer_num): layer = getattr(self, 'highway_layer_{0}'.format(i)) layer.to_gpu(config.gpu[0])
def __init__(self, in_out_size, n_layers, nobias=False, activate='relu', init_Wh=None, init_Wt=None, init_bh=None, init_bt=-1): layers = chainer.ChainList() [ layers.add_link( L.Highway(in_out_size, nobias, mF.get_function(activate), init_Wh, init_Wt, init_bh, init_bt)) for _ in range(n_layers) ] super().__init__() with self.init_scope(): self.layers = layers
def __init__(self, hdims, dropout=0.0): self.dropout = dropout super(adversarial, self).__init__() with self.init_scope(): self.highway = L.Highway(hdims) self.h_to_y = L.Linear(hdims, 2)
def __init__(self, vsize_enc, vsize_dec, nlayers_enc, nlayers_dec, n_units, gpuid, attn=False): ''' vsize: vocabulary size nlayers: # layers attn: if True, use attention ''' super(EncoderDecoder, self).__init__() #-------------------------------------------------------------------- # add encoder layers #-------------------------------------------------------------------- # add embedding layer self.add_link("embed_enc", L.EmbedID(vsize_enc, n_units)) #add CNN layer self.cnn_enc=["L{0:d}_cnn".format(i) for i in range(1,9)] for i,cnn_name in enumerate(self.cnn_enc): self.add_link(cnn_name,L.Convolution2D(1,1,(i+1,200),stride=(1,200))) # self.add_link("cnn_enc",L.Convolution2D(1,1,(64,i))) #add highway layer self.hw_enc=["L{0:d}_hw".format(i) for i in range(1,5)] for i,hw_name in enumerate(self.hw_enc): self.add_link(hw_name,L.Highway(4)) # add LSTM layers self.lstm_enc = ["L{0:d}_enc".format(i) for i in range(nlayers_enc)] for lstm_name in self.lstm_enc: self.add_link(lstm_name, L.LSTM(n_units, n_units)) # reverse LSTM layer self.lstm_rev_enc = ["L{0:d}_rev_enc".format(i) for i in range(nlayers_enc)] for lstm_name in self.lstm_rev_enc: self.add_link(lstm_name, L.LSTM(n_units, n_units)) #-------------------------------------------------------------------- # add decoder layers #-------------------------------------------------------------------- # add embedding layer self.add_link("embed_dec", L.EmbedID(vsize_dec, 2*n_units)) # add LSTM layers self.lstm_dec = ["L{0:d}_dec".format(i) for i in range(nlayers_dec)] for lstm_name in self.lstm_dec: self.add_link(lstm_name, L.LSTM(2*n_units, 2*n_units)) if attn > 0: # add context layer for attention self.add_link("context", L.Linear(4*n_units, 2*n_units)) self.attn = attn # add output layer self.add_link("out", L.Linear(2*n_units, vsize_dec)) # Store GPU id self.gpuid = gpuid self.n_units = n_units xp = cuda.cupy if self.gpuid >= 0 else np # create masking array for pad id self.mask_pad_id = xp.ones(vsize_dec, dtype=xp.float32) # make the class weight for pad id equal to 0 # this way loss will not be computed for this predicted loss self.mask_pad_id[0] = 0
def __init__(self, vocab_size, ParameterClass): self.a = ParameterClass self.vocab_size = vocab_size # 特徴を獲得するため複数フィルター(部首単位,文字単位)を適用 self.cnn_window_sizes = [1, 2, 3, 3, 6, 9] self.cnn_stride_sizes = [1, 1, 1, 3, 3, 3] # ウインドウとストライドに応じたフィルター数を適用 self.cnn_filter_nums = [int(50*(w/r)) for w, r in zip(self.cnn_window_sizes, self.cnn_stride_sizes)] # poolingウインドウは畳み込み時のウインドウとストライドに依存(1単語ごと) self.pooling_window_sizes = [ int((self.a.radical_len * self.a.character_len - w) / r + 1) for w, r in zip(self.cnn_window_sizes, self.cnn_stride_sizes)] initializer = chainer.initializers.HeNormal() super(Model, self).__init__() with self.init_scope(): # Embedding self.embed = L.EmbedID( self.vocab_size, self.a.embed_dim, initialW=initializer) # Convolution (6種類) self.conv0 = L.Convolution2D( 1, self.cnn_filter_nums[0], ksize=(self.cnn_window_sizes[0], self.a.embed_dim), stride=(self.cnn_stride_sizes[0], self.a.embed_dim)) self.conv1 = L.Convolution2D( 1, self.cnn_filter_nums[1], ksize=(self.cnn_window_sizes[1], self.a.embed_dim), stride=(self.cnn_stride_sizes[1], self.a.embed_dim)) self.conv2 = L.Convolution2D( 1, self.cnn_filter_nums[2], ksize=(self.cnn_window_sizes[2], self.a.embed_dim), stride=(self.cnn_stride_sizes[2], self.a.embed_dim)) self.conv3 = L.Convolution2D( 1, self.cnn_filter_nums[3], ksize=(self.cnn_window_sizes[3], self.a.embed_dim), stride=(self.cnn_stride_sizes[3], self.a.embed_dim)) self.conv4 = L.Convolution2D( 1, self.cnn_filter_nums[4], ksize=(self.cnn_window_sizes[4], self.a.embed_dim), stride=(self.cnn_stride_sizes[4], self.a.embed_dim)) self.conv5 = L.Convolution2D( 1, self.cnn_filter_nums[5], ksize=(self.cnn_window_sizes[5], self.a.embed_dim), stride=(self.cnn_stride_sizes[5], self.a.embed_dim)) self.cnn_output_dim = sum(self.cnn_filter_nums) # pooling前のBatchNormalization self.bnorm0 = L.BatchNormalization(self.cnn_filter_nums[0]) self.bnorm1 = L.BatchNormalization(self.cnn_filter_nums[1]) self.bnorm2 = L.BatchNormalization(self.cnn_filter_nums[2]) self.bnorm3 = L.BatchNormalization(self.cnn_filter_nums[3]) self.bnorm4 = L.BatchNormalization(self.cnn_filter_nums[4]) self.bnorm5 = L.BatchNormalization(self.cnn_filter_nums[5]) # Highway1 self.hw1 = L.Highway( self.cnn_output_dim, activate=F.tanh, init_Wh=initializer, init_Wt=initializer) # BiLSTM self.bi_lstm_dim = self.cnn_output_dim * 2 self.bi_lstm = L.NStepBiLSTM( n_layers=1, in_size=self.cnn_output_dim, out_size=self.cnn_output_dim, dropout=0.0) # Higiway2 + Soft Attention self.hw2 = L.Highway( self.bi_lstm_dim, activate=F.tanh, init_Wh=initializer, init_Wt=initializer) self.u_a = chainer.Parameter(initializer, (1, self.bi_lstm_dim)) # output (+ BatchNormalization) self.fc = L.Linear(self.bi_lstm_dim, 2, initialW=initializer) self.bnorm_last = L.BatchNormalization(2)