예제 #1
0
 def __init__(self, vocab_size, embed_size, batch_size, max_len):
     super(EncoderCNN,
           self).__init__(conv_1=L.Convolution2D(1,
                                                 200,
                                                 ksize=(1, vocab_size)),
                          conv_2=L.Convolution2D(1,
                                                 200,
                                                 ksize=(2, vocab_size)),
                          conv_3=L.Convolution2D(1,
                                                 200,
                                                 ksize=(3, vocab_size)),
                          conv_4=L.Convolution2D(1,
                                                 200,
                                                 ksize=(4, vocab_size)),
                          conv_5=L.Convolution2D(1,
                                                 250,
                                                 ksize=(5, vocab_size)),
                          conv_6=L.Convolution2D(1,
                                                 300,
                                                 ksize=(6, vocab_size)),
                          conv_7=L.Convolution2D(1,
                                                 350,
                                                 ksize=(7, vocab_size)),
                          highway_1=L.Highway(1700, activate=F.tanh),
                          highway_2=L.Highway(1700, activate=F.tanh),
                          linear=L.Linear(1700, embed_size))
     self.vocab_size = vocab_size
     self.batch_size = batch_size
     self.max_len = max_len
     self.identity = xp.identity(vocab_size)
예제 #2
0
    def setUp(self):
        self.x = numpy.random.uniform(-1, 1, (5, self.in_out_size)).astype(
            numpy.float32)
        self.gy = numpy.random.uniform(-1, 1, (5, self.in_out_size)).astype(
            numpy.float32)
        self.link = links.Highway(self.in_out_size, activate=functions.tanh)

        Wh = self.link.plain.W.data
        Wh[...] = numpy.random.uniform(-1, 1, Wh.shape)
        bh = self.link.plain.b.data
        bh[...] = numpy.random.uniform(-1, 1, bh.shape)

        Wt = self.link.transform.W.data
        Wt[...] = numpy.random.uniform(-1, 1, Wt.shape)
        bt = self.link.transform.b.data
        bt[...] = numpy.random.uniform(-1, 1, bt.shape)
        self.link.cleargrads()

        self.Wh = Wh.copy()  # fixed on CPU
        self.bh = bh.copy()  # fixed on CPU
        self.Wt = Wt.copy()  # fixed on CPU
        self.bt = bt.copy()  # fixed on CPU

        a = numpy.tanh(self.x.dot(Wh.T) + bh)
        b = self.sigmoid(self.x.dot(Wt.T) + bt)
        self.y = (a * b + self.x * (numpy.ones_like(self.x) - b))
예제 #3
0
    def __init__(self, vocab_size, embed_units=15, num_highway_layers=1,
                 highway_dropout=0.0, ngrams=(1, 2, 3, 4, 5, 6), stride=1, num_filters=None):

        super(CNNWordEncoder, self).__init__()
        if num_filters is None:
            # http://www.people.fas.harvard.edu/~yoonkim/data/char-nlm.pdf
            # Table 2 small model uses constant size
            num_filters = [n * self.FILTER_MULTIPLIER for n in ngrams]
        assert(len(num_filters) == len(ngrams))
        assert(num_highway_layers >= 0)
        out_size = sum(num_filters)
        with self.init_scope():
            self.embed_layer = L.EmbedID(vocab_size, embed_units,
                                         ignore_label=self.IGNORE_LABEL)
            self.cnn_blocks = ['cnn_%d' % n for n in ngrams]
            self.min_width = max(ngrams)
            self.highways = ['highway_%d' % i for i in range(num_highway_layers)]
            # for n in ngrams:
            #     setattr(self, self.cnn_blocks[n])
            for i, name in enumerate(self.cnn_blocks):
                setattr(self, name, L.Convolution2D(1,
                                       num_filters[i],
                                       (ngrams[i], embed_units),
                                       stride))
            for name in self.highways:
                # init_bt -2 used in Kim paper
                setattr(self, name, L.Highway(out_size, init_bt=-2))
        self.vocab_size = vocab_size
        self.embed_units = embed_units
        self.num_highway_layers = num_highway_layers
        self.highway_dropout = highway_dropout
        # highway doesn't change dimensionality
        self.out_size = out_size
        self.cache = dict()
예제 #4
0
 def __init__(self, n_cell, size_hidden, rate_dropout):
     super(ONT, self).__init__()
     self.rate_dropout = rate_dropout
     with self.init_scope():
         self.rnn_a = L.NStepRNNReLU(n_cell, 300, size_hidden, rate_dropout)
         self.rnn_b = L.NStepRNNReLU(n_cell, 300, size_hidden, rate_dropout)
         self.l1 = L.Highway(size_hidden * 2)
         self.l2 = L.Linear(size_hidden * 2, 4)
예제 #5
0
 def __init__(self, in_size, bank_k, proj_filters1, proj_filters2):
     super(CBHG, self).__init__()
     with self.init_scope():
         self.conv1d_banks = [
             Conv1DwithBatchNorm(in_size, 128, i + 1) for i in range(bank_k)
         ]
         self.conv1d_proj1 = Conv1DwithBatchNorm(128, proj_filters1, 3)
         self.conv1d_proj2 = Conv1DwithBatchNorm(proj_filters1,
                                                 proj_filters2, 3)
         self.highways = [
             L.Highway(proj_filters2) for i in range(4)
         ]  # The parameters of the original paper are probably wrong.
         self.gru = L.NStepBiGRU(1, proj_filters2, 128, dropout=0)
예제 #6
0
파일: net.py 프로젝트: penzant/diin-chainer
    def __init__(self, config):
        super(HighwayNetwork, self).__init__()
        self.seq_length = config.seq_length
        self.enc_dim = config.enc_dim
        self.layer_num = config.highway_n_layer

        with self.init_scope():
            for i in range(self.layer_num):
                setattr(self, 'highway_layer_{0}'.format(i),
                        L.Highway(self.enc_dim))

            if config.gpu[0] >= 0:
                for i in range(self.layer_num):
                    layer = getattr(self, 'highway_layer_{0}'.format(i))
                    layer.to_gpu(config.gpu[0])
예제 #7
0
 def __init__(self,
              in_out_size,
              n_layers,
              nobias=False,
              activate='relu',
              init_Wh=None,
              init_Wt=None,
              init_bh=None,
              init_bt=-1):
     layers = chainer.ChainList()
     [
         layers.add_link(
             L.Highway(in_out_size, nobias, mF.get_function(activate),
                       init_Wh, init_Wt, init_bh, init_bt))
         for _ in range(n_layers)
     ]
     super().__init__()
     with self.init_scope():
         self.layers = layers
 def __init__(self, hdims, dropout=0.0):
     self.dropout = dropout
     super(adversarial, self).__init__()
     with self.init_scope():
         self.highway = L.Highway(hdims)
         self.h_to_y = L.Linear(hdims, 2)
예제 #9
0
파일: enc_dec_batch.py 프로젝트: wwzoe/MT
    def __init__(self, vsize_enc, vsize_dec,
                 nlayers_enc, nlayers_dec,
                 n_units, gpuid, attn=False):
        '''
        vsize:   vocabulary size
        nlayers: # layers
        attn:    if True, use attention
        '''
        super(EncoderDecoder, self).__init__()
        #--------------------------------------------------------------------
        # add encoder layers
        #--------------------------------------------------------------------

        # add embedding layer
        self.add_link("embed_enc", L.EmbedID(vsize_enc, n_units))

        #add CNN layer
        self.cnn_enc=["L{0:d}_cnn".format(i) for i in range(1,9)]
        for i,cnn_name in enumerate(self.cnn_enc):
            self.add_link(cnn_name,L.Convolution2D(1,1,(i+1,200),stride=(1,200)))
        # self.add_link("cnn_enc",L.Convolution2D(1,1,(64,i)))
        #add highway layer
        self.hw_enc=["L{0:d}_hw".format(i) for i in range(1,5)]
        for i,hw_name in enumerate(self.hw_enc):
            self.add_link(hw_name,L.Highway(4))

        # add LSTM layers
        self.lstm_enc = ["L{0:d}_enc".format(i) for i in range(nlayers_enc)]
        for lstm_name in self.lstm_enc:
            self.add_link(lstm_name, L.LSTM(n_units, n_units))

        # reverse LSTM layer
        self.lstm_rev_enc = ["L{0:d}_rev_enc".format(i) for i in range(nlayers_enc)]
        for lstm_name in self.lstm_rev_enc:
            self.add_link(lstm_name, L.LSTM(n_units, n_units))

        #--------------------------------------------------------------------
        # add decoder layers
        #--------------------------------------------------------------------

        # add embedding layer
        self.add_link("embed_dec", L.EmbedID(vsize_dec, 2*n_units))

        # add LSTM layers
        self.lstm_dec = ["L{0:d}_dec".format(i) for i in range(nlayers_dec)]
        for lstm_name in self.lstm_dec:
            self.add_link(lstm_name, L.LSTM(2*n_units, 2*n_units))

        if attn > 0:
            # add context layer for attention
            self.add_link("context", L.Linear(4*n_units, 2*n_units))
        self.attn = attn

        # add output layer
        self.add_link("out", L.Linear(2*n_units, vsize_dec))

        # Store GPU id
        self.gpuid = gpuid
        self.n_units = n_units

        xp = cuda.cupy if self.gpuid >= 0 else np

        # create masking array for pad id
        self.mask_pad_id = xp.ones(vsize_dec, dtype=xp.float32)
        # make the class weight for pad id equal to 0
        # this way loss will not be computed for this predicted loss
        self.mask_pad_id[0] = 0
    def __init__(self, vocab_size, ParameterClass):
        self.a = ParameterClass
        self.vocab_size = vocab_size
        # 特徴を獲得するため複数フィルター(部首単位,文字単位)を適用
        self.cnn_window_sizes = [1, 2, 3, 3, 6, 9]
        self.cnn_stride_sizes = [1, 1, 1, 3, 3, 3]
        # ウインドウとストライドに応じたフィルター数を適用
        self.cnn_filter_nums = [int(50*(w/r)) for w, r in
                                zip(self.cnn_window_sizes,
                                    self.cnn_stride_sizes)]
        # poolingウインドウは畳み込み時のウインドウとストライドに依存(1単語ごと)
        self.pooling_window_sizes = [
            int((self.a.radical_len * self.a.character_len - w) / r + 1)
            for w, r in zip(self.cnn_window_sizes, self.cnn_stride_sizes)]

        initializer = chainer.initializers.HeNormal()

        super(Model, self).__init__()
        with self.init_scope():
            # Embedding
            self.embed = L.EmbedID(
                            self.vocab_size,
                            self.a.embed_dim,
                            initialW=initializer)

            # Convolution (6種類)
            self.conv0 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[0],
                            ksize=(self.cnn_window_sizes[0], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[0], self.a.embed_dim))
            self.conv1 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[1],
                            ksize=(self.cnn_window_sizes[1], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[1], self.a.embed_dim))
            self.conv2 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[2],
                            ksize=(self.cnn_window_sizes[2], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[2], self.a.embed_dim))
            self.conv3 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[3],
                            ksize=(self.cnn_window_sizes[3], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[3], self.a.embed_dim))
            self.conv4 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[4],
                            ksize=(self.cnn_window_sizes[4], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[4], self.a.embed_dim))
            self.conv5 = L.Convolution2D(
                            1,
                            self.cnn_filter_nums[5],
                            ksize=(self.cnn_window_sizes[5], self.a.embed_dim),
                            stride=(self.cnn_stride_sizes[5], self.a.embed_dim))
            self.cnn_output_dim = sum(self.cnn_filter_nums)

            # pooling前のBatchNormalization
            self.bnorm0 = L.BatchNormalization(self.cnn_filter_nums[0])
            self.bnorm1 = L.BatchNormalization(self.cnn_filter_nums[1])
            self.bnorm2 = L.BatchNormalization(self.cnn_filter_nums[2])
            self.bnorm3 = L.BatchNormalization(self.cnn_filter_nums[3])
            self.bnorm4 = L.BatchNormalization(self.cnn_filter_nums[4])
            self.bnorm5 = L.BatchNormalization(self.cnn_filter_nums[5])

            # Highway1
            self.hw1 = L.Highway(
                        self.cnn_output_dim,
                        activate=F.tanh,
                        init_Wh=initializer,
                        init_Wt=initializer)

            # BiLSTM
            self.bi_lstm_dim = self.cnn_output_dim * 2
            self.bi_lstm = L.NStepBiLSTM(
                            n_layers=1,
                            in_size=self.cnn_output_dim,
                            out_size=self.cnn_output_dim,
                            dropout=0.0)

            # Higiway2 + Soft Attention
            self.hw2 = L.Highway(
                        self.bi_lstm_dim,
                        activate=F.tanh,
                        init_Wh=initializer,
                        init_Wt=initializer)
            self.u_a = chainer.Parameter(initializer,
                                         (1, self.bi_lstm_dim))
            # output (+ BatchNormalization)
            self.fc = L.Linear(self.bi_lstm_dim, 2, initialW=initializer)
            self.bnorm_last = L.BatchNormalization(2)