def init_model(vocab_size, char_type_size):
    model = FunctionSet(
        embed=F.EmbedID(vocab_size, embed_units),
        char_type_embed=F.EmbedID(char_type_size, char_type_embed_units),
        #dict_embed = F.Linear(12, dict_embed_units),
        hidden1=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        i_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        f_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        o_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        output=F.Linear(hidden_units + 12, label_num),
    )
    if opt_selection == 'Adagrad':
        opt = optimizers.AdaGrad(lr=learning_rate)
    elif opt_selection == 'SGD':
        opt = optimizers.SGD()
    elif opt_selection == 'Adam':
        opt = optimizers.Adam()
    else:
        opt = optimizers.AdaGrad(lr=learning_rate)
        print('Adagrad is chosen as defaut')
    opt.setup(model)
    return model, opt
Esempio n. 2
0
def init_model():
    #Make models
    if use_pre2 == 'pre': pre_unit = 4
    else: pre_unit = 0
    if use_null == 'null': null_unit = 6
    else: null_unit = 0
    if args.phrase == 'phrase':
        phrase_unit = 4
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(
                w0=F.Linear(n_units * 2 + null_unit * 2, n_label),
                ww0=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
                ww1=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
            ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding
        )
    else:
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(w0=F.Linear(
                n_units * 4 + null_unit * 4, n_label), ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding 
        )
    if opt_name == 'SGD':
        optimizer = optimizers.SGD(lr=0.02)  # (lr=opt_score)  # lr=0.01
    elif opt_name == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=0.001)  # (lr=opt_score)  # lr=0.001
    elif opt_name == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=0.9)  # (rho=opt_score)  # rho=0.9
    elif opt_name == 'Adam':
        optimizer = optimizers.Adam(
            alpha=0.0001)  # (alpha=opt_score)  # alpha=0.0001
    optimizer.setup(model)  # .collect_parameters()
    return model, optimizer
Esempio n. 3
0
 def __make_model(self):
     self.__model = wrapper.make_model(
         w_xh=functions.EmbedID(2 * self.__n_context * len(self.__vocab),
                                self.__n_hidden),
         w_hy=functions.Linear(self.__n_hidden, self.__n_labels),
         trans=functions.EmbedID(
             self.__n_labels * self.__n_labels,
             1),  #各ラベル(0,1)間の遷移のweight #確率としておく softmaxかます
     )
Esempio n. 4
0
def init_model(vocab_size):
    model = chainer.FunctionSet(
        embed=F.EmbedID(vocab_size, embed_units),
        hidden1=F.Linear(window * embed_units, hidden_units),
        output=F.Linear(hidden_units, label_num),
        trans=F.EmbedID(label_num, label_num),
    )
    #opt = optimizers.AdaGrad(lr=learning_rate)
    opt = optimizers.Adam()
    opt.setup(model)
    return model, opt
Esempio n. 5
0
 def __make_model(self):
     self.__model = wrapper.make_model(
         # encoder
         w_xi = functions.EmbedID(len(self.__src_vocab), self.__n_embed),
         w_ip = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
         w_pp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         # decoder
         w_pq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_qj = functions.Linear(self.__n_hidden, self.__n_embed),
         w_jy = functions.Linear(self.__n_embed, len(self.__trg_vocab)),
         w_yq = functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden),
         w_qq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
     )
 def make_model(self):
     self.model = wrapper.make_model(
         # encoder
         weight_xi=functions.EmbedID(len(self.src_vocab), self.n_embed),
         weight_ip=functions.Linear(self.n_embed, 4 * self.n_hidden),
         weight_pp=functions.Linear(self.n_hidden, 4 * self.n_hidden),
         # decoder
         weight_pq=functions.Linear(self.n_hidden, 4 * self.n_hidden),
         weight_qj=functions.Linear(self.n_hidden, self.n_embed),
         weight_jy=functions.Linear(self.n_embed, len(self.trg_vocab)),
         weight_yq=functions.EmbedID(len(self.trg_vocab),
                                     4 * self.n_hidden),
         weight_qq=functions.Linear(self.n_hidden, 4 * self.n_hidden),
     )
def init_model(vocab_size, char_type_size):
    model = chainer.FunctionSet(
        embed=F.EmbedID(vocab_size, embed_units),
        char_type_embed = F.EmbedID(char_type_size, char_type_embed_units),
        hidden1=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units),
        i_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units),
        f_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units),
        o_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units),
        output=F.Linear(hidden_units, label_num),
    )
    #opt = optimizers.AdaGrad(lr=learning_rate)
    #opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)
    return model, opt
Esempio n. 8
0
    def __init__(self, n_units, vocab_in, vocab_out, loadpath=None, gpu=-1):
        self.xp = np

        self.tagger = igo.tagger.Tagger(self.DIC_DIR)

        self.vocab_in = vocab_in
        self.vocab_out = vocab_out

        self.n_units = n_units

        if loadpath:
            with open(loadpath, 'rb') as f:
                self.model = pickle.load(f)

        else:
            self.model = chainer.FunctionSet(
                embed=F.EmbedID(len(self.vocab_in), n_units),
                l1_x=F.Linear(self.n_units, 4 * self.n_units),
                l1_h=F.Linear(self.n_units, 4 * self.n_units),
                l2_x=F.Linear(self.n_units, 4 * self.n_units),
                l2_h=F.Linear(self.n_units, 4 * self.n_units),
                l3=F.Linear(self.n_units, len(self.vocab_out)),
            )

        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
Esempio n. 9
0
    def __init__(self,
                 n_vocab,
                 doc_length,
                 wv_size,
                 filter_sizes=[3, 4, 5],
                 hidden_units=[100, 2],
                 output_channel=100,
                 initialW=None,
                 non_static=False):
        super(NNModel, self).__init__()
        self.filter_sizes = filter_sizes
        self.hidden_units = hidden_units
        self.doc_length = doc_length
        self.non_static = non_static

        self.add_link(
            'embed',
            F.EmbedID(n_vocab, wv_size, initialW=initialW, ignore_label=0))
        for filter_h in self.filter_sizes:
            filter_w = wv_size
            filter_shape = (filter_h, filter_w)
            self.add_link('conv' + str(filter_h),
                          L.Convolution2D(1, output_channel, filter_shape))

        for i in range(len(hidden_units)):
            self.add_link('l' + str(i), L.Linear(None, hidden_units[i]))
Esempio n. 10
0
    def setUp(self):
        self.func = functions.EmbedID(3, 2)
        self.func.gW.fill(0)

        self.W = self.func.W.copy()  # fixed on CPU
        self.x = numpy.array([0, 1, 0], dtype=numpy.int32)
        self.gy = numpy.random.uniform(-1, 1, (3, 2)).astype(numpy.float32)
Esempio n. 11
0
 def __init__(self, n_vocab, n_docs, n_units, loss_func):
     super(DistributedBoW, self).__init__(
         embed=F.EmbedID(n_vocab + n_docs,
                         n_units,
                         initialW=I.Uniform(1. / n_units)),
         loss_func=loss_func,
     )
def make_rnnlm_model(n_vocab, n_embed, n_hidden):
    return make_model(
        w_xe=functions.EmbedID(n_vocab, n_embed),
        w_eh=functions.Linear(n_embed, n_hidden),
        w_hh=functions.Linear(n_hidden, n_hidden),
        w_hy=functions.Linear(n_hidden, n_vocab),
    )
Esempio n. 13
0
    def __init__(self,
                 deep,
                 gpu,
                 word2index,
                 in_units,
                 hidden_units,
                 out_units,
                 loss_func,
                 train,
                 drop_ratio=0.0):
        n_vocab = len(word2index)
        l2r_embedding = F.EmbedID(n_vocab, in_units)
        r2l_embedding = F.EmbedID(n_vocab, in_units)

        if deep:
            super(BiLstmContext, self).__init__(
                l2r_embed=l2r_embedding,
                r2l_embed=r2l_embedding,
                loss_func=loss_func,
                l2r_1=L.LSTM(in_units, hidden_units),
                r2l_1=L.LSTM(in_units, hidden_units),
                l3=L.Linear(2 * hidden_units, 2 * hidden_units),
                l4=L.Linear(2 * hidden_units, out_units),
            )
        else:
            super(BiLstmContext,
                  self).__init__(l2r_embed=l2r_embedding,
                                 r2l_embed=r2l_embedding,
                                 loss_func=loss_func,
                                 l2r_1=L.LSTM(in_units, hidden_units),
                                 r2l_1=L.LSTM(in_units, hidden_units),
                                 lp_l2r=L.Linear(hidden_units, out_units / 2),
                                 lp_r2l=L.Linear(hidden_units, out_units / 2))
        if gpu >= 0:
            self.to_gpu()
        l2r_embedding.W.data = self.xp.random.normal(
            0, math.sqrt(1. / l2r_embedding.W.data.shape[0]),
            l2r_embedding.W.data.shape).astype(np.float32)
        r2l_embedding.W.data = self.xp.random.normal(
            0, math.sqrt(1. / r2l_embedding.W.data.shape[0]),
            r2l_embedding.W.data.shape).astype(np.float32)

        self.word2index = word2index
        self.train = train
        self.deep = deep
        self.drop_ratio = drop_ratio
Esempio n. 14
0
 def __init__(self, embed_size, hidden_size, source_vocab):
     super(Encoder, self).__init__(
         word_id_2_embed=F.EmbedID(source_vocab,
                                   embed_size,
                                   ignore_label=-1),
         embed_2_lstm_input=F.Linear(embed_size, hidden_size * 4),
         pre_hidden_2_lstm_input=F.Linear(hidden_size, hidden_size * 4),
     )
Esempio n. 15
0
 def __init__(self, embed_size, hidden_size, target_vocab):
     super(Decoder, self).__init__(
         word_id_2_embed=F.EmbedID(target_vocab,
                                   embed_size,
                                   ignore_label=-1),
         embed_2_lstm_input=F.Linear(embed_size, hidden_size * 4),
         pre_hidden_2_lstm_input=F.Linear(hidden_size, hidden_size * 4),
         hidden_2_word_id=F.Linear(hidden_size, target_vocab),
     )
Esempio n. 16
0
    def __init__(self,
                 emb_dim,
                 vocab_size,
                 layers,
                 suppress_output=False,
                 lstm=False,
                 irnn=False,
                 active=F.relu,
                 eos_id=0):
        """
        Recurrent Neural Network with multiple layers.
        in_dim -> layers[0] -> ... -> layers[-1] -> out_dim (optional)

        :param int emb_dim: dimension of embeddings
        :param int vocab_size: size of vocabulary
        :param layers: dimensions of hidden layers
        :type layers: list of int
        :param bool suppress_output: suppress output
        :param bool lstm: whether to use LSTM
        :param bool irnn: whether to use IRNN
        :param chainer.Function active: activation function between layers of vanilla RNN
        :param int eos_id: ID of <BOS> and <EOS>
        """
        assert not (lstm and irnn)

        self.emb_dim = emb_dim
        self.vocab_size = vocab_size
        self.layers = layers
        self.suppress_output = suppress_output
        self.lstm = lstm
        self.irnn = irnn
        self.active = active
        self.eos_id = eos_id

        # set up NN architecture
        model = chainer.FunctionSet(emb=F.EmbedID(vocab_size, emb_dim), )
        # add hidden layers
        layer_dims = [emb_dim] + layers
        for i in range(len(layers)):
            in_dim = layer_dims[i]
            out_dim = layer_dims[i + 1]
            if lstm:
                linear = F.Linear(in_dim, out_dim * 4)
                hidden = F.Linear(out_dim, out_dim * 4)
            else:
                linear = F.Linear(in_dim, out_dim)
                hidden = F.Linear(out_dim, out_dim)
                if irnn:
                    # initialize hidden connection with identity matrix
                    hidden.W = np.eye(out_dim)
            setattr(model, 'l{}_x'.format(i + 1), linear)
            setattr(model, 'l{}_h'.format(i + 1), hidden)
        if not suppress_output:
            # add output layer
            setattr(model, 'l_y', F.Linear(layer_dims[-1], vocab_size))
        self.model = model
Esempio n. 17
0
 def __init__(self, n_vocab, n_units, n_labels, train=True):
     super(BLSTM, self).__init__(
         embed=F.EmbedID(n_vocab, n_units, ignore_label=-1),
         # fl=L.LSTM(n_units, n_units),
         # bl=L.LSTM(n_units, n_units),
         ll=L.Linear(n_units, 2)
     )
     for param in self.params():
         param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape)
     self.train = train
Esempio n. 18
0
 def __init__(self, n_vocab, n_units):
     super(CharRNN, self).__init__(
         embed=F.EmbedID(n_vocab, n_units),
         l1_x=F.Linear(n_units, 4 * n_units),
         l1_h=F.Linear(n_units, 4 * n_units),
         l2_h=F.Linear(n_units, 4 * n_units),
         l2_x=F.Linear(n_units, 4 * n_units),
         l3=F.Linear(n_units, n_vocab),
     )
     for param in self.parameters:
         param[:] = np.random.uniform(-0.08, 0.08, param.shape)
Esempio n. 19
0
 def __init__(self,
              src_vocab,
              trg_vocab,
              n_embed=256,
              n_hidden=512,
              algorithm='Adam'):
     self.src_vocab = src_vocab
     self.trg_vocab = trg_vocab
     self.n_embed = n_embed
     self.n_hidden = n_hidden
     self.algorithm = algorithm
     self.model = FunctionSet(embed_x=F.EmbedID(len(src_vocab), n_embed),
                              en_x_to_h=F.Linear(n_embed, 4 * n_hidden),
                              en_h_to_h=F.Linear(n_hidden, 4 * n_hidden),
                              en_h_to_de_h=F.Linear(n_hidden, 4 * n_hidden),
                              de_h_to_embed_y=F.Linear(n_hidden, n_embed),
                              embed_y_to_y=F.Linear(n_embed,
                                                    len(trg_vocab)),
                              y_to_h=F.EmbedID(len(trg_vocab),
                                               4 * n_hidden),
                              de_h_to_h=F.Linear(n_hidden, 4 * n_hidden))
 def __make_model(self):
     self.__model = wrapper.make_model(
         w_xe = functions.EmbedID(len(self.__vocab), self.__n_embed),
         w_ea = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
         w_aa = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_eb = functions.Linear(self.__n_embed, 4 * self.__n_hidden),
         w_bb = functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_ay1 = functions.Linear(self.__n_hidden, 1),
         w_by1 = functions.Linear(self.__n_hidden, 1),
         w_ay2 = functions.Linear(self.__n_hidden, 1),
         w_by2 = functions.Linear(self.__n_hidden, 1),
     )
Esempio n. 21
0
 def make_model(self):
     initialW = np.random.uniform
     self.model = self.wrapper.make_model(
         # encoder
         w_xi=functions.EmbedID(len(self.src_vocab), self.n_embed),
         w_ip=functions.Linear(
             self.n_embed,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_embed))),
         w_pp=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         # decoder
         w_pq=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         w_qj=functions.Linear(
             self.n_hidden,
             self.n_embed,
             initialW=initialW(-0.1, 0.1, (self.n_embed, self.n_hidden))),
         w_jy=functions.Linear(
             self.n_embed,
             len(self.trg_vocab),
             initialW=initialW(-0.1, 0.1,
                               (len(self.trg_vocab), self.n_embed))),
         w_yq=functions.EmbedID(len(self.trg_vocab), 4 * self.n_hidden),
         w_qq=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
     )
Esempio n. 22
0
 def __make_model(self):
     self.__model = wrapper.make_model(
         # input embedding
         w_xi=functions.EmbedID(len(self.__src_vocab), self.__n_embed),
         # forward encoder
         w_ia=functions.Linear(self.__n_embed, 4 * self.__n_hidden),
         w_aa=functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         # backward encoder
         w_ib=functions.Linear(self.__n_embed, 4 * self.__n_hidden),
         w_bb=functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         # attentional weight estimator
         w_aw=functions.Linear(self.__n_hidden, self.__n_hidden),
         w_bw=functions.Linear(self.__n_hidden, self.__n_hidden),
         w_pw=functions.Linear(self.__n_hidden, self.__n_hidden),
         w_we=functions.Linear(self.__n_hidden, 1),
         # decoder
         w_ap=functions.Linear(self.__n_hidden, self.__n_hidden),
         w_bp=functions.Linear(self.__n_hidden, self.__n_hidden),
         w_yp=functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden),
         w_pp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_cp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_dp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden),
         w_py=functions.Linear(self.__n_hidden, len(self.__trg_vocab)),
     )
Esempio n. 23
0
 def __init__(self, n_vocab, n_units, batch_size):
     super(CharIRNN, self).__init__(
         embed=F.EmbedID(n_vocab, n_units),
         l1_x=F.Linear(n_units, n_units),
         l1_h=F.Linear(n_units, n_units),
         l2_h=F.Linear(n_units, n_units),
         l2_x=F.Linear(n_units, n_units),
         l3=F.Linear(n_units, n_vocab),
     )
     self.sorted_funcs = sorted(six.iteritems(self.__dict__))
     for param in self.parameters:
         param[:] = np.random.uniform(-0.08, 0.08, param.shape)
     self.l1_h.W = np.eye(self.l1_h.W.shape[0], dtype=np.float32) * 0.5
     self.l2_h.W = np.eye(self.l2_h.W.shape[0], dtype=np.float32) * 0.5
     self.reset_state(batch_size)
Esempio n. 24
0
    def _create_and_initialize_model(self, tags, vocab):
        # The model is feed-forward LSTM:
        # (word_id -> word_emb)_t -> LSTM -> (distribution over tag_id)_t

        self.model = chainer.FunctionSet()
        self.model.embed = F.EmbedID(len(vocab), self.n_lstm_cells)
        self.model.lstm_x_to_h = F.Linear(self.n_lstm_cells,
                                          4 * self.n_lstm_cells)
        self.model.lstm_h_to_h = F.Linear(self.n_lstm_cells,
                                          4 * self.n_lstm_cells)
        self.model.yclf = F.Linear(self.n_lstm_cells, len(tags))

        # Randomly initialize the parameters.
        for param in self.model.parameters:
            param[:] = np.random.uniform(-0.1, 0.1, param.shape)
    def __init__(self,caption_model_place,cnn_model_place,index2word_place,gpu_id=-1,beamsize=3):
        #basic paramaters you need to modify
        self.gpu_id=gpu_id# GPU ID. if you want to use cpu, -1
        self.beamsize=beamsize

        #Gpu Setting
        global xp
        if self.gpu_id >= 0:
            xp = cuda.cupy 
            cuda.get_device(gpu_id).use()
        else:
            xp=np

        # Prepare dataset
        with open(index2word_place, 'r') as f:
            self.index2word = pickle.load(f)
        vocab=self.index2word

        #Load Caffe Model
        with open(cnn_model_place, 'r') as f:
            self.func = pickle.load(f)

        #Model Preparation
        image_feature_dim=1024#dimension of image feature
        self.n_units = 512  #number of units per layer
        n_units = 512 
        self.model = FunctionSet()
        self.model.img_feature2vec=F.Linear(image_feature_dim, n_units)#CNN(I)の最後のレイヤーに相当。#parameter  W,b
        self.model.embed=F.EmbedID(len(vocab), n_units)#W_e*S_tに相当 #parameter  W
        self.model.l1_x=F.Linear(n_units, 4 * n_units)#parameter  W,b
        self.model.l1_h=F.Linear(n_units, 4 * n_units)#parameter  W,b
        self.model.out=F.Linear(n_units, len(vocab))#parameter  W,b
        serializers.load_hdf5(caption_model_place, self.model)#read pre-trained model

        #To GPU
        if gpu_id >= 0:
            self.model.to_gpu()
            self.func.to_gpu()

        #to avoid overflow.
        #I don't know why, but this model overflows at the first time only with CPU.
        #So I intentionally make overflow so that it never happns after that.
        if gpu_id < 0:
            numpy_image = np.ones((3, 224,224), dtype=np.float32)
            self.generate(numpy_image)
Esempio n. 26
0
    def __init__(self,
                 emb_dim,
                 vocab_size,
                 layer_dims,
                 feature_dim,
                 suppress_output,
                 eos_id=0):
        """
        Recurrent Neural Network with multiple layers.
        in_dim -> layers[0] -> ... -> layers[-1] -> out_dim (optional)

        :param int emb_dim: dimension of embeddings
        :param int vocab_size: size of vocabulary
        :param layer_dims: dimensions of hidden layers
        :param int feature_dim: dimesion of external feature
        :type layer_dims: list of int
        :param bool suppress_output: whether to suppress output
        :param int eos_id: ID of <BOS> and <EOS>
        """
        super(Rnn, self).__init__(emb=F.EmbedID(vocab_size, emb_dim))

        self.emb_dim = emb_dim
        self.vocab_size = vocab_size
        self.layer_dims = layer_dims
        self.feature_dim = feature_dim
        self.suppress_output = suppress_output
        self.eos_id = eos_id

        # add hidden layer_dims
        ls_xh = ChainList()
        ls_hh = ChainList()
        ls_fh = ChainList()
        layer_dims = [emb_dim] + layer_dims
        for in_dim, out_dim in zip(layer_dims, layer_dims[1:]):
            ls_xh.add_link(F.Linear(in_dim, out_dim * 4))
            ls_hh.add_link(F.Linear(out_dim, out_dim * 4))
            ls_fh.add_link(F.Linear(feature_dim, out_dim * 4))
        self.add_link('ls_xh', ls_xh)
        self.add_link('ls_hh', ls_hh)
        self.add_link('ls_fh', ls_fh)

        if not suppress_output:
            # add output layer
            self.add_link('l_y', F.Linear(layer_dims[-1], self.vocab_size))
Esempio n. 27
0
 def __init__(self, n_vocab, n_units, loss_func):
     super(ContinuousBoW, self).__init__(
         embed=F.EmbedID(
             n_vocab, n_units, initialW=I.Uniform(1. / n_units)),
         loss_func=loss_func,
     )
Esempio n. 28
0
    words = open(filename).read().replace('\n', '<eos>').strip().split()
    dataset = np.ndarray((len(words), ), dtype=np.int32)
    for i, word in enumerate(words):
        if word not in vocab:
            vocab[word] = len(vocab)
        dataset[i] = vocab[word]
    return dataset


train_data = load_data('ptb.train.txt')
valid_data = load_data('ptb.valid.txt')
test_data = load_data('ptb.test.txt')
print('#vocab =', len(vocab))

# Prepare RNNLM model
model = chainer.FunctionSet(embed=F.EmbedID(len(vocab), n_units),
                            l1_x=F.Linear(n_units, 4 * n_units),
                            l1_h=F.Linear(n_units, 4 * n_units),
                            l2_x=F.Linear(n_units, 4 * n_units),
                            l2_h=F.Linear(n_units, 4 * n_units),
                            l3=F.Linear(n_units, len(vocab)))
for param in model.parameters:
    param[:] = np.random.uniform(-0.1, 0.1, param.shape)
if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
    model.to_gpu()


def forward_one_step(x_data, y_data, state, train=True):
    # Neural net architecture
Esempio n. 29
0
 def make_model(self):
     initialW = np.random.uniform
     self.model = self.wrapper.make_model(
         # input embedding
         w_xi=functions.EmbedID(len(self.src_vocab), self.n_embed),
         # forward encoder
         w_ia=functions.Linear(
             self.n_embed,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_embed))),
         w_aa=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         # backward encoder
         w_ib=functions.Linear(
             self.n_embed,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_embed))),
         w_bb=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         # attentional weight estimator
         w_aw=functions.Linear(
             self.n_hidden,
             self.n_hidden,
             initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))),
         w_bw=functions.Linear(
             self.n_hidden,
             self.n_hidden,
             initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))),
         w_pw=functions.Linear(
             self.n_hidden,
             self.n_hidden,
             initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))),
         w_we=functions.Linear(self.n_hidden,
                               1,
                               initialW=initialW(-0.1, 0.1,
                                                 (1, self.n_hidden))),
         # decoder
         w_ap=functions.Linear(
             self.n_hidden,
             self.n_hidden,
             initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))),
         w_bp=functions.Linear(
             self.n_hidden,
             self.n_hidden,
             initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))),
         w_yp=functions.EmbedID(len(self.trg_vocab), 4 * self.n_hidden),
         w_pp=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         w_cp=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         w_dp=functions.Linear(
             self.n_hidden,
             4 * self.n_hidden,
             initialW=initialW(-0.1, 0.1,
                               (4 * self.n_hidden, self.n_hidden))),
         w_py=functions.Linear(
             self.n_hidden,
             len(self.trg_vocab),
             initialW=initialW(-0.1, 0.1,
                               (len(self.trg_vocab), self.n_hidden))),
     )
Esempio n. 30
0
        traverse(tree, train=False, evaluate=result)

    acc_node = 100.0 * result['correct_node'] / result['total_node']
    acc_root = 100.0 * result['correct_root'] / result['total_root']
    print(' Node accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_node, result['correct_node'], result['total_node']))
    print(' Root accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format(
        acc_root, result['correct_root'], result['total_root']))

vocab = {}
train_trees = read_corpus('trees/train.txt', vocab)
test_trees = read_corpus('trees/test.txt', vocab)
develop_trees = read_corpus('trees/dev.txt', vocab)

model = chainer.FunctionSet(
    embed=F.EmbedID(len(vocab), n_units),
    l=F.Linear(n_units * 2, n_units),
    w=F.Linear(n_units, n_label),
)

if args.gpu >= 0:
    model.to_gpu()

# Setup optimizer
optimizer = optimizers.AdaGrad(lr=0.1)
optimizer.setup(model)

accum_loss = 0
count = 0
start_at = time.time()
cur_at = start_at