Ejemplo n.º 1
0
    def __init__(self, lstm_num, position_num, position_units, value_units,
                 n_units, activation):

        lstm_stack = chainer.ChainList()
        for i in range(lstm_num):
            lstm_stack.add_link(L.LSTM(n_units, n_units))

        output_pos_layers = chainer.ChainList()
        for i in range(position_units - 1):
            output_pos_layers.add_link(
                L.Bilinear(position_num, n_units, n_units))

        output_pos_mid_layers = chainer.ChainList()
        for i in range(position_units - 1):
            output_pos_mid_layers.add_link(
                L.Linear(n_units, n_units + position_num))

        output_val_layers = chainer.ChainList()
        for i in range(value_units - 1):
            output_val_layers.add_link(L.Bilinear(1, n_units, n_units))

        output_val_mid_layers = chainer.ChainList()
        for i in range(position_units - 1):
            output_val_mid_layers.add_link(L.Linear(n_units, n_units + 2))

        super(LogLSTM,
              self).__init__(input_layer=L.Linear(
                  position_num * position_units + value_units, n_units),
                             input_mid_layer=L.Linear(n_units, n_units),
                             lstms=lstm_stack,
                             output_pos1=L.Linear(n_units, position_num),
                             output_pos_layers=output_pos_layers,
                             output_pos_mid_layers=output_pos_mid_layers,
                             output_lastpos=L.Bilinear(position_num, n_units,
                                                       n_units),
                             output_val_layers=output_val_layers,
                             output_val_mid_layers=output_val_mid_layers,
                             output_last_value=L.Linear(n_units, 2))
        self.position_num = position_num
        self.lstm_num = lstm_num
        self.position_units = position_units
        self.value_units = value_units
        self.n_units = n_units
        if activation == 'sigmoid':
            self.activate = F.sigmoid
        elif activation == 'relu':
            self.activate = F.relu
        else:
            pass
Ejemplo n.º 2
0
 def __init__(self,
              hidden_dim,
              out_dim,
              head,
              activation=functions.identity):
     """
     :param hidden_dim: dimension of atom representation
     :param out_dim: dimension of molecular representation
     :param head: number of heads in attention mechanism
     """
     super(DeepNieFineCoattention, self).__init__()
     with self.init_scope():
         self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1)
         self.attention_layer_1 = GraphLinear(head, 1, nobias=True)
         self.attention_layer_2 = GraphLinear(head, 1, nobias=True)
         self.prev_lt_layer_1 = GraphLinear(hidden_dim,
                                            hidden_dim,
                                            nobias=False)
         self.prev_lt_layer_2 = GraphLinear(hidden_dim,
                                            hidden_dim,
                                            nobias=False)
         self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True)
         self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True)
         self.j_layer = GraphLinear(hidden_dim, out_dim)
     self.hidden_dim = hidden_dim
     self.out_dim = out_dim
     self.head = head
     self.activation = activation
Ejemplo n.º 3
0
 def __init__(self,
              hidden_dim,
              out_dim,
              head,
              activation=functions.identity):
     """
     :param hidden_dim: dimension of atom representation
     :param out_dim: dimension of molecular representation
     :param head: number of heads in attention mechanism
     """
     super(ExtremeDeepNieFineCoattention, self).__init__()
     self.n_lt_layers = 3
     with self.init_scope():
         self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1)
         self.attention_layer_1 = GraphLinear(head, 1, nobias=True)
         self.attention_layer_2 = GraphLinear(head, 1, nobias=True)
         # self.prev_lt_layer_1 = GraphLinear(hidden_dim, hidden_dim, nobias=False)
         self.prev_lt_layers_1 = chainer.ChainList(*[
             GraphLinear(hidden_dim, hidden_dim, nobias=False)
             for _ in range(self.n_lt_layers)
         ])
         # self.prev_lt_layer_2 = GraphLinear(hidden_dim, hidden_dim, nobias=False)
         self.prev_lt_layers_2 = chainer.ChainList(*[
             GraphLinear(hidden_dim, hidden_dim, nobias=False)
             for _ in range(self.n_lt_layers)
         ])
         self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True)
         self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True)
         self.j_layer = GraphLinear(hidden_dim, out_dim)
     # modification for concat
     self.hidden_dim = hidden_dim
     self.out_dim = out_dim
     self.head = head
     self.activation = activation
Ejemplo n.º 4
0
 def __init__(self, n_outputs):
     super(GoogLeNet, self).__init__(
         conv1=L.Convolution2D(3, 64, 7, stride=2, pad=3),
         conv2_reduce=L.Convolution2D(64, 64, 1),
         conv2=L.Convolution2D(64, 192, 3, stride=1, pad=1),
         inc3a=L.Inception(192, 64, 96, 128, 16, 32, 32),
         inc3b=L.Inception(256, 128, 128, 192, 32, 96, 64),
         inc4a=L.Inception(480, 192, 96, 208, 16, 48, 64),
         inc4b=L.Inception(512, 160, 112, 224, 24, 64, 64),
         inc4c=L.Inception(512, 128, 128, 256, 24, 64, 64),
         inc4d=L.Inception(512, 112, 144, 288, 32, 64, 64),
         inc4e=L.Inception(528, 256, 160, 320, 32, 128, 128),
         inc5a=L.Inception(832, 256, 160, 320, 32, 128, 128),
         inc5b=L.Inception(832, 384, 192, 384, 48, 128, 128),
         loss3_fc1=L.Linear(4096, 300),
         loss3_fc2=L.Linear(300, n_outputs),
         doc_fc1=L.Linear(1000, 600),
         doc_fc2=L.Linear(600, 300),
         bi1=L.Bilinear(300, 300, 300),
         loss1_conv=L.Convolution2D(512, 128, 1),
         loss1_fc1=L.Linear(4 * 4 * 128, 1024),
         loss1_fc2=L.Linear(1024, n_outputs),
         loss2_conv=L.Convolution2D(528, 128, 1),
         loss2_fc1=L.Linear(4 * 4 * 128, 1024),
         loss2_fc2=L.Linear(1024, n_outputs),
     )
Ejemplo n.º 5
0
 def __init__(self, model_path):
     Param.load(self, model_path / 'tagger_defs.txt')
     self.extractor = FeatureExtractor(model_path)
     self.in_dim = self.word_dim + self.char_dim
     super(BiaffineJaLSTMParser,
           self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim),
                          emb_char=L.EmbedID(self.n_chars,
                                             50,
                                             ignore_label=IGNORE),
                          conv_char=L.Convolution2D(1,
                                                    self.char_dim, (3, 50),
                                                    stride=1,
                                                    pad=(1, 0)),
                          lstm_f=L.NStepLSTM(self.nlayers, self.in_dim,
                                             self.hidden_dim, 0.32),
                          lstm_b=L.NStepLSTM(self.nlayers, self.in_dim,
                                             self.hidden_dim, 0.32),
                          arc_dep=L.Linear(2 * self.hidden_dim,
                                           self.dep_dim),
                          arc_head=L.Linear(2 * self.hidden_dim,
                                            self.dep_dim),
                          rel_dep=L.Linear(2 * self.hidden_dim,
                                           self.dep_dim),
                          rel_head=L.Linear(2 * self.hidden_dim,
                                            self.dep_dim),
                          biaffine_arc=Biaffine(self.dep_dim),
                          biaffine_tag=L.Bilinear(self.dep_dim,
                                                  self.dep_dim,
                                                  len(self.targets)))
Ejemplo n.º 6
0
    def __init__(self,
                 left_dim,
                 right_dim,
                 out_dim,
                 ntn_out_dim=8,
                 hidden_dims=(16, ),
                 activation=relu):
        super(Bilinear, self).__init__()

        ntn_layer = links.Bilinear(left_size=left_dim,
                                   right_size=right_dim,
                                   out_size=ntn_out_dim)
        mlp_layers = [
            links.Linear(in_size=None, out_size=hidden_dim)
            for hidden_dim in hidden_dims
        ]

        with self.init_scope():
            self.ntn_layer = ntn_layer
            self.mlp_layers = chainer.ChainList(*mlp_layers)
            self.l_out = links.Linear(in_size=None, out_size=out_dim)

        self.left_dim = left_dim
        self.right_dim = right_dim
        self.out_dim = out_dim
        self.hidden_dims = hidden_dims
        self.activation = activation
Ejemplo n.º 7
0
    def __init__(self,
                 hidden_dim,
                 out_dim,
                 head,
                 activation=functions.tanh,
                 weight_tying=True):
        """
        :param hidden_dim: dimension of atom representation
        :param out_dim: dimension of molecular representation
        :param head: number of heads in attention mechanism
        """
        super(ParallelCoattention, self).__init__()
        n_entities = 1 if weight_tying else 2
        with self.init_scope():
            self.energy_layers = chainer.ChainList(*[
                links.Bilinear(hidden_dim, out_dim, head)
                for _ in range(n_entities)
            ])

            self.j_layer = GraphLinear(hidden_dim, out_dim)

        self.hidden_dim = hidden_dim
        self.out_dim = out_dim
        self.head = head
        self.activation = activation
        self.weight_tying = weight_tying
Ejemplo n.º 8
0
 def __init__(self, left_size, right_size, out_size):
     super(AttnAggregator, self).__init__()
     with self.init_scope():
         self.bilinear_layer = links.Bilinear(left_size, right_size,
                                              out_size)
     self.left_size = left_size
     self.right_size = right_size
     self.out_size = out_size
Ejemplo n.º 9
0
    def __init__(self,
                 model_path,
                 word_dim=None,
                 char_dim=None,
                 nlayers=2,
                 hidden_dim=128,
                 dep_dim=100,
                 dropout_ratio=0.5):
        self.model_path = model_path
        defs_file = model_path + "/tagger_defs.txt"
        if word_dim is None:
            self.train = False
            Param.load(self, defs_file)
            self.extractor = FeatureExtractor(model_path)
        else:
            self.train = True
            p = Param(self)
            p.dep_dim = dep_dim
            p.word_dim = word_dim
            p.char_dim = char_dim
            p.hidden_dim = hidden_dim
            p.nlayers = nlayers
            p.n_words = len(read_model_defs(model_path + "/words.txt"))
            p.n_chars = len(read_model_defs(model_path + "/chars.txt"))
            p.targets = read_model_defs(model_path + "/target.txt")
            p.dump(defs_file)

        self.in_dim = self.word_dim + self.char_dim
        self.dropout_ratio = dropout_ratio
        super(BiaffineJaLSTMParser,
              self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim),
                             emb_char=L.EmbedID(self.n_chars,
                                                50,
                                                ignore_label=IGNORE),
                             conv_char=L.Convolution2D(1,
                                                       self.char_dim, (3, 50),
                                                       stride=1,
                                                       pad=(1, 0)),
                             lstm_f=L.NStepLSTM(self.nlayers, self.in_dim,
                                                self.hidden_dim, 0.32),
                             lstm_b=L.NStepLSTM(self.nlayers, self.in_dim,
                                                self.hidden_dim, 0.32),
                             arc_dep=L.Linear(2 * self.hidden_dim,
                                              self.dep_dim),
                             arc_head=L.Linear(2 * self.hidden_dim,
                                               self.dep_dim),
                             rel_dep=L.Linear(2 * self.hidden_dim,
                                              self.dep_dim),
                             rel_head=L.Linear(2 * self.hidden_dim,
                                               self.dep_dim),
                             biaffine_arc=Biaffine(self.dep_dim),
                             biaffine_tag=L.Bilinear(self.dep_dim,
                                                     self.dep_dim,
                                                     len(self.targets)))
Ejemplo n.º 10
0
 def __init__(self, hidden_dim, out_dim, activation=functions.tanh):
     """
     :param hidden_dim: dimension of atom representation
     :param out_dim: dimension of molecular representation
     """
     super(LinearTransformFineCoattention, self).__init__()
     with self.init_scope():
         self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1)
         self.j_layer = GraphLinear(hidden_dim, out_dim)
     self.hidden_dim = hidden_dim
     self.out_dim = out_dim
     self.activation = activation
Ejemplo n.º 11
0
 def __init__(self, hidden_dim, out_dim, activation=functions.tanh):
     """
     :param hidden_dim: dimension of atom representation
     :param out_dim: dimension of molecular representation
     :param head: number of heads in attention mechanism
     """
     super(PoolingFineCoattention, self).__init__()
     with self.init_scope():
         self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1)
         self.j_layer = GraphLinear(hidden_dim, out_dim)
     self.hidden_dim = hidden_dim
     self.out_dim = out_dim
     self.activation = activation
Ejemplo n.º 12
0
 def __init__(self, n_outputs):
     super(RetweetNet, self).__init__(
         conv1=L.Convolution2D(3, 96, ksize=11, stride=4, pad=0),
         conv2=L.Convolution2D(96, 256, ksize=5, pad=2),
         conv3=L.Convolution2D(256, 384, ksize=3, pad=1),
         conv4=L.Convolution2D(384, 384, ksize=3, pad=1),
         conv5=L.Convolution2D(384, 256, ksize=3, pad=1),
         fc6=L.Linear(12544, 4096),
         fc7=L.Linear(4096, 300),
         doc_fc1=L.Linear(1000, 600),
         doc_fc2=L.Linear(600, 300),
         bi1=L.Bilinear(300, 300, 200),
         fc8=L.Linear(200, n_outputs),
     )
Ejemplo n.º 13
0
    def setUp(self):
        self.f = links.Bilinear(self.in_shape[0], self.in_shape[1],
                                self.out_size, True)
        W = self.f.W.data
        W[...] = numpy.random.uniform(-1, 1, W.shape)
        self.f.zerograds()

        self.W = W.copy()

        self.e1 = _uniform(self.batch_size, self.in_shape[0])
        self.e2 = _uniform(self.batch_size, self.in_shape[1])
        self.gy = _uniform(self.batch_size, self.out_size)

        self.y = numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W)
Ejemplo n.º 14
0
 def __init__(self):
     super(MDL_full, self).__init__(
         convR1=L.Convolution2D(3,  96, 11, stride=4),
         convR2=L.Convolution2D(96, 256,  5, pad=2),
         convR3=L.Convolution2D(256, 384,  3, pad=1),
         convR4=L.Convolution2D(384, 384,  3, pad=1),
         convR5=L.Convolution2D(384, 256,  3, pad=1),
         fcR6=L.Linear(9216, 4096),
         fcR7=L.Linear(4096, 4096),
         convD1=L.Convolution2D(3,  96, 11, stride=4),
         convD2=L.Convolution2D(96, 256,  5, pad=2),
         convD3=L.Convolution2D(256, 384,  3, pad=1),
         convD4=L.Convolution2D(384, 384,  3, pad=1),
         convD5=L.Convolution2D(384, 256,  3, pad=1),
         fcD6=L.Linear(9216, 4096),
         fcD7=L.Linear(4096, 4096),
         fc8=L.Bilinear(4096, 4096, 4096),
         fc9=L.Linear(4096, 1000),
     )
     self.train = True
Ejemplo n.º 15
0
    def setUp(self):
        self.f = links.Bilinear(self.in_shape[0], self.in_shape[1],
                                self.out_size)
        self.f.W.data[...] = _uniform(*self.f.W.data.shape)
        self.f.V1.data[...] = _uniform(*self.f.V1.data.shape)
        self.f.V2.data[...] = _uniform(*self.f.V2.data.shape)
        self.f.b.data[...] = _uniform(*self.f.b.data.shape)
        self.f.zerograds()

        self.W = self.f.W.data.copy()
        self.V1 = self.f.V1.data.copy()
        self.V2 = self.f.V2.data.copy()
        self.b = self.f.b.data.copy()

        self.e1 = _uniform(self.batch_size, self.in_shape[0])
        self.e2 = _uniform(self.batch_size, self.in_shape[1])
        self.gy = _uniform(self.batch_size, self.out_size)

        self.y = (numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W) +
                  self.e1.dot(self.V1) + self.e2.dot(self.V2) + self.b)
Ejemplo n.º 16
0
    def __init__(self, n_outputs):
        super(GoogLeNetBN, self).__init__(
            conv1=L.Convolution2D(3, 64, 7, stride=2, pad=3, nobias=True),
            norm1=L.BatchNormalization(64),
            conv2=L.Convolution2D(64, 192, 3, pad=1, nobias=True),
            norm2=L.BatchNormalization(192),
            inc3a=L.InceptionBN(192, 64, 64, 64, 64, 96, 'avg', 32),
            inc3b=L.InceptionBN(256, 64, 64, 96, 64, 96, 'avg', 64),
            inc3c=L.InceptionBN(320, 0, 128, 160, 64, 96, 'max', stride=2),
            inc4a=L.InceptionBN(576, 224, 64, 96, 96, 128, 'avg', 128),
            inc4b=L.InceptionBN(576, 192, 96, 128, 96, 128, 'avg', 128),
            inc4c=L.InceptionBN(576, 128, 128, 160, 128, 160, 'avg', 128),
            inc4d=L.InceptionBN(576, 64, 128, 192, 160, 192, 'avg', 128),
            inc4e=L.InceptionBN(576, 0, 128, 192, 192, 256, 'max', stride=2),
            inc5a=L.InceptionBN(1024, 352, 192, 320, 160, 224, 'avg', 128),
            inc5b=L.InceptionBN(1024, 352, 192, 320, 192, 224, 'max', 128),
            linz=L.Linear(1024, 300),
            out=L.Linear(300, n_outputs),
            outimg=L.Linear(1024, n_outputs),
            outdoc=L.Linear(1000, n_outputs),

            doc_fc1=L.Linear(1000, 600),
            doc_fc2=L.Linear(600, 300),

            conva=L.Convolution2D(576, 128, 1, nobias=True),
            norma=L.BatchNormalization(128),
            lina=L.Linear(3200, 1024, nobias=True),
            norma2=L.BatchNormalization(1024),
            outa=L.Linear(1024, n_outputs),

            convb=L.Convolution2D(576, 128, 1, nobias=True),
            normb=L.BatchNormalization(128),
            linb=L.Linear(3200, 1024, nobias=True),
            normb2=L.BatchNormalization(1024),
            outb=L.Linear(1024, n_outputs),
            bi1=L.Bilinear(300, 300, 300)
        )
Ejemplo n.º 17
0
 def __init__(self,
              mem_units,
              label_num,
              attention_method,
              attention_target,
              dropout_ratio,
              is_regression=False,
              only_attn=False):
     super().__init__(atw1=L.Linear(mem_units, mem_units),
                      atw1con=L.Linear(2 * mem_units, mem_units),
                      atw1gate=L.Linear(2 * mem_units, mem_units),
                      atw1bi=L.Bilinear(mem_units, mem_units, 1),
                      atw2=L.Linear(mem_units, 1),
                      atout_class=L.Linear(mem_units + mem_units,
                                           label_num),
                      atout_reg=L.Linear(mem_units + mem_units, 1),
                      out_class=L.Linear(mem_units, label_num),
                      out_reg=L.Linear(mem_units, 1))
     self.__count = {'total_root': 0, 'correct_root': 0}
     self.__attention_method = attention_method
     self.__attention_target = attention_target
     self.__is_regression = is_regression
     self.__dropout_ratio = dropout_ratio
     self.__only_attn = only_attn
Ejemplo n.º 18
0
 def check_invalid(self, initialW, initial_bias, nobias):
     with self.assertRaises(AssertionError):
         links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size,
                        nobias, initialW, initial_bias)
Ejemplo n.º 19
0
 def check_normal(self, initialW, initial_bias, nobias):
     links.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size,
                    nobias, initialW, initial_bias)
Ejemplo n.º 20
0
 def __init__(self, nr_in, nr_out):
     Chain.__init__(self,
         fwd=L.LSTM(nr_in, nr_out),
         bwd=L.LSTM(nr_in, nr_out),
         mix=L.Bilinear(nr_out, nr_out, nr_out))
Ejemplo n.º 21
0
    # make model
    print("make model")
    n_units = 4000
    '''
    model = chainer.FunctionSet(
            l1=L.Linear(5096,  n_units),
            l2=L.Linear(n_units, 4))
    '''
    model = chainer.FunctionSet(
        l1_x1=L.Linear(1000, n_units),
        l1_x2=L.Linear(4096, n_units),
        l2_x1=L.Linear(n_units, 150),
        l2_x2=L.Linear(n_units, 150),
        l3=L.Linear(100, 4),
        l4=L.Bilinear(150, 150, 100),
    )

    cuda.get_device(0).use()
    model.to_gpu()
    xp = cuda.cupy

    def forward(x_data, y_data, train=True):
        # 入力と教師データ

        x1 = chainer.Variable(xp.asarray(x_data[:, :1000]))
        x2 = chainer.Variable(xp.asarray(x_data[:, 1000:]))
        t = chainer.Variable(xp.asarray(y_data))
        '''
        x = chainer.Variable(xp.asarray(x_data))
        t = chainer.Variable(xp.asarray(y_data))
Ejemplo n.º 22
0
    def __init__(self,
                 encoder,
                 num_labels=46,
                 num_aux_lbls=0,
                 mlp_arc_units=100,
                 mlp_lbl_units=100,
                 mlp_tag_units=100,
                 arc_dropout=0.0,
                 lbl_dropout=0.5,
                 tag_dropout=0.2,
                 treeify='chu',
                 visualise=False,
                 debug=False,
                 sub_attn=False,
                 add_feat=False,
                 apply_mtl=False,
                 alpha=1.0,
                 beta=0.0):

        super(GraphParser, self).__init__()
        self.num_labels = num_labels
        self.num_aux_lbls = num_aux_lbls
        self.mlp_arc_units = mlp_arc_units
        self.mlp_lbl_units = mlp_lbl_units
        self.mlp_tag_units = mlp_tag_units
        self.arc_dropout = arc_dropout
        self.lbl_dropout = lbl_dropout
        self.tag_dropout = tag_dropout
        self.treeify = treeify.lower()
        self.visualise = visualise
        self.debug = debug
        self.sleep_time = 0.
        self.sub_attn = sub_attn
        self.add_feat = add_feat

        # MTL parameters
        self.apply_mtl = apply_mtl
        self.alpha = alpha
        self.beta = beta

        assert (treeify in self.TREE_OPTS)
        self.unit_mult = 2 if encoder.use_bilstm else 1

        with self.init_scope():

            self.encoder = encoder
            self.vT = L.Linear(mlp_arc_units, 1)

            # head
            self.H_arc = L.Linear(self.unit_mult * self.encoder.num_units,
                                  mlp_arc_units)

            # dependent
            self.D_arc = L.Linear(self.unit_mult * self.encoder.num_units,
                                  mlp_arc_units)

            if self.sub_attn:
                # parameters for subword attention
                self.units_dim = self.encoder.embedder.word_encoder.num_units
                self.max_sub_len = self.encoder.embedder.word_encoder.max_sub_len

                # k = softmax([sub_feats_head] x V_attn x h_dep)
                self.V_attn = L.Bilinear(self.units_dim * self.max_sub_len,
                                         mlp_arc_units, self.max_sub_len)

                # g = sigmoid(Wglob x h_head + m_head)
                self.W_glob = L.Linear(mlp_arc_units, mlp_arc_units)
                self.W_loc = L.Linear(mlp_arc_units, mlp_arc_units)

                # parameters for computing the scores
                self.W_head = L.Linear(mlp_arc_units, mlp_arc_units)
                self.W_dependent = L.Linear(mlp_arc_units, mlp_arc_units)
                self.U_lbl_attn = L.Linear(mlp_arc_units, mlp_lbl_units)

            if self.beta > 0:
                self.l1_tag = L.Linear(self.unit_mult * self.encoder.num_units,
                                       self.mlp_tag_units)
                self.l2_tag = L.Linear(self.mlp_tag_units, self.mlp_tag_units)
                self.out_tag = L.Linear(self.mlp_tag_units, self.num_aux_lbls)

            self.V_lblT = L.Linear(mlp_lbl_units, self.num_labels)
            self.U_lbl = L.Linear(self.unit_mult * self.encoder.num_units,
                                  mlp_lbl_units)
            self.W_lbl = L.Linear(self.unit_mult * self.encoder.num_units,
                                  mlp_lbl_units)
Ejemplo n.º 23
0
    def __init__(self,
                 vocab,
                 n_units,
                 mem_units,
                 attention_method,
                 is_regression=False,
                 train=True,
                 forget_bias=False,
                 is_leaf_as_chunk=False):
        n_vocab = vocab.get_vocab_size()
        comp_type = Composition.tree_attention_lstm
        if forget_bias:
            super().__init__(
                embed=L.EmbedID(n_vocab, n_units),
                embed2hidden=L.Linear(n_units, mem_units),
                updatel=L.Linear(mem_units * 4, mem_units),
                updater=L.Linear(mem_units * 4, mem_units),
                inputl=L.Linear(mem_units * 4, mem_units),
                inputr=L.Linear(mem_units * 4, mem_units),
                forgetl=L.Linear(mem_units * comp_type.forget_in_size(),
                                 mem_units,
                                 initial_bias=np.ones(mem_units)),
                forgetr=L.Linear(mem_units * comp_type.forget_in_size(),
                                 mem_units,
                                 initial_bias=np.ones(mem_units)),
                outputl=L.Linear(mem_units * 4, mem_units),
                outputr=L.Linear(mem_units * 4, mem_units))
        else:
            super().__init__(
                embed=L.EmbedID(n_vocab, n_units),
                embed2hidden=L.Linear(n_units, mem_units),
                updatel=L.Linear(mem_units * 4, mem_units),
                updater=L.Linear(mem_units * 4, mem_units),
                inputl=L.Linear(mem_units * 4, mem_units),
                inputr=L.Linear(mem_units * 4, mem_units),
                forgetl=L.Linear(mem_units * comp_type.forget_in_size(),
                                 mem_units),
                forgetr=L.Linear(mem_units * comp_type.forget_in_size(),
                                 mem_units),
                outputl=L.Linear(mem_units * 4, mem_units),
                outputr=L.Linear(mem_units * 4, mem_units),
                atw1=L.Linear(mem_units, mem_units),
                atw1con=L.Linear(2 * mem_units, mem_units),
                atw1gate=L.Linear(2 * mem_units, mem_units),
                atw1bi=L.Bilinear(mem_units, mem_units, 1),
                atw2=L.Linear(mem_units, 1),
            )
        self.__attention_method = attention_method
        self.__train = train
        self.__vocab = vocab
        self.is_leaf_as_chunk = is_leaf_as_chunk
        self.mem_units = mem_units
        self.n_units = n_units
        self.is_regression = is_regression
        self.comp_type = comp_type

        # init embed
        if self.__vocab.embed_model is not None:
            for i in range(self.__vocab.get_vocab_size()):
                word = self.__vocab.id2word(i)
                if word in self.__vocab.embed_model:
                    vec = self.__vocab.embed_model[word]
                    self.embed.W.data[i] = vec