def __init__(self, n_vocab, n_char, n_tag, args):
        feature_dim = args[
            'word_embedding_dim'] + 2 * args['char_embedding_dim']
        super(ModelBase,
              self).__init__(char_embed=L.EmbedID(n_char,
                                                  args['char_embedding_dim'],
                                                  ignore_label=-1),
                             bi_char=L.NStepBiLSTM(1,
                                                   args['char_embedding_dim'],
                                                   args['char_embedding_dim'],
                                                   0),
                             word_embed=L.EmbedID(n_vocab,
                                                  args['word_embedding_dim'],
                                                  ignore_label=-1),
                             bi_word=L.NStepBiLSTM(1, feature_dim,
                                                   int(feature_dim / 2), 0),
                             l=L.Linear(feature_dim, n_tag),
                             crf=L.CRF1d(n_tag))

        # Initialize value for hyper parameters
        self.char_embedding_dim = args['char_embedding_dim']
        self.tag_embedding_dim = args['tag_embedding_dim']
        self.dropout_ratio = args['dropout_ratio']
        self.lr_param = args['lr_param']
        self.threshold = args['threshold']
        self.decay_rate = args['decay_rate']
        self.batch_size = args['batch_size']

        if args['mode'] == 'train':
            for w in self.bi_char:
                w.b1.data[:] = 1.0
                w.b5.data[:] = 1.0
            for w in self.bi_word:
                w.b1.data[:] = 1.0
                w.b5.data[:] = 1.0
예제 #2
0
 def __init__(
         self,
         n_units: int,
         in_size: int,
         n_outs_bio: int,
         n_outs_tag: int,
         blstm_stack: int = 1,
         lossfun='crf',
         dropout=0.2,
         weight_bio=0.5,
         weight_tag=0.5
 ):
     assert n_units > 0
     assert n_outs_bio > 0
     assert n_outs_tag > 0
     assert blstm_stack >= 0
     assert 0 <= dropout <= 1
     assert weight_bio >= 0 and weight_tag >= 0
     assert weight_bio + weight_tag <= 1.0
     self.blstm_stack = blstm_stack
     self.inds = None
     self.xs_src_len = None
     self.lossfun = lossfun
     self.dropout = dropout
     self.weight_bio = weight_bio
     self.weight_tag = weight_tag
     super(BLC, self).__init__()
     with self.init_scope():
         # Stack BiLSTM
         if blstm_stack > 0:
             self.bilstm = L.NStepBiLSTM(
                 n_layers=blstm_stack,
                 in_size=in_size,
                 out_size=n_units,
                 dropout=self.dropout
             )
         self.out_layer_bio = L.Linear(None, n_outs_bio)
         self.out_layer_tag = L.Linear(None, n_outs_tag)
         # CRF layer
         if lossfun == 'crf':
             self.crf_bio = L.CRF1d(n_outs_bio)
             self.crf_tag = L.CRF1d(n_outs_tag)
예제 #3
0
    def setUp(self):
        self.n_label = 3
        self.initial_cost = numpy.empty((self.n_label, self.n_label),
                                        dtype=self.dtype)

        if self.initializer is None:
            initializer = initializers.constant.Zero()

        elif self.initializer == 'random':
            initializer = initializers.GlorotUniform()

        initializer(self.initial_cost)
        with chainer.using_config('dtype', self.dtype):
            self.link = links.CRF1d(self.n_label,
                                    initial_cost=self.initial_cost)
예제 #4
0
 def __init__(self, n_vocab, n_tag, embed_dim, hidden_dim, dropout):
     super(CRFTaggerBase, self).__init__(embed=L.EmbedID(n_vocab,
                                                         embed_dim,
                                                         ignore_label=-1),
                                         l1=L.NStepLSTM(1,
                                                        embed_dim,
                                                        embed_dim,
                                                        dropout=0,
                                                        use_cudnn=True),
                                         l2=L.Linear(embed_dim, n_tag),
                                         crf=L.CRF1d(n_tag))
     if dropout:
         self.dropout = True
     else:
         self.dropout = False
예제 #5
0
 def __init__(self,
              char_vocab=60,
              char_emb_dim=50,
              char_window_size=5,
              char_init_emb=None,
              char_hidden_dim=100,
              tag_num=2):
     #CRF Layers
     self.crf = L.CRF1d(n_label=2)
     super(CharSeg, self).__init__(
         char_emb=L.EmbedID(char_vocab, char_emb_dim),
         char_conv=L.Convolution2D(1,
                                   char_hidden_dim,
                                   ksize=(char_emb_dim, char_window_size),
                                   stride=(1, 1),
                                   pad=0),
         predict=L.Linear(char_hidden_dim, tag_num),
     )
예제 #6
0
    def setUp(self):
        self._config_user = chainer.using_config('dtype', self.dtype)
        self._config_user.__enter__()
        self.n_label = 3

        self.lengths = [3, 3]
        self.batches = [2, 2, 2]

        self.xs = [numpy.random.uniform(-1, 1, (b, 3)).astype(self.dtype)
                   for b in self.batches]
        self.ys = [numpy.random.randint(
            0, self.n_label, (b,)).astype(numpy.int32)
            for b in self.batches]

        self.link = links.CRF1d(n_label=self.n_label)
        self.cost_shape = (self.n_label, self.n_label)

        if self.dtype == numpy.float16:
            self.check_forward_options = {'rtol': 5e-3, 'atol': 1e-2}
        else:
            self.check_forward_options = {'atol': 1e-4}
예제 #7
0
 def __init__(self, n_vocab, n_char, n_tag, embed_dim, hidden_dim, dropout):
     super(CRFTaggerBase, self).__init__(
         embed=L.EmbedID(n_vocab, embed_dim, ignore_label=-1),
         # character embeddingは25で決め打ち
         char_embed=L.EmbedID(n_char, 25, ignore_label=-1),
         forward_l1=L.NStepLSTM(1,
                                embed_dim + 50,
                                embed_dim + 50,
                                dropout=0,
                                use_cudnn=True),
         backward_l1=L.NStepLSTM(1,
                                 embed_dim + 50,
                                 embed_dim + 50,
                                 dropout=0,
                                 use_cudnn=True),
         l2=L.Linear((embed_dim + 50) * 2, n_tag),
         forward_char=L.NStepLSTM(1, 25, 25, dropout=0, use_cudnn=True),
         backward_char=L.NStepLSTM(1, 25, 25, dropout=0, use_cudnn=True),
         crf=L.CRF1d(n_tag))
     if dropout:
         self.dropout = True
     else:
         self.dropout = False
예제 #8
0
 def __init__(self, n_words, n_tags):
     super().__init__(embed=L.EmbedID(n_words, n_tags), crf=L.CRF1d(n_tags))
예제 #9
0
    def __init__(self,
                 n_vocab=None,
                 n_char_vocab=None,
                 emb_dim=100,
                 hidden_dim=200,
                 init_emb=None,
                 use_dropout=0.33,
                 n_layers=1,
                 n_label=0,
                 use_crf=True,
                 use_bi=True,
                 char_input_dim=100,
                 char_hidden_dim=100,
                 rnn_name='bilstm',
                 demo=False,
                 use_cudnn=True,
                 n_add_feature_dim=0,
                 n_add_feature=0,
                 n_vocab_add=[]):
        # feature_dim = emb_dim + add_dim + pos_dim
        n_dir = 2 if use_bi else 1
        feature_dim = emb_dim + n_add_feature_dim * n_add_feature
        self.n_add_feature_dim = n_add_feature_dim
        self.n_add_feature = n_add_feature
        use_char = False
        if n_char_vocab is not None:
            use_char = True
            feature_dim += char_hidden_dim

        rnn_names = ['bilstm', 'lstm', 'bigru', 'gru', 'birnn', 'rnn']
        rnn_links = [
            L.NStepBiLSTM, L.NStepLSTM, L.NStepBiGRU, L.NStepGRU,
            L.NStepBiRNNTanh, L.NStepRNNTanh
        ]
        if rnn_name not in rnn_names:
            candidate = ','.join(rnn_names)
            raise ValueError(
                'Invalid RNN name: "%s". Please select from [%s]' %
                (rnn_name, candidate))

        rnn_link = rnn_links[rnn_names.index(rnn_name)]

        super(BiLSTM_CNN_CRF, self).__init__(
            word_embed=L.EmbedID(n_vocab, emb_dim, ignore_label=-1),
            rnn=my_rnn_link(rnn_link, n_layers, feature_dim, hidden_dim,
                            use_dropout, use_cudnn),
            output_layer=L.Linear(hidden_dim * n_dir, n_label),
        )
        if init_emb is not None:
            self.word_embed.W.data[:] = init_emb[:]

        if use_char:
            char_cnn = CharCNNEncoder(emb_dim=char_input_dim,
                                      window_size=3,
                                      hidden_dim=char_hidden_dim,
                                      vocab_size=n_char_vocab,
                                      init_emb=None,
                                      PAD_IDX=0)
            self.add_link('char_cnn', char_cnn)

        if self.n_add_feature:
            for i in six.moves.range(self.n_add_feature):
                n_add_vocab = n_vocab_add[i]
                add_embed = L.EmbedID(n_add_vocab,
                                      n_add_feature_dim,
                                      ignore_label=-1)
                self.add_link('add_embed_' + str(i), add_embed)

        # if n_pos:
        #     pos_embed = L.EmbedID(n_pos, pos_dim, ignore_label=-1)
        #     self.add_link('pos_embed', pos_embed)

        if use_crf:
            if demo:
                import my_crf
                self.add_link('lossfun', my_crf.CRF1d(n_label=n_label))
            else:
                self.add_link('lossfun', L.CRF1d(n_label=n_label))

        # self.n_pos = n_pos
        self.hidden_dim = hidden_dim
        self.train = True
        self.use_dropout = use_dropout
        self.n_layers = n_layers
        self.use_char = use_char

        # Forget gate bias => 1.0
        # MEMO: Values 1 and 5 reference the forget gate.
        for w in self.rnn:
            w.b1.data[:] = 1.0
            w.b5.data[:] = 1.0
예제 #10
0
 def __init__(self, n_vocab, n_pos):
     super(CRF, self).__init__()
     with self.init_scope():
         self.feature = L.EmbedID(n_vocab, n_pos)
         self.crf = L.CRF1d(n_pos)
예제 #11
0
 def _setup_decoder(self):
     self.crf = L.CRF1d(self.num_tag_vocab, initial_cost=self.initializer)
예제 #12
0
 def __init__(self, n_vocab, n_pos):
     super(CRF, self).__init__(
         feature=L.EmbedID(n_vocab, n_pos),
         crf=L.CRF1d(n_pos),
     )
예제 #13
0
    def __init__(self,
                 n_vocab,
                 unigram_embed_dim,
                 n_bigrams,
                 bigram_embed_dim,
                 n_chunks,
                 chunk_embed_dim,
                 rnn_unit_type,
                 rnn_bidirection,
                 rnn_n_layers1,
                 rnn_n_units1,
                 rnn_n_layers2,
                 rnn_n_units2,
                 mlp_n_layers,
                 mlp_n_units,
                 n_labels,
                 use_crf=True,
                 feat_dim=0,
                 embed_dropout=0,
                 rnn_dropout=0,
                 biaffine_dropout=0,
                 mlp_dropout=0,
                 chunk_vector_dropout=0,
                 pretrained_unigram_embed_dim=0,
                 pretrained_bigram_embed_dim=0,
                 pretrained_chunk_embed_dim=0,
                 pretrained_embed_usage=ModelUsage.NONE,
                 chunk_pooling_type=constants.AVG,
                 min_chunk_len=1,
                 max_chunk_len=0,
                 chunk_loss_ratio=0,
                 biaffine_type='',
                 file=sys.stderr):

        chainer.Chain.__init__(self)

        self.chunk_loss_ratio = chunk_loss_ratio
        self.chunk_pooling_type = chunk_pooling_type
        self.use_attention = (chunk_pooling_type == constants.WAVG
                              or chunk_pooling_type == constants.WCON)
        self.use_concat = (chunk_pooling_type == constants.CON
                           or chunk_pooling_type == constants.WCON)
        self.use_average = not self.use_concat
        self.use_rnn2 = rnn_n_layers2 > 0 and rnn_n_units2 > 0
        self.chunk_embed_dim_merged = (
            chunk_embed_dim +
            (pretrained_chunk_embed_dim
             if pretrained_embed_usage == ModelUsage.CONCAT else 0))
        if self.use_concat:
            self.chunk_concat_num = sum(
                [i for i in range(min_chunk_len, max_chunk_len + 1)])
            self.chunk_embed_out_dim = self.chunk_embed_dim_merged * self.chunk_concat_num
        else:
            self.chunk_embed_out_dim = self.chunk_embed_dim_merged

        with self.init_scope():
            print('### Parameters', file=sys.stderr)
            print('# Chunk pooling type: {}'.format(self.chunk_pooling_type),
                  file=sys.stderr)
            print('# Chunk loss ratio: {}'.format(self.chunk_loss_ratio),
                  file=sys.stderr)

            # embedding layers

            self.pretrained_embed_usage = pretrained_embed_usage

            self.embed_dropout = embed_dropout
            print('# Embedding dropout ratio={}'.format(self.embed_dropout),
                  file=sys.stderr)
            self.unigram_embed, self.pretrained_unigram_embed = models.util.construct_embeddings(
                n_vocab, unigram_embed_dim, pretrained_unigram_embed_dim,
                pretrained_embed_usage)
            if self.pretrained_embed_usage != ModelUsage.NONE:
                print('# Pretrained embedding usage: {}'.format(
                    self.pretrained_embed_usage),
                      file=sys.stderr)
            print('# Unigram embedding matrix: W={}'.format(
                self.unigram_embed.W.shape),
                  file=sys.stderr)
            embed_dim = self.unigram_embed.W.shape[1]
            if self.pretrained_unigram_embed is not None:
                if self.pretrained_embed_usage == ModelUsage.CONCAT:
                    embed_dim += self.pretrained_unigram_embed.W.shape[1]
                print('# Pretrained unigram embedding matrix: W={}'.format(
                    self.pretrained_unigram_embed.W.shape),
                      file=sys.stderr)

            if n_bigrams > 0 and bigram_embed_dim > 0:
                self.bigram_embed, self.pretrained_bigram_embed = models.util.construct_embeddings(
                    n_bigrams, bigram_embed_dim, pretrained_bigram_embed_dim,
                    pretrained_embed_usage)
                if self.pretrained_embed_usage != ModelUsage.NONE:
                    print('# Pretrained embedding usage: {}'.format(
                        self.pretrained_embed_usage),
                          file=sys.stderr)
                print('# Bigram embedding matrix: W={}'.format(
                    self.bigram_embed.W.shape),
                      file=sys.stderr)
                embed_dim += self.bigram_embed.W.shape[1]
                if self.pretrained_bigram_embed is not None:
                    if self.pretrained_embed_usage == ModelUsage.CONCAT:
                        embed_dim += self.pretrained_bigram_embed.W.shape[1]
                    print('# Pretrained bigram embedding matrix: W={}'.format(
                        self.pretrained_bigram_embed.W.shape),
                          file=sys.stderr)

            self.additional_feat_dim = feat_dim
            if feat_dim > 0:
                embed_dim += feat_dim
                print('# Additional features dimension: {}'.format(feat_dim),
                      file=sys.stderr)

            self.chunk_embed, self.pretrained_chunk_embed = models.util.construct_embeddings(
                n_chunks, chunk_embed_dim, pretrained_chunk_embed_dim,
                pretrained_embed_usage)
            print('# Chunk embedding matrix: W={}'.format(
                self.chunk_embed.W.shape),
                  file=sys.stderr)
            if self.pretrained_chunk_embed is not None:
                print('# Pretrained chunk embedding matrix: W={}'.format(
                    self.pretrained_chunk_embed.W.shape),
                      file=sys.stderr)

            self.rnn_unit_type = rnn_unit_type
            self.rnn = models.util.construct_RNN(rnn_unit_type,
                                                 rnn_bidirection,
                                                 rnn_n_layers1, embed_dim,
                                                 rnn_n_units1, rnn_dropout)
            rnn_output_dim1 = rnn_n_units1 * (2 if rnn_bidirection else 1)

            # biaffine b/w token and chunk
            if self.use_attention:
                use_U = 'u' in biaffine_type or 'U' in biaffine_type
                use_V = 'v' in biaffine_type or 'V' in biaffine_type
                use_b = 'b' in biaffine_type or 'B' in biaffine_type

                biaffine_left_dim = rnn_output_dim1
                self.biaffine = BiaffineCombination(
                    biaffine_left_dim,
                    self.chunk_embed_dim_merged,
                    use_U=use_U,
                    use_V=use_V,
                    use_b=use_b)
                self.biaffine_dropout = biaffine_dropout
                print(
                    '# Biaffine layer for attention:   W={}, U={}, V={}, b={}, dropout={}'
                    .format(
                        self.biaffine.W.shape, self.biaffine.U.shape
                        if self.biaffine.U is not None else None,
                        self.biaffine.V.shape if self.biaffine.V is not None
                        else None, self.biaffine.b.shape if self.biaffine.b
                        is not None else None, self.biaffine_dropout),
                    file=sys.stderr)

            # chunk vector dropout

            self.chunk_vector_dropout = chunk_vector_dropout
            print('# Chunk vector dropout={}'.format(
                self.chunk_vector_dropout),
                  file=sys.stderr)

            # recurrent layers2

            if self.use_rnn2:
                rnn_input_dim2 = rnn_output_dim1 + self.chunk_embed_out_dim

                self.rnn2 = models.util.construct_RNN(
                    rnn_unit_type, rnn_bidirection, rnn_n_layers2,
                    rnn_input_dim2, rnn_n_units2, rnn_dropout)
                rnn_output_dim2 = rnn_n_units2 * (2 if rnn_bidirection else 1)
                mlp_input_dim = rnn_output_dim2
            else:
                mlp_input_dim = rnn_output_dim1 + self.chunk_embed_out_dim

            # MLP

            print('# MLP', file=sys.stderr)
            self.mlp = MLP(mlp_input_dim,
                           n_labels,
                           n_hidden_units=mlp_n_units,
                           n_layers=mlp_n_layers,
                           output_activation=F.identity,
                           dropout=mlp_dropout)

            # CRF or softmax

            self.use_crf = use_crf
            if self.use_crf:
                self.crf = L.CRF1d(n_labels)
                print('# CRF cost: {}'.format(self.crf.cost.shape),
                      file=sys.stderr)
            else:
                self.softmax_cross_entropy = softmax_cross_entropy.softmax_cross_entropy
예제 #14
0
    def __init__(
        self,
        n_vocab,
        unigram_embed_dim,
        n_bigrams,
        bigram_embed_dim,
        n_attrs,
        attr_embed_dim,
        rnn_unit_type,
        rnn_bidirection,
        rnn_n_layers,
        rnn_n_units,
        mlp_n_layers,
        mlp_n_units,
        n_labels,
        use_crf=True,
        feat_dim=0,
        mlp_n_additional_units=0,
        embed_dropout=0,
        rnn_dropout=0,
        mlp_dropout=0,
        pretrained_unigram_embed_dim=0,
        pretrained_bigram_embed_dim=0,
        pretrained_embed_usage=ModelUsage.NONE,
    ):
        super().__init__()

        with self.init_scope():
            print('### Parameters', file=sys.stderr)

            # embedding layer(s)

            self.pretrained_embed_usage = pretrained_embed_usage

            self.embed_dropout = embed_dropout
            print('# Embedding dropout ratio={}'.format(self.embed_dropout),
                  file=sys.stderr)
            self.unigram_embed, self.pretrained_unigram_embed = models.util.construct_embeddings(
                n_vocab, unigram_embed_dim, pretrained_unigram_embed_dim,
                pretrained_embed_usage)
            if self.pretrained_embed_usage != ModelUsage.NONE:
                print('# Pretrained embedding usage: {}'.format(
                    self.pretrained_embed_usage),
                      file=sys.stderr)
            print('# Unigram embedding matrix: W={}'.format(
                self.unigram_embed.W.shape),
                  file=sys.stderr)
            embed_dim = self.unigram_embed.W.shape[1]
            if self.pretrained_unigram_embed is not None:
                if self.pretrained_embed_usage == ModelUsage.CONCAT:
                    embed_dim += self.pretrained_unigram_embed.W.shape[1]
                print('# Pretrained unigram embedding matrix: W={}'.format(
                    self.pretrained_unigram_embed.W.shape),
                      file=sys.stderr)

            if n_bigrams > 0 and bigram_embed_dim > 0:
                self.bigram_embed, self.pretrained_bigram_embed = models.util.construct_embeddings(
                    n_bigrams, bigram_embed_dim, pretrained_bigram_embed_dim,
                    pretrained_embed_usage)
                if self.pretrained_embed_usage != ModelUsage.NONE:
                    print('# Pretrained embedding usage: {}'.format(
                        self.pretrained_embed_usage),
                          file=sys.stderr)
                print('# Bigram embedding matrix: W={}'.format(
                    self.bigram_embed.W.shape),
                      file=sys.stderr)
                embed_dim += self.bigram_embed.W.shape[1]
                if self.pretrained_bigram_embed is not None:
                    if self.pretrained_embed_usage == ModelUsage.CONCAT:
                        embed_dim += self.pretrained_bigram_embed.W.shape[1]
                    print('# Pretrained bigram embedding matrix: W={}'.format(
                        self.pretrained_bigram_embed.W.shape),
                          file=sys.stderr)

            if n_attrs > 0 and attr_embed_dim > 0:
                self.attr_embed = L.EmbedID(n_attrs, attr_embed_dim)
                embed_dim += attr_embed_dim
                print('# Attribute embedding matrix: W={}'.format(
                    self.attr_embed.W.shape),
                      file=sys.stderr)
            self.attr_embed_dim = attr_embed_dim

            self.additional_feat_dim = feat_dim
            if feat_dim > 0:
                embed_dim += feat_dim
                print('# Additional features dimension: {}'.format(feat_dim),
                      file=sys.stderr)

            # recurrent layers

            self.rnn_unit_type = rnn_unit_type
            self.rnn = models.util.construct_RNN(rnn_unit_type,
                                                 rnn_bidirection, rnn_n_layers,
                                                 embed_dim, rnn_n_units,
                                                 rnn_dropout)
            rnn_output_dim = rnn_n_units * (2 if rnn_bidirection else 1)

            # MLP

            print('# MLP', file=sys.stderr)
            mlp_in = rnn_output_dim + mlp_n_additional_units
            self.mlp = MLP(mlp_in,
                           n_labels,
                           n_hidden_units=mlp_n_units,
                           n_layers=mlp_n_layers,
                           output_activation=F.identity,
                           dropout=mlp_dropout)

            # CRF or softmax

            self.use_crf = use_crf
            if self.use_crf:
                self.crf = L.CRF1d(n_labels)
                print('# CRF cost: {}'.format(self.crf.cost.shape),
                      file=sys.stderr)
            else:
                self.softmax_cross_entropy = softmax_cross_entropy.softmax_cross_entropy
예제 #15
0
 def __init__(self, rnn):
     super(RNNCRFModel, self).__init__()
     with self.init_scope():
         self.rnn = rnn
         self.crf = L.CRF1d(const.N_CHORDS)
예제 #16
0
 def __init__(self):
     super(NBLSTMCRF, self).__init__()
     with self.init_scope():
         self.blstm = NSBLSTM()
         self.crf = L.CRF1d(const.N_CHORDS)