def __init__(self, n_vocab, n_char, n_tag, args): feature_dim = args[ 'word_embedding_dim'] + 2 * args['char_embedding_dim'] super(ModelBase, self).__init__(char_embed=L.EmbedID(n_char, args['char_embedding_dim'], ignore_label=-1), bi_char=L.NStepBiLSTM(1, args['char_embedding_dim'], args['char_embedding_dim'], 0), word_embed=L.EmbedID(n_vocab, args['word_embedding_dim'], ignore_label=-1), bi_word=L.NStepBiLSTM(1, feature_dim, int(feature_dim / 2), 0), l=L.Linear(feature_dim, n_tag), crf=L.CRF1d(n_tag)) # Initialize value for hyper parameters self.char_embedding_dim = args['char_embedding_dim'] self.tag_embedding_dim = args['tag_embedding_dim'] self.dropout_ratio = args['dropout_ratio'] self.lr_param = args['lr_param'] self.threshold = args['threshold'] self.decay_rate = args['decay_rate'] self.batch_size = args['batch_size'] if args['mode'] == 'train': for w in self.bi_char: w.b1.data[:] = 1.0 w.b5.data[:] = 1.0 for w in self.bi_word: w.b1.data[:] = 1.0 w.b5.data[:] = 1.0
def __init__( self, n_units: int, in_size: int, n_outs_bio: int, n_outs_tag: int, blstm_stack: int = 1, lossfun='crf', dropout=0.2, weight_bio=0.5, weight_tag=0.5 ): assert n_units > 0 assert n_outs_bio > 0 assert n_outs_tag > 0 assert blstm_stack >= 0 assert 0 <= dropout <= 1 assert weight_bio >= 0 and weight_tag >= 0 assert weight_bio + weight_tag <= 1.0 self.blstm_stack = blstm_stack self.inds = None self.xs_src_len = None self.lossfun = lossfun self.dropout = dropout self.weight_bio = weight_bio self.weight_tag = weight_tag super(BLC, self).__init__() with self.init_scope(): # Stack BiLSTM if blstm_stack > 0: self.bilstm = L.NStepBiLSTM( n_layers=blstm_stack, in_size=in_size, out_size=n_units, dropout=self.dropout ) self.out_layer_bio = L.Linear(None, n_outs_bio) self.out_layer_tag = L.Linear(None, n_outs_tag) # CRF layer if lossfun == 'crf': self.crf_bio = L.CRF1d(n_outs_bio) self.crf_tag = L.CRF1d(n_outs_tag)
def setUp(self): self.n_label = 3 self.initial_cost = numpy.empty((self.n_label, self.n_label), dtype=self.dtype) if self.initializer is None: initializer = initializers.constant.Zero() elif self.initializer == 'random': initializer = initializers.GlorotUniform() initializer(self.initial_cost) with chainer.using_config('dtype', self.dtype): self.link = links.CRF1d(self.n_label, initial_cost=self.initial_cost)
def __init__(self, n_vocab, n_tag, embed_dim, hidden_dim, dropout): super(CRFTaggerBase, self).__init__(embed=L.EmbedID(n_vocab, embed_dim, ignore_label=-1), l1=L.NStepLSTM(1, embed_dim, embed_dim, dropout=0, use_cudnn=True), l2=L.Linear(embed_dim, n_tag), crf=L.CRF1d(n_tag)) if dropout: self.dropout = True else: self.dropout = False
def __init__(self, char_vocab=60, char_emb_dim=50, char_window_size=5, char_init_emb=None, char_hidden_dim=100, tag_num=2): #CRF Layers self.crf = L.CRF1d(n_label=2) super(CharSeg, self).__init__( char_emb=L.EmbedID(char_vocab, char_emb_dim), char_conv=L.Convolution2D(1, char_hidden_dim, ksize=(char_emb_dim, char_window_size), stride=(1, 1), pad=0), predict=L.Linear(char_hidden_dim, tag_num), )
def setUp(self): self._config_user = chainer.using_config('dtype', self.dtype) self._config_user.__enter__() self.n_label = 3 self.lengths = [3, 3] self.batches = [2, 2, 2] self.xs = [numpy.random.uniform(-1, 1, (b, 3)).astype(self.dtype) for b in self.batches] self.ys = [numpy.random.randint( 0, self.n_label, (b,)).astype(numpy.int32) for b in self.batches] self.link = links.CRF1d(n_label=self.n_label) self.cost_shape = (self.n_label, self.n_label) if self.dtype == numpy.float16: self.check_forward_options = {'rtol': 5e-3, 'atol': 1e-2} else: self.check_forward_options = {'atol': 1e-4}
def __init__(self, n_vocab, n_char, n_tag, embed_dim, hidden_dim, dropout): super(CRFTaggerBase, self).__init__( embed=L.EmbedID(n_vocab, embed_dim, ignore_label=-1), # character embeddingは25で決め打ち char_embed=L.EmbedID(n_char, 25, ignore_label=-1), forward_l1=L.NStepLSTM(1, embed_dim + 50, embed_dim + 50, dropout=0, use_cudnn=True), backward_l1=L.NStepLSTM(1, embed_dim + 50, embed_dim + 50, dropout=0, use_cudnn=True), l2=L.Linear((embed_dim + 50) * 2, n_tag), forward_char=L.NStepLSTM(1, 25, 25, dropout=0, use_cudnn=True), backward_char=L.NStepLSTM(1, 25, 25, dropout=0, use_cudnn=True), crf=L.CRF1d(n_tag)) if dropout: self.dropout = True else: self.dropout = False
def __init__(self, n_words, n_tags): super().__init__(embed=L.EmbedID(n_words, n_tags), crf=L.CRF1d(n_tags))
def __init__(self, n_vocab=None, n_char_vocab=None, emb_dim=100, hidden_dim=200, init_emb=None, use_dropout=0.33, n_layers=1, n_label=0, use_crf=True, use_bi=True, char_input_dim=100, char_hidden_dim=100, rnn_name='bilstm', demo=False, use_cudnn=True, n_add_feature_dim=0, n_add_feature=0, n_vocab_add=[]): # feature_dim = emb_dim + add_dim + pos_dim n_dir = 2 if use_bi else 1 feature_dim = emb_dim + n_add_feature_dim * n_add_feature self.n_add_feature_dim = n_add_feature_dim self.n_add_feature = n_add_feature use_char = False if n_char_vocab is not None: use_char = True feature_dim += char_hidden_dim rnn_names = ['bilstm', 'lstm', 'bigru', 'gru', 'birnn', 'rnn'] rnn_links = [ L.NStepBiLSTM, L.NStepLSTM, L.NStepBiGRU, L.NStepGRU, L.NStepBiRNNTanh, L.NStepRNNTanh ] if rnn_name not in rnn_names: candidate = ','.join(rnn_names) raise ValueError( 'Invalid RNN name: "%s". Please select from [%s]' % (rnn_name, candidate)) rnn_link = rnn_links[rnn_names.index(rnn_name)] super(BiLSTM_CNN_CRF, self).__init__( word_embed=L.EmbedID(n_vocab, emb_dim, ignore_label=-1), rnn=my_rnn_link(rnn_link, n_layers, feature_dim, hidden_dim, use_dropout, use_cudnn), output_layer=L.Linear(hidden_dim * n_dir, n_label), ) if init_emb is not None: self.word_embed.W.data[:] = init_emb[:] if use_char: char_cnn = CharCNNEncoder(emb_dim=char_input_dim, window_size=3, hidden_dim=char_hidden_dim, vocab_size=n_char_vocab, init_emb=None, PAD_IDX=0) self.add_link('char_cnn', char_cnn) if self.n_add_feature: for i in six.moves.range(self.n_add_feature): n_add_vocab = n_vocab_add[i] add_embed = L.EmbedID(n_add_vocab, n_add_feature_dim, ignore_label=-1) self.add_link('add_embed_' + str(i), add_embed) # if n_pos: # pos_embed = L.EmbedID(n_pos, pos_dim, ignore_label=-1) # self.add_link('pos_embed', pos_embed) if use_crf: if demo: import my_crf self.add_link('lossfun', my_crf.CRF1d(n_label=n_label)) else: self.add_link('lossfun', L.CRF1d(n_label=n_label)) # self.n_pos = n_pos self.hidden_dim = hidden_dim self.train = True self.use_dropout = use_dropout self.n_layers = n_layers self.use_char = use_char # Forget gate bias => 1.0 # MEMO: Values 1 and 5 reference the forget gate. for w in self.rnn: w.b1.data[:] = 1.0 w.b5.data[:] = 1.0
def __init__(self, n_vocab, n_pos): super(CRF, self).__init__() with self.init_scope(): self.feature = L.EmbedID(n_vocab, n_pos) self.crf = L.CRF1d(n_pos)
def _setup_decoder(self): self.crf = L.CRF1d(self.num_tag_vocab, initial_cost=self.initializer)
def __init__(self, n_vocab, n_pos): super(CRF, self).__init__( feature=L.EmbedID(n_vocab, n_pos), crf=L.CRF1d(n_pos), )
def __init__(self, n_vocab, unigram_embed_dim, n_bigrams, bigram_embed_dim, n_chunks, chunk_embed_dim, rnn_unit_type, rnn_bidirection, rnn_n_layers1, rnn_n_units1, rnn_n_layers2, rnn_n_units2, mlp_n_layers, mlp_n_units, n_labels, use_crf=True, feat_dim=0, embed_dropout=0, rnn_dropout=0, biaffine_dropout=0, mlp_dropout=0, chunk_vector_dropout=0, pretrained_unigram_embed_dim=0, pretrained_bigram_embed_dim=0, pretrained_chunk_embed_dim=0, pretrained_embed_usage=ModelUsage.NONE, chunk_pooling_type=constants.AVG, min_chunk_len=1, max_chunk_len=0, chunk_loss_ratio=0, biaffine_type='', file=sys.stderr): chainer.Chain.__init__(self) self.chunk_loss_ratio = chunk_loss_ratio self.chunk_pooling_type = chunk_pooling_type self.use_attention = (chunk_pooling_type == constants.WAVG or chunk_pooling_type == constants.WCON) self.use_concat = (chunk_pooling_type == constants.CON or chunk_pooling_type == constants.WCON) self.use_average = not self.use_concat self.use_rnn2 = rnn_n_layers2 > 0 and rnn_n_units2 > 0 self.chunk_embed_dim_merged = ( chunk_embed_dim + (pretrained_chunk_embed_dim if pretrained_embed_usage == ModelUsage.CONCAT else 0)) if self.use_concat: self.chunk_concat_num = sum( [i for i in range(min_chunk_len, max_chunk_len + 1)]) self.chunk_embed_out_dim = self.chunk_embed_dim_merged * self.chunk_concat_num else: self.chunk_embed_out_dim = self.chunk_embed_dim_merged with self.init_scope(): print('### Parameters', file=sys.stderr) print('# Chunk pooling type: {}'.format(self.chunk_pooling_type), file=sys.stderr) print('# Chunk loss ratio: {}'.format(self.chunk_loss_ratio), file=sys.stderr) # embedding layers self.pretrained_embed_usage = pretrained_embed_usage self.embed_dropout = embed_dropout print('# Embedding dropout ratio={}'.format(self.embed_dropout), file=sys.stderr) self.unigram_embed, self.pretrained_unigram_embed = models.util.construct_embeddings( n_vocab, unigram_embed_dim, pretrained_unigram_embed_dim, pretrained_embed_usage) if self.pretrained_embed_usage != ModelUsage.NONE: print('# Pretrained embedding usage: {}'.format( self.pretrained_embed_usage), file=sys.stderr) print('# Unigram embedding matrix: W={}'.format( self.unigram_embed.W.shape), file=sys.stderr) embed_dim = self.unigram_embed.W.shape[1] if self.pretrained_unigram_embed is not None: if self.pretrained_embed_usage == ModelUsage.CONCAT: embed_dim += self.pretrained_unigram_embed.W.shape[1] print('# Pretrained unigram embedding matrix: W={}'.format( self.pretrained_unigram_embed.W.shape), file=sys.stderr) if n_bigrams > 0 and bigram_embed_dim > 0: self.bigram_embed, self.pretrained_bigram_embed = models.util.construct_embeddings( n_bigrams, bigram_embed_dim, pretrained_bigram_embed_dim, pretrained_embed_usage) if self.pretrained_embed_usage != ModelUsage.NONE: print('# Pretrained embedding usage: {}'.format( self.pretrained_embed_usage), file=sys.stderr) print('# Bigram embedding matrix: W={}'.format( self.bigram_embed.W.shape), file=sys.stderr) embed_dim += self.bigram_embed.W.shape[1] if self.pretrained_bigram_embed is not None: if self.pretrained_embed_usage == ModelUsage.CONCAT: embed_dim += self.pretrained_bigram_embed.W.shape[1] print('# Pretrained bigram embedding matrix: W={}'.format( self.pretrained_bigram_embed.W.shape), file=sys.stderr) self.additional_feat_dim = feat_dim if feat_dim > 0: embed_dim += feat_dim print('# Additional features dimension: {}'.format(feat_dim), file=sys.stderr) self.chunk_embed, self.pretrained_chunk_embed = models.util.construct_embeddings( n_chunks, chunk_embed_dim, pretrained_chunk_embed_dim, pretrained_embed_usage) print('# Chunk embedding matrix: W={}'.format( self.chunk_embed.W.shape), file=sys.stderr) if self.pretrained_chunk_embed is not None: print('# Pretrained chunk embedding matrix: W={}'.format( self.pretrained_chunk_embed.W.shape), file=sys.stderr) self.rnn_unit_type = rnn_unit_type self.rnn = models.util.construct_RNN(rnn_unit_type, rnn_bidirection, rnn_n_layers1, embed_dim, rnn_n_units1, rnn_dropout) rnn_output_dim1 = rnn_n_units1 * (2 if rnn_bidirection else 1) # biaffine b/w token and chunk if self.use_attention: use_U = 'u' in biaffine_type or 'U' in biaffine_type use_V = 'v' in biaffine_type or 'V' in biaffine_type use_b = 'b' in biaffine_type or 'B' in biaffine_type biaffine_left_dim = rnn_output_dim1 self.biaffine = BiaffineCombination( biaffine_left_dim, self.chunk_embed_dim_merged, use_U=use_U, use_V=use_V, use_b=use_b) self.biaffine_dropout = biaffine_dropout print( '# Biaffine layer for attention: W={}, U={}, V={}, b={}, dropout={}' .format( self.biaffine.W.shape, self.biaffine.U.shape if self.biaffine.U is not None else None, self.biaffine.V.shape if self.biaffine.V is not None else None, self.biaffine.b.shape if self.biaffine.b is not None else None, self.biaffine_dropout), file=sys.stderr) # chunk vector dropout self.chunk_vector_dropout = chunk_vector_dropout print('# Chunk vector dropout={}'.format( self.chunk_vector_dropout), file=sys.stderr) # recurrent layers2 if self.use_rnn2: rnn_input_dim2 = rnn_output_dim1 + self.chunk_embed_out_dim self.rnn2 = models.util.construct_RNN( rnn_unit_type, rnn_bidirection, rnn_n_layers2, rnn_input_dim2, rnn_n_units2, rnn_dropout) rnn_output_dim2 = rnn_n_units2 * (2 if rnn_bidirection else 1) mlp_input_dim = rnn_output_dim2 else: mlp_input_dim = rnn_output_dim1 + self.chunk_embed_out_dim # MLP print('# MLP', file=sys.stderr) self.mlp = MLP(mlp_input_dim, n_labels, n_hidden_units=mlp_n_units, n_layers=mlp_n_layers, output_activation=F.identity, dropout=mlp_dropout) # CRF or softmax self.use_crf = use_crf if self.use_crf: self.crf = L.CRF1d(n_labels) print('# CRF cost: {}'.format(self.crf.cost.shape), file=sys.stderr) else: self.softmax_cross_entropy = softmax_cross_entropy.softmax_cross_entropy
def __init__( self, n_vocab, unigram_embed_dim, n_bigrams, bigram_embed_dim, n_attrs, attr_embed_dim, rnn_unit_type, rnn_bidirection, rnn_n_layers, rnn_n_units, mlp_n_layers, mlp_n_units, n_labels, use_crf=True, feat_dim=0, mlp_n_additional_units=0, embed_dropout=0, rnn_dropout=0, mlp_dropout=0, pretrained_unigram_embed_dim=0, pretrained_bigram_embed_dim=0, pretrained_embed_usage=ModelUsage.NONE, ): super().__init__() with self.init_scope(): print('### Parameters', file=sys.stderr) # embedding layer(s) self.pretrained_embed_usage = pretrained_embed_usage self.embed_dropout = embed_dropout print('# Embedding dropout ratio={}'.format(self.embed_dropout), file=sys.stderr) self.unigram_embed, self.pretrained_unigram_embed = models.util.construct_embeddings( n_vocab, unigram_embed_dim, pretrained_unigram_embed_dim, pretrained_embed_usage) if self.pretrained_embed_usage != ModelUsage.NONE: print('# Pretrained embedding usage: {}'.format( self.pretrained_embed_usage), file=sys.stderr) print('# Unigram embedding matrix: W={}'.format( self.unigram_embed.W.shape), file=sys.stderr) embed_dim = self.unigram_embed.W.shape[1] if self.pretrained_unigram_embed is not None: if self.pretrained_embed_usage == ModelUsage.CONCAT: embed_dim += self.pretrained_unigram_embed.W.shape[1] print('# Pretrained unigram embedding matrix: W={}'.format( self.pretrained_unigram_embed.W.shape), file=sys.stderr) if n_bigrams > 0 and bigram_embed_dim > 0: self.bigram_embed, self.pretrained_bigram_embed = models.util.construct_embeddings( n_bigrams, bigram_embed_dim, pretrained_bigram_embed_dim, pretrained_embed_usage) if self.pretrained_embed_usage != ModelUsage.NONE: print('# Pretrained embedding usage: {}'.format( self.pretrained_embed_usage), file=sys.stderr) print('# Bigram embedding matrix: W={}'.format( self.bigram_embed.W.shape), file=sys.stderr) embed_dim += self.bigram_embed.W.shape[1] if self.pretrained_bigram_embed is not None: if self.pretrained_embed_usage == ModelUsage.CONCAT: embed_dim += self.pretrained_bigram_embed.W.shape[1] print('# Pretrained bigram embedding matrix: W={}'.format( self.pretrained_bigram_embed.W.shape), file=sys.stderr) if n_attrs > 0 and attr_embed_dim > 0: self.attr_embed = L.EmbedID(n_attrs, attr_embed_dim) embed_dim += attr_embed_dim print('# Attribute embedding matrix: W={}'.format( self.attr_embed.W.shape), file=sys.stderr) self.attr_embed_dim = attr_embed_dim self.additional_feat_dim = feat_dim if feat_dim > 0: embed_dim += feat_dim print('# Additional features dimension: {}'.format(feat_dim), file=sys.stderr) # recurrent layers self.rnn_unit_type = rnn_unit_type self.rnn = models.util.construct_RNN(rnn_unit_type, rnn_bidirection, rnn_n_layers, embed_dim, rnn_n_units, rnn_dropout) rnn_output_dim = rnn_n_units * (2 if rnn_bidirection else 1) # MLP print('# MLP', file=sys.stderr) mlp_in = rnn_output_dim + mlp_n_additional_units self.mlp = MLP(mlp_in, n_labels, n_hidden_units=mlp_n_units, n_layers=mlp_n_layers, output_activation=F.identity, dropout=mlp_dropout) # CRF or softmax self.use_crf = use_crf if self.use_crf: self.crf = L.CRF1d(n_labels) print('# CRF cost: {}'.format(self.crf.cost.shape), file=sys.stderr) else: self.softmax_cross_entropy = softmax_cross_entropy.softmax_cross_entropy
def __init__(self, rnn): super(RNNCRFModel, self).__init__() with self.init_scope(): self.rnn = rnn self.crf = L.CRF1d(const.N_CHORDS)
def __init__(self): super(NBLSTMCRF, self).__init__() with self.init_scope(): self.blstm = NSBLSTM() self.crf = L.CRF1d(const.N_CHORDS)