def __init__(self, word_encoder, in_sizes=None, out_sizes=None, dropout=0.2): self.in_sizes = in_sizes or [] self.out_sizes = out_sizes or [] super(SubwordEmbedder, self).__init__() with self.init_scope(): self.word_encoder = word_encoder for index, (in_size, out_size) in enumerate(zip(self.in_sizes, self.out_sizes), 1): embed_layer = L.EmbedID(in_size, out_size, ignore_label=CHAINER_IGNORE_LABEL) self.set_embed(index, embed_layer) self.dropout = dropout self.out_size = self.word_encoder.out_size if out_sizes: self.out_size += sum(out_sizes) self.is_subword = True
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_embed, n_units, n_latent, type_unit, word_dropout, denoising_rate): super(Seq2seq, self).__init__() with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_embed) self.encoder = L.NStepGRU(n_layers, n_embed, n_units, 0.5) self.W_mu = L.Linear(n_units * n_layers, n_latent) self.W_ln_var = L.Linear(n_units * n_layers, n_latent) self.W_h = L.Linear(n_latent, n_units * n_layers) self.decoder = L.NStepGRU(n_layers, n_embed, n_units, 0.5) self.W = L.Linear(n_units, n_target_vocab) self.embed_y = L.EmbedID(n_target_vocab, n_embed) self.n_layers = n_layers self.n_units = n_units self.n_embed = n_embed self.word_dropout = word_dropout self.denoising_rate = denoising_rate self.n_latent = n_latent self.C = 0 self.k = 10 # unstable if 5 self.n_target_vocab = n_target_vocab
def __init__(self, vocab_size, embed_size, hidden_size): """Encoderのインスタンス化 Args: vocab_size: 使われる単語の種類数 embed_size: 単語をベクトル表現した際のサイズ hidden_size: 隠れ層のサイズ """ super(LSTMEncoder, self).__init__(xe=L.EmbedID(vocab_size, embed_size, ignore_label=-1), eh=L.Linear(embed_size, 4 * hidden_size), hh=L.Linear(hidden_size, 4 * hidden_size))
def __init__(self, n_vocab, n_hid, communicator, rank_next, rank_prev): n_layers = 1 n_rnn_hid = 10 super(Model, self).__init__() with self.init_scope(): self.l1 = L.EmbedID(n_vocab, n_rnn_hid, ignore_label=-1) self.rnn = chainermn.links.create_multi_node_n_step_rnn( L.NStepLSTM( n_layers=n_layers, in_size=n_rnn_hid, out_size=n_rnn_hid, dropout=0.1), communicator, rank_in=rank_prev, rank_out=rank_next, ) self.l2 = L.Linear(n_rnn_hid, n_hid) self.l3 = L.Linear(n_hid, 1)
def __init__(self, path_encoder, class_n, n_w_vocab=None, w_emb_size=50, embed_initial=None, dropout=0): super(LexNET_h, self).__init__() with self.init_scope(): self.path_encoder = path_encoder self.n_units = path_encoder.n_units self.concat_w_embedding = L.EmbedID(n_w_vocab, w_emb_size, initialW=embed_initial) self.fl1 = L.Linear(None, path_encoder.n_units) self.fl2 = L.Linear(None, class_n) self.dropout = dropout
def __init__(self, vocab_size, embed_size, hidden_size, out_size): # クラスの初期化 # :param vocab_size: 単語数 # :param embed_size: 埋め込みベクトルサイズ # :param hidden_size: 隠れ層サイズ # :param out_size: 出力層サイズ super(LSTM_SentenceClassifier, self).__init__( # encode用のLink関数 xe=L.EmbedID(vocab_size, embed_size, ignore_label=-1), eh=L.LSTM(embed_size, hidden_size), encoder=L.NStepLSTM(1, embed_size, hidden_size, 0.1), hh=L.Linear(hidden_size, hidden_size), # classifierのLink関数 hy=L.Linear(hidden_size, out_size))
def __init__(self, feature_size=16777216, embed_size=5, hidden_size1=200, hidden_size2=200, hidden_size3=200, nzdim=32): super(DeepFM, self).__init__( # Shared feature embedding embed=L.EmbedID(feature_size, embed_size, ignore_label=-1), # FM Component L1=L.EmbedID(feature_size, 1), # Deep Component (to capture higher order intaractions) L2=L.Linear(nzdim * embed_size, hidden_size1), L3=L.Linear(hidden_size1, hidden_size2), L4=L.Linear(hidden_size2, hidden_size3), L5=L.Linear(hidden_size3, 1)) self.feature_size = feature_size self.embed_size = embed_size self.hidden_size1 = hidden_size1 self.hidden_size2 = hidden_size2 self.nzdim = nzdim
def __init__(self, n_vocab, n_units, train=True): super(RNNLM, self).__init__( embed=L.EmbedID(n_vocab, n_units), l1=L.LSTM(n_units, n_units), l2=L.LSTM(n_units, n_units), l3=L.Linear(n_units, n_vocab), ) self.n_units = n_units self.n_vocab = n_vocab # Initialize with uniform distribution, expect for our linear tranformation layer # for param in self.params(): # param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape) self.train = train
def __init__(self, n_vocab, n_units, loss_func, ori_con_data): super(ContinuousBoW, self).__init__() with self.init_scope(): print('make first embed') self.embed = L.EmbedID(n_vocab, n_units, initialW=I.Uniform(1. / n_units)) print('finish make first embed') # 派生単語と元の単語の初期ベクトルを統一する for i in range(len(ori_con_data)): self.embed.W.data[ori_con_data[i][0]] = self.embed.W.data[ ori_con_data[i][1]] self.loss_func = loss_func
def __init__(self, opt, shared=None): super(RNNAgent, self).__init__(opt, shared) if not shared: # don't enter this loop for shared instantiations opt['cuda'] = not opt['no_cuda'] and chainer.cuda.available global xp if opt['cuda']: print('[ Using CUDA ]') cuda.get_device(opt['gpu']).use() xp = cuda.cupy else: xp = np self.id = 'RNN' self.dict = DictionaryAgent(opt) self.observation = {} self.rnn_type = opt['rnntype'] self.hidden_size = opt['hiddensize'] self.num_layers = opt['numlayers'] self.dropout_rate = opt['dropout'] self.learning_rate = opt['learningrate'] self.use_cuda = opt.get('cuda', False) self.path = opt.get('model_file', None) vs = len(self.dict) hs = self.hidden_size nl = self.num_layers dr = self.dropout_rate super(Agent, self).__init__(embedding=L.EmbedID(vs, hs), projection=L.Linear(hs, vs)) if self.rnn_type == 'GRU': super(Agent, self).add_link('rnn', L.NStepGRU(nl, hs, hs, dr)) elif self.rnn_type == 'LSTM': super(Agent, self).add_link('rnn', L.NStepLSTM(nl, hs, hs, dr)) self.dropout = F.dropout self.softmax = F.softmax self.loss = F.softmax_cross_entropy self.optimizer = chainer.optimizers.SGD(lr=self.learning_rate) self.optimizer.setup(self) self.optimizer.add_hook(chainer.optimizer.GradientClipping(5)) if self.use_cuda: self.cuda() if opt.get('model_file') and os.path.isfile(opt['model_file']): print('Loading existing model parameters from ' + opt['model_file']) self.load(opt['model_file']) self.episode_done = True
def __init__(self, Vi, Ei, Hi, cell_type): gru_f = cell_type(Ei, Hi) gru_b = cell_type(Ei, Hi) log.info("constructing encoder [%s]" % (cell_type,)) super(EncoderNSteps, self).__init__( emb=L.EmbedID(Vi, Ei), # gru_f = L.GRU(Hi, Ei), # gru_b = L.GRU(Hi, Ei) gru_f=gru_f, gru_b=gru_b ) self.Hi = Hi
def __init__( self, comm, n_layers, n_source_vocab, n_target_vocab, n_units): super(Encoder, self).__init__( embed_x=L.EmbedID(n_source_vocab, n_units), # Corresponding decoder LSTM will be invoked on process 1. mn_encoder=chainermn.links.create_multi_node_n_step_rnn( L.NStepLSTM(n_layers, n_units, n_units, 0.1), comm, rank_in=None, rank_out=1 ), ) self.comm = comm self.n_layers = n_layers self.n_units = n_units
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_source_char, n_target_char, n_units, n_sentences): super(Seq2seq, self).__init__( embed_x=L.EmbedID(n_source_vocab, n_units), embed_y=L.EmbedID(n_target_vocab, n_units * 2), embed_xc=L.EmbedID(n_source_char, n_units), embed_yc=L.EmbedID(n_target_char, n_units), encoder_f=L.NStepGRU(n_layers, n_units, n_units, 0.1), encoder_b=L.NStepGRU(n_layers, n_units, n_units, 0.1), char_encoder=L.NStepGRU(n_layers, n_units, n_units, 0.1), decoder=My.NStepGRU(n_layers, n_units * 2, n_units * 2, 0.1), char_decoder=L.NStepGRU(n_layers, n_units, n_units, 0.1), char_att_decoder=My.NStepGRU(n_layers, n_units, n_units, 0.1), W=L.Linear(n_units * 2, n_target_vocab), W_hat=L.Linear(n_units * 4, n_units), W_char=L.Linear(n_units, n_target_char), ) self.n_layers = n_layers self.n_units = n_units self.n_params = 7 self.n_sentences = n_sentences self.n_process = 0 self.n_sen = len(str(n_sentences))
def __init__(self, n_vocab, n_units, n_layers=2, dropout=0.5): super(RNNForLM, self).__init__() with self.init_scope(): self.embed = L.EmbedID(n_vocab, n_units) self.rnn = L.NStepLSTM(n_layers, n_units, n_units, dropout) self.output = L.Linear(n_units, n_vocab) self.dropout = dropout self.n_units = n_units self.n_layers = n_layers for param in self.params(): param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape) self.loss = 0. self.reset_state()
def __init__(self, embed_mat, mid_size, out_size=4, dropout=0.25): super(BiLSTM, self).__init__() with self.init_scope(): vocab_size, embed_size = embed_mat.shape self.embed = L.EmbedID(in_size=vocab_size, out_size=embed_size, initialW=embed_mat, ignore_label=-1) self.bi_lstm = L.NStepBiLSTM(n_layers=1, in_size=embed_size, out_size=mid_size, dropout=dropout) self.l_attn = L.Linear(mid_size * 2, 1) self.l3 = L.Linear(mid_size * 2, out_size)
def __init__(self, train, alphabet_size, layer_type='multiple', embed_input=False, depth='zero', width=128, inception=True, conv_init=None, bias_init=None): self.depth = depth # After input layer self.width = width self.embed_input = embed_input self.layer_type = layer_type kwargs = {} input = alphabet_size factor = 3 if layer_type == 'multiple-wide' else 1 if self.embed_input: kwargs['embed'] = L.EmbedID(input + 1, 20, ignore_label=0) input = 20 if self.depth in ['zero', 'one', 'two', 'three']: if inception: kwargs['l0'] = NaiveWider1DInceptionLayer( input, 16 * factor, 32 * factor, 48 * factor, 64 * factor, 64 * factor, 64 * factor, 48 * factor) else: kwargs['l0'] = L.Convolution2D(input, 336 * factor, (1, 3), pad=(0, 1), initialW=conv_init, initial_bias=bias_init) input = 336 * factor if self.depth in ['one', 'two', 'three']: kwargs['l1'], input = self._get_layer(input, 1 * factor) if self.depth in ['two', 'three']: self.width = self.width / 2 kwargs['l2'], input = self._get_layer(input, 2 * factor) kwargs['l3'], input = self._get_layer(input, 2 * factor) if self.depth in ['three']: self.width = self.width / 2 kwargs['l4'], input = self._get_layer(input, 4 * factor) kwargs['l5'], input = self._get_layer(input, 4 * factor) super(SimpleIngredient, self).__init__(train=train, alphabet_size=input, out_feature=input, **kwargs)
def __init__(self, n_vocab, embed_dim, out_size, conv_width=2, train=True): self.embed_dim = embed_dim super(QRNNLangModel, self).__init__(embed=L.EmbedID(in_size=n_vocab, out_size=embed_dim), layer1=QRNNLayer(in_size=embed_dim, out_size=out_size), layer2=QRNNLayer(in_size=out_size, out_size=out_size), fc=L.Linear(in_size=out_size, out_size=n_vocab)) # when validating, set this False manually self.train = train self.c_layer1 = None self.c_layer2 = None
def __init__( self, vocab_size, embed_size): super(cnn, self).__init__( x_i = L.EmbedID(vocab_size, embed_size, ignore_label=-1), i_c1 = L.Convolution2D(1, 128, (2, embed_size)), i_c2 = L.Convolution2D(1, 128, (3, embed_size)), # in_channel, out_channel, kernel_size i_c3 = L.Convolution2D(1, 128, (4, embed_size)), h_h = L.Linear(384, 128), h_z = L.Linear(128, 2)) self.vocab_size = vocab_size self.embed_size = embed_size
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units, h=8, dropout=0.1, max_length=500, use_label_smoothing=False, embed_position=False): super(Transformer, self).__init__() with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_units, ignore_label=-1, initialW=linear_init) self.embed_y = L.EmbedID(n_target_vocab, n_units, ignore_label=-1, initialW=linear_init) self.encoder = Encoder(n_layers, n_units, h, dropout) self.decoder = Decoder(n_layers, n_units, h, dropout) if embed_position: self.embed_pos = L.EmbedID(max_length, n_units, ignore_label=-1) self.n_layers = n_layers self.n_units = n_units self.n_target_vocab = n_target_vocab self.dropout = dropout self.use_label_smoothing = use_label_smoothing self.initialize_position_encoding(max_length, n_units) self.scale_emb = self.n_units ** 0.5
def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn, initialW=None): super(BiRNN, self).__init__(embed=L.EmbedID(n_vocab, 50, initialW=initialW, ignore_label=0), l1=L.NStepBiGRU(n_layer, 50, 50, dropout) #l2=L.Linear(n_units/2, 10), )
def __init__( self, gpu, n_vocab, n_aspect, n_units, hidden_units, out_units, loss_func, train=True, drop_ratio=0.0, ): super(AspectSentiContext2Vec, self).__init__() with self.init_scope(): self.l2r_embed = L.EmbedID(n_vocab, n_units) self.r2l_embed = L.EmbedID(n_vocab, n_units) self.l1 = L.Linear(n_aspect, n_units) self.loss_func = loss_func self.l2r_1 = L.LSTM(n_units, hidden_units) self.r2l_1 = L.LSTM(n_units, hidden_units) self.l3 = L.Linear(2 * hidden_units, 2 * hidden_units) self.l4 = L.Linear(2 * hidden_units, out_units) if gpu >= 0: self.to_gpu() self.l2r_embed.W.data = self.xp.random.normal( 0, math.sqrt(1.0 / self.l2r_embed.W.data.shape[0]), self.l2r_embed.W.data.shape, ).astype(self.xp.float32) self.r2l_embed.W.data = self.xp.random.normal( 0, math.sqrt(1.0 / self.r2l_embed.W.data.shape[0]), self.r2l_embed.W.data.shape, ).astype(self.xp.float32) self.train = train self.drop_ratio = drop_ratio
def __init__(self, n_layers, n_vocab, n_units, dropout=0.1, wv=None): out_units = n_units // 3 super(CNNEncoder, self).__init__() with self.init_scope(): if wv is None: self.embed = L.EmbedID(n_vocab, n_units, ignore_label=-1, initialW=embed_init) else: self.embed = L.EmbedID(n_vocab, n_units, ignore_label=-1, initialW=wv) self.wv = wv self.cnn_w3 = L.Convolution2D(n_units, out_units, ksize=(3, 1), stride=1, pad=(2, 0), nobias=True) self.cnn_w4 = L.Convolution2D(n_units, out_units, ksize=(4, 1), stride=1, pad=(3, 0), nobias=True) self.cnn_w5 = L.Convolution2D(n_units, out_units, ksize=(5, 1), stride=1, pad=(4, 0), nobias=True) self.mlp = MLP(n_layers, out_units * 3, dropout) self.out_units = out_units * 3 self.dropout = dropout
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path, length=True) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(FastBiaffineLSTMParser, self).__init__( emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=FixedLengthNStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=Linear(2 * self.hidden_dim, self.dep_dim), arc_head=Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=Linear(2 * self.hidden_dim, self.dep_dim), rel_head=Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, n_vocab_c, n_vocab_r, n_layer, embed_dim, input_channel, output_channel, wordvec_unchain, train=True): self.train = train self.n_layer = n_layer self.embed_dim = embed_dim self.wordvec_unchain = wordvec_unchain if self.n_layer == 1: super(ABCNN_2, self).__init__( # embed=L.EmbedID(n_vocab, embed_dim, initialW=np.random.uniform(-0.01, 0.01)), # 100: word-embedding vector size embed_c=L.EmbedID(n_vocab_c, embed_dim, initialW=np.random.normal(loc=0.0, scale=0.01, size=(n_vocab_c, embed_dim))), embed_r=L.EmbedID(n_vocab_r, embed_dim, initialW=np.random.normal(loc=0.0, scale=0.01, size=(n_vocab_r, embed_dim))), # embed=L.EmbedID(n_vocab, embed_dim), conv1=L.Convolution2D(1, output_channel, (4, embed_dim), pad=(3, 0)), # l1=L.Linear(in_size=2+4, out_size=1) # 4 are from lexical features of WikiQA Task l1=L.Linear( in_size=2, out_size=1) # 4 are from lexical features of WikiQA Task )
def __init__(self, in_vocab_size, hidden_dim, layer_num, out_vocab_size, gru, bidirectional, pyramidal, dropout_ratio, src_vocab_size=None): super(AttentionalEncoderDecoder, self).__init__() if src_vocab_size is None: # use same vocabulary for source/target word_emb = L.EmbedID(in_vocab_size, hidden_dim, ignore_label=IGNORE_ID) self.add_link('word_emb', word_emb) self.word_emb_src = word_emb self.word_emb_trg = word_emb else: word_emb_src = L.EmbedID(src_vocab_size, hidden_dim, ignore_label=IGNORE_ID) word_emb_trg = L.EmbedID(in_vocab_size, hidden_dim, ignore_label=IGNORE_ID) self.add_link('word_emb_src', word_emb_src) self.add_link('word_emb_trg', word_emb_trg) rnns = ChainList() Rnn = GruRnn if gru else LstmRnn for i in range(layer_num): if bidirectional: rnn_f = Rnn(hidden_dim) rnn_b = Rnn(hidden_dim) rnn = BiRnn(rnn_f, rnn_b) else: rnn = Rnn(hidden_dim) rnns.add_link(rnn) multi_rnn = MultiLayerRnn(rnns, [hidden_dim] * layer_num, pyramidal, dropout_ratio) self.add_link('encoder', Encoder(self.word_emb_src, multi_rnn)) self.add_link('decoder', AttentionalDecoder(self.word_emb_trg, hidden_dim, layer_num, out_vocab_size, gru, dropout_ratio)) self.in_vocab_size = in_vocab_size self.hidden_dim = hidden_dim self.layer_num = layer_num self.out_vocab_size = out_vocab_size self.gru = gru self.bidirectional = bidirectional self.pyramidal = pyramidal
def __init__(self, vocabulary_size: int, word_embeddings_size: int, hidden_layer_size: int, attention_hidden_layer_size: int, encoder_output_size: int, maxout_layer_size: int, maxout_pool_size: int = 2, ignore_label: int = -1, dynamic_attention: bool = False): super(Decoder, self).__init__() with self.init_scope(): self.embed_id = L.EmbedID(vocabulary_size, word_embeddings_size, ignore_label=ignore_label) self.rnn = L.StatelessLSTM( word_embeddings_size + encoder_output_size, hidden_layer_size ) self.maxout = L.Maxout(word_embeddings_size + encoder_output_size + hidden_layer_size, maxout_layer_size, maxout_pool_size) self.linear = L.Linear(maxout_layer_size, vocabulary_size) if dynamic_attention: self.attention = DynamicAttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) else: self.attention = AttentionModule( encoder_output_size, attention_hidden_layer_size, hidden_layer_size, word_embeddings_size ) self.bos_state = Parameter( initializer=self.xp.random.randn( 1, hidden_layer_size ).astype('f') ) self.vocabulary_size = vocabulary_size self.word_embeddings_size = word_embeddings_size self.hidden_layer_size = hidden_layer_size self.encoder_output_size = encoder_output_size
def __init__(self, n_embed=9, n_rel=7, d=25, k=75): """ d: embedding size """ Base.__init__(self, n_embed, n_rel, d, k) with self.init_scope(): # Set initializer u_initializer = chainer.initializers.Uniform(dtype=self.xp.float32) initial_embed = self.xp.random.uniform(-0.01, 0.01, (n_embed, 2 * d)) # Entity vectors del self.embed self.embed = L.EmbedID(n_embed, 2 * d, initialW=initial_embed) # RNTN layer # - Tensors W self.w_re = chainer.Parameter(shape=(2 * d, d), initializer=u_initializer) self.w_im = chainer.Parameter(shape=(2 * d, d), initializer=u_initializer) # - Standard layer V del self.V self.V = L.Linear(in_size=4 * d, out_size=2 * d, initialW=u_initializer) # Comparison layer # - Tensors W self.wc_re = chainer.Parameter(shape=(k, d), initializer=u_initializer) self.wc_im = chainer.Parameter(shape=(k, d), initializer=u_initializer) # - Standard layer V del self.Vc self.Vc = L.Linear(in_size=4 * d, out_size=k, initialW=u_initializer) # Converter matrix (d -> n_rel) del self.C self.C = L.Linear(in_size=k, out_size=n_rel, initialW=u_initializer) # Other parameters self.comp = True
def __init__(self, base_data_path, special_tokens, metadata_path=None, initialW=None): """ Arguments base_data_path: None or 学習済みベクトルへのパス special_tokens: [] or 学習可能トークン metadata_path: None or 学習結果のメタデータパス initialW: 初期化の方法 """ super(JaEmbedID, self).__init__() self.ignore_label = -1 self.base_data_path = base_data_path if not (self.base_data_path is None): if self.base_data_path.endswith('.pklb'): tl, tv = self.load_vectors_from_pklb(self.base_data_path) elif self.base_data_path.endswith('.bin'): tl, tv = self.load_vectors_from_bin(self.base_data_path) else: raise Exception('unknown format for embedding base') self.token_list = tl self.add_persistent('W', self.xp.asarray(tv, dtype='float32')) self.vector_dim = tv.shape[1] self.normal_token_len = len(self.token_list) self.token_list.extend(special_tokens) self.total_token_len = len(self.token_list) self.special_token_len = \ self.total_token_len - self.normal_token_len if not (metadata_path is None): self.restore_metadata(metadata_path) # An empty matrix allocated for model-loading after initializing. W_shape = (self.normal_token_len, self.vector_dim) self.add_persistent('W', self.xp.zeros(W_shape, dtype='float32')) with self.init_scope(): self.embed_special = L.EmbedID(self.special_token_len, self.vector_dim, ignore_label=self.ignore_label, initialW=initialW) self.token2id = self.make_dict()
def __init__(self, vocab_size, hidden_size, dropout_ratio, ignore_label): super(NStepLSTMLanguageModel, self).__init__() with self.init_scope(): self.embed_word = L.EmbedID(vocab_size, hidden_size, initialW=initializers.Normal(1.0), ignore_label=ignore_label) self.embed_img = L.Linear(hidden_size, initialW=initializers.Normal(0.01)) self.lstm = L.NStepLSTM(1, hidden_size, hidden_size, dropout_ratio) self.decode_caption = L.Linear(hidden_size, vocab_size, initialW=initializers.Normal(0.01)) self.dropout_ratio = dropout_ratio
def __init__(self, mode, n_layer, n_unit, n_vocab, gpu=-1, dropout=0.5): self.mode = mode if mode == "normal": super(Seq2Seq, self).__init__( embed=L.EmbedID(n_vocab, n_unit, ignore_label=-1), bilstm=NstepLstmNet(n_layer, n_unit, n_unit, gpu, dropout), lstm=L.LSTM(n_unit, n_unit), dec=L.Linear(n_unit, n_vocab), ) elif mode == "bilstm": super(Seq2Seq, self).__init__( embed=L.EmbedID(n_vocab, n_unit, ignore_label=-1), bilstm=BiNstepLstmNet(n_layer, n_unit, n_unit, gpu, dropout), lstm=L.LSTM(n_unit * 2, n_unit * 2), dec=L.Linear(n_unit * 2, n_vocab), ) elif mode == "attention": super(Seq2Seq, self).__init__( embed=L.EmbedID(n_vocab, n_unit, ignore_label=-1), bilstm=BiNstepLstmNet(n_layer, n_unit, n_unit, gpu, dropout), attention=GrobalAttentionNet(n_unit * 2, n_unit * 2), lstm=L.LSTM(n_unit * 2, n_unit * 2), dec=L.Linear(n_unit * 2, n_vocab), )