def __init__(self, input_dimension: int, output_dimension: int, hidden_dimension: int, attention: bool = False): super().__init__() with super().init_scope(): self._embed_input = L.EmbedID(input_dimension, hidden_dimension) self._embed_output = L.EmbedID(output_dimension, hidden_dimension) self._encoder = L.NStepLSTM( n_layers=1, in_size=hidden_dimension, out_size=hidden_dimension, dropout=0.1) self._decoder = L.NStepLSTM( n_layers=1, in_size=hidden_dimension, out_size=hidden_dimension, dropout=0.1) # Embed の逆を行う行列を表す良い名前がほしい。 self._extract_output = L.Linear(hidden_dimension, output_dimension) self._use_attention = attention if attention: self._attention_layer = L.Linear(2 * hidden_dimension, hidden_dimension) else: self._attention_layer = None self._hyper_params = (input_dimension, output_dimension, hidden_dimension, attention)
def __init__(self, args, n_vocab_q, n_vocab_d, train=True): self.train = train self.n_layer = args.n_layer self.n_hdim = args.n_hdim self.embed_dim = args.embed_dim self.n_vocab_q = n_vocab_q self.n_vocab_d = n_vocab_d self.encode_type = args.encode_type self.weighted_sum = args.weighted_sum self.cnn_out_channels = args.cnn_out_channels self.cnn_ksize = args.cnn_ksize self.load_parameter = args.load_parameter self.device = args.gpu super(Network_deep, self).__init__( embed_q=L.EmbedID(self.n_vocab_q, self.embed_dim, initialW=RS.normal(scale=0.5, size=(n_vocab_q, self.embed_dim)), ignore_label=0), embed_d=L.EmbedID(self.n_vocab_d, self.embed_dim, initialW=RS.normal(scale=0.5, size=(n_vocab_d, self.embed_dim)), ignore_label=0), term_weight_q=EmbedID_minus_pad(self.n_vocab_q, 1, initialW=RS.normal(scale=0.5, size=(n_vocab_q, 1)), ignore_label=0), term_weight_d=EmbedID_minus_pad(self.n_vocab_d, 1, initialW=RS.normal(scale=0.5, size=(n_vocab_d, 1)), ignore_label=0), lstm_q=L.NStepLSTM(n_layers=1, in_size=self.embed_dim, out_size=self.embed_dim, dropout=0.5), lstm_d=L.NStepLSTM(n_layers=1, in_size=self.embed_dim, out_size=self.embed_dim, dropout=0.5), conv_q=L.Convolution2D(in_channels=1, out_channels=self.cnn_out_channels, ksize=(self.cnn_ksize, self.embed_dim)), conv_d=L.Convolution2D(in_channels=1, out_channels=self.cnn_out_channels, ksize=(self.cnn_ksize, self.embed_dim)), l1=L.Linear(in_size=self.cnn_out_channels * 2, out_size=self.n_hdim), l2=L.Linear(in_size=self.n_hdim, out_size=self.n_hdim), l3=L.Linear(in_size=self.n_hdim, out_size=self.n_hdim), l4=L.Linear(in_size=self.n_hdim, out_size=self.n_hdim), lo=L.Linear(in_size=self.n_hdim, out_size=1), )
def __init__(self, n_units, encoder_dropout=0.1, decoder_dropout=0.1): super(EncoderDecoderAttractor, self).__init__() with self.init_scope(): self.encoder = L.NStepLSTM(1, n_units, n_units, encoder_dropout) self.decoder = L.NStepLSTM(1, n_units, n_units, decoder_dropout) self.counter = L.Linear(n_units, 1) self.n_units = n_units
def __init__(self, model_path): Param.load(self, model_path / 'tagger_defs.txt') self.extractor = FeatureExtractor(model_path) self.in_dim = self.word_dim + self.char_dim super(BiaffineJaLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=L.Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, n_lstm_layers, n_mid_units, n_out, win_size, batch_size, att_units_size, frame_level=True, dropout=0.5): super(RNN, self).__init__() ### actual number of lstm layers is 2*n_lstm_layers ### initializer = chainer.initializers.Normal() n_word_out = n_out[0] n_char_out = n_out[1] self.batch_size = batch_size ###### local attention related ##### xp = cuda.cupy self.Zu_init = xp.zeros((batch_size, n_word_out), dtype=np.float32) self.pad_size = int((win_size - 1) / 2) self.pad_zero = xp.zeros((self.pad_size, n_mid_units), dtype=np.float32) self.pad_inf = xp.full((self.pad_size, 1), -1e20, dtype=np.float32) self.win_size = win_size self.att_size = 1 if frame_level else n_mid_units ######################################## with self.init_scope(): self.l1 = L.Linear(None, n_mid_units, initialW=initializer) self.encoder = L.NStepLSTM( 2, n_mid_units, n_mid_units, dropout) self.lstm2 = L.NStepLSTM(n_lstm_layers-2, n_mid_units, n_mid_units, dropout) self.attend = Additive_Attention(n_mid_units, n_word_out, win_size, batch_size, att_units_size) # self.attend_ln = L.LayerNormalization(n_mid_units, initial_gamma=initializer) self.output = L.Linear(n_mid_units, n_word_out, initialW=initializer)
def make_model(self, env): n_hidden_channels = 20 obs_size = env.observation_space.low.size if self.recurrent: v = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), L.Linear( None, 1, initialW=chainer.initializers.LeCunNormal(1e-1)), ) if self.discrete: n_actions = env.action_space.n pi = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), policies.FCSoftmaxPolicy( n_hidden_channels, n_actions, n_hidden_layers=0, nonlinearity=F.tanh, last_wscale=1e-1, ) ) else: action_size = env.action_space.low.size pi = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), policies.FCGaussianPolicy( n_hidden_channels, action_size, n_hidden_layers=0, nonlinearity=F.tanh, mean_wscale=1e-1, ) ) return StatelessRecurrentBranched(pi, v) else: v = chainer.Sequential( L.Linear(None, n_hidden_channels), F.tanh, L.Linear( None, 1, initialW=chainer.initializers.LeCunNormal(1e-1)), ) if self.discrete: n_actions = env.action_space.n pi = policies.FCSoftmaxPolicy( obs_size, n_actions, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, last_wscale=1e-1, ) else: action_size = env.action_space.low.size pi = policies.FCGaussianPolicy( obs_size, action_size, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, mean_wscale=1e-1, ) return A3CSeparateModel(pi=pi, v=v)
def __init__(self, n_cell, size_hidden, rate_dropout): super(ONT_LSTM, self).__init__() self.rate_dropout = rate_dropout with self.init_scope(): self.rnn_a = L.NStepLSTM(n_cell, 300, size_hidden, rate_dropout) self.rnn_b = L.NStepLSTM(n_cell, 300, size_hidden, rate_dropout) self.l1 = L.Highway(size_hidden * 2) self.l2 = L.Linear(size_hidden * 2, 4)
def __init__(self, lay, vocab, k, dout): super(seq2seq, self).__init__( embedx=L.EmbedID(vocab, k), embedy=L.EmbedID(vocab, k), encoder=L.NStepLSTM(lay, k, k, dout), decoder=L.NStepLSTM(lay, k, k, dout), W=L.Linear(k, vocab), )
def __init__(self, model_path, word_dim=None, afix_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: # training self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.afix_dim = afix_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_suffixes = len(read_model_defs(model_path + "/suffixes.txt")) p.n_prefixes = len(read_model_defs(model_path + "/prefixes.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + 8 * self.afix_dim self.dropout_ratio = dropout_ratio super(FastBiaffineLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim, ignore_label=IGNORE), emb_suf=L.EmbedID(self.n_suffixes, self.afix_dim, ignore_label=IGNORE), emb_prf=L.EmbedID(self.n_prefixes, self.afix_dim, ignore_label=IGNORE), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, n_vocab, n_lay=1, n_unit=100, dropout=0.5): super().__init__() with self.init_scope(): self.embedx = L.EmbedID(n_vocab, n_unit) self.embedy = L.EmbedID(n_vocab, n_unit) self.encoder = L.NStepLSTM( n_lay, n_unit, n_unit, dropout) self.decoder = L.NStepLSTM( n_lay, n_unit, n_unit, dropout) self.W = L.Linear(n_unit, n_vocab)
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units): super(Seq2seq, self).__init__( embed_x=L.EmbedID(n_source_vocab, n_units), embed_y=L.EmbedID(n_target_vocab, n_units), encoder=L.NStepLSTM(n_layers, n_units, n_units, 0.1), decoder=L.NStepLSTM(n_layers, n_units, n_units, 0.1), W=L.Linear(n_units, n_target_vocab), ) self.n_layers = n_layers self.n_units = n_units
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units): super(Seq2seq, self).__init__() with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_units) self.embed_y = L.EmbedID(n_target_vocab, n_units) self.encoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) self.W = L.Linear(n_units, n_target_vocab) self.n_layers = n_layers self.n_units = n_units
def __init__(self, model_path, word_dim=None, char_dim=None, nlayers=2, hidden_dim=128, dep_dim=100, dropout_ratio=0.5): self.model_path = model_path defs_file = model_path + "/tagger_defs.txt" if word_dim is None: self.train = False Param.load(self, defs_file) self.extractor = FeatureExtractor(model_path) else: self.train = True p = Param(self) p.dep_dim = dep_dim p.word_dim = word_dim p.char_dim = char_dim p.hidden_dim = hidden_dim p.nlayers = nlayers p.n_words = len(read_model_defs(model_path + "/words.txt")) p.n_chars = len(read_model_defs(model_path + "/chars.txt")) p.targets = read_model_defs(model_path + "/target.txt") p.dump(defs_file) self.in_dim = self.word_dim + self.char_dim self.dropout_ratio = dropout_ratio super(BiaffineJaLSTMParser, self).__init__(emb_word=L.EmbedID(self.n_words, self.word_dim), emb_char=L.EmbedID(self.n_chars, 50, ignore_label=IGNORE), conv_char=L.Convolution2D(1, self.char_dim, (3, 50), stride=1, pad=(1, 0)), lstm_f=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), lstm_b=L.NStepLSTM(self.nlayers, self.in_dim, self.hidden_dim, 0.32), arc_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), arc_head=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_dep=L.Linear(2 * self.hidden_dim, self.dep_dim), rel_head=L.Linear(2 * self.hidden_dim, self.dep_dim), biaffine_arc=Biaffine(self.dep_dim), biaffine_tag=L.Bilinear(self.dep_dim, self.dep_dim, len(self.targets)))
def __init__(self, n_layers, n_source_pose_node, n_target_rhythm, n_units): super(XSNet, self).__init__() with self.init_scope(): self.embed_x = L.Linear(n_source_pose_node, n_units) self.embed_y = L.EmbedID(n_target_rhythm, n_units) self.encoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1) self.W = L.Linear(n_units, n_target_rhythm) self.n_layers = n_layers self.n_units = n_units
def __init__(self, n_layers, n_vocab, n_units): super(Seq2seq, self).__init__( embed_x=L.EmbedID(n_vocab, n_units), embed_y=L.EmbedID(n_vocab, n_units), encoder=L.NStepLSTM(n_layers, n_units, n_units, 0.3), decoder=L.NStepLSTM(n_layers, n_units, n_units, 0.3), W=L.Linear(n_units, n_vocab), bnormDec=L.BatchNormalization(n_units), ) self.n_layers = n_layers self.n_units = n_units
def __init__(self, n_layers, n_vocab, n_units, dropout=0.1): super(BiLSTMEncoder, self).__init__() with self.init_scope(): self.embed = L.EmbedID(n_vocab, n_units, initialW=embed_init) self.encoder_forward = L.NStepLSTM(n_layers, n_units, n_units, dropout) self.encoder_backward = L.NStepLSTM(n_layers, n_units, n_units, dropout) self.n_layers = n_layers self.out_units = n_units self.dropout = dropout self.n_dknn_layers = n_layers
def __init__(self, graph_conv, attn=None, mlp=None, symmetric=None, siamese=True, another_graph_conv=None, use_i_lstm=False, use_s_lstm=False): """Initializes the graph convolution predictor. Args: graph_conv: The graph convolution network required to obtain molecule feature representation. mlp: Multi layer perceptron; used as the final fully connected layer. Set it to `None` if no operation is necessary after the `graph_conv` calculation. """ super(GraphConvPredictorForPair, self).__init__() with self.init_scope(): self.graph_conv = graph_conv if not siamese: self.another_graph_conv = another_graph_conv if use_s_lstm: self.s_lstm_1 = L.NStepLSTM(n_layers=1, in_size=self.graph_conv.out_dim, out_size=self.graph_conv.out_dim, dropout=0.) self.s_lstm_2 = L.NStepLSTM(n_layers=1, in_size=self.graph_conv.out_dim, out_size=self.graph_conv.out_dim, dropout=0.) if use_i_lstm: self.i_lstm = L.NStepLSTM(n_layers=1, in_size=self.graph_conv.out_dim * 2, out_size=self.graph_conv.out_dim * 2, dropout=0.) if isinstance(mlp, chainer.Link): self.mlp = mlp if isinstance(attn, chainer.Link): self.attn = attn if not isinstance(mlp, chainer.Link): self.mlp = mlp if not isinstance(attn, chainer.Link): self.attn = attn self.symmetric = symmetric self.siamese = siamese self.use_i_lstm = use_i_lstm self.use_s_lstm = use_s_lstm
def __init__(self, n_layers, n_source_vocab, n_target_vocab, n_units, dropout, rnn_algo, v5=False): super(Seq2seq, self).__init__() with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_units) self.embed_y = L.EmbedID(n_target_vocab, n_units) if v5: self.encoder = L.NStepLSTM(n_layers, n_units, n_units, dropout) self.decoder = L.NStepLSTM(n_layers, n_units, n_units, dropout) else: self.encoder = L.NStepLSTM(n_layers, n_units, n_units, dropout, rnn_algo=rnn_algo) self.decoder = L.NStepLSTM(n_layers, n_units, n_units, dropout, rnn_algo=rnn_algo) self.W = L.Linear(n_units, n_target_vocab) self.n_layers = n_layers self.n_units = n_units
def __init__(self, vocab, rnn, layers, units, dout): super(Encoder, self).__init__() with self.init_scope(): vocabsize = len(vocab) + 3 # 3 means number of special tags initW = None self.emb = L.EmbedID(vocabsize, units, initW, IGNORE) self.rnn = L.NStepLSTM(layers, units, units, dout)
def __init__(self, n_layers, insize, outsize, initialW=None, use_bi_lstm=False): super(NodeRNN, self).__init__() if not initialW: initialW = initializers.HeNormal() self.n_layer = n_layers with self.init_scope(): if use_bi_lstm: self.lstm = L.NStepBiLSTM(self.n_layer, 1024, outsize // 2, initialW=initialW, dropout=0.1) #dropout = 0.0 else: self.lstm = L.NStepLSTM(self.n_layer, 1024, outsize, initialW=initialW, dropout=0.1) self.fc1 = L.Linear(insize, 1024, initialW=initialW) self.fc2 = L.Linear(1024, 1024, initialW=initialW)
def setUp(self): shape = (self.n_layer, len(self.lengths), self.out_size) if self.hidden_none: self.h = self.c = numpy.zeros(shape, 'f') else: self.h = numpy.random.uniform(-1, 1, shape).astype('f') self.c = numpy.random.uniform(-1, 1, shape).astype('f') self.xs = [ numpy.random.uniform(-1, 1, (l, self.in_size)).astype('f') for l in self.lengths ] self.gh = numpy.random.uniform(-1, 1, shape).astype('f') self.gc = numpy.random.uniform(-1, 1, shape).astype('f') self.gys = [ numpy.random.uniform(-1, 1, (l, self.out_size)).astype('f') for l in self.lengths ] self.rnn = links.NStepLSTM(self.n_layer, self.in_size, self.out_size, self.dropout) for layer in self.rnn: for p in layer.params(): p.array[...] = numpy.random.uniform(-1, 1, p.shape) self.rnn.cleargrads()
def _test_three_recurrent_children(self, gpu): # Test if https://github.com/chainer/chainer/issues/6053 is addressed in_size = 2 out_size = 6 rseq = StatelessRecurrentSequential( L.NStepLSTM(1, in_size, 3, 0), L.NStepGRU(2, 3, 4, 0), L.NStepRNNTanh(5, 4, out_size, 0), ) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() rseq.to_gpu() xp = rseq.xp seqs_x = [ xp.random.uniform(-1, 1, size=(4, in_size)).astype(np.float32), xp.random.uniform(-1, 1, size=(1, in_size)).astype(np.float32), xp.random.uniform(-1, 1, size=(3, in_size)).astype(np.float32), ] # Make and load a recurrent state to check if the order is correct. _, rs = rseq.n_step_forward(seqs_x, None, output_mode='concat') _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='concat') _, rs = rseq.n_step_forward(seqs_x, None, output_mode='split') _, _ = rseq.n_step_forward(seqs_x, rs, output_mode='split')
def setUp(self): shape = (self.n_layer, len(self.lengths), self.out_size) self.h = numpy.random.uniform(-1, 1, shape).astype('f') self.c = numpy.random.uniform(-1, 1, shape).astype('f') self.xs = [ numpy.random.uniform(-1, 1, (l, self.in_size)).astype('f') for l in self.lengths ] self.gh = numpy.random.uniform(-1, 1, shape).astype('f') self.gc = numpy.random.uniform(-1, 1, shape).astype('f') self.gys = [ numpy.random.uniform(-1, 1, (l, self.out_size)).astype('f') for l in self.lengths ] self.rnn = links.NStepLSTM(self.n_layer, self.in_size, self.out_size, self.dropout, use_cudnn=self.use_cudnn) for layer in self.rnn: for p in layer.params(): p.data[...] = numpy.random.uniform(-1, 1, p.data.shape) self.rnn.zerograds()
def __init__(self, n_layers=1, n_inputs=384, n_outputs=4, n_units=300, dropout_rate=0.1, class_weight=None): self.dropout_rate = dropout_rate self.class_weight = class_weight super(SER, self).__init__() with self.init_scope(): self.l1 = L.NStepLSTM(n_layers, n_inputs, n_units, dropout_rate) self.l2 = L.Linear(n_units, n_outputs, initialW=initializers.HeNormal())
def __init__(self, n_layers, n_source_vocab, trans_data, n_units, v_eos_src, n_maxsize): super(Seq2Tree_Flatten, self).__init__() # for each nodetype, for each move, the result array. self.trans_data = trans_data self.embed_idx = [] ns = 0 def inc(): nonlocal ns ns += 1 return ns - 1 self.embed_idx = [[[inc() for v in vs] for vs in moves] for moves in self.trans_data] self.embed_root_idx = ns self.embed_y_size = ns + 1 self.choicerange = [] self.choice_idx = [] self.is_trivial = [] s = 0 for d in self.trans_data: ist = len(d) <= 1 self.is_trivial.append(ist) #if ist: # self.choicerange.append(None) # self.choice_idx.append([0]) # continue b = s s += len(d) self.choicerange.append((b, s)) self.choice_idx.append(list(range(b, s))) #self.choice_num_sum = sum(list(map(lambda d: len(d),self.trans_data))) self.type_size = len(self.embed_idx) self.n_all_choice = sum(map(lambda x: len(x), self.trans_data)) with self.init_scope(): self.embed_x = L.EmbedID(n_source_vocab, n_units) #self.embed_y = L.EmbedID(self.embed_y_size, n_units) # maybe mergable self.embed_y_0 = L.EmbedID(self.embed_y_size, n_units) # maybe mergable self.embed_y_1 = L.EmbedID(self.type_size, n_units) # maybe mergable self.encoder = L.NStepBiLSTM(n_layers, n_units, n_units, 0.1) self.decoder = L.NStepLSTM(n_layers, n_units * 2, n_units * 2, 0.1) self.Wc = L.Linear(n_units * 4, n_units) self.Ws = L.Linear(n_units, self.n_all_choice) #self.att = Attention(n_units) self.att = GlobalGeneralAttention(n_units) self.n_layers = n_layers self.n_units = n_units self.v_eos_src = v_eos_src self.n_maxsize = n_maxsize self.rootidx = len(trans_data) - 1
def __init__(self, n_layers, insize, outsize, initialW=None, use_bi_lstm=False): super(ConnLabelRNN, self).__init__() if not initialW: initialW = initializers.HeNormal() self.n_layer = n_layers with self.init_scope(): if use_bi_lstm: self.lstm1 = L.NStepBiLSTM(self.n_layer, insize, 512, initialW=initialW, dropout=0.1) else: self.lstm1 = L.NStepLSTM(self.n_layer, insize, 1024, initialW=initialW, dropout=0.1) self.fc2 = L.Linear(1024, 512) self.fc3 = L.Linear(512, outsize)
def __init__(self, n_layer, n_vocab, n_units, dropout, cudnn): super(RNN, self).__init__( embed=L.EmbedID(n_vocab, n_units), l1=L.NStepLSTM(n_layer, n_units, n_units, dropout, use_cudnn=cudnn), l2=L.Linear(n_units, n_vocab), )
def __init__(self, charVocSize, charEmbedSize, wordEmbedSize): super().__init__() embed = L.EmbedID(charVocSize, charEmbedSize) lstm = L.NStepLSTM(1,charEmbedSize, wordEmbedSize, dropout=0.) self.add_link('embed',embed) self.add_link('lstm',lstm)
def __init__(self, ch): super(Link_NStepLSTM, self).__init__(L.NStepLSTM(1, 1, 1, 0)) hd = ch.children().__next__() if not (hd.w0 is None): self.n_in = hd.w0.shape[1] else: self.n_in = None self.out_size = ch.out_size self.n_layers = ch.n_layers self.dropout = ch.dropout self.ws = [] self.bs = [] for i in range(self.n_layers): ws = [] bs = [] for j in range(8): ws.append( helper.make_tensor_value_info(('/%d/w%d' % (i, j)), TensorProto.FLOAT, ["TODO"])) bs.append( helper.make_tensor_value_info(('/%d/b%d' % (i, j)), TensorProto.FLOAT, ["TODO"])) self.ws.append(ws) self.bs.append(bs)
def __init__(self, n_layers, insize, outsize, initialW=None, use_bi_lstm=False): super(EdgeRNN, self).__init__() self.n_layer = n_layers self.outsize = outsize if use_bi_lstm: assert outsize % 2 == 0, outsize if not initialW: initialW = initializers.HeNormal() with self.init_scope(): self.fc1 = L.Linear(insize, 256, initialW=initialW) self.fc2 = L.Linear(256, 256, initialW=initialW) if use_bi_lstm: self.lstm3 = L.NStepBiLSTM(self.n_layer, 256, outsize // 2, initialW=initialW, dropout=0.1) #dropout = 0.0 else: self.lstm3 = L.NStepLSTM(self.n_layer, 256, outsize, initialW=initialW, dropout=0.1)