class Encoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn embed_w = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') self.embed = TimeEmbedding(embed_w) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) self.params = self.embed.params + self.lstm.params self.grads = self.embed.grads + self.lstm.grads self.hs = None def forward(self, xs): xs = self.embed.forward(xs) hs = self.lstm.forward(xs) self.hs = hs return hs[:, -1, :] def backward(self, dh): dhs = np.zeros_like(self.hs) dhs[:, -1, :] = dh dout = self.lstm.backward(dhs) dout = self.embed.backward(dout) return dout
def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn embed_w = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') self.embed = TimeEmbedding(embed_w) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False) self.params = self.embed.params + self.lstm.params self.grads = self.embed.grads + self.lstm.grads self.hs = None
def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn # Initialise weight embed_W = (rn(V, D) / 100).astype('f') rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f') rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f') rnn_b = np.zeros(H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') # Generate layers self.layers = [ TimeEmbedding(embed_W), TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True), TimeAffine(affine_W, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.rnn_layer = self.layers[1] #Integrate all weight and gradients to a list each self.params = [] self.grads = [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
def __init__(self, vocab_size=10000, wordvec_size=650, hidden_size=650, dropout_ratio=0.5): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn # Initialise weight embed_W = (rn(V, D) / 100).astype('f') lstm_Wx1 = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh1 = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b1 = np.zeros(4 * H).astype('f') lstm_Wx2 = (rn(D, 4 * H) / np.sqrt(H)).astype('f') lstm_Wh2 = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b2 = np.zeros(4 * H).astype('f') affine_b = np.zeros(V).astype('f') # Generate layers self.layers = [ TimeEmbedding(embed_W), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx1, lstm_Wh1, lstm_b1, stateful=True), TimeDropout(dropout_ratio), TimeLSTM(lstm_Wx2, lstm_Wh2, lstm_b2, stateful=True), TimeDropout(dropout_ratio), TimeAffine(embed_W.T, affine_b) ] self.loss_layer = TimeSoftmaxWithLoss() self.softmax = Softmax() self.lstm_layers = [self.layers[2], self.layers[4]] self.dropout_layers = [self.layers[1], self.layers[3], self.layers[5]] #Integrate all weight and gradients to a list each self.params = [] self.grads = [] for layer in self.layers: self.params += layer.params self.grads += layer.grads
class Decoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn embed_w = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_w) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params = [] self.grads = [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads def forward(self, xs, h): self.lstm.set_state(h) out = self.embed.forward(xs) out = self.lstm.forward(out) score = self.affine.forward(out) return score def backward(self, dscore): dout = self.affine.backward(dscore) dout = self.lstm.backward(dout) dout = self.embed.backward(dout) dh = self.lstm.dh return dh def generate(self, h, start_id, sample_size): sampled = [] sample_id = start_id self.lstm.set_state(h) for _ in range(sample_size): x = np.array(sample_id).reshape((1, 1)) out = self.embed.forward(x) out = self.lstm.forward(out) score = self.affine.forward(out) sample_id = np.argmax(score.flatten()) sampled.append(int(sample_id)) return sampled
def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn embed_w = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H, V) / np.sqrt(H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_w) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params = [] self.grads = [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads
def __init__(self, **kwargs): super(ModelTest, self).__init__() self.time_emb = TimeEmbedding(20, 64)
class PeekyDecoder: def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size rn = np.random.randn embed_w = (rn(V, D) / 100).astype('f') lstm_Wx = (rn(H + D, 4 * H) / np.sqrt(H + D)).astype('f') lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f') lstm_b = np.zeros(4 * H).astype('f') affine_W = (rn(H + H, V) / np.sqrt(H + H)).astype('f') affine_b = np.zeros(V).astype('f') self.embed = TimeEmbedding(embed_w) self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True) self.affine = TimeAffine(affine_W, affine_b) self.params = [] self.grads = [] for layer in (self.embed, self.lstm, self.affine): self.params += layer.params self.grads += layer.grads self.cache = None def forward(self, xs, h): N, T = xs.shape N, H = h.shape self.lstm.set_state(h) out = self.embed.forward(xs) hs = np.repeat(h, T, axis=0).reshape(N, T, H) out = np.concatenate((hs, out), axis=2) out = self.lstm.forward(out) out = np.concatenate((hs, out), axis=2) score = self.affine.forward(out) self.cache = H return score def backward(self, dscore): H = self.cache dout = self.affine.backward(dscore) dout = dout[:, :, H:] dhs0 = dout[:, :, :H] dout = self.lstm.backward(dout) dembed = dout[:, :, H:] dhs1 = dout[:, :, :H] self.embed.backward(dembed) dhs = dhs0 + dhs1 dh = self.lstm.dh + np.sum(dhs, axis=1) return dh def generate(self, h, start_id, sample_size): sampled = [] char_id = start_id self.lstm.set_state(h) H = h.shape[1] peeky_h = h.reshape(1, 1, H) for _ in range(sample_size): x = np.array([char_id]).reshape((1, 1)) out = self.embed.forward(x) out = np.concatenate((peeky_h, out), axis=2) out = self.lstm.forward(out) out = np.concatenate((peeky_h, out), axis=2) score = self.affine.forward(out) char_id = np.argmax(score.flatten()) sampled.append(char_id) return sampled
def __init__(self, conf, char_embed_matrix=None, term_embed_matrix=None, name='hybridconvmodel.h5', train_embed=False, train_top=True): self.batch_size = conf.batch_size self.MAX_LEN = conf.MAX_LEN self.PE = conf.PE self.name = name #char input = Input(shape=(conf.MAX_LEN, ), dtype='int32') topic_in = Input(shape=(20, ), dtype='float32') if char_embed_matrix is None: x = Embedding(conf.V, 32)(input) else: embed1 = Embedding(char_embed_matrix.shape[0], char_embed_matrix.shape[1], weights=[char_embed_matrix], trainable=train_embed) x = embed1(input) xt_repeat = RepeatVector(conf.MAX_LEN)(topic_in) x = Concatenate()([x, xt_repeat]) if self.PE: e_input = Input(shape=(conf.MAX_LEN, ), dtype='int32', name='PE_in') ex = Embedding(self.MAX_LEN, 32, name='PE')(e_input) if conf.CPE: ex2 = TimeEmbedding()(e_input) x = Concatenate()([x, ex, ex2]) else: x = Concatenate()([x, ex]) hs_char = self.feed_forward(x, train_top) input_term = Input(shape=(conf.MAX_LEN_TERM, ), dtype='int32') if term_embed_matrix is None: xterm = Embedding(conf.V, 32)(input_term) else: embed1 = Embedding(term_embed_matrix.shape[0], term_embed_matrix.shape[1], weights=[term_embed_matrix], trainable=train_embed) xterm = embed1(input_term) xt_repeat = RepeatVector(conf.MAX_LEN_TERM)(topic_in) xterm = Concatenate()([xterm, xt_repeat]) # xterm = Dense(64, activation='relu')(xterm) if conf.PE: eterm_input = Input(shape=(conf.MAX_LEN_TERM, ), dtype='int32', name='PE_term_in') ex_term = Embedding(conf.MAX_LEN_TERM, 32, name='PEterm')(eterm_input) if conf.CPE: ex_term2 = TimeEmbedding()(eterm_input) xterm = Concatenate()([xterm, ex_term, ex_term2]) else: xterm = Concatenate()([xterm, ex_term]) hs_term = self.feed_forward(xterm, train_top) # l1_weight = 5e-6, kernel_regularizer=l1(l1_weight) input_feat = Input(shape=(conf.NUM_FEAT, ), dtype='float32') hfeat = Dense(8, activation='relu', trainable=train_top)(input_feat) if conf.use_tfidf: l1_weight = 5e-6 NV = 10000 ds_dim = 128 tfidf_in = Input(shape=(NV, ), dtype='float32') term_tfidf_in = Input(shape=(NV, ), dtype='float32') htfidf = Dense(ds_dim, activation='relu', trainable=train_top, kernel_regularizer=l1(l1_weight))(tfidf_in) hterm_tfidf = Dense( ds_dim, activation='relu', trainable=train_top, kernel_regularizer=l1(l1_weight))(term_tfidf_in) hs = Concatenate()( [hs_char, hs_term, hfeat, topic_in, htfidf, hterm_tfidf]) z = Dense(128, activation='relu', trainable=train_top)(hs) else: hs = Concatenate()([hs_char, hs_term, hfeat, topic_in]) # hs = BatchNormalization()(hs) z = Dense(128, activation='relu', trainable=train_top)(hs) # z = BatchNormalization()(z) z = Dense(conf.C, activation='softmax', trainable=train_top)(z) if self.PE: if conf.use_tfidf: model = Model([ input, e_input, input_term, eterm_input, input_feat, topic_in, tfidf_in, term_tfidf_in ], z) else: model = Model([ input, e_input, input_term, eterm_input, input_feat, topic_in ], z) else: model = Model([input, input_term, input_feat, topic_in], z) # opt = Adagrad(lr=lr) opt = Adam(lr=conf.lr) model.compile(opt, 'categorical_crossentropy', metrics=['acc']) self.model = model