def prediction(self, y_emb, state, context, keep_prob=1.0): """ maxout -> readout -> softmax p(y_j) \propto f(y_{j-1}, s_{j-1}, c_{j}) :param y_emb: :param state: :param context: :param keep_prob: :return: """ features = [state, y_emb, context] maxhid = nn.maxout( features, [[self.dim_hid, self.dim_y, self.dim_value], self.dim_maxout], self.max_part, True) readout = nn.linear(maxhid, [self.dim_maxout, self.dim_readout], False, scope="readout") if keep_prob < 1.0: readout = nn.dropout(readout, keep_prob=keep_prob) logits = nn.linear(readout, [self.dim_readout, self.n_y_vocab], True, scope="logits") if logits.ndim == 3: new_shape = [logits.shape[0] * logits.shape[1], -1] logits = logits.reshape(new_shape) probs = T.nnet.softmax(logits) return probs
def prediction(prev_inputs, prev_state, context, keep_prob=1.0): features = [prev_state, prev_inputs, context] maxhid = nn.maxout(features, [[thdim, tedim, 2 * shdim], maxdim], maxpart, True) readout = nn.linear(maxhid, [maxdim, deephid], False, scope="deepout") if keep_prob < 1.0: readout = nn.dropout(readout, keep_prob=keep_prob) logits = nn.linear(readout, [deephid, tvsize], True, scope="logits") if logits.ndim == 3: new_shape = [logits.shape[0] * logits.shape[1], -1] logits = logits.reshape(new_shape) probs = theano.tensor.nnet.softmax(logits) return probs
def __init__(self, emb_size, shidden_size, thidden_size, ahidden_size, mhidden_size, maxpart, dhidden_size, voc_size, config=decoder_config()): scope = config.scope ctx_size = 2 * shidden_size with variable_scope(scope): init_transform = feedforward(shidden_size, thidden_size, config.init_transform) annotation_transform = linear(ctx_size, ahidden_size, config.annotation_transform) state_transform = linear(thidden_size, ahidden_size, config.state_transform) context_transform = linear(ahidden_size, 1, config.context_transform) rnn = gru([emb_size, ctx_size], thidden_size, config.rnn) maxout_transform = maxout([thidden_size, emb_size, ctx_size], mhidden_size, maxpart, config.maxout) deepout_transform = linear(mhidden_size, dhidden_size, config.deepout) classify_transform = linear(dhidden_size, voc_size, config.classify) params = [] params.extend(init_transform.parameter) params.extend(annotation_transform.parameter) params.extend(state_transform.parameter) params.extend(context_transform.parameter) params.extend(rnn.parameter) params.extend(maxout_transform.parameter) params.extend(deepout_transform.parameter) params.extend(classify_transform.parameter) def attention(state, xmask, mapped_annotation): mapped_state = state_transform(state) hidden = theano.tensor.tanh(mapped_state + mapped_annotation) score = context_transform(hidden) score = score.reshape((score.shape[0], score.shape[1])) # softmax over masked batch alpha = theano.tensor.exp(score) alpha = alpha * xmask alpha = alpha / theano.tensor.sum(alpha, 0) return alpha def compute_initstate(annotation): hb = annotation[0, :, -annotation.shape[2] / 2:] inis = init_transform(hb) mapped_annotation = annotation_transform(annotation) return inis, mapped_annotation def compute_context(state, xmask, annotation, mapped_annotation): alpha = attention(state, xmask, mapped_annotation) context = theano.tensor.sum(alpha[:, :, None] * annotation, 0) return [alpha, context] def compute_probability(yemb, state, context): maxhid = maxout_transform([state, yemb, context]) readout = deepout_transform(maxhid) preact = classify_transform(readout) prob = theano.tensor.nnet.softmax(preact) return prob def compute_state(yemb, ymask, state, context): new_state, states = rnn([yemb, context], state) ymask = ymask[:, None] new_state = (1.0 - ymask) * state + ymask * new_state return new_state def compute_attention_score(yseq, xmask, ymask, annotation): initstate, mapped_annotation = compute_initstate(annotation) def step(yemb, ymask, state, xmask, annotation, mannotation): outs = compute_context(state, xmask, annotation, mannotation) alpha, context = outs new_state = compute_state(yemb, ymask, state, context) return [new_state, alpha] seq = [yseq, ymask] oinfo = [initstate, None] nonseq = [xmask, annotation, mapped_annotation] (states, alpha), updates = theano.scan(step, seq, oinfo, nonseq) return alpha def forward(yseq, xmask, ymask, annotation): yshift = theano.tensor.zeros_like(yseq) yshift = theano.tensor.set_subtensor(yshift[1:], yseq[:-1]) initstate, mapped_annotation = compute_initstate(annotation) def step(yemb, ymask, state, xmask, annotation, mannotation): outs = compute_context(state, xmask, annotation, mannotation) alpha, context = outs new_state = compute_state(yemb, ymask, state, context) return [new_state, context] seq = [yseq, ymask] oinfo = [initstate, None] nonseq = [xmask, annotation, mapped_annotation] (states, contexts), updates = theano.scan(step, seq, oinfo, nonseq) inis = initstate[None, :, :] all_states = theano.tensor.concatenate([inis, states], 0) prev_states = all_states[:-1] maxhid = maxout_transform([prev_states, yshift, contexts]) readout = deepout_transform(maxhid) preact = classify_transform(readout) preact = preact.reshape((preact.shape[0] * preact.shape[1], -1)) prob = theano.tensor.nnet.softmax(preact) return prob self.name = scope self.config = config self.forward = forward self.parameter = params self.compute_initstate = compute_initstate self.compute_context = compute_context self.compute_probability = compute_probability self.compute_state = compute_state self.compute_attention_score = compute_attention_score